A data visualization makeover: riffing off of other people's COVID-19 figures (CC144)

September 9, 2021 • PD Schloss • 23 min read

Pat gives a COVID-19 data visualization a makeover using the ggplot2 R package by combining what he likes best of two figures generated by the researchers at Ipsos and chartr. Along the way he’ll review a number of concepts from the past few episodes of Code Club including fonts, colors, and customizing axis labels. The data depict the percentage of people in 15 countries who would be willing to receive the COVID-19 vaccine as of August and October of 2020.

Pat uses functions from the showtext package and the tidyverse including functions from the ggplot2, dplyr, ggtext and glue packages in RStudio.

Code

Final R script

library(tidyverse)
library(glue)
library(ggtext)
library(showtext)

font_add_google(family="patua-one", "Patua One")
font_add_google(family="roboto", "Roboto")

showtext_auto()

data <- read_csv("august_october_2020.csv") %>%
  rename(country = X.1,
         percent_august = "Total Agree - August 2020",
         percent_october = "Total Agree - October 2020") %>%
  mutate(bump_august = case_when(percent_august <= percent_october ~
                                   percent_august - 2,
                                 percent_august > percent_october ~
                                   percent_august + 2),
         bump_october = case_when(percent_august <= percent_october ~
                                    percent_october + 2,
                                  percent_august > percent_october ~
                                    percent_october - 2),
         y_position = rev(1:nrow(.))) %>%
  mutate(country = recode(country,
                          "South Korea" = "S. Korea",
                          "South Africa" = "S. Africa",
                          "United Kingdom" = "UK",
                          "United States" = "USA")) %>%
  filter(country != "Total")

strip_data <- data %>%
  select(country, y_position) %>%
  mutate(xmin = 50, xmax=100,
         ymin = y_position - 0.5,
         ymax = y_position + 0.5,
         fill = c(rep(c("a", "b"), length.out=nrow(.)))) %>%
  pivot_longer(cols=c(xmin, xmax), values_to="x", names_to="xmin_xmax") %>%
  select(-xmin_xmax)

data %>%
  pivot_longer(cols = -c(country, y_position),
               names_to=c(".value", "month"),
               names_sep = "_") %>%
  drop_na() %>%
  ggplot(aes(x=percent, y=y_position, color=month, group=y_position)) +
  geom_ribbon(data = strip_data,
              aes(x = x, ymin=ymin, ymax = ymax, group=y_position, fill=fill),
              inherit.aes = FALSE,
              show.legend=FALSE) +
  geom_line(color="#888888", size=1, show.legend = FALSE) +
  geom_point(size=3, show.legend = TRUE) +
  geom_text(aes(label=glue("{percent}%"), x=bump),
            size=2,
            color="#686868",
            family="roboto",
            show.legend = FALSE) +
  scale_color_manual(name="If a vaccine for COVID-19 were\navailable, I agree I would get it...",
                     breaks=c("august", "october"),
                     values=c("#ABB3FF", "#263AFF"),
                     labels=c("August '20", "October '20"),
                     guide = guide_legend(override.aes = list(size=4))) +
  scale_fill_manual(name=NULL,
                    breaks=c("a", "b"),
                    values=c("#F8F8F8", "#FFFFFF"),
                    labels=c("a", "b")) +
  scale_x_continuous(limits=c(50, 100),
                     breaks=NULL,
                     labels=NULL,
                     expand = c(0, 0)) +
  scale_y_continuous(breaks = data$y_position,
                     labels = data$country,
                     expand = c(0, 0),
                     limits=c(0.5, 16.5)) +
  labs(x=NULL, y=NULL,
       title="COVID-19 vaccination intent\nis decreasing globally",
       caption="<i>Base: 18,526 online adults aged 16-74 across 15 countries</i><br>Source: Ipsos")+
  theme(
    text = element_text(family = "roboto"),
    plot.title.position = "plot",
    plot.title = element_text(face="bold", size=28,
                              color="#000000", family = "patua-one",
                              margin = margin(t=10, b=20)),
    plot.caption = element_markdown(hjust=0, color="darkgray"),
    plot.caption.position = "plot",
    plot.background = element_rect(fill="#FFFFFF"),
    plot.margin = margin(l=5, r=15),
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.ticks = element_blank(),
    axis.text.x = element_text(color="#686868", size=6),
    axis.text.y = element_text(face="bold"),
    legend.position = c(0, 1.0),
    legend.margin = margin(l=0),
    legend.direction = "horizontal",
    legend.justification = "left",
    legend.background = element_blank(),
    legend.title = element_text(size=9, lineheight = 1.3, margin = margin(r=35)),
    legend.key = element_blank(),
    legend.key.width = unit(3, "pt"),
    legend.text = element_markdown(margin = margin(r=10))
    ) +
  guides(fill="none") +
  coord_cartesian(clip="off")


ggsave("august_october_2020_makeover.tiff", width=5, height=5)

Initial R scripts

august_october_2020_chartr.R

library(tidyverse)
library(glue)
library(ggtext)
library(showtext)

font_add_google(family="josefin-slab", "Josefin Slab")
font_add_google(family="josefin-sans", "Josefin Sans")

showtext_auto()

data <- read_csv("august_october_2020.csv") %>%
  rename(country = X.1,
         percent_august = "Total Agree - August 2020",
         percent_october = "Total Agree - October 2020") %>%
  mutate(bump_august = case_when(percent_august < percent_october ~
                                   percent_august - 2.5,
                                 percent_august > percent_october ~
                                   percent_august + 2.5,
                                 TRUE ~ NA_real_),
         bump_october = case_when(percent_august < percent_october ~
                                    percent_october + 2.5,
                                  percent_august > percent_october ~
                                    percent_october - 2.5,
                                  TRUE ~ percent_october + 2.5),
         y_position = rev(1:nrow(.))) %>%
  mutate(country = recode(country,
                          "South Korea" = "S. Korea",
                          "South Africa" = "S. Africa",
                          "United Kingdom" = "UK",
                          "United States" = "USA")) %>%
  filter(country != "Total")

strip_data <- data %>%
  select(country, y_position) %>%
  mutate(xmin = 50, xmax=100,
         ymin = y_position - 0.5,
         ymax = y_position + 0.5,
         fill = c(rep(c("a", "b"), length.out=nrow(.)))) %>%
  pivot_longer(cols=c(xmin, xmax), values_to="x", names_to="xmin_xmax") %>%
  select(-xmin_xmax)

arrows_data <- data %>%
  filter(abs(percent_august - percent_october) > 1) %>%
  mutate(midpoint = (percent_august + 2*percent_october)/3) %>%
  select(country, y_position, percent_august, midpoint) %>%
  pivot_longer(c(percent_august, midpoint), names_to="type", values_to="x")

data %>%
  pivot_longer(cols = -c(country, y_position),
               names_to=c(".value", "month"),
               names_sep = "_") %>%
  drop_na() %>%
  ggplot(aes(x=percent, y=y_position, color=month, group=y_position)) +
  geom_ribbon(data = strip_data,
              aes(x = x, ymin=ymin, ymax = ymax, group=y_position, fill=fill),
              inherit.aes = FALSE,
              show.legend=FALSE) +
  geom_line(color="#153744", size=0.75, show.legend = FALSE) +
  geom_path(data=arrows_data, aes(x=x, y=y_position, group=y_position),
            color="#153744",
            size=0.75,
            arrow = arrow(angle = 45, length=unit(0.1, "in"), type="open"),
            show.legend = FALSE,
            inherit.aes = FALSE) +
  geom_point(size=3, show.legend = TRUE) +
  geom_text(aes(label=glue("{percent}%"), x=bump),
            size=3,
            color="#686868", family="josefin-sans",
            show.legend = FALSE) +
  scale_color_manual(name="If a vaccine for COVID-19 were\navailable, I totally argree I would get it...",
                     breaks=c("october", "august"),
                     values=c("#59AC74", "#153744"),
                     labels=c("<span style='color:#59AC74'>October '20</span>",
                              "<span style='color:#153744'>August '20</span>"),
                     guide = guide_legend(override.aes = list(size=4))) +
  scale_fill_manual(name=NULL,
                    breaks=c("a", "b", "c"),
                    values=c("#DFEAF9", "#EDF4F7", "#F3FAFE"),
                    labels=c("a", "b", "c")) +
  scale_x_continuous(limits=c(50, 100),
                     breaks=seq(50, 100, by=5),
                     labels=glue("{seq(50, 100, 5)}%"),
                     expand = c(0, 0)) +
  scale_y_continuous(breaks = c(data$y_position, 0.5, data$y_position+0.5,
                                length(data$y_position) + 1.5),
                     labels = c(data$country, rep("", length(data$y_position)+2)),
                     expand = c(0, 0),
                     limits=c(0.5, 16.5)) +
  labs(x="<span style='color:#4DA6BE;'>chart</span><span style='color:#E9B388;'>r</span>", y=NULL,
       title="Vaccine Skepticism by Country",
       caption="Source: Ipsos")+
  theme(
    text = element_text(family = "josefin-sans"),
    plot.title.position = "plot",
    plot.title = element_text(face="bold", margin= margin(b=25, t=15), size=26,
                              color="#2E737B", family = "josefin-slab"),
    plot.caption = element_markdown(hjust=0, color="darkgray",
                                    margin = margin(t=-10)),
    plot.caption.position = "plot",
    plot.background = element_rect(fill="#F3FAFE"),
    plot.margin = margin(l=5, r=15),
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.ticks.x = element_blank(),
    axis.ticks.y = element_line(color = c(rep(NA, nrow(data)),
                                          rep("darkgray", nrow(data)+2)),
                                size=0.2),
    axis.text.x = element_text(color="#686868", size=6),
    axis.text.y = element_text(face="bold"),
    axis.title.x = element_markdown(family="josefin-slab", face="bold", size=25),
    axis.line = element_line(color="darkgray", size=0.2),
    legend.background = element_blank(),
    legend.position = c(0, 1.0),
    legend.direction = "horizontal",
    legend.title = element_text(size=9, lineheight = 1.3),
    legend.justification = "left",
    legend.key = element_blank(),
    legend.key.width = unit(3, "pt"),
    legend.text = element_markdown(margin = margin(r=10))
    ) +
  guides(fill="none") +
  coord_cartesian(clip="off")


ggsave("august_october_2020_chartr.tiff", width=5, height=5)

august_october_2020_ipsos.R

library(tidyverse)
library(glue)
library(ggtext)

data <- read_csv("august_october_2020.csv") %>%
  rename(country = X.1,
         percent_august = "Total Agree - August 2020",
         percent_october = "Total Agree - October 2020") %>%
  mutate(bump_august = if_else(percent_august < percent_october,
                               percent_august - 2,
                               percent_august + 2),
         bump_october = if_else(percent_august < percent_october,
                               percent_october + 2,
                               percent_october - 2))

main_plot <- data %>%
  pivot_longer(cols = -country, names_to=c(".value", "month"),
               names_sep = "_") %>%
  mutate(country = factor(country, levels = rev(data$country))) %>%
  ggplot(aes(x=percent, y=country, color=month)) +
  geom_line(color="#e6e6e6", size=1.75, show.legend = FALSE) +
  geom_point(size=2, show.legend = FALSE) +
  geom_text(aes(label=glue("{percent}%"), x=bump),size=3, show.legend = FALSE) +
  scale_color_manual(name=NULL,
                     breaks=c("august", "october"),
                     values=c("#727272", "#15607a"),
                     labels=c("August", "October")) +
  scale_x_continuous(limits=c(50, 100),
                     breaks=seq(50, 100, by=5),
                     labels=glue("{seq(50, 100, 5)}%")) +
  labs(x=NULL, y=NULL,
       title="If a vaccine for COVID-19 were available, I would get it",
       caption="<i>Base: 18,526 online adults aged 16-74 across 15 countries</i><br>Source: Ipsos")+
  theme(
    plot.title.position = "plot",
    plot.title = element_text(face="bold", margin= margin(b=20)),
    plot.caption = element_markdown(hjust=0, color="darkgray"),
    plot.caption.position = "plot",
    panel.background = element_blank(),
    axis.ticks = element_blank(),
    axis.text.x = element_text(color="darkgray"),
    panel.grid.major.x = element_line(color="gray", size=0.1),
    panel.grid.major.y = element_line(color="gray", size=0.1, linetype="dotted")
  )

total <- data %>%
  filter(country == "Total") %>%
  pivot_longer(cols = -country, names_to=c(".value", "month"),
             names_sep = "_") %>%
  mutate(pretty = if_else(month == "august",
                          "Total Agree -<br>August 2020",
                          "Total Agree -<br>October 2020"),
         align = if_else(month == "august", 0, 1))

main_plot +
  coord_cartesian(clip="off") +
  geom_textbox(data=total,
               aes(x=percent, y =country, color=month, label=pretty, hjust=align),
               size=2,
               box.color=NA,
               width=NULL,
               vjust=-0.5,
               box.padding=margin(0,0,0,0),
               fill=NA,
               show.legend=FALSE)

ggsave("august_october_2020_ipsos.tiff", width=6, height=4)

Data

X.1,Total Agree - August 2020,Total Agree - October 2020
Total,77,73
India,87,87
China,97,85
South Korea,84,83
Brazil,88,81
Australia,88,79
United Kingdom,85,79
Mexico,75,78
Canada,76,76
Germany,67,69
Japan,75,69
South Africa,64,68
Italy,67,65
Spain,72,64
United States,67,64
France,59,54