Showing the change in the US death rate since before the COVID-19 pandemic with R's tidyverse (CC356)

April 23, 2025 • PD Schloss • 8 min read

Pat uses R to recreate a plot a provocative New York Times article on 30 things that have changed since the start of the COVID-19 pandemic. The plot Pat selected shows the change in the weekly death rate across the United States using functions from the tidyverse. To pull this off he uses the dplyr, ggplot2, showtext, and ggtext packages. The functions he uses from these packages include aes, annotate, col_date, cols, coord_cartesian, count, element_blank, element_line, element_rect, element_text, element_textbox_simple, filter, font_add_google, full_join, geom_hline, geom_line, geom_rect, ggplot, ggsave, if_else, is.na, labs, library, margin, mean, mutate, print, pull, read_csv, rename, scale_color_manual, scale_x_date, scale_y_continuous, select, showtext_auto, showtext_opts, sum, summarize, theme, unit, and ymd. The newsletter describing this visualization at a 30,000 ft view can be found here. You can find the original article here. The two CDC websites are available here and here. If you have a figure that you would like to see me discuss in a future newsletter and episode of Code Club, email me at pat@riffomonas.org!

Code

library(tidyverse)
library(showtext)
library(ggtext)

font_add_google("Libre Franklin", "franklin")
showtext_opts(dpi = 300)
showtext_auto()

pre <- read_csv("Weekly_Counts_of_Deaths_by_Jurisdiction_and_Age_20250421.csv", 
         col_types = cols(`Week Ending Date` = col_date(format = "%m/%d/%Y"))) %>%
  filter(Jurisdiction == "United States" & Type == "Unweighted") %>%
  # filter(`Age Group` == "25-44 years", `Week Ending Date` == "2015-01-10") %>%
  # print(width = Inf)
  # count(`Age Group`, `Week Ending Date`)
  summarize(pre_deaths = sum(`Number of Deaths`), .by = `Week Ending Date`) %>%
  rename(date = `Week Ending Date`)
  
post <- read_csv("Provisional_COVID-19_Death_Counts_by_Week_Ending_Date_and_State_20250421.csv",
         col_types = cols(`End Date` = col_date(format = "%m/%d/%Y"))) %>%
  filter(State == "United States" & Group == "By Week") %>%
  select(date = `End Date`,
         post_deaths = `Total Deaths`)

deaths_by_week <- full_join(pre, post, by = "date") %>%
  mutate(deaths = if_else(is.na(post_deaths), pre_deaths, post_deaths)) %>%
  select(date, deaths) %>%
  mutate(pre_post = if_else(date < "2020-03-01", "pre", "post")) %>%
  filter(date >= "2015-03-01" & date <= "2025-03-01")

pre_deaths <- deaths_by_week %>%
  filter(pre_post == "pre") %>%
  summarize(mean = mean(deaths)) %>%
  pull(mean)

deaths_by_week %>%
  ggplot(aes(x = date, y = deaths, color = pre_post)) +
  geom_rect(xmin = ymd("2020-03-01"), xmax = ymd("2026-03-01"),
            ymin = 0, ymax = 1e6, fill = "#FCF0E9", color = "white") +
  geom_hline(yintercept = c(60000, 80000, pre_deaths),
             linewidth = c(0.2, 0.2, 0.4),
             color = c("gray80", "gray80", "black"),
             linetype = c(1, 1, 2)) +
  geom_line(show.legend = FALSE,
            linewidth = 0.75) +
  annotate(geom = "text", hjust = 0, vjust = -0.2,
           family = "franklin", size = 8.5, size.unit = "pt",
           x = ymd("2015-03-01"),
           y = c(60000, 80000),
           label = c("60,000", "80,000 deaths per week")) +
  annotate(geom = "text", family = "franklin",
           x = ymd("2022-09-01"),
           y = pre_deaths * 0.98,
           label = "2015-19 average") +
  coord_cartesian(expand = FALSE, clip = "off") +
  scale_y_continuous(breaks = c(60000, 80000),
                     limits = c(45000, 89000)) +
  scale_x_date(breaks = ymd(c("2016-01-01", "2018-01-01", "2020-03-01",
                              "2022-01-01", "2024-01-01")),
               labels = c("2016", "2018", "March 2020", "2022", "2024")) +
  scale_color_manual(breaks = c("pre", "post"),
                     values = c("#58595B", "#F05A27")) +
  labs(x = NULL,
       y = NULL,
       title = "Total U.S. deaths") +
  theme(
    text = element_text(family = "franklin"),
    plot.background = element_rect(fill = "#EEEEEE"),
    plot.title = element_textbox_simple(face = "bold", size = 11.5,
                                        fill = "white", width = NULL,
                                        padding = margin(5, 4, 5, 4),
                                        hjust = 0),
    plot.margin = margin(4, 15, 10, 10),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(size = 8.5, margin = margin(t = 3),
                               color = "black"),
    axis.ticks.length.x = unit(4, "pt"),
    axis.ticks.x = element_line(linewidth = 0.4),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_rect(fill = NA)
  )

ggsave("pre_post_covid.png", width = 6, height = 4.26)