Data viz makeover from bar plot to dot plot with ggplot2 in R (CC309)

October 24, 2024 • PD Schloss • 7 min read

Pat does a data viz makeover by converting a grouped bar plot to a dot plot using ggplot2 in R. He customizes the appearance using geom_point, geom_hline, scale_fill_manual, scale_y_discrete, coord_cartesian, labs, and theme functions. You can find the code he developed in this episode at https://www.riffomonas.org/code_club/2024-10-24-dotplot. The original bar plot was included in a slide deck to report the results of a study performed by Oxford University Press. You can find their press release at https://corp.oup.com/news/how-are-researchers-responding-to-ai/ and the slide deck is at https://fdslive.oup.com/www.oup.com/academic/pdf/Researchers-and-AI-survey-findings.pdf. The figure I recreate in this episode is on slide 20. The newsletter describing how I would go about generating the figure can be found at https://shop.riffomonas.org/posts/whatever-you-do-don-t-use-ai-to-figure-out-this-plot.

Code

library(tidyverse)
library(glue)
library(forcats)
library(ggtext)

cohorts <- c("Pioneers", "Careful optimists", "Cautious time savers",
             "Neutrals", "Wary observers", "Concerned pessimists",
             "Sceptics", "Challengers")

oup_data <- tibble(
  question = rep(
    c("Generating ideas for\nresearch", "Discovering existing\nresearch",
      "Summarising existing\nresearch", "Designing the\nresearch",
      "Collecting research\ndata", "Analysing research\ndata",
      "Writing up the\nresearch", "Editing write-up of\nresearch",
      "Creating marketing/\noutreach content", "Another stage"),
    each = 8),
  cohort = factor(rep(cohorts, times = 10), levels = cohorts), 
  positives =   rep(
    c(383, 636, 531, 159, 373, 410, 293, 580, 135, 194),
    each = 8),
  rate = c(39, 34, 26, 25, 17, 5, 12, 6,
           50, 49, 44, 40, 43, 26, 32, 19,
           57, 47, 37, 31, 25, 11, 11, 8,
           20, 17, 7, 11, 5, 1, 2, 1,
           24, 29, 25, 24, 24, 15, 27, 27,
           43, 30, 26, 24, 17, 7, 24, 14,
           31, 21, 18, 22, 10, 13, 8, 13,
           59, 48, 38, 38, 26, 15, 23, 6,
           15, 10, 8, 7, 6, 5, 5, 8,
           8, 5, 13, 11, 11, 35, 15, 34),
  pretty = fct_reorder(glue("{question} (N={positives})"),
                       positives)
)

oup_data %>%
  ggplot(aes(x = rate, y = pretty, fill = cohort)) +
  geom_point(shape = 21, position = position_dodge(width = 0.6)) +
  geom_hline(yintercept = seq(0.5, 10.5, 1), linewidth = 0.25, color = "gray") +
  scale_fill_manual(values = c('#b2182b','#d6604d','#f4a582','#fddbc7','#d1e5f0','#92c5de','#4393c3','#2166ac')) +
  scale_y_discrete(expand = c(0, 0)) +
  coord_cartesian(clip = "off", ylim = c(0.5, 10.5), xlim = c(0, 70)) +
  labs(x = "Percent of researchers in favor",
       y = NULL,
       fill = NULL,
       title = "AI has most commonly been used to support the discovery of existing research, followed by editing the research write-up",
       caption = "Multiple-response question, therefore percentages do not sum to 100") +
  theme_classic() +
  theme(
    axis.text.y = element_text(hjust = 0),
    axis.ticks.y = element_blank(),
    legend.key.size = unit(6, "pt"),
    legend.text = element_text(size = 6),
    legend.background = element_rect(color = "black"),
    legend.position = "inside",
    legend.position.inside = c(0.8, 0.15),
    plot.title.position = "plot",
    plot.title = element_textbox_simple(face = "bold", 
                                        size = 15,
                                        margin = margin(t = 0, b = 15)),
    plot.caption.position = "plot",
    plot.caption = element_text(size = 6, hjust = 0)
  )

ggsave("ai_dotplot.png", width = 5, height = 5)