Recreating a grouped and labelled barplot in R with ggplot2 (CC308)

October 21, 2024 • PD Schloss • 3 min read

Pat recreates a grouped bar plot that had text annotation that provided the value of each bar of the barplot using the geom_col and geom_text functions from ggplot2 in R. He customizes the appearance to match the original figure using the scale_color_manual, scale_x_discrete, scale_y_continuous, coord_cartesian, labs, and theme functions. The plot was included in a slide deck to report the results of a study performed by Oxford University Press. You can find their press release at https://corp.oup.com/news/how-are-researchers-responding-to-ai/ and the slide deck is at https://fdslive.oup.com/www.oup.com/academic/pdf/Researchers-and-AI-survey-findings.pdf. The figure I recreate in this episode is on slide 20. The newsletter describing how I would go about generating the figure can be found at https://shop.riffomonas.org/posts/whatever-you-do-don-t-use-ai-to-figure-out-this-plot.

Code

library(tidyverse)
library(ggtext)
library(glue)

questions <- c("Generating ideas for\nresearch",
               "Discovering existing\nresearch",
               "Summarising existing\nresearch",
               "Designing the\nresearch",
               "Collecting research\ndata",
               "Analysing research\ndata",
               "Writing up the\nresearch",
               "Editing write-up of\nresearch",
               "Creating marketing /\noutreach content\n", "Another stage\n")

  
oup_data <- tibble(
  question = rep(questions, each = 8),
  cohort = rep(
    c("Pioneers", "Careful optimists", "Cautious time savers",
      "Neutrals", "Wary observers", "Concerned pessimists",
      "Sceptics", "Challengers"),
    times = 10),
  positives =   rep(
    c(383, 636, 531, 159, 373, 410, 293, 580, 135, 194),
    each = 8),
  rate = c(39, 34, 26, 25, 17, 5, 12, 6,
           50, 49, 44, 40, 43, 26, 32, 19,
           57, 47, 37, 31, 25, 11, 11, 8,
           20, 17, 7, 11, 5, 1, 2, 1,
           24, 29, 25, 24, 24, 15, 27, 27,
           43, 30, 26, 24, 17, 7, 24, 14,
           31, 21, 18, 22, 10, 13, 8, 13,
           59, 48, 38, 38, 26, 15, 23, 6,
           15, 10, 8, 7, 6, 5, 5, 8,
           8, 5, 13, 11, 11, 35, 15, 34)
) %>%
  mutate(
    question = factor( question, levels = questions),
    cohort = factor(
      cohort,
      levels =     c("Pioneers", "Careful optimists", "Cautious time savers",
                     "Neutrals", "Wary observers", "Concerned pessimists",
                     "Sceptics", "Challengers")),
    pretty_rate = glue("{rate}%")
  )

pretty_label <- oup_data %>%
  select(question, positives) %>%
  distinct() %>%
  mutate(pretty = glue("{question} (N={positives})")) %>%
  pull(pretty)


oup_data %>%
  ggplot(aes(x = question, y = rate, fill = cohort, 
             label = pretty_rate)) +
  geom_col(position = "dodge") +
  geom_text(aes(y = rate + 1), position = position_dodge(width = 0.9),
            vjust = 0, size = 1.5, fontface = "bold") +
  annotate("segment", x = seq(0.5, 10.5, 1), xend = seq(0.5, 10.5, 1),
           y = 0, yend = -1, 
           linewidth = 0.25, color = "gray") +
  scale_fill_manual(values = c("#742FA0", "#021F62", "#056FBF", "#08AEF0",
                               "#94D052", "#FFFF04", "#FFBF05", "#C10100")) +
  scale_y_continuous(breaks = seq(0, 70, by = 10),
                     labels = scales::label_percent(scale = 1),
                     expand = c(0, 0)) +
  scale_x_discrete(expand = expansion(add = 0.5),
                   labels = pretty_label) +
  coord_cartesian(clip = "off", ylim = c(0, 70)) + 
  guides(fill = guide_legend(nrow = 1)) +
  labs(fill = NULL, x = NULL, y = NULL,
       title = "AI has most commonly been used to support the discovery of existing research, followed by editing the research write-up",
       caption = "Multiple-response question, therefore percentages do not sum to 100") +
  theme_classic() +
  theme(legend.position = "top",
        legend.text = element_text(size = 5, margin = margin(l = 1)),
        legend.key.size = unit(5, unit = "pt"),
        axis.text = element_text(size = 5, face = "bold"),
        axis.line = element_line(linewidth = 0.25, color = "gray"),
        axis.ticks.y = element_line(linewidth = 0.25, color = "gray"),
        axis.ticks.x = element_blank(),
        plot.title.position = "plot",
        plot.title = element_textbox_simple(face = "bold",
                                            margin = margin(b = 15)),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 5, hjust = 0)
        )

ggsave("ai_barplot.png", width = 8, height = 3.5)