Data viz makeover from bar plot to dot plot with ggplot2 in R (CC309)
Pat does a data viz makeover by converting a grouped bar plot to a dot plot using ggplot2 in R. He customizes the appearance using geom_point, geom_hline, scale_fill_manual, scale_y_discrete, coord_cartesian, labs, and theme functions. You can find the code he developed in this episode at https://www.riffomonas.org/code_club/2024-10-24-dotplot. The original bar plot was included in a slide deck to report the results of a study performed by Oxford University Press. You can find their press release at https://corp.oup.com/news/how-are-researchers-responding-to-ai/ and the slide deck is at https://fdslive.oup.com/www.oup.com/academic/pdf/Researchers-and-AI-survey-findings.pdf. The figure I recreate in this episode is on slide 20. The newsletter describing how I would go about generating the figure can be found at https://shop.riffomonas.org/posts/whatever-you-do-don-t-use-ai-to-figure-out-this-plot.
Code
library(tidyverse)
library(glue)
library(forcats)
library(ggtext)
cohorts <- c("Pioneers", "Careful optimists", "Cautious time savers",
"Neutrals", "Wary observers", "Concerned pessimists",
"Sceptics", "Challengers")
oup_data <- tibble(
question = rep(
c("Generating ideas for\nresearch", "Discovering existing\nresearch",
"Summarising existing\nresearch", "Designing the\nresearch",
"Collecting research\ndata", "Analysing research\ndata",
"Writing up the\nresearch", "Editing write-up of\nresearch",
"Creating marketing/\noutreach content", "Another stage"),
each = 8),
cohort = factor(rep(cohorts, times = 10), levels = cohorts),
positives = rep(
c(383, 636, 531, 159, 373, 410, 293, 580, 135, 194),
each = 8),
rate = c(39, 34, 26, 25, 17, 5, 12, 6,
50, 49, 44, 40, 43, 26, 32, 19,
57, 47, 37, 31, 25, 11, 11, 8,
20, 17, 7, 11, 5, 1, 2, 1,
24, 29, 25, 24, 24, 15, 27, 27,
43, 30, 26, 24, 17, 7, 24, 14,
31, 21, 18, 22, 10, 13, 8, 13,
59, 48, 38, 38, 26, 15, 23, 6,
15, 10, 8, 7, 6, 5, 5, 8,
8, 5, 13, 11, 11, 35, 15, 34),
pretty = fct_reorder(glue("{question} (N={positives})"),
positives)
)
oup_data %>%
ggplot(aes(x = rate, y = pretty, fill = cohort)) +
geom_point(shape = 21, position = position_dodge(width = 0.6)) +
geom_hline(yintercept = seq(0.5, 10.5, 1), linewidth = 0.25, color = "gray") +
scale_fill_manual(values = c('#b2182b','#d6604d','#f4a582','#fddbc7','#d1e5f0','#92c5de','#4393c3','#2166ac')) +
scale_y_discrete(expand = c(0, 0)) +
coord_cartesian(clip = "off", ylim = c(0.5, 10.5), xlim = c(0, 70)) +
labs(x = "Percent of researchers in favor",
y = NULL,
fill = NULL,
title = "AI has most commonly been used to support the discovery of existing research, followed by editing the research write-up",
caption = "Multiple-response question, therefore percentages do not sum to 100") +
theme_classic() +
theme(
axis.text.y = element_text(hjust = 0),
axis.ticks.y = element_blank(),
legend.key.size = unit(6, "pt"),
legend.text = element_text(size = 6),
legend.background = element_rect(color = "black"),
legend.position = "inside",
legend.position.inside = c(0.8, 0.15),
plot.title.position = "plot",
plot.title = element_textbox_simple(face = "bold",
size = 15,
margin = margin(t = 0, b = 15)),
plot.caption.position = "plot",
plot.caption = element_text(size = 6, hjust = 0)
)
ggsave("ai_dotplot.png", width = 5, height = 5)