Recreating a grouped and labelled barplot in R with ggplot2 (CC308)
Pat recreates a grouped bar plot that had text annotation that provided the value of each bar of the barplot using the geom_col and geom_text functions from ggplot2 in R. He customizes the appearance to match the original figure using the scale_color_manual, scale_x_discrete, scale_y_continuous, coord_cartesian, labs, and theme functions. The plot was included in a slide deck to report the results of a study performed by Oxford University Press. You can find their press release at and the slide deck is at The figure I recreate in this episode is on slide 20. The newsletter describing how I would go about generating the figure can be found at
questions <- c("Generating ideas for\nresearch",
"Discovering existing\nresearch",
"Summarising existing\nresearch",
"Designing the\nresearch",
"Collecting research\ndata",
"Analysing research\ndata",
"Writing up the\nresearch",
"Editing write-up of\nresearch",
"Creating marketing /\noutreach content\n", "Another stage\n")
oup_data <- tibble(
question = rep(questions, each = 8),
cohort = rep(
c("Pioneers", "Careful optimists", "Cautious time savers",
"Neutrals", "Wary observers", "Concerned pessimists",
"Sceptics", "Challengers"),
times = 10),
positives = rep(
c(383, 636, 531, 159, 373, 410, 293, 580, 135, 194),
each = 8),
rate = c(39, 34, 26, 25, 17, 5, 12, 6,
50, 49, 44, 40, 43, 26, 32, 19,
57, 47, 37, 31, 25, 11, 11, 8,
20, 17, 7, 11, 5, 1, 2, 1,
24, 29, 25, 24, 24, 15, 27, 27,
43, 30, 26, 24, 17, 7, 24, 14,
31, 21, 18, 22, 10, 13, 8, 13,
59, 48, 38, 38, 26, 15, 23, 6,
15, 10, 8, 7, 6, 5, 5, 8,
8, 5, 13, 11, 11, 35, 15, 34)
) %>%
question = factor( question, levels = questions),
cohort = factor(
levels = c("Pioneers", "Careful optimists", "Cautious time savers",
"Neutrals", "Wary observers", "Concerned pessimists",
"Sceptics", "Challengers")),
pretty_rate = glue("{rate}%")
pretty_label <- oup_data %>%
select(question, positives) %>%
distinct() %>%
mutate(pretty = glue("{question} (N={positives})")) %>%
oup_data %>%
ggplot(aes(x = question, y = rate, fill = cohort,
label = pretty_rate)) +
geom_col(position = "dodge") +
geom_text(aes(y = rate + 1), position = position_dodge(width = 0.9),
vjust = 0, size = 1.5, fontface = "bold") +
annotate("segment", x = seq(0.5, 10.5, 1), xend = seq(0.5, 10.5, 1),
y = 0, yend = -1,
linewidth = 0.25, color = "gray") +
scale_fill_manual(values = c("#742FA0", "#021F62", "#056FBF", "#08AEF0",
"#94D052", "#FFFF04", "#FFBF05", "#C10100")) +
scale_y_continuous(breaks = seq(0, 70, by = 10),
labels = scales::label_percent(scale = 1),
expand = c(0, 0)) +
scale_x_discrete(expand = expansion(add = 0.5),
labels = pretty_label) +
coord_cartesian(clip = "off", ylim = c(0, 70)) +
guides(fill = guide_legend(nrow = 1)) +
labs(fill = NULL, x = NULL, y = NULL,
title = "AI has most commonly been used to support the discovery of existing research, followed by editing the research write-up",
caption = "Multiple-response question, therefore percentages do not sum to 100") +
theme_classic() +
theme(legend.position = "top",
legend.text = element_text(size = 5, margin = margin(l = 1)),
legend.key.size = unit(5, unit = "pt"),
axis.text = element_text(size = 5, face = "bold"),
axis.line = element_line(linewidth = 0.25, color = "gray"),
axis.ticks.y = element_line(linewidth = 0.25, color = "gray"),
axis.ticks.x = element_blank(),
plot.title.position = "plot",
plot.title = element_textbox_simple(face = "bold",
margin = margin(b = 15)),
plot.caption.position = "plot",
plot.caption = element_text(size = 5, hjust = 0)
ggsave("ai_barplot.png", width = 8, height = 3.5)