Using functions from the ggtext R package to customize the appearance of text in figures (CC115)
Code
This is where we started before the episode
library(tidyverse)
library(readxl)
library(glue)
library(ggtext)
taxonomy <- read_tsv("raw_data/schubert.cons.taxonomy") %>%
select("OTU", "Taxonomy") %>%
rename_all(tolower) %>%
mutate(taxonomy = str_replace_all(taxonomy, "\\(\\d+\\)", ""),
taxonomy = str_replace(taxonomy, ";$", "")) %>%
separate(taxonomy,
into=c("kingdom", "phylum", "class", "order", "family", "genus"),
sep=";") %>%
mutate(pretty_otu = str_replace(string=otu,
pattern="tu0*",
replacement = "TU "),
genus = str_replace(string=genus,
pattern="(.*)",
replacement="*\\1*"),
genus = str_replace(string=genus,
pattern="\\*(.*)_unclassified\\*",
replacement="Unclassified<br>*\\1*"),
taxon = glue("{genus}<br>({pretty_otu})")) %>%
select(otu, taxon)
metadata <- read_excel("raw_data/schubert.metadata.xlsx", na="NA") %>%
drop_na(disease_stat) %>%
select(sample_id, disease_stat)
shared_file <- read_tsv("raw_data/schubert.subsample.shared")
shared_design <- inner_join(shared_file, metadata, by=c("Group" = "sample_id"))
run_lefse <- function(x, y, tag){
x_y <- shared_design %>%
filter(disease_stat == x | disease_stat == y)
x_y %>%
select(-disease_stat) %>%
write_tsv(glue("processed_data/schubert.{tag}.shared"))
x_y %>%
select(Group, disease_stat) %>%
write_tsv(glue("processed_data/schubert.{tag}.design"))
command <- glue('mothur/mothur "#lefse(shared=schubert.{tag}.shared, design=schubert.{tag}.design, inputdir=processed_data)"')
system(command)
return(glue("processed_data/schubert.{tag}.0.03.lefse_summary"))
}
ndc_dc <- run_lefse("NonDiarrhealControl", "DiarrhealControl", "ndc_dc")
ndc_case <- run_lefse("NonDiarrhealControl", "Case", "ndc_case")
dc_case <- run_lefse("DiarrhealControl", "Case", "dc_case")
read_tsv(ndc_dc) %>%
drop_na(LDA) %>%
filter(LDA > 4) %>%
inner_join(., taxonomy, by=c("OTU" = "otu")) %>%
mutate(LDA = if_else(Class == "NonDiarrhealControl", -1 * LDA, LDA),
taxon = fct_reorder(taxon, LDA)) %>%
ggplot(aes(x=LDA, y=taxon, fill=Class)) +
geom_col() +
labs(y=NULL, x="LDA Score (log 10)") +
scale_x_continuous(limits = c(-6, 6), breaks = seq(-6, 6, by=2)) +
scale_fill_manual(name=NULL,
breaks = c("NonDiarrhealControl", "DiarrhealControl"),
labels=c("Healthy", "Diarrhea,<br>*C. difficile* negative"),
values =c("gray", "blue")) +
theme_classic() +
theme(
axis.text.y = element_markdown(),
legend.text = element_markdown()
)
ggsave("ndc_dc.pdf", width=5, height=4)