Using functions from the ggtext R package to customize the appearance of text in figures (CC115)

June 14, 2021 • PD Schloss • 5 min read

Code

This is where we started before the episode

library(tidyverse)
library(readxl)
library(glue)
library(ggtext)

taxonomy <- read_tsv("raw_data/schubert.cons.taxonomy") %>%
  select("OTU", "Taxonomy") %>%
  rename_all(tolower) %>%
  mutate(taxonomy = str_replace_all(taxonomy, "\\(\\d+\\)", ""),
         taxonomy = str_replace(taxonomy, ";$", "")) %>%
  separate(taxonomy,
           into=c("kingdom", "phylum", "class", "order", "family", "genus"),
           sep=";") %>%
  mutate(pretty_otu = str_replace(string=otu,
                                  pattern="tu0*",
                                  replacement = "TU "),
         genus = str_replace(string=genus,
                             pattern="(.*)",
                             replacement="*\\1*"),
         genus = str_replace(string=genus,
                             pattern="\\*(.*)_unclassified\\*",
                             replacement="Unclassified<br>*\\1*"),
         taxon = glue("{genus}<br>({pretty_otu})")) %>%
  select(otu, taxon)

metadata <- read_excel("raw_data/schubert.metadata.xlsx", na="NA") %>%
  drop_na(disease_stat) %>%
  select(sample_id, disease_stat)

shared_file <- read_tsv("raw_data/schubert.subsample.shared")

shared_design <- inner_join(shared_file, metadata, by=c("Group" = "sample_id"))

run_lefse <- function(x, y, tag){

  x_y <- shared_design %>%
    filter(disease_stat == x | disease_stat == y)

  x_y %>%
    select(-disease_stat) %>%
    write_tsv(glue("processed_data/schubert.{tag}.shared"))

  x_y %>%
    select(Group, disease_stat) %>%
    write_tsv(glue("processed_data/schubert.{tag}.design"))

  command <- glue('mothur/mothur "#lefse(shared=schubert.{tag}.shared, design=schubert.{tag}.design, inputdir=processed_data)"')

  system(command)

  return(glue("processed_data/schubert.{tag}.0.03.lefse_summary"))
}

ndc_dc <- run_lefse("NonDiarrhealControl", "DiarrhealControl", "ndc_dc")
ndc_case <- run_lefse("NonDiarrhealControl", "Case", "ndc_case")
dc_case <- run_lefse("DiarrhealControl", "Case", "dc_case")

read_tsv(ndc_dc) %>%
  drop_na(LDA) %>%
  filter(LDA > 4) %>%
  inner_join(., taxonomy, by=c("OTU" = "otu")) %>%
  mutate(LDA = if_else(Class == "NonDiarrhealControl", -1 * LDA, LDA),
         taxon = fct_reorder(taxon, LDA)) %>%
  ggplot(aes(x=LDA, y=taxon, fill=Class)) +
  geom_col() +
  labs(y=NULL, x="LDA Score (log 10)") +
  scale_x_continuous(limits = c(-6, 6), breaks = seq(-6, 6, by=2)) +
  scale_fill_manual(name=NULL,
                    breaks = c("NonDiarrhealControl", "DiarrhealControl"),
                    labels=c("Healthy", "Diarrhea,<br>*C. difficile* negative"),
                    values =c("gray", "blue")) +
  theme_classic() +
  theme(
    axis.text.y = element_markdown(),
    legend.text = element_markdown()
  )

ggsave("ndc_dc.pdf", width=5, height=4)

Installations