library(tidyverse)
library(here)
library(phyloseq)
library(microViz)
Correlation and Ordination
In this session, we’re going to take a look at beta diversity, and at the microViz package for working with phyloseq objects.
Have this microviz documentation open
First let’s load the libraries:
<- readRDS("data/instructional/sample_data_instructional.rds")
sample_df <- readRDS("data/instructional/count_table_instructional.rds")
count_tab <- readRDS("data/instructional/tax_table_instructional.rds") tax_tab
# make a data frame of only the vaginal samples
<- sample_df %>%
vaginal_samples_to_keep filter(sample_type == "vaginal")
<- tax_tab %>%
asv_df data.frame() %>%
rownames_to_column(var = "asv") %>%
mutate(asv_len = nchar(asv))
<- asv_df %>%
keepers filter(asv_len > 385 & asv_len < 460) %>%
# Careful below! Don't drop NA unless you intend to
filter(Order != "Chloroplast" | is.na(Order)) %>%
filter(Family != "Mitochondria" | is.na(Family)) %>%
pull(asv) %>% # pull() goes from column to vector
unique()
# filter the count table to only include vaginal samples
# and to only include ASVs who are non-zero in at least one sample
<- count_tab %>%
filtered_counts # convert to data frame
as.data.frame() %>%
# change rownames to a column (https://tibble.tidyverse.org/reference/rownames.html)
rownames_to_column("amplicon_sample_id") %>%
# make the table "longer" to be in tidy format
# (https://r4ds.hadley.nz/data-tidy.html#sec-tidy-data)
pivot_longer(-amplicon_sample_id,names_to="asv", values_to = "count") %>%
# semi_join() returns all rows from x with a match in y.
semi_join(vaginal_samples_to_keep) %>%
# get rid of asvs that are zero in all samples
group_by(asv) %>%
filter(sum(count > 0) > 0) %>%
ungroup() %>%
#get rid of ASVs that aren't in the keepers vector
filter(asv %in% keepers)
# we need to convert the count table back into a matrix
# to make a phyloseq object
<- filtered_counts %>%
filtered_counts_matrix pivot_wider(names_from = asv, values_from = count) %>%
column_to_rownames("amplicon_sample_id") %>%
as.matrix()
# lastly, remove asv tax assignments if they are
# not present in any of the vaginal samples
# also remove Label because microviz doesn't like it
<- tax_tab %>%
filtered_taxonomy as.data.frame() %>%
rownames_to_column("asv") %>%
as_tibble() %>%
semi_join(filtered_counts) %>%
select(-Label) %>%
column_to_rownames("asv") %>%
as.matrix()
<- phyloseq(sample_data(sample_df %>%
ps filter(sample_type == "vaginal") %>%
column_to_rownames(var = "amplicon_sample_id")),
otu_table(filtered_counts_matrix, taxa_are_rows = FALSE),
tax_table(filtered_taxonomy)) %>%
prune_samples(sample_sums(.) > 100, .) %>%
tax_fix() %>% # this propogates higher taxonomy when lower is NA
tax_mutate(genus_species = str_c(Genus, Species, sep=" ")) %>%
tax_rename(rank = "genus_species")
<- ps %>%
ps_manual_taxonomy tax_fix() %>%
tax_mutate(Species = case_when(
== "acidophilus/casei/crispatus/gallinarum" ~ "crispatus",
Species == "crispatus/gasseri/helveticus/johnsonii/kefiranofaciens" ~ "crispatus",
Species == "animalis/apodemi/crispatus/murinus" ~ "crispatus",
Species .default = Species)) %>%
#also remake genus_species to fix those taxa
tax_mutate(genus_species = str_c(Genus, Species, sep = " ")) %>%
tax_rename(rank = "genus_species")
Why manual taxonomy? Well, if we look at the original dataframe we can see these taxa where our taxonomic database SILVA didn’t know which species it was exactly, but instead gave a list of possible species. However, we know these are vaginal samples, and that 99% of the time, these ASVs correspond to L. crispatus because the other possible species are very rare in vaginal samples.
%>%
ps tax_fix() %>%
tax_agg("genus_species") %>%
ps_seriate() %>% # this changes the order of the samples to be sorted by similarity
comp_barplot(tax_level = "genus_species", sample_order = "asis", n_taxa = 10) +
facet_wrap(vars(arm, time_point), scales="free_x")
Ok, now we’ll switch to live-coding for the rest of the session.