Set knitting options
# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
dev = c("png", "pdf"), fig.keep = "all",
dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rlang)
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(latex2exp)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# source all relevant scripting files
source(file.path("scripts", "ampliverse_05.R"))
source(file.path("scripts", "plotting_functions.R"))
Load DADA2 output data
# taxa and sequence tables obtained from https://github.com/danote/Samail_16S_compilation
seqtab_OM19 <- read_rds("data_raw/16S_sequencing_data/seqtab_nochim_OM19_processed_20200803.rds")
taxtab_OM19 <- read_rds("data_raw/16S_sequencing_data/taxa_OM19_processed_20200803.rds")
Load metadata
meta_map_OM19 <- read_delim("data_raw/16S_sequencing_data/map_for_compilation_OM19.txt", delim = "\t",
col_types = cols(
sample_id = col_character(),
barcode_sequence = col_character(),
forward_linker_primer_sequence = col_character(),
reverse_primer_sequence = col_character(),
sample_type = col_character(),
nucleic_acid_type = col_character(),
sampling_site = col_character(),
year_sampled = col_double(),
month_sampled = col_double(),
day_sampled = col_double(),
depth_fluid_intake_mbct = col_double(),
notes = col_character(),
sampling_method = col_character(),
upper_packer_inflated = col_logical(),
upper_packer_depth_mbct = col_double(),
lower_packer_inflated = col_logical(),
lower_packer_depth_mbct = col_double(),
well_depth_mbgl = col_double(),
casing_extent_mbct = col_double(),
casing_height_magl = col_double(),
screened_interval_mbct = col_character(),
depth_to_water_mbct = col_double()
)
) %>% select(1:22)
ampli_data_OM19 <- ampli_tidy_dada2(seqtab_OM19, taxtab_OM19) %>% ampli_concat_tax() %>% ampli_concat_tax(tax_levels = 6, concat_tax_col_name = taxonomy_6) %>% ampli_join_metadata_map(meta_map_OM19)
ampli_data_OM19_sum <- ampli_data_OM19 %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_data_OM19_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_data_OM19_sum %>% select(reads_sum))
## reads_sum
## Min. : 92
## 1st Qu.:43423
## Median :66501
## Mean :55686
## 3rd Qu.:73624
## Max. :86307
Plot read counts
Oman groundwater samples have significantly higher read counts than extraction or PCR controls, which is good.
plot_reads_sums_1 <- ampli_data_OM19_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
fill = sample_type
)) +
geom_bar(stat = "identity") +
scale_y_continuous(name = "Reads") +
scale_x_discrete(name = "Sample ID") +
scale_fill_discrete(name = "Sample type") +
theme_bw()+
theme(
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom"
)
plot_reads_sums_1
# Focus on data of interest ## Filter for only desired samples
# define sample set ID's
samples_to_keep <- c("14NP", "B71")
# keep just those samples
ampli_data_OM19_focus_samples <- ampli_data_OM19 %>% ampli_filter_strings(col_to_filter = sample_id, strings_to_filter = samples_to_keep, detection_method = "complete", action = "keep") %>%
# remove taxa with zero reads (messes up plotting later if kept)
ampli_rm_0_read_taxa()
## 468 zero-read taxa removed from data set.
Filter out mitochondria, chloroplasts, eukaryotes, and sequences not assigned taxonomy at the the domain level
ampli_data_OM19_focus_samples_taxa_filtered <- ampli_data_OM19_focus_samples %>% ampli_filter_strings(col_to_filter = taxonomy, strings_to_filter = c("Chloroplast", "Mitochondria", "Eukaryota", "k__NA"), detection_method = "substring", action = "remove")
Tally reads per sample
ampli_data_OM19_focus_samples_taxa_filtered_sum <- ampli_data_OM19_focus_samples_taxa_filtered %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_data_OM19_focus_samples_taxa_filtered_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_data_OM19_focus_samples_taxa_filtered_sum %>% select(reads_sum))
## reads_sum
## Min. :67256
## 1st Qu.:68841
## Median :70426
## Mean :70426
## 3rd Qu.:72010
## Max. :73595
Plot read counts
plot_reads_sums_2 <- ampli_data_OM19_focus_samples_taxa_filtered_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
label = reads_sum
)) +
geom_bar(stat = "identity") +
geom_text(nudge_y = 1200) +
scale_x_discrete(name = "Sample ID") +
scale_y_continuous(name = "Reads") +
theme_bw()+
theme(
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom",
panel.grid = element_blank()
)
plot_reads_sums_2
ampli_data_OM19_focus_samples_taxa_filtered <- ampli_data_OM19_focus_samples_taxa_filtered %>% ampli_calc_rel_abund()
ampli_data_OM19_focus_samples_taxa_filtered %>% head()
Check that relative abundances add up to 1, as expected
ampli_data_OM19_focus_samples_taxa_filtered %>% group_by(sample_id) %>% summarise(rel_abund_sum = sum(rel_abund), .groups = "drop") %>% summary()
## sample_id rel_abund_sum
## Length:2 Min. :1
## Class :character 1st Qu.:1
## Mode :character Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
OM19_heat <- ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 10, y_taxa_arrangement = "abund")
OM19_heat +
# plot geometry
geom_text(parse = FALSE, size = 2.8) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance /\n[%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8.124) +
theme(
legend.position = "right",
panel.grid = element_blank(),
legend.box.spacing = unit(2, "pt"),
legend.title = element_text(size = 7),
plot.margin = margin(2,0,1,1)
)
OM19_heat_tax_6 <- ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 7, y_taxa_arrangement = "abund", y_taxa_col = taxonomy_6)
OM19_heat_tax_6 +
# plot geometry
geom_text(parse = FALSE, size = 2.8) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance /\n[%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 9) +
theme(
legend.position = "right",
panel.grid = element_blank(),
legend.box.spacing = unit(2, "pt"),
legend.title = element_text(size = 7),
plot.margin = margin(2,0,1,1)
)
OM19_heat_tax_6_names_tbl <- ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 7, y_taxa_arrangement = "abund", y_taxa_col = taxonomy_6, return_plot_data_tbl = TRUE)
OM19_heat_tax_6_names_tbl
OM19_heat_tax_6_names_tbl_edit <- OM19_heat_tax_6_names_tbl %>% mutate(tax_short = case_when(
taxonomy_6 == "k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobacterium" ~ "g. $\\textit{Methanobacterium}$",
taxonomy_6 == "k__Bacteria; p__Acetothermia; c__Acetothermiia; o__NA; f__NA; g__NA" ~ "c. Acetothermiia",
taxonomy_6 == "k__Bacteria; p__Deinococcota; c__Deinococci; o__Thermales; f__Thermaceae; g__Meiothermus" ~ "g. $\\textit{Meiothermus}$",
taxonomy_6 == "k__Bacteria; p__Nitrospirota; c__Thermodesulfovibrionia; o__NA; f__NA; g__NA" ~ "c. Thermodesulfovibrionia",
taxonomy_6 == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Brachymonas" ~ "g. $\\textit{Brachymonas}$",
taxonomy_6 == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Hydrogenophaga" ~ "g. $\\textit{Hydrogenophaga}$",
taxonomy_6 == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__Silanimonas" ~ "g. $\\textit{Silanimonas}$",
taxonomy_6 == "Other taxa" ~ "Other taxa"
))
OM19_heat_tax_6_names_tbl_edit
OM19_heat_tax_6 <- ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 7, y_taxa_arrangement = "abund", y_taxa_col = taxonomy_6, custom_taxa_names_tbl = OM19_heat_tax_6_names_tbl_edit, custom_taxa_names_col = tax_short,)
OM19_heat_tax_6 +
# plot geometry
geom_text(parse = FALSE) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance /\n[%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Well", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment\n(maximum of genus)", expand = c(0,0), labels = TeX) +
theme_bw(base_size = 10) +
theme(
legend.position = "right",
panel.grid = element_blank(),
legend.box.spacing = unit(0, "pt"),
legend.box.margin = margin(1,1,1,1),
plot.margin = margin(2,-5,1,1),
legend.title = element_text(size = 9)
)
CH4_taxa <- c("Methanobacteri", "Methanomicrobia", "Methanopyrales", "Methanocellales", "Methanoplasmatales", "Methanosarcinales", "Methanomassiliicocc", "Methylococc", "Methylocystis", "Methylosinus", "Methylocella", "Methylocapsa", "Methylacidiphil", "Methylomirabilis", "ANME")
ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 1, text_label_format = "scientific") +
# plot geometry
geom_text(parse = TRUE, size = 3) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8) +
theme(
legend.position = "right",
panel.grid = element_blank()
)
## Warning: `as_character()` is deprecated as of rlang 0.4.0
## Please use `vctrs::vec_cast()` instead.
## This warning is displayed once per session.
ampli_data_OM19_focus_samples_taxa_filtered %>%
ampli_heat_map(y_taxa_col = Genus, x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round") +
# plot geometry
geom_text(parse = FALSE, size = 3) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8) +
theme(
legend.position = "right",
panel.grid = element_blank()
)