Set knitting options
# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
dev = c("png", "pdf"), fig.keep = "all",
dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rlang)
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(latex2exp)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# source all relevant scripting files
source(file.path("scripts", "ampliverse_07.R"))
source(file.path("scripts", "plotting_functions.R"))
Load DADA2 output data
# taxa and sequence tables obtained from https://github.com/danote/Samail_16S_compilation
seqtab_OM17 <- read_rds("data_raw/16S_sequencing_data/seqtab_nochim_OM17_processed_20200929_3.rds")
taxtab_OM17 <- read_rds("data_raw/16S_sequencing_data/taxa_OM17_processed_20200929_2.rds")
seqtab_OM18 <- read_rds("data_raw/16S_sequencing_data/seqtab_nochim_OM18_processed_20200929.rds")
taxtab_OM18 <- read_rds("data_raw/16S_sequencing_data/taxa_OM18_processed_20200929.rds")
seqtab_OM19 <- read_rds("data_raw/16S_sequencing_data/seqtab_nochim_OM19_processed_20200803.rds")
taxtab_OM19 <- read_rds("data_raw/16S_sequencing_data/taxa_OM19_processed_20200803.rds")
Load metadata
meta_map_OM17 <- read_delim("data_raw/16S_sequencing_data/map_for_compilation_OM17.txt", delim = "\t",
col_types = cols(
sample_id = col_character(),
barcode_sequence = col_character(),
forward_linker_primer_sequence = col_character(),
reverse_primer_sequence = col_character(),
sample_type = col_character(),
nucleic_acid_type = col_character(),
sampling_site = col_character(),
year_sampled = col_double(),
month_sampled = col_double(),
day_sampled = col_double(),
depth_fluid_intake_mbct = col_double(),
notes = col_character(),
sampling_method = col_character(),
upper_packer_inflated = col_logical(),
upper_packer_depth_mbct = col_double(),
lower_packer_inflated = col_logical(),
lower_packer_depth_mbct = col_double(),
well_depth_mbgl = col_double(),
casing_extent_mbct = col_double(),
casing_height_magl = col_double(),
screened_interval_mbct = col_character(),
depth_to_water_mbct = col_double()
)
) %>%
# select columns 1 through 22 because read_delim is generating some extra dummy columns (X23, X24, etc.)
# (there are only 22 actual data columns in the spreadsheet)
select(1:22)
## New names:
## * `` -> ...23
## * `` -> ...24
## * `` -> ...25
## * `` -> ...26
## * `` -> ...27
## * ...
meta_map_OM18 <- read_delim("data_raw/16S_sequencing_data/map_for_compilation_OM18.txt", delim = "\t",
col_types = cols(
sample_id = col_character(),
barcode_sequence = col_character(),
forward_linker_primer_sequence = col_character(),
reverse_primer_sequence = col_character(),
sample_type = col_character(),
nucleic_acid_type = col_character(),
sampling_site = col_character(),
year_sampled = col_double(),
month_sampled = col_double(),
day_sampled = col_double(),
depth_fluid_intake_mbct = col_double(),
notes = col_character(),
sampling_method = col_character(),
upper_packer_inflated = col_logical(),
upper_packer_depth_mbct = col_double(),
lower_packer_inflated = col_logical(),
lower_packer_depth_mbct = col_double(),
well_depth_mbgl = col_double(),
casing_extent_mbct = col_double(),
casing_height_magl = col_double(),
screened_interval_mbct = col_character(),
depth_to_water_mbct = col_double()
)
) %>% select(1:22)
meta_map_OM19 <- read_delim("data_raw/16S_sequencing_data/map_for_compilation_OM19.txt", delim = "\t",
col_types = cols(
sample_id = col_character(),
barcode_sequence = col_character(),
forward_linker_primer_sequence = col_character(),
reverse_primer_sequence = col_character(),
sample_type = col_character(),
nucleic_acid_type = col_character(),
sampling_site = col_character(),
year_sampled = col_double(),
month_sampled = col_double(),
day_sampled = col_double(),
depth_fluid_intake_mbct = col_double(),
notes = col_character(),
sampling_method = col_character(),
upper_packer_inflated = col_logical(),
upper_packer_depth_mbct = col_double(),
lower_packer_inflated = col_logical(),
lower_packer_depth_mbct = col_double(),
well_depth_mbgl = col_double(),
casing_extent_mbct = col_double(),
casing_height_magl = col_double(),
screened_interval_mbct = col_character(),
depth_to_water_mbct = col_double()
)
) %>% select(1:22)
ampli_data_OM17 <- ampli_tidy_dada2(seqtab_OM17, taxtab_OM17) %>% ampli_concat_tax() %>% ampli_join_metadata_map(meta_map_OM17)
ampli_OM17_thru_19 <- ampli_tidy_dada2(seqtab_OM18, taxtab_OM18) %>% ampli_concat_tax() %>% ampli_join_metadata_map(meta_map_OM18)
ampli_data_OM19 <- ampli_tidy_dada2(seqtab_OM19, taxtab_OM19) %>% ampli_concat_tax() %>% ampli_join_metadata_map(meta_map_OM19)
ampli_OM17_thru_19 <- ampli_join_ampli_tibbles(lst(ampli_data_OM17, ampli_OM17_thru_19, ampli_data_OM19))
## Joining, by = c("sample_id", "sequence", "reads", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "taxonomy", "barcode_sequence", "forward_linker_primer_sequence", "reverse_primer_sequence", "sample_type", "nucleic_acid_type", "sampling_site", "year_sampled", "month_sampled", "day_sampled", "depth_fluid_intake_mbct", "notes", "sampling_method", "upper_packer_inflated", "upper_packer_depth_mbct", "lower_packer_inflated", "lower_packer_depth_mbct", "well_depth_mbgl", "casing_extent_mbct", "casing_height_magl", "screened_interval_mbct", "depth_to_water_mbct")
## Joining, by = c("sample_id", "sequence", "reads", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "taxonomy", "barcode_sequence", "forward_linker_primer_sequence", "reverse_primer_sequence", "sample_type", "nucleic_acid_type", "sampling_site", "year_sampled", "month_sampled", "day_sampled", "depth_fluid_intake_mbct", "notes", "sampling_method", "upper_packer_inflated", "upper_packer_depth_mbct", "lower_packer_inflated", "lower_packer_depth_mbct", "well_depth_mbgl", "casing_extent_mbct", "casing_height_magl", "screened_interval_mbct", "depth_to_water_mbct")
ampli_OM17_thru_19_sum <- ampli_OM17_thru_19 %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_OM17_thru_19_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_OM17_thru_19_sum %>% select(reads_sum))
## reads_sum
## Min. : 0.0
## 1st Qu.: 903.2
## Median : 5864.0
## Mean :11758.8
## 3rd Qu.:15034.8
## Max. :86307.0
Plot read counts
Oman groundwater samples have significantly higher read counts than extraction or PCR controls, which is good.
plot_reads_sums_1 <- ampli_OM17_thru_19_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
fill = sample_type
)) +
geom_bar(stat = "identity") +
scale_y_continuous(name = "Reads") +
scale_x_discrete(name = "Sample ID") +
scale_fill_discrete(name = "Sample type") +
theme_bw(base_size = 7)+
facet_grid(cols = vars(year_sampled), scales = "free", space = "free") +
theme(
panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom"
)
plot_reads_sums_1
# keep just those samples
ampli_OM17_thru_19_focus_samples <- ampli_OM17_thru_19 %>% ampli_filter_strings(col_to_filter = sampling_site, strings_to_filter = c("BA1A", "BA1D"), detection_method = "complete", action = "keep") %>%
# remove taxa with zero reads (messes up plotting later if kept)
ampli_rm_0_read_taxa()
## 7961 zero-read taxa removed from data set.
Filter out mitochondria, chloroplasts, eukaryotes, and sequences not assigned taxonomy at the the domain level
ampli_OM17_thru_19_focus_samples_taxa_filtered <- ampli_OM17_thru_19_focus_samples %>% ampli_filter_strings(col_to_filter = taxonomy, strings_to_filter = c("Chloroplast", "Mitochondria", "Eukaryota", "k__NA"), detection_method = "substring", action = "remove")
Tally reads per sample
ampli_OM17_thru_19_focus_samples_taxa_filtered_sum <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_OM17_thru_19_focus_samples_taxa_filtered_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_OM17_thru_19_focus_samples_taxa_filtered_sum %>% select(reads_sum))
## reads_sum
## Min. : 106
## 1st Qu.: 1701
## Median :25811
## Mean :25583
## 3rd Qu.:37294
## Max. :86183
Plot read counts
plot_reads_sums_2 <- ampli_OM17_thru_19_focus_samples_taxa_filtered_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
label = reads_sum
)) +
geom_bar(stat = "identity") +
geom_text(nudge_y = 2500, size = 2.5) +
scale_y_continuous(name = "Reads") +
scale_x_discrete(name = "Sample ID") +
scale_fill_discrete(name = "Sample type") +
theme_bw(base_size = 11)+
facet_grid(cols = vars(year_sampled), scales = "free", space = "free") +
theme(
panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom"
)
plot_reads_sums_2
ampli_OM17_thru_19_focus_samples_taxa_filtered <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>% ampli_calc_rel_abund()
ampli_OM17_thru_19_focus_samples_taxa_filtered %>% head()
Check that relative abundances add up to 1, as expected
ampli_OM17_thru_19_focus_samples_taxa_filtered %>% group_by(sample_id) %>% summarise(rel_abund_sum = sum(rel_abund), .groups = "drop") %>% summary()
## sample_id rel_abund_sum
## Length:21 Min. :1
## Class :character 1st Qu.:1
## Mode :character Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
OM17_thru_19_heat <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sample_id, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 20, y_taxa_arrangement = "abund", facet_grid_sample_group_col = year_sampled)
OM17_thru_19_heat +
# plot geometry
geom_text(parse = FALSE, size = 2.25) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8) +
theme(
legend.position = "bottom",
panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.margin = margin(1,1,1,1)
)
# keep just those samples
ampli_packer_main_text_taxa_filtered <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>% ampli_filter_strings(col_to_filter = sample_id, strings_to_filter = c("BA1A100_22_2D", "BA1A5566_22_1C", "A108", "A30", "A41", "D102", "D45"), detection_method = "complete", action = "keep") %>%
# remove taxa with zero reads (messes up plotting later if kept)
ampli_rm_0_read_taxa()
## 219 zero-read taxa removed from data set.
# if packers were used, generate names for packed intervals
# in this code, the packer sample sites have their intervals separated by a new line character. This is for plotting clarity.
ampli_packer_main_text_taxa_filtered <- ampli_packer_main_text_taxa_filtered %>% mutate(site_packer_interval = if_else(str_detect(sampling_method, "packer") == FALSE, sampling_site, paste0(sampling_site, "\n", if_else(upper_packer_inflated == FALSE, 0, upper_packer_depth_mbct), "-", if_else(lower_packer_inflated == FALSE, well_depth_mbgl, lower_packer_depth_mbct))))
ampli_packer_main_text_taxa_filtered <- ampli_packer_main_text_taxa_filtered %>% mutate(year_sampling_site = paste(year_sampled, sampling_site))
ampli_packer_main_text_taxa_filtered <- ampli_packer_main_text_taxa_filtered %>% mutate(packer_interval = if_else(str_detect(sampling_method, "packer") == FALSE, NA_character_, paste0(if_else(upper_packer_inflated == FALSE, 0, upper_packer_depth_mbct), "-", if_else(lower_packer_inflated == FALSE, well_depth_mbgl, lower_packer_depth_mbct))))
add factor levels for plot organization
ampli_packer_main_text_taxa_filtered$packer_interval <- factor(ampli_packer_main_text_taxa_filtered$packer_interval, levels = c(
"0-30",
"41-65",
"45-75",
"55-66",
"100-400",
"102-132",
"108-132"
))
packer_main_text_heat <- ampli_packer_main_text_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = packer_interval, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 25, y_taxa_arrangement = "abund", facet_grid_sample_group_col = year_sampling_site)
packer_main_text_heat +
# plot geometry
geom_text(parse = FALSE, size = 2.5) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Depth of sampling interval / [m] grouped by\nyear of sampling and well name", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment", expand = c(0,0)) +
theme_bw(base_size = 8.5) +
theme(
panel.grid = element_blank(),
legend.position = "bottom",
plot.margin = margin(1,1,1,1)
)
packer_main_text_heat_names_tbl <- ampli_packer_main_text_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = packer_interval, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 23, y_taxa_arrangement = "abund", facet_grid_sample_group_col = year_sampling_site, return_plot_data_tbl = TRUE)
packer_main_text_heat_names_tbl
packer_main_text_heat_names_taxonomy_vector <- packer_main_text_heat_names_tbl %>% group_by(taxonomy) %>% summarise(taxonomy = first(taxonomy)) %>% mutate(taxonomy_char = as.character(taxonomy)) %>% pull(taxonomy_char)
packer_main_text_heat_names_taxonomy_vector
## [1] "Other taxa"
## [2] "k__Bacteria; p__Firmicutes; c__Dethiobacteria; o__Dethiobacterales; f__Dethiobacteraceae; g__Dethiobacter; s__NA"
## [3] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Hydrogenophilaceae; g__Thiobacillus; s__thioparus"
## [4] "k__Bacteria; p__Bacteroidota; c__Kryptonia; o__Kryptoniales; f__BSV26; g__NA; s__NA"
## [5] "k__Bacteria; p__Patescibacteria; c__ABY1; o__Candidatus_Magasanikbacteria; f__NA; g__NA; s__NA"
## [6] "k__Bacteria; p__Acetothermia; c__Acetothermiia; o__NA; f__NA; g__NA; s__NA"
## [7] "k__Bacteria; p__Nitrospirota; c__Nitrospiria; o__Nitrospirales; f__Nitrospiraceae; g__Nitrospira; s__NA"
## [8] "k__Bacteria; p__Patescibacteria; c__Parcubacteria; o__NA; f__NA; g__NA; s__NA"
## [9] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Azohydromonas; s__NA"
## [10] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__NA; s__NA"
## [11] "k__Bacteria; p__Patescibacteria; c__CPR2; o__NA; f__NA; g__NA; s__NA"
## [12] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__Silanimonas; s__NA"
## [13] "k__Archaea; p__Crenarchaeota; c__Nitrososphaeria; o__Nitrososphaerales; f__Nitrososphaeraceae; g__Candidatus_Nitrososphaera; s__NA"
## [14] "k__Bacteria; p__Verrucomicrobiota; c__Verrucomicrobiae; o__Opitutales; f__Opitutaceae; g__Lacunisphaera; s__NA"
## [15] "k__Bacteria; p__NA; c__NA; o__NA; f__NA; g__NA; s__NA"
## [16] "k__Bacteria; p__Bacteroidota; c__Ignavibacteria; o__Ignavibacteriales; f__SM1H02; g__NA; s__NA"
## [17] "k__Archaea; p__Crenarchaeota; c__Nitrososphaeria; o__Nitrosopumilales; f__Nitrosopumilaceae; g__Candidatus_Nitrosotenuis; s__NA"
## [18] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Sutterellaceae; g__NA; s__NA"
## [19] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__NA; f__NA; g__NA; s__NA"
## [20] "k__Bacteria; p__GAL15; c__NA; o__NA; f__NA; g__NA; s__NA"
## [21] "k__Bacteria; p__Deinococcota; c__Deinococci; o__Thermales; f__Thermaceae; g__Meiothermus; s__NA"
## [22] "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Parvibaculales; f__Parvibaculaceae; g__Parvibaculum; s__lavamentivorans"
## [23] "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Brachymonas; s__NA"
## [24] "k__Bacteria; p__Nitrospirota; c__Thermodesulfovibrionia; o__NA; f__NA; g__NA; s__NA"
ampli_packer_main_text_taxa_filtered %>% ampli_filter_strings(col_to_filter = taxonomy, strings_to_filter = packer_main_text_heat_names_taxonomy_vector, detection_method = "complete", action = "keep") %>%
# remove taxa with zero reads
ampli_rm_0_read_taxa() %>% group_by(taxonomy) %>% summarise(Kingdom = first(Kingdom), Phylum = first(Phylum), Class = first(Class), Order = first(Order), Family = first(Family), Genus = first(Genus), Species = first(Species))
## 1 zero-read taxa removed from data set.
packer_main_text_heat_names_tbl_edit <- packer_main_text_heat_names_tbl %>% mutate(tax_short = case_when(
taxonomy == "k__Archaea; p__Crenarchaeota; c__Nitrososphaeria; o__Nitrosopumilales; f__Nitrosopumilaceae; g__Candidatus_Nitrosotenuis; s__NA" ~ "g. \\textit{Ca.} Nitrosotenuis",
taxonomy == "k__Archaea; p__Crenarchaeota; c__Nitrososphaeria; o__Nitrososphaerales; f__Nitrososphaeraceae; g__Candidatus_Nitrososphaera; s__NA" ~ "g. \\textit{Ca.} Nitrososphaera",
taxonomy == "k__Bacteria; p__Acetothermia; c__Acetothermiia; o__NA; f__NA; g__NA; s__NA" ~ "c. Acetothermiia",
taxonomy == "k__Bacteria; p__Bacteroidota; c__Ignavibacteria; o__Ignavibacteriales; f__SM1H02; g__NA; s__NA" ~ "f. SM1H02",
taxonomy == "k__Bacteria; p__Bacteroidota; c__Kryptonia; o__Kryptoniales; f__BSV26; g__NA; s__NA" ~ "f. BSV26",
taxonomy == "k__Bacteria; p__Chloroflexi; c__P2-11E; o__NA; f__NA; g__NA; s__NA" ~ "c. P2-11E",
taxonomy == "k__Bacteria; p__Deinococcota; c__Deinococci; o__Thermales; f__Thermaceae; g__Meiothermus; s__NA" ~ "g. \\textit{Meiothermus}",
taxonomy == "k__Bacteria; p__Firmicutes; c__Dethiobacteria; o__Dethiobacterales; f__Dethiobacteraceae; g__Dethiobacter; s__NA" ~ "g. \\textit{Dethiobacter}",
taxonomy == "k__Bacteria; p__GAL15; c__NA; o__NA; f__NA; g__NA; s__NA" ~ "p. GAL15",
taxonomy == "k__Bacteria; p__NA; c__NA; o__NA; f__NA; g__NA; s__NA" ~ "k. Bacteria",
taxonomy == "k__Bacteria; p__Nitrospirota; c__Nitrospiria; o__Nitrospirales; f__Nitrospiraceae; g__Nitrospira; s__NA" ~ "g. \\textit{Nitrospira}",
taxonomy == "k__Bacteria; p__Nitrospirota; c__Thermodesulfovibrionia; o__NA; f__NA; g__NA; s__NA" ~ "c. Thermodesulfovibrionia",
taxonomy == "k__Bacteria; p__Patescibacteria; c__ABY1; o__Candidatus_Magasanikbacteria; f__NA; g__NA; s__NA" ~ "o. \\textit{Ca.} Magasanikbacteria",
taxonomy == "k__Bacteria; p__Patescibacteria; c__CPR2; o__NA; f__NA; g__NA; s__NA" ~ "c. CPR2",
taxonomy == "k__Bacteria; p__Patescibacteria; c__Parcubacteria; o__NA; f__NA; g__NA; s__NA" ~ "c. Parcubacteria",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Parvibaculales; f__Parvibaculaceae; g__Parvibaculum; s__lavamentivorans" ~ "s. \\textit{lavamentivorans}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Azohydromonas; s__NA" ~ "g. \\textit{Azohydromonas}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Brachymonas; s__NA" ~ "g. \\textit{Brachymonas}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Hydrogenophilaceae; g__Thiobacillus; s__thioparus" ~ "s. \\textit{thioparus}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Sutterellaceae; g__NA; s__NA" ~ "f. Sutterellaceae",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__NA; f__NA; g__NA; s__NA" ~ "c. Gammaproteobacteria",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__NA; s__NA" ~ "f. Xanthomonadaceae",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__Silanimonas; s__NA" ~ "g. \\textit{Silanimonas}",
taxonomy == "k__Bacteria; p__Verrucomicrobiota; c__Verrucomicrobiae; o__Opitutales; f__Opitutaceae; g__Lacunisphaera; s__NA" ~ "g. \\textit{Lacunisphaera}",
taxonomy == "Other taxa" ~ "Other taxa"))
packer_main_text_heat_names_tbl_edit
packer_main_text_heat <- ampli_packer_main_text_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = packer_interval, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 23, y_taxa_arrangement = "abund", facet_grid_sample_group_col = year_sampling_site, custom_taxa_names_tbl = packer_main_text_heat_names_tbl_edit, custom_taxa_names_col = tax_short)
packer_main_text_heat +
# plot geometry
geom_text(parse = FALSE, size = 3) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Depth of sampling interval / [m] grouped by\nyear of sampling and well name", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment", expand = c(0,0), labels = TeX) +
theme_bw(base_size = 11) +
theme(
panel.grid = element_blank(),
# legend.position = "bottom",
plot.margin = margin(1,1,1,1)
)
ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
# remove samples taken prior to 2018
filter(year_sampled == 2018) %>%
# add column for packer interval
mutate(packer_interval = if_else(str_detect(sampling_method, "packer") == FALSE, NA_character_, paste0(if_else(upper_packer_inflated == FALSE, 0, upper_packer_depth_mbct), "-", if_else(lower_packer_inflated == FALSE, well_depth_mbgl, lower_packer_depth_mbct)))) %>%
# add column for filter pore diameter
mutate(filter_diam =
case_when(
str_detect(sample_id, "_10_") == TRUE ~ "0.10",
str_detect(sample_id, "_45_") == TRUE ~ "0.45",
TRUE ~ "0.22"
)) %>%
ampli_heat_map(x_sample_group_col = filter_diam, plot_other_taxa_bin = TRUE, top_n = 20, y_taxa_arrangement = "abund", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", facet_grid_sample_group_col = packer_interval) +
# plot geometry
geom_text(parse = FALSE, size = 2.5) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Filter pore diameter / [µm] grouped by\ndepth of sampling interval / [m] ", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment", expand = c(0,0)) +
theme_bw(base_size = 9) +
theme(
legend.position = "bottom",
panel.grid = element_blank(),
plot.margin = margin(1,1,1,1)
)
CH4_taxa <- c("Methanobacteria", "Methanomicrobia", "Methanopyrales", "Methanocellales", "Methanoplasmatales", "Methanosarcinales", "Methanomassiliicocc", "Methylococc", "Methylocystis", "Methylosinus", "Methylocella", "Methylocapsa", "Methylacidiphil", "Methylomirabilis", "ANME")
CH4_plotted_taxa_names_tbl <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
# remove samples taken prior to 2018
filter(year_sampled > 2017) %>%
# add column combining year and well
mutate(year_sampling_site = paste(year_sampled, sampling_site)) %>%
# add column combining site, packer_interval, and filter pore diamter
mutate(packer_interval_filter_diam = paste(if_else(str_detect(sampling_method, "packer") == FALSE, NA_character_, paste0(if_else(upper_packer_inflated == FALSE, 0, upper_packer_depth_mbct), "-", if_else(lower_packer_inflated == FALSE, well_depth_mbgl, lower_packer_depth_mbct))),# "\n0.22 µm")
case_when(
str_detect(sample_id, "_10_") == TRUE ~ "\n0.10",
str_detect(sample_id, "_45_") == TRUE ~ "\n0.45",
TRUE ~ "\n0.22"
))
) %>%
ampli_heat_map(x_sample_group_col = packer_interval_filter_diam, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 20, facet_grid_sample_group_col = year_sampling_site, return_plot_data_tbl = TRUE)
## Warning: `as_character()` is deprecated as of rlang 0.4.0
## Please use `vctrs::vec_cast()` instead.
## This warning is displayed once per session.
CH4_plotted_taxa_names_tbl
CH4_plotted_taxa_names_tbl_edit <- CH4_plotted_taxa_names_tbl %>% mutate(tax_short = case_when(
taxonomy == "k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobacterium; s__NA" ~ "g. $\\textit{Methanobacterium}$",
taxonomy == "k__Bacteria; p__Verrucomicrobiota; c__Verrucomicrobiae; o__Methylacidiphilales; f__Methylacidiphilaceae; g__NA; s__NA" ~ "f. Methylacidiphilaceae",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methylocaldum; s__NA" ~ "g. \\textit{Methylocaldum}"
))
CH4_plotted_taxa_names_tbl_edit
CH4_plotted_taxa_names_tbl_edit %>% group_by(year_sampling_site, packer_interval_filter_diam) %>% summarise()
## `summarise()` has grouped output by 'year_sampling_site'. You can override using the `.groups` argument.
ampli_OM18_thru_19_focus_samples_taxa_filtered_packer_interval_filter_diam <- ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
# remove samples taken prior to 2018
filter(year_sampled > 2017) %>%
# add column combining year and well
mutate(year_sampling_site = paste(year_sampled, sampling_site)) %>%
# add column combining site, packer_interval, and filter pore diameter
mutate(packer_interval_filter_diam = paste(if_else(str_detect(sampling_method, "packer") == FALSE, NA_character_, paste0(if_else(upper_packer_inflated == FALSE, 0, upper_packer_depth_mbct), "-", if_else(lower_packer_inflated == FALSE, well_depth_mbgl, lower_packer_depth_mbct))),# "\n0.22 µm")
case_when(
str_detect(sample_id, "_10_") == TRUE ~ "\n0.10",
str_detect(sample_id, "_45_") == TRUE ~ "\n0.45",
TRUE ~ "\n0.22"
))
)
# sort levels for plotting
ampli_OM18_thru_19_focus_samples_taxa_filtered_packer_interval_filter_diam$packer_interval_filter_diam <- factor(ampli_OM18_thru_19_focus_samples_taxa_filtered_packer_interval_filter_diam$packer_interval_filter_diam, levels = c(
"55-66 \n0.10",
"55-66 \n0.22",
"55-66 \n0.45",
"100-400 \n0.10",
"100-400 \n0.22",
"100-400 \n0.45",
"0-30 \n0.22",
"41-65 \n0.22",
"108-132 \n0.22",
"45-75 \n0.22",
"102-132 \n0.22"
))
ampli_OM18_thru_19_focus_samples_taxa_filtered_packer_interval_filter_diam %>%
ampli_heat_map(x_sample_group_col = packer_interval_filter_diam, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 20, facet_grid_sample_group_col = year_sampling_site, custom_taxa_names_tbl = CH4_plotted_taxa_names_tbl_edit, custom_taxa_names_col = tax_short) +
# plot geometry
geom_text(parse = FALSE) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "white", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Depth of sampling interval / [m] and filter pore diameter / [µm]\ngrouped by year of sampling and well name", expand = c(0,0)) +
scale_y_discrete(name = "Deepest\ntaxonomic assignment", expand = c(0,0), labels = TeX) +
theme_bw(base_size = 10.5) +
theme(
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(1,1,1,1)
)
# input SCA-consuming taxa from Singh et al. (2017), Laso-Pérez et al. (2019), and Shennan et al. (2006)
all_SCA_taxa_oman <- c("BuS5", "Syntrophoarchaeum", "HotSeep-1", "Argoarchaeum", "ethanivorans", "Methanoliparia", "GoM", "Actinomyces", "Arthrobacter", "Brevibacterium", "Corynebacterium", "Gordonia", "Mycobacterium", "Nocardia", "Nocardiodes", "Rhodococcus", "Acinetobacter", "cepacia", "Pseudomonas", "Ralstonia")
ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = all_SCA_taxa_oman, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 1, text_label_format = "scientific") +
# plot geometry
geom_text(parse = TRUE, size = 2.1) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 6.5) +
theme(
legend.position = "bottom",
panel.grid = element_blank()
)
There were no species-level assignments to known short-chain alkane oxidizing taxa. The genera shown above contain organisms capable of many metabolic processes unrelated to short-chain alkane oxidation, so their presence is not strong evidence for microbial short-chain alkane oxidation.
hydrogenophaga_taxa_oman <- c("Hydrogenophaga")
ampli_OM18_thru_19_focus_samples_taxa_filtered_packer_interval_filter_diam %>%
ampli_heat_map(x_sample_group_col = packer_interval_filter_diam, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = hydrogenophaga_taxa_oman, plot_other_taxa_bin = FALSE, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 50, facet_grid_sample_group_col = year_sampling_site, y_taxa_col = Genus) +
# plot geometry
geom_text(parse = FALSE) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "white", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Depth of sampling interval / [m] and filter pore diameter / [µm]\ngrouped by year of sampling and well name", expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 11) +
theme(
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(1,1,1,1),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
## Warning: Unknown levels in `f`: Other taxa
ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
# filter for drill foam samples
filter(sample_type == "drill foam emerged from well") %>%
ampli_heat_map(x_sample_group_col = sample_id, plot_other_taxa_bin = TRUE, top_n = 20, y_taxa_arrangement = "abund", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", facet_grid_sample_group_col = nucleic_acid_type) +
# plot geometry
geom_text(parse = FALSE, size = 2.5) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Replicate grouped by nucleic acid", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment", expand = c(0,0)) +
theme_bw(base_size = 8) +
theme(
legend.position = "bottom",
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = margin(1,1,1,1)
)
Brachymonas and Parvibaculum were not detected. I also included Pseudomonas in the search here just so that the plot would still have an output.
ampli_OM17_thru_19_focus_samples_taxa_filtered %>%
# filter for drill foam samples
filter(sample_type == "drill foam emerged from well") %>%
ampli_heat_map(x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = c("Brachymonas", "Parvibaculum", "Pseudomonas"), plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 1, text_label_format = "scientific") +
# plot geometry
geom_text(parse = TRUE, size = 2.1) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8) +
theme(
legend.position = "bottom",
panel.grid = element_blank()
)