1 Setup

1.1 Load code libraries

# load libraries
library(tidyverse) # dplyr, tidyr, ggplot
library(readxl) # reading excel files
library(modelr) # adding model predictions to data frames
library(knitr) # generating reports
library(scales) # making log scales with exponents
library(ggrepel) # algorithm to avoid overlapping labels
library(cowplot) # extracting legends from plots
library(lemon) # repeat axis lines for paneled figures in ggplot
library(ggpmisc) # adding linear regression equations to ggplot
library(latex2exp) # plot labels with latex

# global knitting options for automatic saving of all plots as .png and .pdf
knitr::opts_chunk$set(
  dev = c("png", "pdf"), fig.keep = "all",
  dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
  fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
  cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)
# source all relevant scripting files
source(file.path("scripts", "plotting_functions.R")) # includes LaTeX labelling function

1.2 Load data

Load in Oman well data

# Load in Oman aqueous geochemistry etc.
data <- read_xlsx("data_raw/Summary_metadata/Oman_Geochem_2012-2019_packers.xlsx", na = "NA") # read in data from excel file

# add site column for plotting against other literature data
data <- data %>% mutate(site = "Oman/UAE")

# create a column to identify packer samples by site, year, and interval
data <-  data %>% mutate(packer_site_interval = if_else(sampling_method == "packer - Solexperts", paste0(sampling_site, "_", if_else(is.na(upper_packer_depth_mbct), 0, upper_packer_depth_mbct), "-", if_else(is.na(lower_packer_depth_mbct), well_depth_mbgl, lower_packer_depth_mbct)), NA_character_))

# create a column to identify packer samples by site, year, and interval
data <-  data %>% mutate(packer_site_year_interval = if_else(sampling_method == "packer - Solexperts", paste0(sampling_site, "_", year_sampled, "_", if_else(is.na(upper_packer_depth_mbct), 0, upper_packer_depth_mbct), "-", if_else(is.na(lower_packer_depth_mbct), well_depth_mbgl, lower_packer_depth_mbct)), "Other Samail Ophiolite"))

# create levels (for plotting)
data$packer_site_year_interval <- factor(data$packer_site_year_interval, levels = c("BA1A_2018_55-66", "BA1A_2018_100-400", "BA1A_2019_0-30", "BA1A_2019_41-65", "BA1A_2019_108-132", "BA1D_2019_45-75", "BA1D_2019_102-132", "Other Samail Ophiolite"))
  
# print data
data

Load in comparison data from the literature

# read comparison data from literature
data_literature <-  read_excel("data_raw/Summary_metadata/CH4_isotope_literature_data.xlsx", na = "NA")

# Filter for only literature data from Oman and add an empty column for packer_site_year_interval, which will be useful later for plotting
data_literature <-  data_literature %>% filter(site == "Oman/UAE") %>%
  mutate(packer_site_year_interval = "Other Samail Ophiolite")

# print
data_literature

1.3 Set plotting aesthetics

# colors

microbial_color <- "#009E73" # forest/mint green
thermogenic_color <- "#F0E442" # yellow
abiotic_color <- "#999999" # grey

microbial_opacity <- 0.05
thermo_opacity <- 0.1
abiotic_opacity <- 0.08

gabbro_color <- "#CC79A7" # magenta
Mg_HCO3_color <- "#E69F00" # gold
Ca_OH_color <- "#0072B2" # dark blue

cbPalette_water_types <- c(gabbro_color, Mg_HCO3_color, Ca_OH_color)
names(cbPalette_water_types) <- c("gabbro", "Mg_HCO3", "Ca_OH")

color_rock_crush_named <- c("rock crushing" = "#D55E00", "vent/well fluid" = "black")
color_rock_crush_named_new_line <- c("rock\ncrushing" = "#D55E00", "vent/well\nfluid" = "black")

# shapes

shapes_all_CH4_iso_sites_named <- c("Other Samail Ophiolite" = 5, "BA1A_2019_108-132" = 15, "BA1D_2019_45-75" = 17,  "BA1D_2019_102-132" = 19)
colors_all_CH4_iso_sites_named <- c("Other Samail Ophiolite" = "#999999", "BA1A_2019_108-132" = "#E69F00", "BA1D_2019_45-75" = "#56B4E9",  "BA1D_2019_102-132" = "#CC79A7")

shapes_all_packer_site_year_interval_named <- c("BA1A_2018_55-66" = 4, "BA1A_2018_100-400" = 6, "BA1A_2019_0-30" = 18, "BA1A_2019_41-65" = 3, "BA1A_2019_108-132" = 15, "BA1D_2019_45-75" = 17,  "BA1D_2019_102-132" = 19, "Other Samail Ophiolite" = 5)
colors_all_packer_site_year_interval_named <- c("BA1A_2018_55-66" = "#0072B2", "BA1A_2018_100-400" = "#D55E00", "BA1A_2019_0-30" = "#F0E442", "BA1A_2019_41-65" = "#009E73", "BA1A_2019_108-132" = "#E69F00", "BA1D_2019_45-75" = "#56B4E9",  "BA1D_2019_102-132" = "#CC79A7", "Other Samail Ophiolite" = "#999999")
# read mixing data from Leong et al., 2020
data_Leong_mix_8umolal_DIC <-  read_excel("data_raw/Summary_metadata/mixing_leong_et_al_JGRmanuscript.xlsx", sheet = "ctl_brc_di_cal_8umolalDIC") %>% rename(g_surface_fluid_added_to_kg_high_pH = `g destroyed`,  mixing_extent = `Mixing Extent`, Ca_molal = Ca, Cl_molal = Cl, C_molal = C, Mg_molal = Mg, Na_molal = Na, Si_molal = Si)

data_Leong_mix_10umolal_DIC <-  read_excel("data_raw/Summary_metadata/mixing_leong_et_al_JGRmanuscript.xlsx", sheet = "ctl_brc_cal_10umolalDIC") %>% rename(g_surface_fluid_added_to_kg_high_pH = `surface fluid added (g) to 1 kg of high pH`,  mixing_extent = `Mixing Extent`, Ca_molal = `Ca (molal)`, Cl_molal = `Cl (molal)`, C_molal = `C (molal)`, Mg_molal = `Mg (molal)`, Na_molal = `Na (molal)`, Si_molal = `Si (molal)`)

data_Leong_mix_20umolal_DIC <-  read_excel("data_raw/Summary_metadata/mixing_leong_et_al_JGRmanuscript.xlsx", sheet = "ctl_brc_cal_20umolalDIC") %>% rename(g_surface_fluid_added_to_kg_high_pH = `surface fluid added (g) to 1 kg of high pH`,  mixing_extent = `Mixing Extent`, Ca_molal = `Ca (molal)`, Cl_molal = `Cl (molal)`, C_molal = `C (molal)`, Mg_molal = `Mg (molal)`, Na_molal = `Na (molal)`, Si_molal = `Si (molal)`)
# read context Oman fluid data from Leong et al., 2020
context_data_Leong_et_al_2020 <-  read_excel("data_raw/Summary_metadata/Leong_et_al_data_for_si_pH.xlsx")

2 Water stable isotopes plot

2.1 Get and summarize water stable isotope data

# select water isotope data from full dataset
data_water_isotopes <- data %>% filter(!is.na(H2O_dD_permil_VSMOW)) %>% select(sampling_site, year_sampled, H2O_dD_permil_VSMOW, H2O_d18O_permil_VSMOW, packer_site_year_interval)

# print
data_water_isotopes

Summarize water stable isotope data by well and year

# group water isotope data by well and sample year and compute summary data
data_water_isotopes_summ_well_year <- data_water_isotopes %>% group_by(sampling_site, year_sampled) %>% summarize(n = n(), H2O_dD_permil_VSMOW_mean_well_year = mean(H2O_dD_permil_VSMOW), H2O_dD_permil_VSMOW_sd = sd(H2O_dD_permil_VSMOW), H2O_d18O_permil_VSMOW_mean_well_year = mean(H2O_d18O_permil_VSMOW), H2O_d18O_permil_VSMOW_sd = sd(H2O_d18O_permil_VSMOW), first(packer_site_year_interval))
## `summarise()` has grouped output by 'sampling_site'. You can override using the `.groups` argument.
# print
data_water_isotopes_summ_well_year

2.2 Plot water stable isotope data

# create plot
MWL_plot <- data_water_isotopes %>% ggplot(aes(
  x = H2O_d18O_permil_VSMOW,
  y = H2O_dD_permil_VSMOW,
  color = packer_site_year_interval,
  shape = packer_site_year_interval)) +

  scale_color_manual(name = "Sample ID", values = colors_all_packer_site_year_interval_named) +
  scale_shape_manual(name = "Sample ID", values = shapes_all_packer_site_year_interval_named) +
    
  # add meteoric water lines
  geom_abline(slope = 7.91, intercept = 8.72, color = "grey80", linetype = "dashed") + # GMWL, Terzer et al. 2013
  annotate("text", label = "GMWL", x = -4.3, y = -23, angle = 52, color = "grey60") +
  geom_abline(slope = 5.0, intercept = 10.7, color = "grey40") + # LMWL-N
  annotate("text", label = "LMWL-N", x = -4, y = -7, angle = 40) +
  geom_abline(slope = 7.2, intercept = -1.1, color = "grey40") + # LMWL-S
  annotate("text", label = "LMWL-S", x = -2.4, y = -20, angle = 52) +

  # plot Oman well data
  geom_point(size = 3.5, alpha = 1) +

  # # symbol styling
  # scale_shape_manual(values=c(1, 4, 0, 6, 5, 17, 3, 18, 16, 12, 18), name = "well", guide = guide_legend(order = 1))+
  # scale_color_manual(name = "Year Sampled", values = c("2012" = "black", "2014" = "grey", "2018" = "#E69F00", "2019" = "#0072B2"), guide = guide_legend(order = 2)) +
  
  # axis styling
  scale_x_continuous(limits = c(-5, 1), name = latex2exp::TeX("$\\delta^{18}O\\, /\\, \\[\U2030  \\, VSMOW \\]$")) +
  scale_y_continuous(limits = c(-25, 10), name = latex2exp::TeX("$\\delta D\\, /\\, \\[\U2030  \\, VSMOW \\]$")) +
  
  # geom_text_repel(force = 30, size = 4, show.legend = FALSE) +
  
  # design
  theme_classic(base_size = 11)

# print plot
MWL_plot 

3 \(\delta\text{D}_{\text{CH}_4}\) vs. \(\delta^{13}\text{C}_{\text{CH}_4}\) (“CD”) plot

3.1 Prepare data

# make data frame of CH4 d13C and dD data for all wells
data_CD <- data %>% select(sampling_site, notes, year_sampled, depth_fluid_intake_mbct, water_type, CH4_d13C_permil_VPDB_LBNL, CH4_dD_permil_VSMOW_LBNL,
CH4_d13C_permil_VPDB_CUB, CH4_dD_permil_VSMOW_CUB, CH4_d13C_permil_VPDB_MIT, CH4_dD_permil_VSMOW_MIT, CH4_d13C_permil_VPDB_UCLA, CH4_dD_permil_VSMOW_UCLA, site, packer_site_year_interval)

# compute means between analyses in multiple labs
data_CD <- data_CD %>%
    rowwise() %>%
    mutate(CH4_d13C_permil_VPDB_interlab_mean  = mean(c(CH4_d13C_permil_VPDB_LBNL, CH4_d13C_permil_VPDB_CUB, CH4_d13C_permil_VPDB_MIT, CH4_d13C_permil_VPDB_UCLA), na.rm = TRUE))

data_CD <- data_CD %>%
    rowwise() %>%
    mutate(CH4_dD_permil_VSMOW_interlab_mean  = mean(c(CH4_dD_permil_VSMOW_LBNL, CH4_dD_permil_VSMOW_CUB, CH4_dD_permil_VSMOW_MIT, CH4_dD_permil_VSMOW_UCLA), na.rm = TRUE))

# remove samples without C and D isotope data
data_CD <- data_CD %>% filter(!is.nan(CH4_d13C_permil_VPDB_interlab_mean) & !is.nan(CH4_dD_permil_VSMOW_interlab_mean))

# make data frame of CH4 d13C data
data_C_longer <- data_CD %>% select(sampling_site, notes, year_sampled, water_type, CH4_d13C_permil_VPDB_CUB, CH4_d13C_permil_VPDB_MIT, CH4_d13C_permil_VPDB_UCLA, CH4_d13C_permil_VPDB_LBNL, site, depth_fluid_intake_mbct, packer_site_year_interval) %>%
  pivot_longer(cols = c(-sampling_site, -notes, -water_type, -year_sampled, -site, -depth_fluid_intake_mbct, -packer_site_year_interval), names_to = "parameter", values_to = "CH4_d13C_permil_VPDB")

# make a column for analytical laboratory
data_C_longer<- data_C_longer %>% mutate(lab = case_when(
str_detect(parameter, "LBNL") == TRUE ~ "LBNL",
str_detect(parameter, "CUB") == TRUE ~ "CUB",
str_detect(parameter, "MIT") == TRUE ~ "MIT",
str_detect(parameter, "UCLA") == TRUE ~ "UCLA"
))  %>%
  select(-parameter)

# repeat the above for dD
data_D_longer <- data_CD %>% select(sampling_site, notes, water_type, year_sampled, CH4_dD_permil_VSMOW_CUB, CH4_dD_permil_VSMOW_MIT, CH4_dD_permil_VSMOW_UCLA, CH4_dD_permil_VSMOW_LBNL, packer_site_year_interval) %>%
  pivot_longer(cols = c(-sampling_site, -notes, -water_type, -year_sampled, -packer_site_year_interval), names_to = "parameter", values_to = "CH4_dD_permil_VSMOW")

data_D_longer <- data_D_longer %>% mutate(lab = case_when(
str_detect(parameter, "LBNL") == TRUE ~ "LBNL",
str_detect(parameter, "CUB") == TRUE ~ "CUB",
str_detect(parameter, "MIT") == TRUE ~ "MIT",
str_detect(parameter, "UCLA") == TRUE ~ "UCLA"
))  %>%  select(-parameter)

# combine 13C and D data
data_CD_longer <- full_join(data_C_longer, data_D_longer) %>%
  filter(!is.na(CH4_d13C_permil_VPDB) | !is.na(CH4_dD_permil_VSMOW))
## Joining, by = c("sampling_site", "notes", "year_sampled", "water_type", "packer_site_year_interval", "lab")
data_CD_longer

3.2 Plot

set factor levels for plotting

data_CD_longer$packer_site_year_interval <- factor(data_CD_longer$packer_site_year_interval, levels = c("BA1A_2019_108-132", "BA1D_2019_45-75", "BA1D_2019_102-132", "Other Samail Ophiolite"))

3.2.1 Plot data

# Set coordinates of fields of origins (Milkov and Etiope 2018)

primary_microbial <- data.frame(
  x = c(-90, -50, -50, -60, -60, -90),
  y = c(-450,-450,-250, -250, -100, -100)
)

secondary_microbial <- data.frame(
  x = c(-60,-35,-35, -60),
  y = c(-350, -200, -150, -175)
)

thermogenic <- data.frame(
  x = c(-75, -60, -40, -15, -40),
  y = c(-350, -350, -300, -150, -100)
)

abiotic <- data.frame(
  x = c(-50, -10, 10, 10, -20, -50),
  y = c(-450,-450, -350, -50, -50, -300)
)

# create plot
CD_plot <-
  ggplot() +

  # add origin fields
  geom_polygon(data = primary_microbial, aes(x=x, y=y), fill = microbial_color, alpha = microbial_opacity, color = microbial_color, size = .2) +
  geom_polygon(data = secondary_microbial, aes(x=x, y=y), alpha = 0.1, fill = NA, color = "black", size = .2, linetype = "dashed") +
  geom_polygon(data = thermogenic, aes(x=x, y=y), alpha = thermo_opacity, fill = thermogenic_color, color = thermogenic_color, size = .2) +
  geom_polygon(data = abiotic, aes(x=x, y=y), alpha = abiotic_opacity, fill = abiotic_color, color = abiotic_color, size = .2) +

  # # plot literature data
  # geom_point(data = data_literature %>% filter(sample_type != "laboratory synthesis" ),  aes(
  #   x = CH4_d13C_permil_VPDB,
  #   y = CH4_dD_permil_VSMOW,
  #   shape = packer_site_year_interval,
  #   color = packer_site_year_interval
  #   ), alpha = 1) +

  # plot Oman well data
  geom_point(data = data_CD_longer,  aes(
    x = CH4_d13C_permil_VPDB,
    y = CH4_dD_permil_VSMOW,
    shape = packer_site_year_interval,
    color = packer_site_year_interval
    ), alpha = 1, size = 3) +

  # axis styling
  scale_y_continuous(name = latex2exp::TeX("$\\delta D_{CH_4}\\,/\\,\\[\U2030  \\, VSMOW \\]$"), limits = c(-450, 100), expand = c(0,0)) +
  scale_x_continuous(name = latex2exp::TeX("$\\delta ^{13}C_{CH_4}\\,/\\,\\[\U2030  \\, VPDB \\]$"), limits = c(-90, 30), expand = c(0,0)) +
  
  # symbol styling
  scale_color_manual(name = "Sample ID", values = colors_all_CH4_iso_sites_named) +
  scale_shape_manual(name = "Sample ID", values = shapes_all_CH4_iso_sites_named) +

  # annotations
  annotate("text", y = -200, x= -80, size = 2.8, label = "PM", color = microbial_color) +
  annotate("text", y = -185, x= -56.5, size = 2.8, label = "SM", color = "grey20") +
  annotate("text", y = -130, x= -39, size = 2.8, label = "T", color = "grey20") +
  annotate("text", y = -360, x= -25, size = 2.8, label = "A", color = "grey20") +

  # design
  theme_classic(base_size = 11)+
  theme(
    plot.margin = margin(5, 5, 0, 0, "pt")
    )

# print
CD_plot

Show what literature data were used in this plot.

data_literature %>% filter(!is.na(CH4_dD_permil_VSMOW)) %>% select(author, year_published, title_of_publication)

4 C\(_1\) / (C\(_2\) + C\(_3\)) vs. \(\delta^{13}\text{C}_{\text{CH}_4}\) (“Bernard”) Plot

4.1 Prepare data

Prepare Oman well data

# select samples with quantifiable C2H6
data_C1_C2_C3 <- data %>% filter(!is.na(C2H6_uM)) %>%  select(sampling_site, notes, year_sampled, depth_fluid_intake_mbct, water_type, CH4_uM, CH4_was_measured, CH4_LQ_uM, CH4_unc_uM, C2H6_uM, C2H6_was_measured, C2H6_LQ_uM, C2H6_unc_uM, C3H8_uM, C3H8_was_measured, C3H8_LQ_uM, C3H8_unc_uM,
CH4_d13C_permil_VPDB_CUB, CH4_d13C_unc_permil_CUB, packer_site_year_interval) # select relevant data for CH4, C2H6, C3H8

data_C1_C2_C3  <- data_C1_C2_C3  %>%  filter(CH4_uM > 0.5) # filter for samples for with greater than 0.5 µmol/L CH4

# calculate C1/(C2+C3)
data_C1_C2_C3 <- data_C1_C2_C3 %>% mutate(C1_over_C2_plus_C3 = CH4_uM / (C2H6_uM + if_else(!is.na(C3H8_uM), C3H8_uM, 0)))

data_C1_C2_C3 <- data_C1_C2_C3 %>% mutate(site = "Oman/UAE")

# print
data_C1_C2_C3

Get literature data

# select samples with quantifiable C2H6
data_C1_C2_C3_lit <- data_literature %>% filter(!is.na(C2H6_conc)) %>%  select(site, sample_type, CH4_conc, C2H6_conc, C3H8_conc,
CH4_d13C_permil_VPDB, packer_site_year_interval, author, year_published) # select relevant data for CH4, C2H6, C3H8

# calculate C1/(C2+C3)
data_C1_C2_C3_lit <- data_C1_C2_C3_lit %>% mutate(C1_over_C2_plus_C3 = CH4_conc / (C2H6_conc + if_else(!is.na(C3H8_conc), C3H8_conc, 0)))

# print
data_C1_C2_C3_lit

4.2 Plot

set factor levels for plotting

data_C1_C2_C3$packer_site_year_interval <- factor(data_C1_C2_C3$packer_site_year_interval, levels = c("BA1A_2019_108-132", "BA1D_2019_45-75", "BA1D_2019_102-132", "Other Samail Ophiolite"))
# Set coordinates of fields of origins (Milkov and Etiope 2018)
primary_microbial_bernard <- data.frame(
  x = c(-90,-70,-50, -50, -90),
  y = c(2e2,2e2,7e2, 1e5, 1e5)
)

secondary_microbial_bernard <- data.frame(
  x = c(-55, -50, -45, -35, -40, -55),
  y = c(4e3, 4e0, 2e0, 7e0, 1e5, 1e5)
)

thermogenic_bernard <- data.frame(
  x = c(-50, -65, -70, -50, -47, -42, -40, -40, -15, -15, -30, -50),
  y = c(1e-1, 1e0, 2e2, 7e2, 2e2, 1e2, 2e2, 1e5, 1e5, 5e2, 1e-1, 1e-1)
)

abiotic_bernard <- data.frame(
  x = c(-50, -30, -10, 10, -30),
  y = c(8e1, 1e-1, 1e-1, 1e5, 1e5)
)

# create plot
C1_C2_C3_plot <-
  ggplot() +
  
  # add origin fields
  geom_polygon(data = primary_microbial_bernard, aes(x=x, y=y), fill = microbial_color, alpha = microbial_opacity, color = microbial_color, size = 0.2) +
  geom_polygon(data = secondary_microbial_bernard, aes(x=x, y=y), fill = NA, color = "black", size = 0.2, linetype = "dashed") +
  geom_polygon(data = thermogenic_bernard, aes(x=x, y=y), alpha = thermo_opacity, fill = thermogenic_color, color = thermogenic_color, size = 0.5) +
  geom_polygon(data = abiotic_bernard, aes(x=x, y=y), alpha= abiotic_opacity, fill = abiotic_color, color = abiotic_color, size = .2) +

  # add text annotations
  annotate("text", y = 3e4, x = -80, size = 3, label = "PM", color = microbial_color) +
  annotate("text", y = 1e4, x = -45, size = 3, label = "SM", color = "grey20") +
  annotate("text", y = 1e0, x = -56, size = 3, label = "T", color = "grey20") +
  annotate("text", y = 3e0, x = -15, size = 3, label = "A", color = "grey20") +

  # # plot literature data
  # geom_point(data = data_C1_C2_C3_lit,
  #   aes(
  #   x = CH4_d13C_permil_VPDB,
  #   y = C1_over_C2_plus_C3,
  #   shape = packer_site_year_interval,
  #   color = packer_site_year_interval,
  #   ), alpha = 1) +

  # plot Oman well data
  geom_point(data = data_C1_C2_C3,
    aes(
    x = CH4_d13C_permil_VPDB_CUB ,
    y = C1_over_C2_plus_C3,
    shape = packer_site_year_interval,
    color = packer_site_year_interval
    ), alpha = 1, size = 3) +

  # axis styling
  scale_y_continuous(trans = 'log10',
    breaks = trans_breaks('log10', function(x) 10 ^ x, n=5),
    labels = trans_format('log10', math_format(10^.x)),
    limits = c(.1, 1e5), expand = c(0,0),
    name = latex2exp::TeX("$C_1$/($C_{2}$+$C_{3}$)"))+
    scale_x_continuous(name = latex2exp::TeX("$\\delta ^{13}C_{CH_4}$$\\,$/$\\,$$\\lbrack$‰ VPDB$\\rbrack$"), limits = c(-90, 30), expand = c(0,0)) +

  # symbol styling
  scale_color_manual(name = "Sample ID", values = colors_all_CH4_iso_sites_named) +
  scale_shape_manual(name = "Sample ID", values = shapes_all_CH4_iso_sites_named) +
  
  # design
  theme_classic(base_size = 11)+
  theme(
    plot.margin = margin(5, 7, 5, 1, "pt")
    )

# print plot
C1_C2_C3_plot

5 Alkanes \(\delta^{13}\)C plot

5.1 Prepare data

# create data frame of CH4, C2H6, and C3H8 d13C data
data_alkanes_d13C <- data %>% filter(!is.na(C2H6_d13C_permil_VPDB)) %>% select(sampling_site, year_sampled, depth_fluid_intake_mbct, CH4_d13C_permil_VPDB_CUB, CH4_d13C_unc_permil_CUB, CH4_d13C_permil_VPDB_MIT, CH4_d13C_unc_permil_MIT, C2H6_d13C_permil_VPDB, C2H6_d13C_unc_permil, C3H8_d13C_permil_VPDB, C3H8_d13C_unc_permil, packer_site_year_interval)

# print
data_alkanes_d13C

Gather alkane \(\delta^{13}\text{C}\) data so it can be plotted

# gather data making a column for the compound
data_alkanes_d13C_longer <- data_alkanes_d13C  %>% select(-depth_fluid_intake_mbct) %>% gather("compound_lab", "d13C_permil_VPDB", -sampling_site, -year_sampled, -CH4_d13C_unc_permil_CUB, -CH4_d13C_unc_permil_MIT, -C2H6_d13C_unc_permil, -C3H8_d13C_unc_permil, -packer_site_year_interval)

# make a column that gives the # of C atoms in the compound based on the compounds chemcal formula
data_alkanes_d13C_longer  <- data_alkanes_d13C_longer %>% mutate(C_atoms = case_when(
str_detect(compound_lab, "CH") == TRUE ~ 1,
str_detect(compound_lab, "C2") == TRUE ~ 2,
str_detect(compound_lab, "C3") == TRUE ~ 3
))

# make another column for the uncertainty of each d13C analysis
data_alkanes_d13C_longer  <- data_alkanes_d13C_longer %>% mutate(uncertainty_d13C_permil = case_when(
compound_lab == "CH4_d13C_permil_VPDB_CUB" ~ CH4_d13C_unc_permil_CUB,
compound_lab == "CH4_d13C_permil_VPDB_MIT" ~ CH4_d13C_unc_permil_MIT,
compound_lab == "C2H6_d13C_permil_VPDB" ~ C2H6_d13C_unc_permil,
compound_lab == "C3H8_d13C_permil_VPDB" ~ C3H8_d13C_unc_permil
))

# select only the most relevant columns
data_alkanes_d13C_longer <- data_alkanes_d13C_longer %>% select(sampling_site, year_sampled, C_atoms, compound_lab, d13C_permil_VPDB, uncertainty_d13C_permil, packer_site_year_interval) %>% filter(!is.na(d13C_permil_VPDB))

# print
data_alkanes_d13C_longer

Get literature data

# get relevant data for C1-C5 alkane analyses
data_alkanes_d13C_lit <- data_literature %>% filter(!is.na(C2H6_d13C_permil_VPDB)) %>% filter(!is.na(C2H6_d13C_permil_VPDB)) %>% select(site, Sample, sample_type, CH4_d13C_permil_VPDB, C2H6_d13C_permil_VPDB, C3H8_d13C_permil_VPDB,  n_C4H10_d13C_permil_VPDB, n_C5H12_d13C_permil_VPDB, packer_site_year_interval)

# print
data_alkanes_d13C_lit

As previously done for Oman well data, gather literature data by compound for plotting purposes

# select most relevant data from literature
data_alkanes_d13C_lit <- data_alkanes_d13C_lit %>%  select(site, Sample, sample_type, CH4_d13C_permil_VPDB, C2H6_d13C_permil_VPDB, C3H8_d13C_permil_VPDB,  n_C4H10_d13C_permil_VPDB, n_C5H12_d13C_permil_VPDB, packer_site_year_interval)

# gather by compound
data_alkanes_d13C_longer_lit <- data_alkanes_d13C_lit %>% gather("compound", "d13C_permil_VPDB", -site, -Sample, -sample_type, -packer_site_year_interval)

# assign number of C atoms per molecule based on the compound formula
data_alkanes_d13C_longer_lit <- data_alkanes_d13C_longer_lit %>% mutate(C_atoms = case_when(
str_detect(compound, "CH4") == TRUE ~ 1,
str_detect(compound, "C2H6") == TRUE ~ 2,
str_detect(compound, "C3H8") == TRUE ~ 3,
str_detect(compound, "C4H10") == TRUE ~ 4,
str_detect(compound, "C5H12") == TRUE ~ 5
))

# change sample type to have new line character for plotting
data_alkanes_d13C_longer_lit  <- data_alkanes_d13C_longer_lit %>% mutate(sample_type = if_else(sample_type == "vent/well fluid", "vent/well\nfluid", "rock\ncrushing"))

# print
data_alkanes_d13C_longer_lit

5.2 Plot

5.2.1 Plot data

# create plot
alkanes_d13C_oman_wells_and_lit_plot <- ggplot() +
  
  # Add NSHQ14 label
  annotate("text", label = "NSHQ14", x = 2.75, y = -2, label.size = 0.22, color = "#999999") +
  annotate("text", label = "BA1D_2019_102-132", x = 1.7, y = 3.5, label.size = 0.22, color = "#CC79A7") +
    annotate("text", label = "Nizwa", x = 1.3, y = -11, label.size = 0.22, color = "#999999") +
  
  # Plot literature data
  # lines
  geom_line(data = data_alkanes_d13C_longer_lit %>% filter(C_atoms < 4),
    mapping =  aes(
    x = C_atoms,
    y = d13C_permil_VPDB,
    color = packer_site_year_interval,
    shape = packer_site_year_interval,
    group = Sample)) +
  
  # points
  geom_point(data = data_alkanes_d13C_longer_lit %>% filter(C_atoms < 4),
    mapping =  aes(
    x = C_atoms,
    y = d13C_permil_VPDB,
    shape = packer_site_year_interval,
    color = packer_site_year_interval), size = 2.5) +
  
  # Plot Oman well data
  # lines
  geom_line(data = data_alkanes_d13C_longer %>% filter(C_atoms < 4),
    mapping =  aes(
    x = C_atoms,
    y = d13C_permil_VPDB,
    # group = year_sampled,
    color = packer_site_year_interval)) +

  # error bars
  geom_linerange(data = data_alkanes_d13C_longer,
    mapping =  aes(
    x = C_atoms,
    ymax = d13C_permil_VPDB + uncertainty_d13C_permil,
    ymin = d13C_permil_VPDB - uncertainty_d13C_permil,
    color = packer_site_year_interval), show.legend = FALSE) +
  # points
  geom_point(data = data_alkanes_d13C_longer,
    mapping =  aes(
    x = C_atoms,
    y = d13C_permil_VPDB,
    shape = packer_site_year_interval,
    color = packer_site_year_interval), size = 2.5) +
  
  # symbol styling
  scale_color_manual(name = NULL, values = colors_all_CH4_iso_sites_named, guide = guide_legend(ncol = 2)) +
  scale_shape_manual(name = NULL, values = shapes_all_CH4_iso_sites_named, guide = guide_legend(ncol = 2)) +

  # axis styling
  scale_x_continuous(name = "C atoms in alkane", breaks = c(1,2,3), expand = expand_scale(add = c(0.04)))+
  scale_y_continuous(name = latex2exp::TeX("$\\delta ^{13}C$$\\,$/$\\,$\\lbrack$$‰ VPDB$\\rbrack$"))+
  
  # design
  theme_classic(base_size = 11) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.direction = "vertical",
    legend.position = "none",
    legend.text = element_text(margin = margin(t=3, b=3)),
    plot.margin = margin(3, 3, 0, 1, "pt")
    )

# print plot
alkanes_d13C_oman_wells_and_lit_plot

6 Sulfate

data$water_type <- factor(data$water_type, levels = c("alluvium", "gabbro", "Mg_HCO3", "Ca_OH"))
# create plot
sulfate_plot <- ggplot() +
  
  #   # points
  #   geom_jitter(data = data %>% filter(water_type == "Ca_OH" | water_type == "gabbro" | water_type == "Mg_HCO3") %>% filter(packer_site_year_interval == "Other Samail Ophiolite"),
  #   mapping =  aes(
  #   x = water_type,
  #   y = Sulfate_uM/1000,
  #   # shape = packer_site_year_interval,
  #   fill = packer_site_year_interval),
  #   width = 0.2, shape = 21, alpha = 0.4) +
  # 
  # # points
  # geom_jitter(data = data %>% filter(water_type == "Ca_OH" | water_type == "gabbro" | water_type == "Mg_HCO3") %>% filter(packer_site_year_interval != "Other Samail Ophiolite"),
  #   mapping =  aes(
  #   x = water_type ,
  #   y = Sulfate_uM/1000,
  #   # shape = packer_site_year_interval,
  #   fill = packer_site_year_interval),
  #   width = 0.25, shape = 21) +
  
  
  geom_jitter(data = data %>% filter(water_type != "alluvium"),
    mapping =  aes(
    x = water_type ,
    y = Sulfate_uM/1000,
    shape = packer_site_year_interval,
    color = packer_site_year_interval),
    width = 0.27, size = 3.5) +
  
  scale_color_manual(name = "Sample ID", values = colors_all_packer_site_year_interval_named) +
  scale_shape_manual(name = "Sample ID", values = shapes_all_packer_site_year_interval_named) +
  scale_y_continuous(name = latex2exp::TeX("$\\textit{c}_{SO_{4}^{2-}}$ / $\\lbrack$mmol $\\cdot$ L$^{-1}$$\\rbrack$")) +
  scale_x_discrete(name = "Water type", labels = unname(latex2exp::TeX(c("gabbro", "Mg$^{2+}$ - HCO$_{3}^{-}$", "Ca$^{2+}$ - OH$^{-}$"))))+
  theme_classic()

# print plot
sulfate_plot

7 Si

7.1 Prepare data for plotting

# Join data frames for all endmembers of Leong et al. 2020
data_Leong_mix_all_endmembers <- (data_Leong_mix_8umolal_DIC %>% mutate(DIC_endmember_conc_umolal = 8)) %>%
  bind_rows((data_Leong_mix_10umolal_DIC %>% mutate(DIC_endmember_conc_umolal = 10))) %>%
  bind_rows((data_Leong_mix_20umolal_DIC %>% mutate(DIC_endmember_conc_umolal = 20)))
# Get mixing extents of at various percents for each endmember case
data_Leong_mix_all_endmembers_01_percent_mix <- data_Leong_mix_all_endmembers %>% group_by(DIC_endmember_conc_umolal) %>% filter(abs(mixing_extent - 0.01) == min(abs(mixing_extent - 0.01))) %>% ungroup()

data_Leong_mix_all_endmembers_05_percent_mix <- data_Leong_mix_all_endmembers %>% group_by(DIC_endmember_conc_umolal) %>% filter(abs(mixing_extent - 0.05) == min(abs(mixing_extent - 0.05))) %>% ungroup()

data_Leong_mix_all_endmembers_10_percent_mix <- data_Leong_mix_all_endmembers %>% group_by(DIC_endmember_conc_umolal) %>% filter(abs(mixing_extent - 0.1) == min(abs(mixing_extent - 0.1))) %>% ungroup()

data_Leong_mix_all_endmembers_50_percent_mix <- data_Leong_mix_all_endmembers %>% group_by(DIC_endmember_conc_umolal) %>% filter(abs(mixing_extent - 0.5) == min(abs(mixing_extent - 0.5))) %>% ungroup()

data_Leong_mix_all_endmembers_90_percent_mix <- data_Leong_mix_all_endmembers %>% group_by(DIC_endmember_conc_umolal) %>% filter(abs(mixing_extent - 0.9) == min(abs(mixing_extent - 0.9))) %>% ungroup()

Set reaction path adapted from Leong et al. 2020

Leong_rxn_path <- tribble(
  ~pH, ~Si_total_umolal,
  5.5,   1,
  7.9,   303,
  10.1,  0.003,
  12.3,   1.3
)

7.2 Plot

# create plot
Si_pH_plot <- ggplot() +
  
    # reaction path from Leong et al.
    geom_line(data = Leong_rxn_path,
    mapping =  aes(
    x = pH,
    y = Si_total_umolal),
    size = 3.5,
    color = "blue",
    lineend = "round") +
  
    # mixing path from Leong et al., 8 umolal DIC
    geom_line(data = data_Leong_mix_8umolal_DIC,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dotted",
    size = 0.3,
    color = "blue") +
  
      # mixing path from Leong et al., 10 umolal DIC
    geom_line(data = data_Leong_mix_10umolal_DIC,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dotted",
    size = 0.3,
    color = "blue") +
  
      # mixing path from Leong et al., 20 umolal DIC
    geom_line(data = data_Leong_mix_20umolal_DIC,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dotted",
    size = 0.3,
    color = "blue") +
  
    # 1% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_01_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
      # 1% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_01_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
      # 5% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_05_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
      # 10% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_10_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
      # 50% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_50_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
      # 90% mixing from Leong et al.
    geom_line(data = data_Leong_mix_all_endmembers_90_percent_mix,
    mapping =  aes(
    x = pH,
    y = Si_molal * 1e6),
    linetype = "dashed",
    size = 0.3,
    color = "blue") +
  
    # add directionality to leong rxn path
    geom_line(data = tribble(
    ~pH, ~Si_total_uM,
  6.2,   5.5,
  7.2,   60),
    mapping =  aes(
    x = pH,
    y = Si_total_uM),
    linetype = "solid",
    arrow = arrow(angle = 30, length = unit(0.06, "inches"),
      ends = "last", type = "open"),
     size = 0.6,
    color = "white") +
  
      geom_line(data = tribble(
    ~pH, ~Si_total_uM,
  8.55,   10,
  9.5,   0.07),
    mapping =  aes(
    x = pH,
    y = Si_total_uM),
    linetype = "solid",
    arrow = arrow(angle = 30, length = unit(0.06, "inches"),
      ends = "last", type = "open"),
     size = 0.6,
    color = "white") +
  
      geom_line(data = tribble(
    ~pH, ~Si_total_uM,
  10.3,   0.005,
  11.3,   0.08),
    mapping =  aes(
    x = pH,
    y = Si_total_uM),
    linetype = "solid",
    arrow = arrow(angle = 30, length = unit(0.06, "inches"),
      ends = "last", type = "open"),
     size = 0.6,
    color = "white") +
  

  # points

# context data Leong, canovas, stanger, chavagnac
  geom_point(data = context_data_Leong_et_al_2020 %>% filter(Lithology == "peridotite"),
    mapping =  aes(
    x = pH,
    y = Si_mol_kg * 1e6),     shape = 5,
    color = "#999999", size = 3.5) +

      # context data Nothaft, rempfert
  geom_point(data = data %>% filter(water_type == "Ca_OH" | water_type == "Mg_HCO3") %>% filter(str_detect(packer_site_year_interval, "BA1") == FALSE),
    mapping =  aes(
    x = pH,
    y = Si_total_uM,
    shape = packer_site_year_interval,
    color = packer_site_year_interval), size = 3.5) +
  
    # BA1A / BA1D data
  geom_point(data = data %>% filter(water_type == "Ca_OH" | water_type == "Mg_HCO3") %>% filter(str_detect(packer_site_year_interval, "BA1") == TRUE),
    mapping =  aes(
    x = pH,
    y = Si_total_uM,
    shape = packer_site_year_interval,
    color = packer_site_year_interval), size = 3.5) +
  
  scale_color_manual(name = "Sample ID", values = colors_all_packer_site_year_interval_named) +
  scale_shape_manual(name = "Sample ID", values = shapes_all_packer_site_year_interval_named) +
  
  # annotations
  ggplot2::annotate("text", label = "Rain", x = 5.8,  y = 0.4, color = "black", size = 3.2) +
  ggplot2::annotate("text", label = TeX("Mg$^{2+}$ - HCO$_{3}^{-}$"), x = 7.6,  y = 700, angle = 0, color = "black", parse = TRUE, size = 3.2) +
  ggplot2::annotate("text", label = "Intermediate", x = 8.9,  y = 0.3, angle = -63, color = "black", size = 3.2) +
  ggplot2::annotate("text", label = TeX("Ca$^{2+}$ - OH$^{-}$"), x = 11.8,  y = 0.09, angle = 50, color = "black", parse = TRUE, size = 3.2) +
  ggplot2::annotate("text", label = "Mix", x = 11,  y = 400, angle = 0, color = "black", size = 3.2) +

    ggplot2::annotate("text", label = "1%", x = 12.7,  y = data_Leong_mix_all_endmembers_01_percent_mix %>% summarise(Si_umolal = max(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "5%", x = 12.7,  y = data_Leong_mix_all_endmembers_05_percent_mix %>% summarise(Si_umolal = min(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "10%", x = 12.7,  y = data_Leong_mix_all_endmembers_10_percent_mix %>% summarise(Si_umolal = max(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "50%", x = 12.45,  y = 170, angle = 0, color = "blue", size = 3.2) +
  ggplot2::annotate("text", label = "90%", x = 10,  y = 450, angle = 0, color = "blue", size = 3.2) +

  ggplot2::annotate("text", label = "Intermediate", x = 8.9,  y = 0.3, angle = -63, color = "black", size = 3.2) +
  ggplot2::annotate("text", label = TeX("Ca$^{2+}$ - OH$^{-}$"), x = 11.8,  y = 0.09, angle = 50, color = "black", parse = TRUE, size = 3.2) +
  ggplot2::annotate("text", label = "Mix", x = 11,  y = 400, angle = 0, color = "black", size = 3.2) +

    ggplot2::annotate("text", label = "1%", x = 12.7,  y = data_Leong_mix_all_endmembers_01_percent_mix %>% summarise(Si_umolal = max(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "5%", x = 12.7,  y = data_Leong_mix_all_endmembers_05_percent_mix %>% summarise(Si_umolal = min(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "10%", x = 12.7,  y = data_Leong_mix_all_endmembers_10_percent_mix %>% summarise(Si_umolal = max(Si_molal) * 1e6) %>% as.numeric(), angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "50%", x = 12.45,  y = 170, angle = 0, color = "blue", size = 3.2) +
    ggplot2::annotate("text", label = "90%", x = 10,  y = 450, angle = 0, color = "blue", size = 3.2) +
  

    # axis styling
  scale_y_continuous(trans = 'log10',
    breaks = trans_breaks('log10', function(x) 10 ^ x, n=5),
    labels = trans_format('log10', math_format(10^.x)),
    limits = c(1e-3, 2e3), expand = c(0,0),
    name = latex2exp::TeX("$\\textit{c}_{\\sum Si$} / $\\lbrack$µmol $\\cdot$ L$^{-1}$$\\rbrack$"))+
  scale_x_continuous(breaks = c(6,8,10,12))+

  # design
  theme_classic(base_size = 11)+
  theme(
    plot.margin = margin(1, 1, 0, 1, "pt")
  )

# print plot
Si_pH_plot

7.3 Calculate mixing

Using 20 umolal DIC end member and create linear model of Si vs. mix extent

# change from molal to µmolar, and change to name fitting with the Oman field data
data_Leong_mix_20umolal_DIC <- data_Leong_mix_20umolal_DIC %>% mutate(Si_total_uM = Si_molal * 1e6)

# create linear model of mixing extent vs Si conc. in umolal
Leong_mix_lm <- lm(mixing_extent ~ Si_total_uM, data = data_Leong_mix_20umolal_DIC)

Predict mixing from BA1 data

# Get relevant BA1 data
data_BA1_Si <- data %>% filter(str_detect(packer_site_year_interval, "BA1") == TRUE) %>% select(packer_site_year_interval, Si_total_uM)

# predict mixing extent from Si using linear model
add_predictions(data_BA1_Si, Leong_mix_lm, var = "mix_extent_frac_Mg_HCO3") %>% mutate(mix_extent_percent_Mg_HCO3 = mix_extent_frac_Mg_HCO3 * 100) %>% select(-mix_extent_frac_Mg_HCO3)