1 Oman 2019 CH\(_4\) \(\delta^{2}\)H - Introduction

CH\(_4\) isotopic composition was measured by GC-P-IRMS as described in the methods section of the manuscript. The following code steps through the data processing and calibration to convert measured values to values vs. the international reference standard (Vienna Standard Mean Ocean Water, VSMOW) using CH\(_4\) isotope standards from USGS and Airgas. The gas samples processed in this document were collected in Oman in January 2019.

2 Load libraries

# load libraries
library(tidyverse) # dplyr, tidyr, ggplot
library(isoreader) # reading isotope data files
library(isoprocessor) # processing isotope data files

# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
  dev = c("png", "pdf"), fig.keep = "all",
  dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
  fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
  cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)

Data processed using the packages isoreader version 1.2.1 and isoprocessor version 0.6.7.

3 Load Data

# read files
iso_files_raw <- 
  file.path(
    "data_raw/191130_OM19_CH4_dD", "191130_OM19_CH4_dD.cf.rds"
  ) %>% 
    # read data files in parallel for fast read
  iso_read_continuous_flow(parallel = TRUE)

4 Process file info & peak table

# Filter out initial H2 zero and H3 factor tests prior to the analytical session
iso_files_raw <- iso_files_raw %>% iso_filter_files(`Identifier 1` != "H2 zero" & `Identifier 1` != "H3 F")
## Info: applying file filter, keeping 37 of 41 files
# Filter out files with peak amplitude out of linearity range. WAB71 was the result of an initial mis-calculation of moles on column. NSHQ14 was result of inefficient gas transfer with the small syringe volume initially used.

iso_files_raw <- iso_files_raw %>% iso_filter_files(file_id != "BF5936__WAB71_BS1B_1000.dxf" & file_id != "BF5938__NSHQ14_BS1B_0015.dxf")
## Info: applying file filter, keeping 35 of 37 files
# process
iso_files <- iso_files_raw %>% 
  # set peak table from vendor data table
  iso_set_peak_table_from_auto_vendor_data_table() %>% 
  # rename key file info columns
  iso_rename_file_info(id1 = `Identifier 1`, inject_type = `Identifier 2`) %>%
  # parse text info into numbers
  iso_parse_file_info(number = Analysis) %>%
  # process other file information that is specific to the naming conventions
  # of this particular sequence
  iso_mutate_file_info(
    # what was the injection volume?
    inject_volume = parse_number(Comment) %>% iso_double_with_units("uL"),
    # what folder are the data files in? (assuming folder = sequence)
    folder = basename(dirname(file_path))
  )
## Warning: 'iso_set_peak_table_from_auto_vendor_data_table' has been renamed
## to 'iso_set_peak_table_automatically_from_vendor_data_table'. Please call the
## latter function directly to avoid this warning.
## Info: setting peak table for 35 file(s) from vendor data table with the following renames:
## - for 35 file(s): 'Nr.'->'peak_nr', 'Is Ref.?'->'is_ref', 'Start'->'rt_start', 'Rt'->'rt', 'End'->'rt_end', 'Ampl 2'->'amp2', 'Ampl 3'->'amp3', 'BGD 2'->'bgrd2_start', 'BGD 3'->'bgrd3_start', 'BGD 2'->'bgrd2_end', 'BGD 3'->'bgrd3_end', 'rIntensity 2'->'area2', 'rIntensity 3'->'area3', 'rR 3H2/2H2'->'r3/2', 'rd 3H2/2H2'->'rd3/2', 'd 3H2/2H2'->'d3/2', 'd 2H/1H'->'d2H', 'AT% 2H/1H'->'at2H'
## Info: renaming the following file info across 35 data file(s): 'Identifier 1'->'id1', 'Identifier 2'->'inject_type'
## Info: parsing 1 file info columns for 35 data file(s):
##  - to number: 'Analysis'
## Info: mutating file info for 35 data file(s)

4.1 Chromatograms

Display chromatograms of samples and standards for CH\(_4\) analyses.

# plot the chromatograms
chroms <- iso_files %>%
  iso_plot_continuous_flow_data(
     # select data and aesthetics
    data = c(2),
    color = id1
  )

# print
chroms

4.2 Peak maps

# set peak map
peak_maps <- 
  tibble::tribble(
    ~compound, ~ref_nr, ~`rt`,
    # peak map data (row-by-row)
    "H2",          1,      29,
    "H2",          2,      64,
    "H2",          3,      100,
    "H2",          4,      130,
    "CH4",          NA,    273
  )

# print
peak_maps %>% knitr::kable(digits = 0)
compound ref_nr rt
H2 1 29
H2 2 64
H2 3 100
H2 4 130
CH4 NA 273

4.3 Fetch peak table

# identify peaks
peak_table_w_ids <- iso_files %>% 
  iso_map_peaks(peak_maps) %>%
  # peak table
  iso_get_peak_table(include_file_info = everything())
## Warning in max(..n_overlapping..): no non-missing arguments to max; returning -
## Inf
## Warning in max(..n_matches..): no non-missing arguments to max; returning -Inf
## Info: 175 of 175 peaks in 35 files were successfully mapped using a single peak map.
## Info: aggregating peak table from 35 data file(s), including file info 'everything()'

Display an example chromatogram for CH\(_4\).

chrom_mapped <- iso_files %>%
  iso_filter_files(id1 == "NSHQ14_BS1B") %>% 
  iso_plot_continuous_flow_data(
    # select data and aesthetics
    data = c(2),
    color = id1,
    # provide our peak table with ids
    peak_table = peak_table_w_ids, 
    # define peak labels, this can be any valid expression
    peak_label = iso_format(id = peak_info), 
    # specify which labels to show (removing the !is_identified or putting a 
    # restriction by retention time can help a lot with busy chromatograms)
    peak_label_filter = is_identified | !is_identified | is_missing
  )+
  theme(legend.position = "bottom")
## Info: applying file filter, keeping 1 of 35 files
chrom_mapped

4.4 Select analyte peaks

# focus on analyte peaks
peak_table_analytes <- peak_table_w_ids %>% 
  filter(compound == "CH4")

# We separately tested and determined the limit of quantitation for our intrument setup. For peaks below 1.0 V m/z 2, the effect of peak amplitude on dD CH4 increases sharply. In addition, repeatability measured as standard deviation of dD CH4 measurements increases sharply below this threshold.

peak_table_analytes <- peak_table_analytes %>%
filter(amp2 > 1000)

# print
peak_table_analytes

5 Isotope standard values

5.1 Load isotope standards

standards <- 
  tibble::tribble(
    ~id1,                  ~true_d2H,
    "T-ISO1",                    -157,
    "H-ISO1",                    -156,
    "Bio 1.0 Mid",           -240,
    "Beecher Island II",   -224.3,
    "516256",              -64,
    "043332T",             -160.3
  ) %>% 
  mutate(
    true_d2H = iso_double_with_units(true_d2H, "permil")
  )
standards %>% knitr::kable()
id1 true_d2H
T-ISO1 -157.0
H-ISO1 -156.0
Bio 1.0 Mid -240.0
Beecher Island II -224.3
516256 -64.0
043332T -160.3

5.2 Add isotope standards

peak_table_w_stds <- 
  peak_table_analytes %>% 
  iso_add_standards(stds = standards, match_by = c("id1")) 
## Info: matching standards by 'id1' - added 6 standard entries to 30 out of 34 rows, added new column 'is_std_peak' to identify standard peaks

5.3 Initial overview plots of data

Plot of standards, accepted versus measured \(\delta^{2}\text{H}\). Coefficients and \(\text{r}^2\) value of regression reported further below.

# generate plot
plot_stnds_accepted_measured <- peak_table_w_stds %>% filter(is_std_peak == TRUE) %>% 
  # use generic data plot function
  iso_plot_data(
    x = true_d2H,
    y = d2H,
    color = id1,
    points = TRUE
  )

# print
plot_stnds_accepted_measured

Standards cover the sample peak amplitude range.

# generate plot
plot_amp_d2H <- peak_table_w_stds %>% 
  # use generic data plot function
  iso_plot_data(
    x = amp2, y = d2H, color = is_std_peak,
    points = TRUE
  )

# print
plot_amp_d2H

Standards cover the sample time range. Standard delta analyses appear pretty constant over time.

# generate plot
plot_d2H_time <- peak_table_w_stds %>% 
  # use generic data plot function
  iso_plot_data(
    x = file_datetime, y = d2H, color = is_std_peak,
    points = TRUE
  )

# print
plot_d2H_time

6 Calibration

6.1 Generate a calibration with linear regression

global_calibs <- peak_table_w_stds %>%
  # prepare for calibration
  iso_prepare_for_calibration() %>% 
  # run calibrations
  iso_generate_calibration(
    model = c(
  # reference scale correction
      delta_only = lm(d2H ~ true_d2H),
      # multivariate with delta and amplitude
      delta_and_ampl = lm(d2H ~ true_d2H + amp2),
      # + the delta and amplitude cross term
      delta_cross_ampl = lm(d2H ~ true_d2H * amp2),
      # multivariate with delta and the datetime (i.e. checking for temporal drift)
      delta_and_time = lm(d2H ~ true_d2H + file_datetime),
      delta_cross_time = lm(d2H ~ true_d2H * file_datetime),
      # multivariate with delta, amplitude and datetime
      delta_and_ampl_and_time = lm(d2H ~ true_d2H + amp2 + file_datetime),
      # multivariate with delta cross amplitude and datetime
      delta_cross_ampl_and_time = lm(d2H ~ true_d2H * amp2 + file_datetime)
    ), 
    # specify which peaks to include in the calibration, here:
    # - all std_peaks (this filter should always be included!)
    use_in_calib = is_std_peak
  ) 
## Info: preparing data for calibration by nesting the entire dataset
## Info: generating calibration based on 7 models (delta_only = 'lm(d2H ~ true_d2H)', delta_and_ampl = 'lm(d2H ~ true_d2H + amp2)', delta_cross_ampl = 'lm(d2H ~ true_d2H * amp2)', delta_and_time = 'lm(d2H ~ true_d2H + file_datetime)', delta_cross_time = 'lm(d2H ~ true_d2H * file_datetime)', delta_and_ampl_and_time = 'lm(d2H ~ true_d2H + amp2 + file_datetime)', delta_cross_ampl_and_time = 'lm(d2H ~ true_d2H * amp2 + file_datetime)') for 1 data group(s) with standards filter 'is_std_peak'. Storing residuals in new column 'resid'. Storing calibration info in new column 'in_calib'.

6.2 Coefficients

# look at coefficients and summary
global_calibs %>% 
  # unnest calibration parameters
  iso_get_calibration_parameters(
    select_from_coefs = 
      c(term, estimate, SE = std.error, signif),
    select_from_summary = 
      c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)) %>%
  arrange(term) %>% 
  knitr::kable(digits = 4)
## Info: retrieving coefficient column(s) 'c(term, estimate, SE = std.error, signif)' for calibration
## Info: retrieving summary column(s) 'c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)' for calibration
calib calib_ok calib_points term estimate SE signif fit_R2 fit_RMSD residual_df
delta_only TRUE 30 (Intercept) 317.9747 4.3703 *** (p < 0.001) 0.9892 1689.9089 28
delta_and_ampl TRUE 30 (Intercept) 341.7586 4.4711 *** (p < 0.001) 0.9958 633.5522 27
delta_cross_ampl TRUE 30 (Intercept) 347.7830 8.7063 *** (p < 0.001) 0.9958 618.0219 26
delta_and_time TRUE 30 (Intercept) 315531.6535 185300.9306 - 0.9899 1526.3258 27
delta_cross_time TRUE 30 (Intercept) 771284.6767 922270.6094 - 0.9896 1511.5145 26
delta_and_ampl_and_time TRUE 30 (Intercept) 100020.2107 125160.3503 - 0.9957 618.4649 26
delta_cross_ampl_and_time TRUE 30 (Intercept) 123766.3435 127608.5502 - 0.9957 595.7318 25
delta_and_ampl TRUE 30 amp2 -0.0095 0.0014 *** (p < 0.001) 0.9958 633.5522 27
delta_cross_ampl TRUE 30 amp2 -0.0121 0.0035 ** (p < 0.01) 0.9958 618.0219 26
delta_and_ampl_and_time TRUE 30 amp2 -0.0092 0.0015 *** (p < 0.001) 0.9957 618.4649 26
delta_cross_ampl_and_time TRUE 30 amp2 -0.0123 0.0035 ** (p < 0.01) 0.9957 595.7318 25
delta_and_time TRUE 30 file_datetime -0.0002 0.0001 - 0.9899 1526.3258 27
delta_cross_time TRUE 30 file_datetime -0.0005 0.0006 - 0.9896 1511.5145 26
delta_and_ampl_and_time TRUE 30 file_datetime -0.0001 0.0001 - 0.9957 618.4649 26
delta_cross_ampl_and_time TRUE 30 file_datetime -0.0001 0.0001 - 0.9957 595.7318 25
delta_only TRUE 30 true_d2H 1.2459 0.0242 *** (p < 0.001) 0.9892 1689.9089 28
delta_and_ampl TRUE 30 true_d2H 1.2578 0.0152 *** (p < 0.001) 0.9958 633.5522 27
delta_cross_ampl TRUE 30 true_d2H 1.2944 0.0477 *** (p < 0.001) 0.9958 618.0219 26
delta_and_time TRUE 30 true_d2H 1.2459 0.0234 *** (p < 0.001) 0.9899 1526.3258 27
delta_cross_time TRUE 30 true_d2H 2593.2998 5135.2990 - 0.9896 1511.5145 26
delta_and_ampl_and_time TRUE 30 true_d2H 1.2574 0.0153 *** (p < 0.001) 0.9957 618.4649 26
delta_cross_ampl_and_time TRUE 30 true_d2H 1.3024 0.0485 *** (p < 0.001) 0.9957 595.7318 25
delta_cross_ampl TRUE 30 true_d2H:amp2 0.0000 0.0000 - 0.9958 618.0219 26
delta_cross_ampl_and_time TRUE 30 true_d2H:amp2 0.0000 0.0000 - 0.9957 595.7318 25
delta_cross_time TRUE 30 true_d2H:file_datetime 0.0000 0.0000 - 0.9896 1511.5145 26

6.3 Visualize Calibration Parameters

The visualization of the calibration parameters reveals that as expected \(^{2}\delta_{true}\) (the scale contraction) and \(A_{2}\) (the linear intensity term) are statistically relevant (*** = p.value < 0.001). DT is not significant. See residuals below for additional information on these multivariate calibration regressions.

global_calibs %>% iso_plot_calibration_parameters()

6.4 Apply global calibration

Apply the calibrations as discussed above. Inversion of the calibration is done with the investr package. Standard errors for each data point based on the calibration are calculated using binomial proportion confidence intervals (Wald intervals).

global_calibs_applied <- 
  global_calibs %>% 
  # which calibration to use? can include multiple if desired to see the result
  # in this case, the amplitude- and time-conscious calibrations are not necessary
  filter(calib == "delta_and_ampl") %>% 
  # apply calibration indication what should be calculated
  iso_apply_calibration(true_d2H, calculate_error = TRUE)
## Info: applying calibration to infer 'true_d2H' for 1 data group(s); storing resulting value in new column 'true_d2H_pred' and estimated error in new column 'true_d2H_pred_se'. This may take a moment... finished.
# calibration ranges
global_calibs_with_ranges <-
  global_calibs_applied %>% 
  # evaluate calibration range for the measured amplitude and predicted d2H
  iso_evaluate_calibration_range(amp2, true_d2H_pred) 
## Info: evaluating range for terms 'amp2' and 'true_d2H_pred' in calibration for 1 data group(s); storing resulting summary for each data entry in new column 'in_range'.
# show calibration ranges
global_calibs_with_ranges %>% 
  iso_get_calibration_range() %>% 
  iso_remove_list_columns() %>% 
  knitr::kable(d = 2)
## Info: retrieving all calibration range information for calibration
calib calib_ok calib_points term units min max
delta_and_ampl TRUE 30 amp2 mV 1124.32 3474.23
delta_and_ampl TRUE 30 true_d2H_pred permil -243.26 -55.26
# create calibrated peak table
peak_table_calibrated <- global_calibs_with_ranges %>% 
  iso_get_calibration_data()
## Info: retrieving all data

7 Inspect Calibration

7.1 Overview

Some samples are outside the \(\delta^{2}\text{H}\) range covered by the standards. It is hard to find standards that covers such a wide range of \(\delta^{2}\text{H}\) values as these samples, so there is not much that can be done about that. Anyway, the calibration is pretty linear and covers quite a wide range of \(\delta^{2}\text{H}\) values, so applying this calibration to the samples should still be valid.

# replicate earlier overview plot but now with the calibrated delta values
# and with a highlight of the calibration ranges and which points are in range
peak_table_calibrated %>% 
  # focus on identified peaks (comment out this line to see ALL peaks)
  filter(!is.na(compound)) %>% 
  # visualize with convenience function iso_plot_data
  iso_plot_data(
    # choose x and y (multiple y possible)
    x = amp2, y = true_d2H_pred,
    # choose aesthetics
    color = in_range, shape = is_std_peak, label = compound, size = 3,
    # decide what geoms to include
    points = TRUE
  ) %>% 
  # highlight calibration range
  iso_mark_calibration_range() +
  # legend
  theme(legend.position = "bottom", legend.direction = "vertical")

7.2 Summary

# generate data summary
peak_data <- 
  peak_table_calibrated %>% 
  # focus on identified peaks in the samples
  filter(!is.na(compound))

# summarize replicates
peak_data_summary <- 
  peak_data %>% 
  # summarize for each sample and compound
  group_by(id1, compound) %>% 
  iso_summarize_data_table(amp2, true_d2H_pred, true_d2H_pred_se) %>% select(-`true_d2H_pred_se sd`)

# add column in which d2H is rounded to permil
peak_data_summary <- peak_data_summary %>% mutate(`d2H rounded to integer` = round(`true_d2H_pred mean`, 0))

# print
peak_data_summary %>% iso_make_units_explicit() %>% knitr::kable(d = 2)
id1 compound n amp2 mean [mV] amp2 sd true_d2H_pred mean [permil] true_d2H_pred sd true_d2H_pred_se mean [permil] d2H rounded to integer [permil]
043332T CH4 3 2156.45 52.74 -163.64 2.79 3.92 -164
516256 CH4 5 2367.93 944.83 -62.53 5.05 4.23 -63
BA1A108132_BS2A CH4 1 1219.12 NA 44.75 NA 4.93 45
BA1D102132_BS2A CH4 1 1766.61 NA -112.31 NA 4.03 -112
BA1D4575_BS2A CH4 1 1870.08 NA -111.16 NA 4.02 -111
Beecher Island II CH4 7 2169.00 728.10 -221.82 4.90 4.03 -222
Bio 1.0 Mid CH4 5 2208.53 745.13 -241.11 1.92 4.07 -241
H-ISO1 CH4 7 2399.41 646.92 -156.83 1.65 3.98 -157
NSHQ14_BS1B CH4 1 2228.88 NA -263.11 NA 4.07 -263
NSHQ14NP_HS2 CH4 1 2172.97 NA -250.61 NA 4.03 -251
T-ISO1 CH4 3 2477.91 49.73 -158.11 1.69 3.92 -158
WAB56_BS1A CH4 1 2554.03 NA -346.62 NA 4.48 -347
WAB71_BS1B CH4 1 1496.00 NA -303.87 NA 4.28 -304

8 Export

Save data to xlsx spreadsheet.

# export the global calibration with all its information and data to Excel
peak_table_calibrated %>% 
  iso_export_calibration_to_excel(
    filepath = format(Sys.Date(), "data_output/%Y%m%d_OM19_CH4_H_data_calibrated.xlsx"),
    # include data summary as an additional useful tab
    `data summary` = peak_data_summary
  )
## Info: exporting calibrations into Excel '20200812_OM19_CH4_H_data_calibrated.xlsx'...
## Info: retrieving all data
## Info: retrieving all coefficient information for calibration
## Info: retrieving all summary information for calibration
## Info: retrieving all calibration range information for calibration
## Info: export complete, created tabs 'data summary', 'all data', 'calib coefs', 'calib summary' and 'calib range'.