CH\(_4\) isotopic composition was measured by GC-P-IRMS as described in the methods section of the manuscript. The following code steps through the data processing and calibration to convert measured values to values vs. the international reference standard (Vienna Standard Mean Ocean Water, VSMOW) using CH\(_4\) isotope standards from USGS and Airgas. The gas samples processed in this document were collected in Oman in January 2019.
# load libraries
library(tidyverse) # dplyr, tidyr, ggplot
library(isoreader) # reading isotope data files
library(isoprocessor) # processing isotope data files
# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
dev = c("png", "pdf"), fig.keep = "all",
dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)
Data processed using the packages isoreader version 1.2.1 and isoprocessor version 0.6.7.
# read files
iso_files_raw <-
file.path(
"data_raw/191130_OM19_CH4_dD", "191130_OM19_CH4_dD.cf.rds"
) %>%
# read data files in parallel for fast read
iso_read_continuous_flow(parallel = TRUE)
# Filter out initial H2 zero and H3 factor tests prior to the analytical session
iso_files_raw <- iso_files_raw %>% iso_filter_files(`Identifier 1` != "H2 zero" & `Identifier 1` != "H3 F")
## Info: applying file filter, keeping 37 of 41 files
# Filter out files with peak amplitude out of linearity range. WAB71 was the result of an initial mis-calculation of moles on column. NSHQ14 was result of inefficient gas transfer with the small syringe volume initially used.
iso_files_raw <- iso_files_raw %>% iso_filter_files(file_id != "BF5936__WAB71_BS1B_1000.dxf" & file_id != "BF5938__NSHQ14_BS1B_0015.dxf")
## Info: applying file filter, keeping 35 of 37 files
# process
iso_files <- iso_files_raw %>%
# set peak table from vendor data table
iso_set_peak_table_from_auto_vendor_data_table() %>%
# rename key file info columns
iso_rename_file_info(id1 = `Identifier 1`, inject_type = `Identifier 2`) %>%
# parse text info into numbers
iso_parse_file_info(number = Analysis) %>%
# process other file information that is specific to the naming conventions
# of this particular sequence
iso_mutate_file_info(
# what was the injection volume?
inject_volume = parse_number(Comment) %>% iso_double_with_units("uL"),
# what folder are the data files in? (assuming folder = sequence)
folder = basename(dirname(file_path))
)
## Warning: 'iso_set_peak_table_from_auto_vendor_data_table' has been renamed
## to 'iso_set_peak_table_automatically_from_vendor_data_table'. Please call the
## latter function directly to avoid this warning.
## Info: setting peak table for 35 file(s) from vendor data table with the following renames:
## - for 35 file(s): 'Nr.'->'peak_nr', 'Is Ref.?'->'is_ref', 'Start'->'rt_start', 'Rt'->'rt', 'End'->'rt_end', 'Ampl 2'->'amp2', 'Ampl 3'->'amp3', 'BGD 2'->'bgrd2_start', 'BGD 3'->'bgrd3_start', 'BGD 2'->'bgrd2_end', 'BGD 3'->'bgrd3_end', 'rIntensity 2'->'area2', 'rIntensity 3'->'area3', 'rR 3H2/2H2'->'r3/2', 'rd 3H2/2H2'->'rd3/2', 'd 3H2/2H2'->'d3/2', 'd 2H/1H'->'d2H', 'AT% 2H/1H'->'at2H'
## Info: renaming the following file info across 35 data file(s): 'Identifier 1'->'id1', 'Identifier 2'->'inject_type'
## Info: parsing 1 file info columns for 35 data file(s):
## - to number: 'Analysis'
## Info: mutating file info for 35 data file(s)
Display chromatograms of samples and standards for CH\(_4\) analyses.
# plot the chromatograms
chroms <- iso_files %>%
iso_plot_continuous_flow_data(
# select data and aesthetics
data = c(2),
color = id1
)
# print
chroms
# set peak map
peak_maps <-
tibble::tribble(
~compound, ~ref_nr, ~`rt`,
# peak map data (row-by-row)
"H2", 1, 29,
"H2", 2, 64,
"H2", 3, 100,
"H2", 4, 130,
"CH4", NA, 273
)
# print
peak_maps %>% knitr::kable(digits = 0)
compound | ref_nr | rt |
---|---|---|
H2 | 1 | 29 |
H2 | 2 | 64 |
H2 | 3 | 100 |
H2 | 4 | 130 |
CH4 | NA | 273 |
# identify peaks
peak_table_w_ids <- iso_files %>%
iso_map_peaks(peak_maps) %>%
# peak table
iso_get_peak_table(include_file_info = everything())
## Warning in max(..n_overlapping..): no non-missing arguments to max; returning -
## Inf
## Warning in max(..n_matches..): no non-missing arguments to max; returning -Inf
## Info: 175 of 175 peaks in 35 files were successfully mapped using a single peak map.
## Info: aggregating peak table from 35 data file(s), including file info 'everything()'
Display an example chromatogram for CH\(_4\).
chrom_mapped <- iso_files %>%
iso_filter_files(id1 == "NSHQ14_BS1B") %>%
iso_plot_continuous_flow_data(
# select data and aesthetics
data = c(2),
color = id1,
# provide our peak table with ids
peak_table = peak_table_w_ids,
# define peak labels, this can be any valid expression
peak_label = iso_format(id = peak_info),
# specify which labels to show (removing the !is_identified or putting a
# restriction by retention time can help a lot with busy chromatograms)
peak_label_filter = is_identified | !is_identified | is_missing
)+
theme(legend.position = "bottom")
## Info: applying file filter, keeping 1 of 35 files
chrom_mapped
# focus on analyte peaks
peak_table_analytes <- peak_table_w_ids %>%
filter(compound == "CH4")
# We separately tested and determined the limit of quantitation for our intrument setup. For peaks below 1.0 V m/z 2, the effect of peak amplitude on dD CH4 increases sharply. In addition, repeatability measured as standard deviation of dD CH4 measurements increases sharply below this threshold.
peak_table_analytes <- peak_table_analytes %>%
filter(amp2 > 1000)
# print
peak_table_analytes
standards <-
tibble::tribble(
~id1, ~true_d2H,
"T-ISO1", -157,
"H-ISO1", -156,
"Bio 1.0 Mid", -240,
"Beecher Island II", -224.3,
"516256", -64,
"043332T", -160.3
) %>%
mutate(
true_d2H = iso_double_with_units(true_d2H, "permil")
)
standards %>% knitr::kable()
id1 | true_d2H |
---|---|
T-ISO1 | -157.0 |
H-ISO1 | -156.0 |
Bio 1.0 Mid | -240.0 |
Beecher Island II | -224.3 |
516256 | -64.0 |
043332T | -160.3 |
peak_table_w_stds <-
peak_table_analytes %>%
iso_add_standards(stds = standards, match_by = c("id1"))
## Info: matching standards by 'id1' - added 6 standard entries to 30 out of 34 rows, added new column 'is_std_peak' to identify standard peaks
Plot of standards, accepted versus measured \(\delta^{2}\text{H}\). Coefficients and \(\text{r}^2\) value of regression reported further below.
# generate plot
plot_stnds_accepted_measured <- peak_table_w_stds %>% filter(is_std_peak == TRUE) %>%
# use generic data plot function
iso_plot_data(
x = true_d2H,
y = d2H,
color = id1,
points = TRUE
)
# print
plot_stnds_accepted_measured
Standards cover the sample peak amplitude range.
# generate plot
plot_amp_d2H <- peak_table_w_stds %>%
# use generic data plot function
iso_plot_data(
x = amp2, y = d2H, color = is_std_peak,
points = TRUE
)
# print
plot_amp_d2H
Standards cover the sample time range. Standard delta analyses appear pretty constant over time.
# generate plot
plot_d2H_time <- peak_table_w_stds %>%
# use generic data plot function
iso_plot_data(
x = file_datetime, y = d2H, color = is_std_peak,
points = TRUE
)
# print
plot_d2H_time
global_calibs <- peak_table_w_stds %>%
# prepare for calibration
iso_prepare_for_calibration() %>%
# run calibrations
iso_generate_calibration(
model = c(
# reference scale correction
delta_only = lm(d2H ~ true_d2H),
# multivariate with delta and amplitude
delta_and_ampl = lm(d2H ~ true_d2H + amp2),
# + the delta and amplitude cross term
delta_cross_ampl = lm(d2H ~ true_d2H * amp2),
# multivariate with delta and the datetime (i.e. checking for temporal drift)
delta_and_time = lm(d2H ~ true_d2H + file_datetime),
delta_cross_time = lm(d2H ~ true_d2H * file_datetime),
# multivariate with delta, amplitude and datetime
delta_and_ampl_and_time = lm(d2H ~ true_d2H + amp2 + file_datetime),
# multivariate with delta cross amplitude and datetime
delta_cross_ampl_and_time = lm(d2H ~ true_d2H * amp2 + file_datetime)
),
# specify which peaks to include in the calibration, here:
# - all std_peaks (this filter should always be included!)
use_in_calib = is_std_peak
)
## Info: preparing data for calibration by nesting the entire dataset
## Info: generating calibration based on 7 models (delta_only = 'lm(d2H ~ true_d2H)', delta_and_ampl = 'lm(d2H ~ true_d2H + amp2)', delta_cross_ampl = 'lm(d2H ~ true_d2H * amp2)', delta_and_time = 'lm(d2H ~ true_d2H + file_datetime)', delta_cross_time = 'lm(d2H ~ true_d2H * file_datetime)', delta_and_ampl_and_time = 'lm(d2H ~ true_d2H + amp2 + file_datetime)', delta_cross_ampl_and_time = 'lm(d2H ~ true_d2H * amp2 + file_datetime)') for 1 data group(s) with standards filter 'is_std_peak'. Storing residuals in new column 'resid'. Storing calibration info in new column 'in_calib'.
# look at coefficients and summary
global_calibs %>%
# unnest calibration parameters
iso_get_calibration_parameters(
select_from_coefs =
c(term, estimate, SE = std.error, signif),
select_from_summary =
c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)) %>%
arrange(term) %>%
knitr::kable(digits = 4)
## Info: retrieving coefficient column(s) 'c(term, estimate, SE = std.error, signif)' for calibration
## Info: retrieving summary column(s) 'c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)' for calibration
calib | calib_ok | calib_points | term | estimate | SE | signif | fit_R2 | fit_RMSD | residual_df |
---|---|---|---|---|---|---|---|---|---|
delta_only | TRUE | 30 | (Intercept) | 317.9747 | 4.3703 | *** (p < 0.001) | 0.9892 | 1689.9089 | 28 |
delta_and_ampl | TRUE | 30 | (Intercept) | 341.7586 | 4.4711 | *** (p < 0.001) | 0.9958 | 633.5522 | 27 |
delta_cross_ampl | TRUE | 30 | (Intercept) | 347.7830 | 8.7063 | *** (p < 0.001) | 0.9958 | 618.0219 | 26 |
delta_and_time | TRUE | 30 | (Intercept) | 315531.6535 | 185300.9306 | - | 0.9899 | 1526.3258 | 27 |
delta_cross_time | TRUE | 30 | (Intercept) | 771284.6767 | 922270.6094 | - | 0.9896 | 1511.5145 | 26 |
delta_and_ampl_and_time | TRUE | 30 | (Intercept) | 100020.2107 | 125160.3503 | - | 0.9957 | 618.4649 | 26 |
delta_cross_ampl_and_time | TRUE | 30 | (Intercept) | 123766.3435 | 127608.5502 | - | 0.9957 | 595.7318 | 25 |
delta_and_ampl | TRUE | 30 | amp2 | -0.0095 | 0.0014 | *** (p < 0.001) | 0.9958 | 633.5522 | 27 |
delta_cross_ampl | TRUE | 30 | amp2 | -0.0121 | 0.0035 | ** (p < 0.01) | 0.9958 | 618.0219 | 26 |
delta_and_ampl_and_time | TRUE | 30 | amp2 | -0.0092 | 0.0015 | *** (p < 0.001) | 0.9957 | 618.4649 | 26 |
delta_cross_ampl_and_time | TRUE | 30 | amp2 | -0.0123 | 0.0035 | ** (p < 0.01) | 0.9957 | 595.7318 | 25 |
delta_and_time | TRUE | 30 | file_datetime | -0.0002 | 0.0001 | - | 0.9899 | 1526.3258 | 27 |
delta_cross_time | TRUE | 30 | file_datetime | -0.0005 | 0.0006 | - | 0.9896 | 1511.5145 | 26 |
delta_and_ampl_and_time | TRUE | 30 | file_datetime | -0.0001 | 0.0001 | - | 0.9957 | 618.4649 | 26 |
delta_cross_ampl_and_time | TRUE | 30 | file_datetime | -0.0001 | 0.0001 | - | 0.9957 | 595.7318 | 25 |
delta_only | TRUE | 30 | true_d2H | 1.2459 | 0.0242 | *** (p < 0.001) | 0.9892 | 1689.9089 | 28 |
delta_and_ampl | TRUE | 30 | true_d2H | 1.2578 | 0.0152 | *** (p < 0.001) | 0.9958 | 633.5522 | 27 |
delta_cross_ampl | TRUE | 30 | true_d2H | 1.2944 | 0.0477 | *** (p < 0.001) | 0.9958 | 618.0219 | 26 |
delta_and_time | TRUE | 30 | true_d2H | 1.2459 | 0.0234 | *** (p < 0.001) | 0.9899 | 1526.3258 | 27 |
delta_cross_time | TRUE | 30 | true_d2H | 2593.2998 | 5135.2990 | - | 0.9896 | 1511.5145 | 26 |
delta_and_ampl_and_time | TRUE | 30 | true_d2H | 1.2574 | 0.0153 | *** (p < 0.001) | 0.9957 | 618.4649 | 26 |
delta_cross_ampl_and_time | TRUE | 30 | true_d2H | 1.3024 | 0.0485 | *** (p < 0.001) | 0.9957 | 595.7318 | 25 |
delta_cross_ampl | TRUE | 30 | true_d2H:amp2 | 0.0000 | 0.0000 | - | 0.9958 | 618.0219 | 26 |
delta_cross_ampl_and_time | TRUE | 30 | true_d2H:amp2 | 0.0000 | 0.0000 | - | 0.9957 | 595.7318 | 25 |
delta_cross_time | TRUE | 30 | true_d2H:file_datetime | 0.0000 | 0.0000 | - | 0.9896 | 1511.5145 | 26 |
The visualization of the calibration parameters reveals that as expected \(^{2}\delta_{true}\) (the scale contraction) and \(A_{2}\) (the linear intensity term) are statistically relevant (***
= p.value < 0.001). DT is not significant. See residuals below for additional information on these multivariate calibration regressions.
global_calibs %>% iso_plot_calibration_parameters()
Apply the calibrations as discussed above. Inversion of the calibration is done with the investr
package. Standard errors for each data point based on the calibration are calculated using binomial proportion confidence intervals (Wald intervals).
global_calibs_applied <-
global_calibs %>%
# which calibration to use? can include multiple if desired to see the result
# in this case, the amplitude- and time-conscious calibrations are not necessary
filter(calib == "delta_and_ampl") %>%
# apply calibration indication what should be calculated
iso_apply_calibration(true_d2H, calculate_error = TRUE)
## Info: applying calibration to infer 'true_d2H' for 1 data group(s); storing resulting value in new column 'true_d2H_pred' and estimated error in new column 'true_d2H_pred_se'. This may take a moment... finished.
# calibration ranges
global_calibs_with_ranges <-
global_calibs_applied %>%
# evaluate calibration range for the measured amplitude and predicted d2H
iso_evaluate_calibration_range(amp2, true_d2H_pred)
## Info: evaluating range for terms 'amp2' and 'true_d2H_pred' in calibration for 1 data group(s); storing resulting summary for each data entry in new column 'in_range'.
# show calibration ranges
global_calibs_with_ranges %>%
iso_get_calibration_range() %>%
iso_remove_list_columns() %>%
knitr::kable(d = 2)
## Info: retrieving all calibration range information for calibration
calib | calib_ok | calib_points | term | units | min | max |
---|---|---|---|---|---|---|
delta_and_ampl | TRUE | 30 | amp2 | mV | 1124.32 | 3474.23 |
delta_and_ampl | TRUE | 30 | true_d2H_pred | permil | -243.26 | -55.26 |
# create calibrated peak table
peak_table_calibrated <- global_calibs_with_ranges %>%
iso_get_calibration_data()
## Info: retrieving all data
Some samples are outside the \(\delta^{2}\text{H}\) range covered by the standards. It is hard to find standards that covers such a wide range of \(\delta^{2}\text{H}\) values as these samples, so there is not much that can be done about that. Anyway, the calibration is pretty linear and covers quite a wide range of \(\delta^{2}\text{H}\) values, so applying this calibration to the samples should still be valid.
# replicate earlier overview plot but now with the calibrated delta values
# and with a highlight of the calibration ranges and which points are in range
peak_table_calibrated %>%
# focus on identified peaks (comment out this line to see ALL peaks)
filter(!is.na(compound)) %>%
# visualize with convenience function iso_plot_data
iso_plot_data(
# choose x and y (multiple y possible)
x = amp2, y = true_d2H_pred,
# choose aesthetics
color = in_range, shape = is_std_peak, label = compound, size = 3,
# decide what geoms to include
points = TRUE
) %>%
# highlight calibration range
iso_mark_calibration_range() +
# legend
theme(legend.position = "bottom", legend.direction = "vertical")
# generate data summary
peak_data <-
peak_table_calibrated %>%
# focus on identified peaks in the samples
filter(!is.na(compound))
# summarize replicates
peak_data_summary <-
peak_data %>%
# summarize for each sample and compound
group_by(id1, compound) %>%
iso_summarize_data_table(amp2, true_d2H_pred, true_d2H_pred_se) %>% select(-`true_d2H_pred_se sd`)
# add column in which d2H is rounded to permil
peak_data_summary <- peak_data_summary %>% mutate(`d2H rounded to integer` = round(`true_d2H_pred mean`, 0))
# print
peak_data_summary %>% iso_make_units_explicit() %>% knitr::kable(d = 2)
id1 | compound | n | amp2 mean [mV] | amp2 sd | true_d2H_pred mean [permil] | true_d2H_pred sd | true_d2H_pred_se mean [permil] | d2H rounded to integer [permil] |
---|---|---|---|---|---|---|---|---|
043332T | CH4 | 3 | 2156.45 | 52.74 | -163.64 | 2.79 | 3.92 | -164 |
516256 | CH4 | 5 | 2367.93 | 944.83 | -62.53 | 5.05 | 4.23 | -63 |
BA1A108132_BS2A | CH4 | 1 | 1219.12 | NA | 44.75 | NA | 4.93 | 45 |
BA1D102132_BS2A | CH4 | 1 | 1766.61 | NA | -112.31 | NA | 4.03 | -112 |
BA1D4575_BS2A | CH4 | 1 | 1870.08 | NA | -111.16 | NA | 4.02 | -111 |
Beecher Island II | CH4 | 7 | 2169.00 | 728.10 | -221.82 | 4.90 | 4.03 | -222 |
Bio 1.0 Mid | CH4 | 5 | 2208.53 | 745.13 | -241.11 | 1.92 | 4.07 | -241 |
H-ISO1 | CH4 | 7 | 2399.41 | 646.92 | -156.83 | 1.65 | 3.98 | -157 |
NSHQ14_BS1B | CH4 | 1 | 2228.88 | NA | -263.11 | NA | 4.07 | -263 |
NSHQ14NP_HS2 | CH4 | 1 | 2172.97 | NA | -250.61 | NA | 4.03 | -251 |
T-ISO1 | CH4 | 3 | 2477.91 | 49.73 | -158.11 | 1.69 | 3.92 | -158 |
WAB56_BS1A | CH4 | 1 | 2554.03 | NA | -346.62 | NA | 4.48 | -347 |
WAB71_BS1B | CH4 | 1 | 1496.00 | NA | -303.87 | NA | 4.28 | -304 |
Save data to xlsx spreadsheet.
# export the global calibration with all its information and data to Excel
peak_table_calibrated %>%
iso_export_calibration_to_excel(
filepath = format(Sys.Date(), "data_output/%Y%m%d_OM19_CH4_H_data_calibrated.xlsx"),
# include data summary as an additional useful tab
`data summary` = peak_data_summary
)
## Info: exporting calibrations into Excel '20200812_OM19_CH4_H_data_calibrated.xlsx'...
## Info: retrieving all data
## Info: retrieving all coefficient information for calibration
## Info: retrieving all summary information for calibration
## Info: retrieving all calibration range information for calibration
## Info: export complete, created tabs 'data summary', 'all data', 'calib coefs', 'calib summary' and 'calib range'.