1 Oman 2019 C\(_1\)-C\(_6\) alkane \(\delta^{13}\)C - Introduction

C\(_1\)-C\(_6\) alkane \(\delta^{13}\)C was measured by GC-C-IRMS as described in the methods section of the manuscript. The following code steps through the data processing and calibration to convert measured values to values vs. the international reference standard (Vienna Pee Dee Belemnite, VPDB) using CH\(_4\) isotope standards from USGS and Airgas. The gas samples processed in this document were collected in Oman in January 2019.

2 Load libraries

# load libraries
library(tidyverse) # dplyr, tidyr, ggplot
library(isoreader) # reading isotope data files
library(isoprocessor) # processing isotope data files

# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
  dev = c("png", "pdf"), fig.keep = "all",
  dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
  fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
  cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)

Data processed using the packages isoreader version 1.2.1 and isoprocessor version 0.6.7.

3 Load Data

# read files
iso_files_raw <-
  file.path(
    "data_raw/191218_OM19_C1-C3-HCs_d13C", "191218_OM19_C1-C3-HCs_d13C.cf.rds"
  ) %>%
    # read data files in parallel for fast read
  iso_read_continuous_flow(parallel = TRUE)

4 Process file info & peak table

# filter out unecessary files
iso_files_raw <- iso_files_raw %>% iso_filter_files(file_id != "BF6013__100 ppm C1-C6 mix_1000.dxf" & file_id !=  "BF6014__CO2 zero_.dxf" & file_id != "BF6015__CO2 zero_.dxf" & file_id !=  "BF6016__CO2 zero_.dxf" & file_id !=  "BF6017__Linearity CO2_.dxf" & file_id !=  "BF6018__H-ISO1_00200.dxf" & file_id !=  "BF6023__516256_00100.dxf" & file_id !=  "BF6027__NSHQ14_BS1B_COG16001_00700.dxf")

## Info: applying file filter, keeping 19 of 27 files

# process
iso_files <- iso_files_raw %>%
  # set peak table from vendor data table
  iso_set_peak_table_from_auto_vendor_data_table() %>%
  # rename key file info columns
  iso_rename_file_info(id1 = `Identifier 1`, id2 = `Identifier 2`) %>%
  # parse text info into numbers
  iso_parse_file_info(number = Analysis) %>%
  # process other file information that is specific to the naming conventions
  # of this particular sequence
  iso_mutate_file_info(
    # was there seed oxidation?
    seed_oxidation = ifelse(`Seed Oxidation` == "1", "yes", "no"),
    # what was the injection volume?
    inject_volume = parse_number(Comment) %>% iso_double_with_units("uL"),
    # what folder are the data files in? (assuming folder = sequence)
    folder = basename(dirname(file_path))
  )

## Warning: 'iso_set_peak_table_from_auto_vendor_data_table' has been renamed
## to 'iso_set_peak_table_automatically_from_vendor_data_table'. Please call the
## latter function directly to avoid this warning.

## Info: setting peak table for 19 file(s) from vendor data table with the following renames:
## - for 19 file(s): 'Nr.'->'peak_nr', 'Is Ref.?'->'is_ref', 'Start'->'rt_start', 'Rt'->'rt', 'End'->'rt_end', 'Ampl 44'->'amp44', 'Ampl 45'->'amp45', 'Ampl 46'->'amp46', 'BGD 44'->'bgrd44_start', 'BGD 45'->'bgrd45_start', 'BGD 46'->'bgrd46_start', 'BGD 44'->'bgrd44_end', 'BGD 45'->'bgrd45_end', 'BGD 46'->'bgrd46_end', 'rIntensity 44'->'area44', 'rIntensity 45'->'area45', 'rIntensity 46'->'area46', 'rR 45CO2/44CO2'->'r45/44', 'rR 46CO2/44CO2'->'r46/44', 'rd 45CO2/44CO2'->'rd45/44', 'rd 46CO2/44CO2'->'rd46/44', 'd 45CO2/44CO2'->'d45/44', 'd 46CO2/44CO2'->'d46/44', 'd 13C/12C'->'d13C', 'd 18O/16O'->'d18O', 'd 17O/16O'->'d17O', 'AT% 13C/12C'->'at13C', 'AT% 18O/16O'->'at18O'

## Info: renaming the following file info across 19 data file(s): 'Identifier 1'->'id1', 'Identifier 2'->'id2'

## Warning: 'rename = c(id1 = `Identifier 1`, id2 = `Identifier 2`)' refers to unknown column(s) in data frame 'file_info':
## - Can't subset columns that don't exist. x Column `Identifier 2` doesn't exist.

## Info: parsing 1 file info columns for 19 data file(s):
##  - to number: 'Analysis'

## Info: mutating file info for 19 data file(s)

4.1 Chromatograms

Display chromatograms of samples and standards for CH\(_4\) analyses.

# plot the chromatograms
chroms_C1 <- iso_files %>%
  # select CH4 analyses
  iso_filter_files(str_detect(`GC Method`, "C2") == FALSE) %>%
  iso_plot_continuous_flow_data(
     # select data and aesthetics
    data = c(44),
    color = id1
  )

## Info: applying file filter, keeping 17 of 19 files

# plot
chroms_C1

Display chromatograms of samples for C\(_2\)-C\(_6\) alkane analyses.

# plot the chromatograms
chroms_C2_C6 <- iso_files %>%
    # select C2-C6 alkane analyses
  iso_filter_files(str_detect(`GC Method`, "C2") == TRUE) %>%
  iso_plot_continuous_flow_data(
    # select data and aesthetics
    data = c(44),
    color = id1
  )

## Info: applying file filter, keeping 2 of 19 files

# plot
chroms_C2_C6

4.2 Peak maps

# set peak maps
peak_maps <-
  tibble::tribble(
    ~compound, ~ref_nr, ~`rt`,
    # peak map data (row-by-row)
    "CO2",          1,      39,
    "CO2",          2,      74,
    "CO2",          3,      108,
    "CO2",          4,      143,
    "CH4",          NA,     284,
    "CO2",          NA,     723,
    "C2",           NA,     1402,
    "C3",           NA,     2525,
    "C4-iso",       NA,     3082,
    "C4-n",         NA,     3167,
    "C5-iso",       NA,     3638,
    "C5-n",         NA,     3692,
    "C6",           NA,     4145
  )

# print peak maps
peak_maps %>% knitr::kable(digits = 0)

compound	ref_nr	rt
CO2	1	39
CO2	2	74
CO2	3	108
CO2	4	143
CH4	NA	284
CO2	NA	723
C2	NA	1402
C3	NA	2525
C4-iso	NA	3082
C4-n	NA	3167
C5-iso	NA	3638
C5-n	NA	3692
C6	NA	4145

4.3 Fetch peak table

# identify peaks
peak_table_w_ids <- iso_files %>%
  iso_map_peaks(peak_maps) %>%
  # peak table
  iso_get_peak_table(include_file_info = everything())

## Warning in max(..n_overlapping..): no non-missing arguments to max; returning -
## Inf

## Warning in max(..n_matches..): no non-missing arguments to max; returning -Inf

## Info: 105 of 119 peaks in 19 files were successfully mapped using a single peak map. 14 peak(s) could not be mapped. 123 expected peak(s) are missing.

## Info: aggregating peak table from 19 data file(s), including file info 'everything()'

Display an example chromatogram for CH\(_4\) with mapped peaks. Note: the small, late peak is CO\(_2\), which was partially backflushed.

# make plot
chroms_C1_mapped <- iso_files %>%
  iso_filter_files(str_detect(id1, "WAB71_BS1B_COG16000") == TRUE & str_detect(`GC Method`, "C2") == FALSE) %>%
  iso_plot_continuous_flow_data(
    # select data and aesthetics
    data = c(44),
    color = id1,
    # provide our peak table with ids
    peak_table = peak_table_w_ids,
    # define peak labels, this can be any valid expression
    peak_label = iso_format(id = peak_info),
    # specify which labels to show (removing the !is_identified or putting a
    # restriction by retention time can help a lot with busy chromatograms)
    peak_label_filter = is_identified | !is_identified | is_missing
  )

## Info: applying file filter, keeping 1 of 19 files

# print plot
chroms_C1_mapped

Display an example chromatogram of C\(_2\)-C\(_6\) alkane analyses with mapped peaks.

# plot the chromatograms
chroms_C2_C6_mapped <- iso_files %>%
    # select C2-C6 alkane analyses
  iso_filter_files(file_id == "BF6029__NSHQ14_BS1B_COG16001_03000.dxf") %>%
  iso_plot_continuous_flow_data(
    # select data and aesthetics
    data = c(44),
    color = file_id,
    # zoom in on time interval
    time_interval = c(1250, 4200),
    # provide our peak table with ids
    peak_table = peak_table_w_ids,
    # define peak labels, this can be any valid expression
    peak_label = iso_format(id = peak_info),
    # specify which labels to show (removing the !is_identified or putting a
    # restriction by retention time can help a lot with busy chromatograms)
    peak_label_filter = is_identified
  ) +
  # customize resulting ggplot
  theme(legend.position = "bottom")

## Info: applying file filter, keeping 1 of 19 files

# print
chroms_C2_C6_mapped

4.4 Select analyte peaks

# focus on analyte peaks

 # We separately tested and determined the limit of quantitation (defined here as amplitude yielding d13C sample standard deviation of 0.6 per mil over studied amplitude range) for our intrument setup.

peak_table_analytes <- peak_table_w_ids %>% 
    filter(amp44 > 300)

  # omit reference peaks for downstream processing (i.e. analyte peaks only) and select only peaks where good chromatographic separation has been demonstrated. Note: baseline separation of C5+ alkanes was not comprehensively determined for the chromatographic method used for these samples, so only C1-C4 alkane data are used for downstream processing.

peak_table_analytes <- peak_table_analytes %>%  
  filter(compound == "CH4" | compound == "C2" | compound == "C3" | compound == "C4-n" | compound == "C4-iso")

# print
peak_table_analytes

5 Isotope standard values

5.1 Load isotope standards

# add stnd accepted values
standards <-
  tibble::tribble(
    ~id1,                 ~true_d13C,
    "T-ISO1",                   -38.3,
    "H-ISO1",                   -23.9,
    "Bio 1.0 mid",          -68.6,
    "Beecher Island II",  -61.39,
    "516256",             -43.8,
    "043332T",            -39.82,
    
  ) %>%
  mutate(
    true_d13C = iso_double_with_units(true_d13C, "permil")
  )

# print
standards %>% knitr::kable(digits = 2)

id1	true_d13C
T-ISO1	-38.30
H-ISO1	-23.90
Bio 1.0 mid	-68.60
Beecher Island II	-61.39
516256	-43.80
043332T	-39.82

5.2 Add isotope standards

peak_table_w_stds <-
  peak_table_analytes %>%
  iso_add_standards(stds = standards, match_by = c("id1"))

## Info: matching standards by 'id1' - added 5 standard entries to 10 out of 20 rows, added new column 'is_std_peak' to identify standard peaks

5.3 Initial overview plots of data

Accepted \(\delta^{13}\text{C}\) (VPDB) plotted against measured \(\delta^{13}\text{C}\) (vs. lab CO\(_2\) tank) of standards. Coefficients and \(\text{r}^2\) value of regression reported further below.

# generate plot
plot_stnds_accepted_measured <- peak_table_w_stds %>% filter(is_std_peak == TRUE) %>%
  # use generic data plot function
  iso_plot_data(
    x = true_d13C,
    y = d13C,
    color = id1,
    points = TRUE
  )

# print
plot_stnds_accepted_measured

Standards are in small amplitude range, but separate tests showed that there is an acceptably small trend of \(\delta^{13}\)C vs. peak amplitude over the range of peak amplitudes in sample analyses reported here.

# generate plot
plot_amp_d13C <- peak_table_w_stds %>%
  # use generic data plot function
  iso_plot_data(
    x = amp44, y = d13C, color = is_std_peak,
    points = TRUE
  )

# print
plot_amp_d13C

Standards cover the sample time range. Standard delta analyses appear pretty constant over time.

# generate plot
plot_d13C_time <- peak_table_w_stds %>%
  # use generic data plot function
  iso_plot_data(
    x = file_datetime, y = d13C, color = is_std_peak,
    points = TRUE
  )

# print
plot_d13C_time

6 Calibration

6.1 Generate a calibration with linear regression

Note: Separate testing indicated that the effect of peak amplitude on \(\delta^{13}\)C (i.e. linearity) was within a repeatability of 0.6 per mil over the range of peak amplitudes measured here. Since only CH\(_4\) isotope standards were available to us for testing, and the peak amplitude/\(\delta^{13}\)C effect observed for these standards may differ from those observed for \(\text{C}_{2}-\text{C}_6\) alkanes, which have different peak shapes than CH\(_4\), calibrations based on peak amplitude are not applied here.

calibs <- peak_table_w_stds %>%
  # prepare for calibration
  iso_prepare_for_calibration() %>%
  # run calibrations
  iso_generate_calibration(
    model = c(
      # reference scale correction
      delta_only = lm(d13C ~ true_d13C),
      # multivariate with delta and the datetime (i.e. checking for temporal drift)
      delta_and_time = lm(d13C ~ true_d13C + file_datetime)
    ),
    # specify which peaks to include in the calibration, here:
    # - all std_peaks (this filter should always be included!)
    use_in_calib = is_std_peak
  )

## Info: preparing data for calibration by nesting the entire dataset

## Info: generating calibration based on 2 models (delta_only = 'lm(d13C ~ true_d13C)', delta_and_time = 'lm(d13C ~ true_d13C + file_datetime)') for 1 data group(s) with standards filter 'is_std_peak'. Storing residuals in new column 'resid'. Storing calibration info in new column 'in_calib'.

6.2 Coefficients

# look at coefficients and summary
calibs %>%
  # unnest calibration parameters
  iso_get_calibration_parameters(
    select_from_coefs =
      c(term, estimate, SE = std.error, signif),
    select_from_summary =
      c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)) %>%
  arrange(term) %>%
  knitr::kable(digits = 4)

## Info: retrieving coefficient column(s) 'c(term, estimate, SE = std.error, signif)' for calibration

## Info: retrieving summary column(s) 'c(fit_R2 = adj.r.squared, fit_RMSD = deviance, residual_df = df.residual)' for calibration

calib	calib_ok	calib_points	term	estimate	SE	signif	fit_R2	fit_RMSD	residual_df
delta_only	TRUE	10	(Intercept)	9.7972	0.4556	*** (p < 0.001)	0.9989	1.2908	8
delta_and_time	TRUE	10	(Intercept)	6534.0817	10100.5566	-	0.9988	1.2182	7
delta_and_time	TRUE	10	file_datetime	0.0000	0.0000	-	0.9988	1.2182	7
delta_only	TRUE	10	true_d13C	0.9600	0.0106	*** (p < 0.001)	0.9989	1.2908	8
delta_and_time	TRUE	10	true_d13C	0.9597	0.0110	*** (p < 0.001)	0.9988	1.2182	7

6.3 Visualize Calibration Parameters

The visualization of the calibration parameters reveals that as expected the scale contraction is statistically relevant (*** = p.value < 0.001). Dt (drift) is not significant.

calibs %>% iso_plot_calibration_parameters()

6.4 Apply global calibration

calibs_applied <-
  calibs %>%
  # which calibration to use? can include multiple if desired to see the result
  # in this case, the amplitude- and time-conscious calibrations are not necessary
  filter(calib == "delta_only") %>%
  # apply calibration indication what should be calculated
  iso_apply_calibration(true_d13C, calculate_error = TRUE)

## Info: applying calibration to infer 'true_d13C' for 1 data group(s); storing resulting value in new column 'true_d13C_pred' and estimated error in new column 'true_d13C_pred_se'. This may take a moment... finished.

# calibration ranges
calibs_with_ranges <-
  calibs_applied %>%
  # evaluate calibration range for the measured amplitude and predicted d13C
  iso_evaluate_calibration_range(amp44, true_d13C_pred)

## Info: evaluating range for terms 'amp44' and 'true_d13C_pred' in calibration for 1 data group(s); storing resulting summary for each data entry in new column 'in_range'.

# show calibration ranges
calibs_with_ranges %>%
  iso_get_calibration_range() %>%
  iso_remove_list_columns() %>%
  knitr::kable(d = 2)

## Info: retrieving all calibration range information for calibration

calib	calib_ok	calib_points	term	units	min	max
delta_only	TRUE	10	amp44	mV	738.73	1086.04
delta_only	TRUE	10	true_d13C_pred	permil	-61.55	-23.51

# create calibrated peak table
peak_table_calibrated <- calibs_with_ranges %>%
  iso_get_calibration_data()

## Info: retrieving all data

7 Inspect Calibration

7.1 Overview

As noted above, standards are in small amplitude range, but separate tests showed that there is an acceptably small trend of \(\delta^{13}\)C vs. peak amplitude over this range of analyses, which make all the sample analyses valid on the basis of their peak amplitudes. Several points are above calibrated delta range, but it is hard to find standards as \(^{13}\)C-enriched as these samples, so there is not much that can be done about that. Anyway, the calibration is quite linear over a fairly large delta range, so these data should be fine.

# replicate earlier overview plot but now with the calibrated delta values
# and with a highlight of the calibration ranges and which points are in range
peak_table_calibrated %>%
  # focus on identified peaks (comment out this line to see ALL peaks)
  filter(!is.na(compound)) %>%
  # visualize with convenience function iso_plot_data
  iso_plot_data(
    # choose x and y (multiple y possible)
    x = amp44, y = true_d13C_pred,
    # choose aesthetics
    color = in_range, shape = is_std_peak, label = compound, size = 3,
    # decide what geoms to include
    points = TRUE
  ) %>%
  # highlight calibration range
  iso_mark_calibration_range() +
  # legend
  theme(legend.position = "bottom", legend.direction = "vertical")

7.2 Summary

# generate data summary and filter out suspect data
peak_data <-
  peak_table_calibrated %>%
  # focus on identified peaks in the samples
  filter(!is.na(compound)) %>% filter((file_id == "BF6030__NSHQ14_BS1B_COG16001_10000.dxf" & compound != "C2") | file_id != "BF6030__NSHQ14_BS1B_COG16001_10000.dxf") # filter this 10 mL inject of NSHQ14 for ethane because there was large background due to CH4 during elution of ethane

# summarize replicates
peak_data_summary <-
  peak_data %>%
  # summarize for each sample and compound
  group_by(id1, compound) %>%
  iso_summarize_data_table(amp44, true_d13C_pred, true_d13C_pred_se) %>% select(-`true_d13C_pred_se sd`)

# add column in which d13C is rounded to tenth of permil place
peak_data_summary <- peak_data_summary %>% mutate(`d13C rounded tenth` = round(`true_d13C_pred mean`, 1))

# print
peak_data_summary %>% iso_make_units_explicit() %>% knitr::kable(d = 2)

id1	compound	n	amp44 mean [mV]	amp44 sd	true_d13C_pred mean [permil]	true_d13C_pred sd	true_d13C_pred_se mean [permil]	d13C rounded tenth [permil]
043332T	CH4	2	866.74	11.54	-39.36	0.25	0.44	-39.4
516256	CH4	2	871.68	32.93	-43.82	0.14	0.44	-43.8
Beecher Island II	CH4	2	764.29	36.15	-61.38	0.24	0.49	-61.4
Bio 1.0 Mid	CH4	2	978.86	107.39	-69.19	0.55	0.53	-69.2
H-ISO1	CH4	2	1013.38	75.83	-23.85	0.47	0.48	-23.8
NSHQ14_BS1B_COG16001	C2	1	312.90	NA	-5.54	NA	0.59	-5.5
NSHQ14_BS1B_COG16001	C3	1	389.93	NA	4.85	NA	0.67	4.9
NSHQ14_BS1B_COG16001	CH4	1	2562.98	NA	7.21	NA	0.69	7.2
NSHQ14NP_HS2_COG16006	CH4	1	3303.07	NA	4.48	NA	0.67	4.5
T-ISO1	CH4	2	1027.00	83.50	-38.80	0.35	0.44	-38.8
WAB105_BS1A_COG16007	CH4	1	304.84	NA	-3.97	NA	0.60	-4.0
WAB56_BS1A_COG16003	CH4	1	2060.97	NA	-21.78	NA	0.49	-21.8
WAB71_BS1B_COG16000	CH4	1	1274.71	NA	7.04	NA	0.69	7.0

8 Export

Save data to xlsx spreadsheet.

# export the global calibration with all its information and data to Excel
peak_table_calibrated %>%
  iso_export_calibration_to_excel(
    filepath = format(Sys.Date(), "data_output/%Y%m%d_OM19_C1-C6_HC_d13C_data_calibrated.xlsx"),
    # include data summary as an additional useful tab
    `data summary` = peak_data_summary
  )

## Info: exporting calibrations into Excel '20200812_OM19_C1-C6_HC_d13C_data_calibrated.xlsx'...

## Info: retrieving all data

## Info: retrieving all coefficient information for calibration

## Info: retrieving all summary information for calibration

## Info: retrieving all calibration range information for calibration

## Info: export complete, created tabs 'data summary', 'all data', 'calib coefs', 'calib summary' and 'calib range'.

Methane and short-chain alkane C isotope data processing for samples obtained in Oman in 2019

Source file: 191218_OM19_C1-C3-HCs_d13C.Rmd

Daniel Nothaft

12 Aug 2020