Evaluate viral MGS protocols: Dissociation treatments and Amicon MWCOs

R qPCR
Michael R. McLaren true
2023-06-24

This experiment compares two different Amicon molecular weight cut-offs (MWCOs), 30 kDa and 100 kDa, and 4 dissociation treatments (None, NaCl, Tween-20, and a method using beef extract (BE).

Drive folder

library(tidyverse)
library(fs)
library(here)

library(knitr)

library(broom)

# plotting helpers
library(cowplot)
library(patchwork)
library(ggbeeswarm)

theme_set(theme_cowplot())

# Okabe Ito color scheme with amber for yellow; see https://easystats.github.io/see/reference/scale_color_okabeito.html
colors_oi <- grDevices::palette.colors()  
colors_oi['yellow'] <- "#F5C710"

# Custon qPCR helpers
source('_functions.R')

Data import

fns <- '_data' %>%
  dir_ls(recurse = TRUE, glob = '*_Results_*.csv')
fns %>% path_file
[1] "2023-06-15_OSH-16S-Amicon_Results_20230616 113441.csv"          
[2] "2023-06-15_OSH_CrA-amicon_Results_20230616 114121.csv"          
[3] "2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.csv"          
[4] "2023-06-15_OSH-Cov2-Amicon_Raw_Results_20230616 105748.csv"     
[5] "2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_20230616 105816.csv"

I’ll just use the ‘Adjusted’ SARS2 results when I import,

fns_filt <- fns %>% str_subset(negate = TRUE, 'Raw')
fns_filt %>% path_file
[1] "2023-06-15_OSH-16S-Amicon_Results_20230616 113441.csv"          
[2] "2023-06-15_OSH_CrA-amicon_Results_20230616 114121.csv"          
[3] "2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.csv"          
[4] "2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_20230616 105816.csv"

We can use the following snippet to pull out the target name from the file path,

base <- fns %>% path_common
base_num <- base %>% path_split %>% .[[1]] %>% length
fns %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1)
[1] "[2023-06-15] 16S"        "[2023-06-15] Crassphage"
[3] "[2023-06-15] Phagemid"   "[2023-06-15] SARS-CoV-2"
[5] "[2023-06-15] SARS-CoV-2"
# Pattern for extracting experimental metadata from sample names; 
# Experimental metadata stored in qpcr sample name as '<sewer_system>_<amicon_mwco>_<dissoc_treatment>'
pat <- '(N|S)_(30|100)_(None|Tween|BE|NaCl)'

results <- tibble(file = fns_filt) %>%
  mutate(
    dir_name = file %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1),
    data = map(file, read_qpcr_results_csv)
  ) %>%
  # select(-file) %>%
  separate(dir_name, into = c('dir_date', 'dir_target'), remove = FALSE, sep = ' ') %>%
  mutate(
    dir_date = dir_date %>% lubridate::ymd(),
  ) %>%
  unnest(data) %>%
  # Extract experimental sample metadata from sample names
  mutate(
    match = str_match(sample, pat),
  ) %>%
  mutate(.keep = 'unused',
    sample_experiment = match[,1],
    sewer_system = match[,2],
    amicon_mwco = match[,3] %>% fct_relevel('30'),
    dissoc_treatment = match[,4] %>% fct_relevel('None'),
  ) %>%
  mutate(
    cq = ifelse(is.na(cq), 40, cq)
  ) %>%
  glimpse
Rows: 264
Columns: 32
$ file                  <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH…
$ dir_name              <chr> "[2023-06-15] 16S", "[2023-06-15] 16S"…
$ dir_date              <date> 2023-06-15, 2023-06-15, 2023-06-15, 2…
$ dir_target            <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ well                  <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15,…
$ well_position         <chr> "A1", "A2", "A3", "A4", "A5", "A6", "A…
$ row                   <ord> A, A, A, A, A, A, A, A, A, B, B, B, B,…
$ column                <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4,…
$ omit                  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ sample                <chr> "N_30_None", "N_30_None", "N_30_None",…
$ target                <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ task                  <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKN…
$ reporter              <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FA…
$ quencher              <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-…
$ amp_status            <chr> "Amp", "Amp", "Amp", "Amp", "Amp", "No…
$ amp_score             <dbl> 1.5151167, 1.5159783, 1.4994412, 1.228…
$ curve_quality         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ result_quality_issues <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ cq                    <dbl> 21.95765, 21.82037, 21.87888, 31.92359…
$ cq_confidence         <dbl> 0.9866581, 0.9902164, 0.9869926, 0.938…
$ cq_mean               <dbl> 21.88563, 21.88563, 21.88563, 31.79154…
$ cq_sd                 <dbl> 0.06889292, 0.06889292, 0.06889292, 0.…
$ auto_threshold        <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ threshold             <dbl> 0.1680576, 0.1680576, 0.1680576, 0.168…
$ auto_baseline         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ baseline_start        <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ baseline_end          <int> 17, 16, 17, 25, 25, 39, 14, 24, 15, 16…
$ cq_status             <chr> "Determined", "Determined", "Determine…
$ sample_experiment     <chr> "N_30_None", "N_30_None", "N_30_None",…
$ sewer_system          <chr> "N", "N", "N", "N", "N", "N", NA, NA, …
$ amicon_mwco           <fct> 30, 30, 30, 30, 30, 30, NA, NA, NA, 30…
$ dissoc_treatment      <fct> None, None, None, BE, BE, BE, NA, NA, …

Note, in cases where the Cq value could not be determined, I set the value to 40.

results %>% count(dir_target)
# A tibble: 4 × 2
  dir_target     n
  <chr>      <int>
1 16S           66
2 Crassphage    66
3 Phagemid      69
4 SARS-CoV-2    63
results %>% count(dir_target, file %>% path_file)
# A tibble: 4 × 3
  dir_target `file %>% path_file`                                    n
  <chr>      <chr>                                               <int>
1 16S        2023-06-15_OSH-16S-Amicon_Results_20230616 113441.…    66
2 Crassphage 2023-06-15_OSH_CrA-amicon_Results_20230616 114121.…    66
3 Phagemid   2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.…    69
4 SARS-CoV-2 2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_202306…    63
# results %>% count(target)
results %>% count(dir_target, target)
# A tibble: 4 × 3
  dir_target target       n
  <chr>      <chr>    <int>
1 16S        16S         66
2 Crassphage CrA         66
3 Phagemid   Target 1    69
4 SARS-CoV-2 Target 1    63
# results %>% count(sample) %>% print(n=Inf)
results %>% count(sample) %>% deframe
          100.0          1000.0         10000.0        100000.0 
              3               3               3               3 
           10E3            10E4            10E5            10E6 
              3               3               3               3 
           10E7              75             750            7500 
              3               3               3               3 
          75000          750000 CPT ultrafilter        N_100_BE 
              3               3               3              12 
     N_100_NaCl      N_100_None     N_100_Tween         N_30_BE 
             12              12              12              12 
      N_30_NaCl       N_30_None      N_30_Tween        S_100_BE 
             12              12              12              12 
     S_100_NaCl      S_100_None     S_100_Tween         S_30_BE 
             12              12              12              12 
      S_30_NaCl       S_30_None      S_30_Tween           Std 1 
             12              12              12               3 
          Std 2           Std 3           Std 4           Std 5 
              3               3               3               3 
           <NA> 
             12 
results %>% count(sample_experiment) %>% deframe
   N_100_BE  N_100_NaCl  N_100_None N_100_Tween     N_30_BE 
         12          12          12          12          12 
  N_30_NaCl   N_30_None  N_30_Tween    S_100_BE  S_100_NaCl 
         12          12          12          12          12 
 S_100_None S_100_Tween     S_30_BE   S_30_NaCl   S_30_None 
         12          12          12          12          12 
 S_30_Tween        <NA> 
         12          72 

TODO: Check status of samples with no name; are these blanks?

Load amplification data,

fns_amp <- '_data' %>%
  dir_ls(recurse = TRUE, glob = '*_Amplification Data_*.csv') %>%
  str_subset(negate = TRUE, 'Raw')

amp <- tibble(file = fns_amp) %>%
  mutate(
    dir_name = file %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1),
    data = map(file, read_qpcr_amplification_csv)
  ) %>%
  # select(-file) %>%
  separate(dir_name, into = c('dir_date', 'dir_target'), remove = FALSE, sep = ' ') %>%
  mutate(
    dir_date = dir_date %>% lubridate::ymd(),
  ) %>%
  unnest(data) %>%
  glimpse
Rows: 10,680
Columns: 14
$ file          <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH-16S-Ami…
$ dir_name      <chr> "[2023-06-15] 16S", "[2023-06-15] 16S", "[2023…
$ dir_date      <date> 2023-06-15, 2023-06-15, 2023-06-15, 2023-06-1…
$ dir_target    <chr> "16S", "16S", "16S", "16S", "16S", "16S", "16S…
$ well          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ well_position <chr> "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1"…
$ row           <ord> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A…
$ column        <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ cycle_number  <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ target        <chr> "16S", "16S", "16S", "16S", "16S", "16S", "16S…
$ rn            <dbl> 2.798063, 2.799998, 2.801923, 2.808614, 2.8092…
$ d_rn          <dbl> -0.0024182955, -0.0022691886, -0.0021315257, 0…
$ sample        <chr> "N_30_None", "N_30_None", "N_30_None", "N_30_N…
$ omit          <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
results_min <- results %>%
  select(dir_name, well, task:dissoc_treatment)
amp <- amp %>% 
  left_join(results_min, by = c('dir_name', 'well'))

TODO: find a better way to import the amplification data and results at the same time

Get the baseline coordinates for plotting,

baselines <- results %>%
  pivot_longer(
    cols = c(baseline_start, baseline_end),
    names_to = 'baseline_boundary',
    values_to = 'cycle_number',
    names_prefix = 'baseline_',
  ) %>%
  left_join(
    amp %>% select(well_position, cycle_number, rn, d_rn), 
    by = c('well_position', 'cycle_number')
  ) %>%
  glimpse
Rows: 2,094
Columns: 34
$ file                  <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH…
$ dir_name              <chr> "[2023-06-15] 16S", "[2023-06-15] 16S"…
$ dir_date              <date> 2023-06-15, 2023-06-15, 2023-06-15, 2…
$ dir_target            <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ well                  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
$ well_position         <chr> "A1", "A1", "A1", "A1", "A1", "A1", "A…
$ row                   <ord> A, A, A, A, A, A, A, A, A, A, A, A, A,…
$ column                <ord> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
$ omit                  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ sample                <chr> "N_30_None", "N_30_None", "N_30_None",…
$ target                <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ task                  <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKN…
$ reporter              <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FA…
$ quencher              <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-…
$ amp_status            <chr> "Amp", "Amp", "Amp", "Amp", "Amp", "Am…
$ amp_score             <dbl> 1.515117, 1.515117, 1.515117, 1.515117…
$ curve_quality         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ result_quality_issues <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ cq                    <dbl> 21.95765, 21.95765, 21.95765, 21.95765…
$ cq_confidence         <dbl> 0.9866581, 0.9866581, 0.9866581, 0.986…
$ cq_mean               <dbl> 21.88563, 21.88563, 21.88563, 21.88563…
$ cq_sd                 <dbl> 0.06889292, 0.06889292, 0.06889292, 0.…
$ auto_threshold        <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ threshold             <dbl> 0.1680576, 0.1680576, 0.1680576, 0.168…
$ auto_baseline         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ cq_status             <chr> "Determined", "Determined", "Determine…
$ sample_experiment     <chr> "N_30_None", "N_30_None", "N_30_None",…
$ sewer_system          <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
$ amicon_mwco           <fct> 30, 30, 30, 30, 30, 30, 30, 30, 30, 30…
$ dissoc_treatment      <fct> None, None, None, None, None, None, No…
$ baseline_boundary     <chr> "start", "start", "start", "start", "e…
$ cycle_number          <int> 3, 3, 3, 3, 17, 17, 17, 17, 3, 3, 3, 3…
$ rn                    <dbl> 2.8019226, 2.9759669, 5.9895606, 0.921…
$ d_rn                  <dbl> -0.0021315257, 0.0002468450, -0.014340…

Analysis

Three types of conditions:

  1. Sewer system (N or S)
  2. Amicon filter pore size / molecular weight cutoff
  3. Dissociation treatment
results %>%
  count(sewer_system, amicon_mwco, dissoc_treatment)
# A tibble: 17 × 4
   sewer_system amicon_mwco dissoc_treatment     n
   <chr>        <fct>       <fct>            <int>
 1 N            30          None                12
 2 N            30          BE                  12
 3 N            30          NaCl                12
 4 N            30          Tween               12
 5 N            100         None                12
 6 N            100         BE                  12
 7 N            100         NaCl                12
 8 N            100         Tween               12
 9 S            30          None                12
10 S            30          BE                  12
11 S            30          NaCl                12
12 S            30          Tween               12
13 S            100         None                12
14 S            100         BE                  12
15 S            100         NaCl                12
16 S            100         Tween               12
17 <NA>         <NA>        <NA>                72
results_main <- results %>%
  filter(!is.na(sample_experiment))
results_main %>%
  ggplot(aes(x = cq, y = amicon_mwco, color = dissoc_treatment)) +
  # theme_minimal_vgrid() +
  theme(
    legend.position = 'bottom',
    panel.spacing = unit(1, 'char'),
  ) +
  labs(
    y = 'Amicon MWCO (kDa)',
    x = 'Cq value',
    color = 'Dissoc. treatment',
  ) +
  facet_grid(sewer_system ~ dir_target, scales = 'free') +
  scale_color_manual(values = colors_oi %>% unname) +
  geom_vline(xintercept = 40, color = 'grey', size = 0.3) + 
  geom_quasirandom()

This figure splits out the qPCR results by target, Amicon MWCO and sewer system (N vs. S), with the points colors by dissociation treatment. The three points of the same color for a given MWCO and facet panel are qPCR replicates.

We can see that the impact of the BE treatment is highly variable. BE tends to increase the Cq of 16S (decrease abundance) relative to the other treatments, but the effect is minor relative to None in the N-100 sample and large in the other three. BE decreases Crassphage and phagemid Cq values in the N-100 and S-100 samples (relative to other treatments), but increases the Cq in the N-30 and S-30 samples. There is an extreme effect (increase in Cq) of BE on Crassphage in the S-30 sample, with two qPCR replicates failing to yield Cq values below 40. For SARS2, we see BE appears to decrease abundance to a large extent across all four samples, to the extent that we fail to get Cq values for any replicates except one N-100 replicate.

From these results, I suggest that we remove the BE treatment from further consideration, due to its high variance and strong negative effect on SARS2 measurement (along with the technical difficulty of applying it in the lab).

Inspect SARS2 amplification curves

condition_vars <- c('sewer_system', 'amicon_mwco', 'dissoc_treatment')
results %>%
  filter(dir_target == 'SARS-CoV-2') %>%
  summarize(.by = c(sample, condition_vars), num_na = sum(is.na(cq))) %>%
  arrange(dissoc_treatment) %>%
  print(n=Inf)
# A tibble: 21 × 5
   sample      sewer_system amicon_mwco dissoc_treatment num_na
   <chr>       <chr>        <fct>       <fct>             <int>
 1 N_30_None   N            30          None                  0
 2 S_30_None   S            30          None                  0
 3 N_100_None  N            100         None                  0
 4 S_100_None  S            100         None                  0
 5 N_30_BE     N            30          BE                    0
 6 S_30_BE     S            30          BE                    0
 7 N_100_BE    N            100         BE                    0
 8 S_100_BE    S            100         BE                    0
 9 N_30_NaCl   N            30          NaCl                  0
10 S_30_NaCl   S            30          NaCl                  0
11 N_100_NaCl  N            100         NaCl                  0
12 S_100_NaCl  S            100         NaCl                  0
13 N_30_Tween  N            30          Tween                 0
14 S_30_Tween  S            30          Tween                 0
15 N_100_Tween N            100         Tween                 0
16 S_100_Tween S            100         Tween                 0
17 100000.0    <NA>         <NA>        <NA>                  0
18 10000.0     <NA>         <NA>        <NA>                  0
19 1000.0      <NA>         <NA>        <NA>                  0
20 100.0       <NA>         <NA>        <NA>                  0
21 <NA>        <NA>         <NA>        <NA>                  0
  # arrange(desc(num_na))

HERE. Check amplification.

delta_rn_min <- 1e-3
ct_threshold <- results %>% filter(dir_target == 'SARS-CoV-2') %>% pull(threshold) %>% unique
stopifnot(length(ct_threshold) == 1)

amp %>%
  filter(
    dir_target == 'SARS-CoV-2',
    !is.na(sewer_system)
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = dissoc_treatment)) +
  facet_grid(sewer_system ~ amicon_mwco, scales = 'free') +
  scale_color_manual(values = colors_oi %>% unname) +
  scale_y_log10() +
  geom_line(aes(group = well)) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  # scale_color_brewer(type = 'qual') +
  # geom_point(data = baselines, aes(shape = baseline_boundary), size = 3) +
  # scale_shape_manual(values = c(1, 4)) +
  labs(y = 'Delta Rn', x = 'Cycle', color = 'Target')

delta_rn_min <- 1e-3
ct_threshold <- results %>% filter(dir_target == 'SARS-CoV-2') %>% pull(threshold) %>% unique
stopifnot(length(ct_threshold) == 1)

amp %>%
  filter(
    dir_target == 'SARS-CoV-2',
    !is.na(sewer_system)
  ) %>%
  ggplot(aes(cycle_number, pmax(rn, delta_rn_min), color = dissoc_treatment)) +
  facet_grid(sewer_system ~ amicon_mwco, scales = 'free') +
  scale_color_manual(values = colors_oi %>% unname) +
  scale_y_log10() +
  geom_line(aes(group = well)) +
  # scale_color_brewer(type = 'qual') +
  # geom_point(data = baselines, aes(shape = baseline_boundary), size = 3) +
  # scale_shape_manual(values = c(1, 4)) +
  labs(y = 'Rn', x = 'Cycle', color = 'Target')

Qubit measurements

url <- 'https://docs.google.com/spreadsheets/d/1OUp0DBVb3QOrELZ32-4C6Ia3QH1x_G95nCJFW2h8I4k'
qubit <- googlesheets4::read_sheet(url, col_types = 'ccnccc') %>%
  janitor::clean_names() %>%
  rename(
    sewer_system = system,
    amicon_mwco = mwco,
    dissoc_treatment = treatment,
    qubit_rna_raw = qubit_hs_rna_ng_ul,
    qubit_dna_raw = qubit_br_dna_ng_ul,
  ) %>%
  glimpse
Rows: 16
Columns: 6
$ sample           <chr> "N_30_Tween", "N_30_None", "N_30_NaCl", "N_…
$ sewer_system     <chr> "N", "N", "N", "N", "N", "N", "N", "N", "S"…
$ amicon_mwco      <dbl> 30, 30, 30, 30, 100, 100, 100, 100, 30, 30,…
$ dissoc_treatment <chr> "Tween", "None", "NaCl", "BE", "Tween", "No…
$ qubit_rna_raw    <chr> "0.52", "0.284", "0.24", "OOR (>10 ng/uL)",…
$ qubit_dna_raw    <chr> "OOR (<4 ng/uL)", "OOR (<4 ng/uL)", "OOR (<…
qubit_raw <- qubit
qubit <- qubit_raw %>%
  mutate(
    across(matches('qubit_(rna|dna)_raw'), 
      ~ifelse(str_detect(.x, 'OOR'), NA, .x) %>% as.numeric,
      .names = "{str_replace(.col, '_raw', '')}"
    ),
    qubit_rna_adj = case_when(
      qubit_rna_raw == 'OOR (>10 ng/uL)' ~ 10,
      qubit_rna_raw == 'OOR (<0.2 ng/uL)' ~ 0.2,
      TRUE ~ qubit_rna
    ),
    qubit_dna_adj = case_when(
      qubit_dna_raw == 'OOR (<4 ng/uL)' ~ 4,
      TRUE ~ qubit_dna
    ),
    across(amicon_mwco, as.factor),
    across(dissoc_treatment, ~fct_relevel(.x, 'None')),
  ) %>%
  glimpse
Rows: 16
Columns: 10
$ sample           <chr> "N_30_Tween", "N_30_None", "N_30_NaCl", "N_…
$ sewer_system     <chr> "N", "N", "N", "N", "N", "N", "N", "N", "S"…
$ amicon_mwco      <fct> 30, 30, 30, 30, 100, 100, 100, 100, 30, 30,…
$ dissoc_treatment <fct> Tween, None, NaCl, BE, Tween, None, NaCl, B…
$ qubit_rna_raw    <chr> "0.52", "0.284", "0.24", "OOR (>10 ng/uL)",…
$ qubit_dna_raw    <chr> "OOR (<4 ng/uL)", "OOR (<4 ng/uL)", "OOR (<…
$ qubit_rna        <dbl> 0.520, 0.284, 0.240, NA, 3.530, 1.300, 0.51…
$ qubit_dna        <dbl> NA, NA, NA, 146, NA, NA, NA, 333, NA, NA, N…
$ qubit_rna_adj    <dbl> 0.520, 0.284, 0.240, 10.000, 3.530, 1.300, …
$ qubit_dna_adj    <dbl> 4, 4, 4, 146, 4, 4, 4, 333, 4, 4, 4, 188, 4…
qubit %>%
  select(sewer_system:dissoc_treatment, qubit_rna_raw, qubit_dna_raw) %>%
  knitr::kable()
sewer_system amicon_mwco dissoc_treatment qubit_rna_raw qubit_dna_raw
N 30 Tween 0.52 OOR (<4 ng/uL)
N 30 None 0.284 OOR (<4 ng/uL)
N 30 NaCl 0.24 OOR (<4 ng/uL)
N 30 BE OOR (>10 ng/uL) 146
N 100 Tween 3.53 OOR (<4 ng/uL)
N 100 None 1.3 OOR (<4 ng/uL)
N 100 NaCl 0.511 OOR (<4 ng/uL)
N 100 BE OOR (>10 ng/uL) 333
S 30 Tween 1.97 OOR (<4 ng/uL)
S 30 None 2.73 OOR (<4 ng/uL)
S 30 NaCl OOR (<0.2 ng/uL) OOR (<4 ng/uL)
S 30 BE OOR (>10 ng/uL) 188
S 100 Tween 5.5 OOR (<4 ng/uL)
S 100 None 1.2 OOR (<4 ng/uL)
S 100 NaCl 0.351 OOR (<4 ng/uL)
S 100 BE OOR (>10 ng/uL) 379
qubit %>%
  select(sample:dissoc_treatment, matches('qubit_(rna|dna)_adj')) %>%
  pivot_longer(matches('qubit_(rna|dna)_adj')) %>%
  ggplot(aes(x = value, y = amicon_mwco, color = dissoc_treatment)) +
  # theme_minimal_vgrid() +
  theme(
    legend.position = 'bottom',
    panel.spacing = unit(1, 'char'),
  ) +
  labs(
    y = 'Amicon MWCO (kDa)',
    x = 'Concentration (ng/uL)',
    color = 'Dissoc. treatment',
  ) +
  facet_grid(sewer_system ~ name, scales = 'free') +
  scale_color_manual(values = colors_oi %>% unname) +
  # geom_vline(xintercept = 40, color = 'grey', size = 0.3) + 
  scale_x_log10() +
  # expand_limits(x = 0.1) +
  geom_point()

We see high (out of range) values for the BE-treated samples, which may be caused by bovine NA in the beef extract. We also see that the DNA measurements can’t distinguish the other treatments, due to the relatively high lower limit of the BR assay that was used.

Compared to our 2022 tests using DNA extractions, we have concentrated more wastewater than in our previous experiments (40 mL instead of 10 mL per sample). Those extractions used a different extraction kit. I would need to revisit those results to see if I’d expect to be seeing <4 ng/uL.

Let’s look at the RNA measurements of the non-BE treatments, where the Qubit results are most informative. Note that I have set the measurement that is OOR (below lower limit) to the lower limit value of 0.2.

qubit %>%
  filter(dissoc_treatment != 'BE') %>%
  ggplot(aes(x = qubit_rna_adj, y = amicon_mwco, color = dissoc_treatment)) +
  # theme_minimal_vgrid() +
  theme(
    legend.position = 'bottom',
    panel.spacing = unit(1, 'char'),
  ) +
  labs(
    y = 'Amicon MWCO (kDa)',
    x = 'Concentration (ng/uL)',
    color = 'Dissoc. treatment',
  ) +
  facet_grid(sewer_system ~ ., scales = 'free') +
  scale_x_log10() +
  # expand_limits(x = 0.1) +
  scale_color_manual(values = colors_oi %>% unname) +
  geom_vline(xintercept = 0.2, color = 'grey', size = 0.3) + 
  geom_point()

This evidence suggests that the NaCl treatment tends to give lower yields than the other two treatments, and that the Tween treatment tends to give higher yields than the None treatment, though the evidence is not that strong (particularly for the advantage of Tween over None) due to limited sample size.

Linear modeling

This was a warm-up exercise I did before looking at data carefully and making the above exploratory plots. Care should be taken in interpretting these results, including my quick attempts at interpreting them below.

Fit linear model to assess the effect of conditions on the Cq value of a single target.

fits <- results_main  %>%
  summarize(
    .by = c(sample_experiment, dir_target, sewer_system, amicon_mwco, dissoc_treatment),
    across(cq, mean)
  ) %>%
  nest(.by = dir_target) %>%
  mutate(.keep = 'unused',
    fit = map(data, ~lm(data = .x, cq ~ sewer_system + amicon_mwco + dissoc_treatment)),
    # fit_tidy = map(fit, broom::tidy)
  )
fits_summary <- fits %>%
  mutate(.keep = 'unused',
    tidy = map(fit, broom::tidy)
  ) %>%
  unnest(tidy)

Impact of dissoc treatment:

fits_summary %>%
  filter(str_detect(term, 'dissoc')) %>%
  select(dir_target, term, estimate, std.error, p.value) 
# A tibble: 12 × 5
   dir_target term                  estimate std.error    p.value
   <chr>      <chr>                    <dbl>     <dbl>      <dbl>
 1 16S        dissoc_treatmentBE       7.23      1.52  0.000787  
 2 16S        dissoc_treatmentNaCl    -0.255     1.52  0.870     
 3 16S        dissoc_treatmentTween   -1.25      1.52  0.429     
 4 Crassphage dissoc_treatmentBE       2.93      3.24  0.386     
 5 Crassphage dissoc_treatmentNaCl    -0.307     3.24  0.926     
 6 Crassphage dissoc_treatmentTween   -1.44      3.24  0.666     
 7 Phagemid   dissoc_treatmentBE       0.491     0.639 0.460     
 8 Phagemid   dissoc_treatmentNaCl     0.698     0.639 0.300     
 9 Phagemid   dissoc_treatmentTween   -0.192     0.639 0.770     
10 SARS-CoV-2 dissoc_treatmentBE       5.73      0.611 0.00000285
11 SARS-CoV-2 dissoc_treatmentNaCl    -0.119     0.611 0.850     
12 SARS-CoV-2 dissoc_treatmentTween   -1.10      0.611 0.101     

Thus it appears that BE helps by reducing bacteria. I don’t think we expected this result, and I’d want to understand it better.

Impact of Amicon filter type:

fits_summary %>%
  filter(str_detect(term, 'amicon')) %>%
  select(dir_target, term, estimate, std.error, p.value) 
# A tibble: 4 × 5
  dir_target term           estimate std.error p.value
  <chr>      <chr>             <dbl>     <dbl>   <dbl>
1 16S        amicon_mwco100 -0.400       1.08    0.718
2 Crassphage amicon_mwco100 -1.79        2.29    0.453
3 Phagemid   amicon_mwco100  0.00536     0.452   0.991
4 SARS-CoV-2 amicon_mwco100  0.491       0.432   0.283

No clear effect (large standard errors relative to average effect). For viruses except SASR2, data consistent with effects on the order of roughly 2-4-fold in either direction. For SARS2, data consistent with a 1-4-fold decrease with a mean est of 2-fold decrease.

Now, fit model to lo at cq differences of viruses relative to bacteria

Session info

Click for session info
sessioninfo::session_info()
─ Session info ─────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.0 (2023-04-21)
 os       Arch Linux
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/New_York
 date     2023-06-24
 pandoc   3.0.1 @ /usr/bin/ (via rmarkdown)

─ Packages ─────────────────────────────────────────────────────────────
 package       * version date (UTC) lib source
 askpass         1.1     2019-01-13 [1] CRAN (R 4.0.0)
 backports       1.4.1   2021-12-13 [1] CRAN (R 4.1.2)
 beeswarm        0.4.0   2021-06-01 [1] CRAN (R 4.1.0)
 bit             4.0.5   2022-11-15 [1] CRAN (R 4.3.0)
 bit64           4.0.5   2020-08-30 [1] CRAN (R 4.0.2)
 bookdown        0.34    2023-05-09 [1] CRAN (R 4.3.0)
 broom         * 1.0.5   2023-06-09 [1] CRAN (R 4.3.0)
 bslib           0.5.0   2023-06-09 [1] CRAN (R 4.3.0)
 cachem          1.0.8   2023-05-01 [1] CRAN (R 4.3.0)
 cellranger      1.1.0   2016-07-27 [1] CRAN (R 4.0.0)
 cli             3.6.1   2023-03-23 [1] CRAN (R 4.3.0)
 codetools       0.2-19  2023-02-01 [2] CRAN (R 4.3.0)
 colorspace      2.1-0   2023-01-23 [1] CRAN (R 4.3.0)
 cowplot       * 1.1.1   2021-08-27 [1] Github (wilkelab/cowplot@555c9ae)
 crayon          1.5.2   2022-09-29 [1] CRAN (R 4.2.1)
 curl            5.0.1   2023-06-07 [1] CRAN (R 4.3.0)
 digest          0.6.31  2022-12-11 [1] CRAN (R 4.3.0)
 distill         1.5.2   2022-11-10 [1] Github (rstudio/distill@9c1a1a2)
 downlit         0.4.2   2022-07-05 [1] CRAN (R 4.2.1)
 dplyr         * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
 evaluate        0.21    2023-05-05 [1] CRAN (R 4.3.0)
 fansi           1.0.4   2023-01-22 [1] CRAN (R 4.3.0)
 farver          2.1.1   2022-07-06 [1] CRAN (R 4.2.1)
 fastmap         1.1.1   2023-02-24 [1] CRAN (R 4.3.0)
 fontawesome     0.5.1   2023-04-18 [1] CRAN (R 4.3.0)
 forcats       * 1.0.0   2023-01-29 [1] CRAN (R 4.3.0)
 fs            * 1.6.2   2023-04-25 [1] CRAN (R 4.3.0)
 gargle          1.5.0   2023-06-10 [1] CRAN (R 4.3.0)
 generics        0.1.3   2022-07-05 [1] CRAN (R 4.2.1)
 ggbeeswarm    * 0.7.2   2023-04-29 [1] CRAN (R 4.3.0)
 ggplot2       * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
 glue            1.6.2   2022-02-24 [1] CRAN (R 4.2.0)
 googledrive     2.1.1   2023-06-11 [1] CRAN (R 4.3.0)
 googlesheets4   1.1.1   2023-06-11 [1] CRAN (R 4.3.0)
 gtable          0.3.3   2023-03-21 [1] CRAN (R 4.3.0)
 here          * 1.0.1   2020-12-13 [1] CRAN (R 4.0.5)
 highr           0.10    2022-12-22 [1] CRAN (R 4.3.0)
 hms             1.1.3   2023-03-21 [1] CRAN (R 4.3.0)
 htmltools       0.5.5   2023-03-23 [1] CRAN (R 4.3.0)
 httr            1.4.6   2023-05-08 [1] CRAN (R 4.3.0)
 janitor         2.2.0   2023-02-02 [1] CRAN (R 4.3.0)
 jquerylib       0.1.4   2021-04-26 [1] CRAN (R 4.1.0)
 jsonlite        1.8.5   2023-06-05 [1] CRAN (R 4.3.0)
 knitr         * 1.43    2023-05-25 [1] CRAN (R 4.3.0)
 labeling        0.4.2   2020-10-20 [1] CRAN (R 4.0.3)
 lifecycle       1.0.3   2022-10-07 [1] CRAN (R 4.2.1)
 lubridate     * 1.9.2   2023-02-10 [1] CRAN (R 4.3.0)
 magrittr        2.0.3   2022-03-30 [1] CRAN (R 4.2.0)
 memoise         2.0.1   2021-11-26 [1] CRAN (R 4.1.2)
 munsell         0.5.0   2018-06-12 [1] CRAN (R 4.0.0)
 nvimcom       * 0.9-144 2023-05-01 [1] local
 openssl         2.0.6   2023-03-09 [1] CRAN (R 4.2.3)
 patchwork     * 1.1.2   2022-08-19 [1] CRAN (R 4.2.1)
 pillar          1.9.0   2023-03-22 [1] CRAN (R 4.3.0)
 pkgconfig       2.0.3   2019-09-22 [1] CRAN (R 4.0.0)
 purrr         * 1.0.1   2023-01-10 [1] CRAN (R 4.2.3)
 R6              2.5.1   2021-08-19 [1] CRAN (R 4.1.1)
 rappdirs        0.3.3   2021-01-31 [1] CRAN (R 4.0.4)
 readr         * 2.1.4   2023-02-10 [1] CRAN (R 4.3.0)
 rlang           1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
 rmarkdown     * 2.22    2023-06-01 [1] CRAN (R 4.3.0)
 rprojroot       2.0.3   2022-04-02 [1] CRAN (R 4.2.2)
 rstudioapi      0.14    2022-08-22 [1] CRAN (R 4.2.1)
 sass            0.4.6   2023-05-03 [1] CRAN (R 4.3.0)
 scales          1.2.1   2022-08-20 [1] CRAN (R 4.2.1)
 sessioninfo     1.2.2   2021-12-06 [1] CRAN (R 4.1.2)
 snakecase       0.11.0  2019-05-25 [1] CRAN (R 4.0.0)
 stringi         1.7.12  2023-01-11 [1] CRAN (R 4.3.0)
 stringr       * 1.5.0   2022-12-02 [1] CRAN (R 4.3.0)
 tibble        * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
 tidyr         * 1.3.0   2023-01-24 [1] CRAN (R 4.3.0)
 tidyselect      1.2.0   2022-10-10 [1] CRAN (R 4.2.1)
 tidyverse     * 2.0.0   2023-02-22 [1] CRAN (R 4.3.0)
 timechange      0.2.0   2023-01-11 [1] CRAN (R 4.3.0)
 tzdb            0.4.0   2023-05-12 [1] CRAN (R 4.3.0)
 utf8            1.2.3   2023-01-31 [1] CRAN (R 4.3.0)
 vctrs           0.6.2   2023-04-19 [1] CRAN (R 4.3.0)
 vipor           0.4.5   2017-03-22 [1] CRAN (R 4.0.0)
 vroom           1.6.3   2023-04-28 [1] CRAN (R 4.3.0)
 withr           2.5.0   2022-03-03 [1] CRAN (R 4.2.0)
 xfun            0.39    2023-04-20 [1] CRAN (R 4.3.0)
 xml2            1.3.4   2023-04-27 [1] CRAN (R 4.3.0)
 yaml            2.3.7   2023-01-23 [1] CRAN (R 4.3.0)

 [1] /home/michael/.local/lib/R/library
 [2] /usr/lib/R/library

────────────────────────────────────────────────────────────────────────

References