This experiment compares two different Amicon molecular weight cut-offs (MWCOs), 30 kDa and 100 kDa, and 4 dissociation treatments (None, NaCl, Tween-20, and a method using beef extract (BE).
library(tidyverse)
library(fs)
library(here)
library(knitr)
library(broom)
# plotting helpers
library(cowplot)
library(patchwork)
library(ggbeeswarm)
theme_set(theme_cowplot())
# Okabe Ito color scheme with amber for yellow; see https://easystats.github.io/see/reference/scale_color_okabeito.html
colors_oi <- grDevices::palette.colors()
colors_oi['yellow'] <- "#F5C710"
# Custon qPCR helpers
source('_functions.R')
[1] "2023-06-15_OSH-16S-Amicon_Results_20230616 113441.csv"
[2] "2023-06-15_OSH_CrA-amicon_Results_20230616 114121.csv"
[3] "2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.csv"
[4] "2023-06-15_OSH-Cov2-Amicon_Raw_Results_20230616 105748.csv"
[5] "2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_20230616 105816.csv"
I’ll just use the ‘Adjusted’ SARS2 results when I import,
fns_filt <- fns %>% str_subset(negate = TRUE, 'Raw')
fns_filt %>% path_file
[1] "2023-06-15_OSH-16S-Amicon_Results_20230616 113441.csv"
[2] "2023-06-15_OSH_CrA-amicon_Results_20230616 114121.csv"
[3] "2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.csv"
[4] "2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_20230616 105816.csv"
We can use the following snippet to pull out the target name from the file path,
base <- fns %>% path_common
base_num <- base %>% path_split %>% .[[1]] %>% length
fns %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1)
[1] "[2023-06-15] 16S" "[2023-06-15] Crassphage"
[3] "[2023-06-15] Phagemid" "[2023-06-15] SARS-CoV-2"
[5] "[2023-06-15] SARS-CoV-2"
# Pattern for extracting experimental metadata from sample names;
# Experimental metadata stored in qpcr sample name as '<sewer_system>_<amicon_mwco>_<dissoc_treatment>'
pat <- '(N|S)_(30|100)_(None|Tween|BE|NaCl)'
results <- tibble(file = fns_filt) %>%
mutate(
dir_name = file %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1),
data = map(file, read_qpcr_results_csv)
) %>%
# select(-file) %>%
separate(dir_name, into = c('dir_date', 'dir_target'), remove = FALSE, sep = ' ') %>%
mutate(
dir_date = dir_date %>% lubridate::ymd(),
) %>%
unnest(data) %>%
# Extract experimental sample metadata from sample names
mutate(
match = str_match(sample, pat),
) %>%
mutate(.keep = 'unused',
sample_experiment = match[,1],
sewer_system = match[,2],
amicon_mwco = match[,3] %>% fct_relevel('30'),
dissoc_treatment = match[,4] %>% fct_relevel('None'),
) %>%
mutate(
cq = ifelse(is.na(cq), 40, cq)
) %>%
glimpse
Rows: 264
Columns: 32
$ file <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH…
$ dir_name <chr> "[2023-06-15] 16S", "[2023-06-15] 16S"…
$ dir_date <date> 2023-06-15, 2023-06-15, 2023-06-15, 2…
$ dir_target <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ well <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15,…
$ well_position <chr> "A1", "A2", "A3", "A4", "A5", "A6", "A…
$ row <ord> A, A, A, A, A, A, A, A, A, B, B, B, B,…
$ column <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4,…
$ omit <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ sample <chr> "N_30_None", "N_30_None", "N_30_None",…
$ target <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ task <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKN…
$ reporter <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FA…
$ quencher <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-…
$ amp_status <chr> "Amp", "Amp", "Amp", "Amp", "Amp", "No…
$ amp_score <dbl> 1.5151167, 1.5159783, 1.4994412, 1.228…
$ curve_quality <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ result_quality_issues <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ cq <dbl> 21.95765, 21.82037, 21.87888, 31.92359…
$ cq_confidence <dbl> 0.9866581, 0.9902164, 0.9869926, 0.938…
$ cq_mean <dbl> 21.88563, 21.88563, 21.88563, 31.79154…
$ cq_sd <dbl> 0.06889292, 0.06889292, 0.06889292, 0.…
$ auto_threshold <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ threshold <dbl> 0.1680576, 0.1680576, 0.1680576, 0.168…
$ auto_baseline <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ baseline_start <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ baseline_end <int> 17, 16, 17, 25, 25, 39, 14, 24, 15, 16…
$ cq_status <chr> "Determined", "Determined", "Determine…
$ sample_experiment <chr> "N_30_None", "N_30_None", "N_30_None",…
$ sewer_system <chr> "N", "N", "N", "N", "N", "N", NA, NA, …
$ amicon_mwco <fct> 30, 30, 30, 30, 30, 30, NA, NA, NA, 30…
$ dissoc_treatment <fct> None, None, None, BE, BE, BE, NA, NA, …
Note, in cases where the Cq value could not be determined, I set the value to 40.
# A tibble: 4 × 2
dir_target n
<chr> <int>
1 16S 66
2 Crassphage 66
3 Phagemid 69
4 SARS-CoV-2 63
# A tibble: 4 × 3
dir_target `file %>% path_file` n
<chr> <chr> <int>
1 16S 2023-06-15_OSH-16S-Amicon_Results_20230616 113441.… 66
2 Crassphage 2023-06-15_OSH_CrA-amicon_Results_20230616 114121.… 66
3 Phagemid 2023-06-15_OSH-Phg-Amicon_Results_20230616 121105.… 69
4 SARS-CoV-2 2023-06-15_OSH-Cov2-Amicon_Adjusted_Results_202306… 63
# A tibble: 4 × 3
dir_target target n
<chr> <chr> <int>
1 16S 16S 66
2 Crassphage CrA 66
3 Phagemid Target 1 69
4 SARS-CoV-2 Target 1 63
# results %>% count(sample) %>% print(n=Inf)
100.0 1000.0 10000.0 100000.0
3 3 3 3
10E3 10E4 10E5 10E6
3 3 3 3
10E7 75 750 7500
3 3 3 3
75000 750000 CPT ultrafilter N_100_BE
3 3 3 12
N_100_NaCl N_100_None N_100_Tween N_30_BE
12 12 12 12
N_30_NaCl N_30_None N_30_Tween S_100_BE
12 12 12 12
S_100_NaCl S_100_None S_100_Tween S_30_BE
12 12 12 12
S_30_NaCl S_30_None S_30_Tween Std 1
12 12 12 3
Std 2 Std 3 Std 4 Std 5
3 3 3 3
<NA>
12
N_100_BE N_100_NaCl N_100_None N_100_Tween N_30_BE
12 12 12 12 12
N_30_NaCl N_30_None N_30_Tween S_100_BE S_100_NaCl
12 12 12 12 12
S_100_None S_100_Tween S_30_BE S_30_NaCl S_30_None
12 12 12 12 12
S_30_Tween <NA>
12 72
TODO: Check status of samples with no name; are these blanks?
Load amplification data,
fns_amp <- '_data' %>%
dir_ls(recurse = TRUE, glob = '*_Amplification Data_*.csv') %>%
str_subset(negate = TRUE, 'Raw')
amp <- tibble(file = fns_amp) %>%
mutate(
dir_name = file %>% path_split %>% map(tail, -base_num) %>% map_chr(head, 1),
data = map(file, read_qpcr_amplification_csv)
) %>%
# select(-file) %>%
separate(dir_name, into = c('dir_date', 'dir_target'), remove = FALSE, sep = ' ') %>%
mutate(
dir_date = dir_date %>% lubridate::ymd(),
) %>%
unnest(data) %>%
glimpse
Rows: 10,680
Columns: 14
$ file <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH-16S-Ami…
$ dir_name <chr> "[2023-06-15] 16S", "[2023-06-15] 16S", "[2023…
$ dir_date <date> 2023-06-15, 2023-06-15, 2023-06-15, 2023-06-1…
$ dir_target <chr> "16S", "16S", "16S", "16S", "16S", "16S", "16S…
$ well <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ well_position <chr> "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1"…
$ row <ord> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A…
$ column <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ cycle_number <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ target <chr> "16S", "16S", "16S", "16S", "16S", "16S", "16S…
$ rn <dbl> 2.798063, 2.799998, 2.801923, 2.808614, 2.8092…
$ d_rn <dbl> -0.0024182955, -0.0022691886, -0.0021315257, 0…
$ sample <chr> "N_30_None", "N_30_None", "N_30_None", "N_30_N…
$ omit <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
TODO: find a better way to import the amplification data and results at the same time
Get the baseline coordinates for plotting,
baselines <- results %>%
pivot_longer(
cols = c(baseline_start, baseline_end),
names_to = 'baseline_boundary',
values_to = 'cycle_number',
names_prefix = 'baseline_',
) %>%
left_join(
amp %>% select(well_position, cycle_number, rn, d_rn),
by = c('well_position', 'cycle_number')
) %>%
glimpse
Rows: 2,094
Columns: 34
$ file <chr> "_data/[2023-06-15] 16S/2023-06-15_OSH…
$ dir_name <chr> "[2023-06-15] 16S", "[2023-06-15] 16S"…
$ dir_date <date> 2023-06-15, 2023-06-15, 2023-06-15, 2…
$ dir_target <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ well <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
$ well_position <chr> "A1", "A1", "A1", "A1", "A1", "A1", "A…
$ row <ord> A, A, A, A, A, A, A, A, A, A, A, A, A,…
$ column <ord> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
$ omit <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ sample <chr> "N_30_None", "N_30_None", "N_30_None",…
$ target <chr> "16S", "16S", "16S", "16S", "16S", "16…
$ task <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKN…
$ reporter <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FA…
$ quencher <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-…
$ amp_status <chr> "Amp", "Amp", "Amp", "Amp", "Amp", "Am…
$ amp_score <dbl> 1.515117, 1.515117, 1.515117, 1.515117…
$ curve_quality <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ result_quality_issues <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ cq <dbl> 21.95765, 21.95765, 21.95765, 21.95765…
$ cq_confidence <dbl> 0.9866581, 0.9866581, 0.9866581, 0.986…
$ cq_mean <dbl> 21.88563, 21.88563, 21.88563, 21.88563…
$ cq_sd <dbl> 0.06889292, 0.06889292, 0.06889292, 0.…
$ auto_threshold <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ threshold <dbl> 0.1680576, 0.1680576, 0.1680576, 0.168…
$ auto_baseline <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ cq_status <chr> "Determined", "Determined", "Determine…
$ sample_experiment <chr> "N_30_None", "N_30_None", "N_30_None",…
$ sewer_system <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
$ amicon_mwco <fct> 30, 30, 30, 30, 30, 30, 30, 30, 30, 30…
$ dissoc_treatment <fct> None, None, None, None, None, None, No…
$ baseline_boundary <chr> "start", "start", "start", "start", "e…
$ cycle_number <int> 3, 3, 3, 3, 17, 17, 17, 17, 3, 3, 3, 3…
$ rn <dbl> 2.8019226, 2.9759669, 5.9895606, 0.921…
$ d_rn <dbl> -0.0021315257, 0.0002468450, -0.014340…
Three types of conditions:
# A tibble: 17 × 4
sewer_system amicon_mwco dissoc_treatment n
<chr> <fct> <fct> <int>
1 N 30 None 12
2 N 30 BE 12
3 N 30 NaCl 12
4 N 30 Tween 12
5 N 100 None 12
6 N 100 BE 12
7 N 100 NaCl 12
8 N 100 Tween 12
9 S 30 None 12
10 S 30 BE 12
11 S 30 NaCl 12
12 S 30 Tween 12
13 S 100 None 12
14 S 100 BE 12
15 S 100 NaCl 12
16 S 100 Tween 12
17 <NA> <NA> <NA> 72
results_main %>%
ggplot(aes(x = cq, y = amicon_mwco, color = dissoc_treatment)) +
# theme_minimal_vgrid() +
theme(
legend.position = 'bottom',
panel.spacing = unit(1, 'char'),
) +
labs(
y = 'Amicon MWCO (kDa)',
x = 'Cq value',
color = 'Dissoc. treatment',
) +
facet_grid(sewer_system ~ dir_target, scales = 'free') +
scale_color_manual(values = colors_oi %>% unname) +
geom_vline(xintercept = 40, color = 'grey', size = 0.3) +
geom_quasirandom()
This figure splits out the qPCR results by target, Amicon MWCO and sewer system (N vs. S), with the points colors by dissociation treatment. The three points of the same color for a given MWCO and facet panel are qPCR replicates.
We can see that the impact of the BE treatment is highly variable. BE tends to increase the Cq of 16S (decrease abundance) relative to the other treatments, but the effect is minor relative to None in the N-100 sample and large in the other three. BE decreases Crassphage and phagemid Cq values in the N-100 and S-100 samples (relative to other treatments), but increases the Cq in the N-30 and S-30 samples. There is an extreme effect (increase in Cq) of BE on Crassphage in the S-30 sample, with two qPCR replicates failing to yield Cq values below 40. For SARS2, we see BE appears to decrease abundance to a large extent across all four samples, to the extent that we fail to get Cq values for any replicates except one N-100 replicate.
From these results, I suggest that we remove the BE treatment from further consideration, due to its high variance and strong negative effect on SARS2 measurement (along with the technical difficulty of applying it in the lab).
condition_vars <- c('sewer_system', 'amicon_mwco', 'dissoc_treatment')
results %>%
filter(dir_target == 'SARS-CoV-2') %>%
summarize(.by = c(sample, condition_vars), num_na = sum(is.na(cq))) %>%
arrange(dissoc_treatment) %>%
print(n=Inf)
# A tibble: 21 × 5
sample sewer_system amicon_mwco dissoc_treatment num_na
<chr> <chr> <fct> <fct> <int>
1 N_30_None N 30 None 0
2 S_30_None S 30 None 0
3 N_100_None N 100 None 0
4 S_100_None S 100 None 0
5 N_30_BE N 30 BE 0
6 S_30_BE S 30 BE 0
7 N_100_BE N 100 BE 0
8 S_100_BE S 100 BE 0
9 N_30_NaCl N 30 NaCl 0
10 S_30_NaCl S 30 NaCl 0
11 N_100_NaCl N 100 NaCl 0
12 S_100_NaCl S 100 NaCl 0
13 N_30_Tween N 30 Tween 0
14 S_30_Tween S 30 Tween 0
15 N_100_Tween N 100 Tween 0
16 S_100_Tween S 100 Tween 0
17 100000.0 <NA> <NA> <NA> 0
18 10000.0 <NA> <NA> <NA> 0
19 1000.0 <NA> <NA> <NA> 0
20 100.0 <NA> <NA> <NA> 0
21 <NA> <NA> <NA> <NA> 0
# arrange(desc(num_na))
HERE. Check amplification.
delta_rn_min <- 1e-3
ct_threshold <- results %>% filter(dir_target == 'SARS-CoV-2') %>% pull(threshold) %>% unique
stopifnot(length(ct_threshold) == 1)
amp %>%
filter(
dir_target == 'SARS-CoV-2',
!is.na(sewer_system)
) %>%
ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = dissoc_treatment)) +
facet_grid(sewer_system ~ amicon_mwco, scales = 'free') +
scale_color_manual(values = colors_oi %>% unname) +
scale_y_log10() +
geom_line(aes(group = well)) +
geom_hline(yintercept = ct_threshold, alpha = 0.3) +
# scale_color_brewer(type = 'qual') +
# geom_point(data = baselines, aes(shape = baseline_boundary), size = 3) +
# scale_shape_manual(values = c(1, 4)) +
labs(y = 'Delta Rn', x = 'Cycle', color = 'Target')
delta_rn_min <- 1e-3
ct_threshold <- results %>% filter(dir_target == 'SARS-CoV-2') %>% pull(threshold) %>% unique
stopifnot(length(ct_threshold) == 1)
amp %>%
filter(
dir_target == 'SARS-CoV-2',
!is.na(sewer_system)
) %>%
ggplot(aes(cycle_number, pmax(rn, delta_rn_min), color = dissoc_treatment)) +
facet_grid(sewer_system ~ amicon_mwco, scales = 'free') +
scale_color_manual(values = colors_oi %>% unname) +
scale_y_log10() +
geom_line(aes(group = well)) +
# scale_color_brewer(type = 'qual') +
# geom_point(data = baselines, aes(shape = baseline_boundary), size = 3) +
# scale_shape_manual(values = c(1, 4)) +
labs(y = 'Rn', x = 'Cycle', color = 'Target')
url <- 'https://docs.google.com/spreadsheets/d/1OUp0DBVb3QOrELZ32-4C6Ia3QH1x_G95nCJFW2h8I4k'
qubit <- googlesheets4::read_sheet(url, col_types = 'ccnccc') %>%
janitor::clean_names() %>%
rename(
sewer_system = system,
amicon_mwco = mwco,
dissoc_treatment = treatment,
qubit_rna_raw = qubit_hs_rna_ng_ul,
qubit_dna_raw = qubit_br_dna_ng_ul,
) %>%
glimpse
Rows: 16
Columns: 6
$ sample <chr> "N_30_Tween", "N_30_None", "N_30_NaCl", "N_…
$ sewer_system <chr> "N", "N", "N", "N", "N", "N", "N", "N", "S"…
$ amicon_mwco <dbl> 30, 30, 30, 30, 100, 100, 100, 100, 30, 30,…
$ dissoc_treatment <chr> "Tween", "None", "NaCl", "BE", "Tween", "No…
$ qubit_rna_raw <chr> "0.52", "0.284", "0.24", "OOR (>10 ng/uL)",…
$ qubit_dna_raw <chr> "OOR (<4 ng/uL)", "OOR (<4 ng/uL)", "OOR (<…
qubit_raw <- qubit
qubit <- qubit_raw %>%
mutate(
across(matches('qubit_(rna|dna)_raw'),
~ifelse(str_detect(.x, 'OOR'), NA, .x) %>% as.numeric,
.names = "{str_replace(.col, '_raw', '')}"
),
qubit_rna_adj = case_when(
qubit_rna_raw == 'OOR (>10 ng/uL)' ~ 10,
qubit_rna_raw == 'OOR (<0.2 ng/uL)' ~ 0.2,
TRUE ~ qubit_rna
),
qubit_dna_adj = case_when(
qubit_dna_raw == 'OOR (<4 ng/uL)' ~ 4,
TRUE ~ qubit_dna
),
across(amicon_mwco, as.factor),
across(dissoc_treatment, ~fct_relevel(.x, 'None')),
) %>%
glimpse
Rows: 16
Columns: 10
$ sample <chr> "N_30_Tween", "N_30_None", "N_30_NaCl", "N_…
$ sewer_system <chr> "N", "N", "N", "N", "N", "N", "N", "N", "S"…
$ amicon_mwco <fct> 30, 30, 30, 30, 100, 100, 100, 100, 30, 30,…
$ dissoc_treatment <fct> Tween, None, NaCl, BE, Tween, None, NaCl, B…
$ qubit_rna_raw <chr> "0.52", "0.284", "0.24", "OOR (>10 ng/uL)",…
$ qubit_dna_raw <chr> "OOR (<4 ng/uL)", "OOR (<4 ng/uL)", "OOR (<…
$ qubit_rna <dbl> 0.520, 0.284, 0.240, NA, 3.530, 1.300, 0.51…
$ qubit_dna <dbl> NA, NA, NA, 146, NA, NA, NA, 333, NA, NA, N…
$ qubit_rna_adj <dbl> 0.520, 0.284, 0.240, 10.000, 3.530, 1.300, …
$ qubit_dna_adj <dbl> 4, 4, 4, 146, 4, 4, 4, 333, 4, 4, 4, 188, 4…
sewer_system | amicon_mwco | dissoc_treatment | qubit_rna_raw | qubit_dna_raw |
---|---|---|---|---|
N | 30 | Tween | 0.52 | OOR (<4 ng/uL) |
N | 30 | None | 0.284 | OOR (<4 ng/uL) |
N | 30 | NaCl | 0.24 | OOR (<4 ng/uL) |
N | 30 | BE | OOR (>10 ng/uL) | 146 |
N | 100 | Tween | 3.53 | OOR (<4 ng/uL) |
N | 100 | None | 1.3 | OOR (<4 ng/uL) |
N | 100 | NaCl | 0.511 | OOR (<4 ng/uL) |
N | 100 | BE | OOR (>10 ng/uL) | 333 |
S | 30 | Tween | 1.97 | OOR (<4 ng/uL) |
S | 30 | None | 2.73 | OOR (<4 ng/uL) |
S | 30 | NaCl | OOR (<0.2 ng/uL) | OOR (<4 ng/uL) |
S | 30 | BE | OOR (>10 ng/uL) | 188 |
S | 100 | Tween | 5.5 | OOR (<4 ng/uL) |
S | 100 | None | 1.2 | OOR (<4 ng/uL) |
S | 100 | NaCl | 0.351 | OOR (<4 ng/uL) |
S | 100 | BE | OOR (>10 ng/uL) | 379 |
qubit %>%
select(sample:dissoc_treatment, matches('qubit_(rna|dna)_adj')) %>%
pivot_longer(matches('qubit_(rna|dna)_adj')) %>%
ggplot(aes(x = value, y = amicon_mwco, color = dissoc_treatment)) +
# theme_minimal_vgrid() +
theme(
legend.position = 'bottom',
panel.spacing = unit(1, 'char'),
) +
labs(
y = 'Amicon MWCO (kDa)',
x = 'Concentration (ng/uL)',
color = 'Dissoc. treatment',
) +
facet_grid(sewer_system ~ name, scales = 'free') +
scale_color_manual(values = colors_oi %>% unname) +
# geom_vline(xintercept = 40, color = 'grey', size = 0.3) +
scale_x_log10() +
# expand_limits(x = 0.1) +
geom_point()
We see high (out of range) values for the BE-treated samples, which may be caused by bovine NA in the beef extract. We also see that the DNA measurements can’t distinguish the other treatments, due to the relatively high lower limit of the BR assay that was used.
Compared to our 2022 tests using DNA extractions, we have concentrated more wastewater than in our previous experiments (40 mL instead of 10 mL per sample). Those extractions used a different extraction kit. I would need to revisit those results to see if I’d expect to be seeing <4 ng/uL.
Let’s look at the RNA measurements of the non-BE treatments, where the Qubit results are most informative. Note that I have set the measurement that is OOR (below lower limit) to the lower limit value of 0.2.
qubit %>%
filter(dissoc_treatment != 'BE') %>%
ggplot(aes(x = qubit_rna_adj, y = amicon_mwco, color = dissoc_treatment)) +
# theme_minimal_vgrid() +
theme(
legend.position = 'bottom',
panel.spacing = unit(1, 'char'),
) +
labs(
y = 'Amicon MWCO (kDa)',
x = 'Concentration (ng/uL)',
color = 'Dissoc. treatment',
) +
facet_grid(sewer_system ~ ., scales = 'free') +
scale_x_log10() +
# expand_limits(x = 0.1) +
scale_color_manual(values = colors_oi %>% unname) +
geom_vline(xintercept = 0.2, color = 'grey', size = 0.3) +
geom_point()
This evidence suggests that the NaCl treatment tends to give lower yields than the other two treatments, and that the Tween treatment tends to give higher yields than the None treatment, though the evidence is not that strong (particularly for the advantage of Tween over None) due to limited sample size.
This was a warm-up exercise I did before looking at data carefully and making the above exploratory plots. Care should be taken in interpretting these results, including my quick attempts at interpreting them below.
Fit linear model to assess the effect of conditions on the Cq value of a single target.
fits <- results_main %>%
summarize(
.by = c(sample_experiment, dir_target, sewer_system, amicon_mwco, dissoc_treatment),
across(cq, mean)
) %>%
nest(.by = dir_target) %>%
mutate(.keep = 'unused',
fit = map(data, ~lm(data = .x, cq ~ sewer_system + amicon_mwco + dissoc_treatment)),
# fit_tidy = map(fit, broom::tidy)
)
fits_summary <- fits %>%
mutate(.keep = 'unused',
tidy = map(fit, broom::tidy)
) %>%
unnest(tidy)
Impact of dissoc treatment:
fits_summary %>%
filter(str_detect(term, 'dissoc')) %>%
select(dir_target, term, estimate, std.error, p.value)
# A tibble: 12 × 5
dir_target term estimate std.error p.value
<chr> <chr> <dbl> <dbl> <dbl>
1 16S dissoc_treatmentBE 7.23 1.52 0.000787
2 16S dissoc_treatmentNaCl -0.255 1.52 0.870
3 16S dissoc_treatmentTween -1.25 1.52 0.429
4 Crassphage dissoc_treatmentBE 2.93 3.24 0.386
5 Crassphage dissoc_treatmentNaCl -0.307 3.24 0.926
6 Crassphage dissoc_treatmentTween -1.44 3.24 0.666
7 Phagemid dissoc_treatmentBE 0.491 0.639 0.460
8 Phagemid dissoc_treatmentNaCl 0.698 0.639 0.300
9 Phagemid dissoc_treatmentTween -0.192 0.639 0.770
10 SARS-CoV-2 dissoc_treatmentBE 5.73 0.611 0.00000285
11 SARS-CoV-2 dissoc_treatmentNaCl -0.119 0.611 0.850
12 SARS-CoV-2 dissoc_treatmentTween -1.10 0.611 0.101
Thus it appears that BE helps by reducing bacteria. I don’t think we expected this result, and I’d want to understand it better.
Impact of Amicon filter type:
fits_summary %>%
filter(str_detect(term, 'amicon')) %>%
select(dir_target, term, estimate, std.error, p.value)
# A tibble: 4 × 5
dir_target term estimate std.error p.value
<chr> <chr> <dbl> <dbl> <dbl>
1 16S amicon_mwco100 -0.400 1.08 0.718
2 Crassphage amicon_mwco100 -1.79 2.29 0.453
3 Phagemid amicon_mwco100 0.00536 0.452 0.991
4 SARS-CoV-2 amicon_mwco100 0.491 0.432 0.283
No clear effect (large standard errors relative to average effect). For viruses except SASR2, data consistent with effects on the order of roughly 2-4-fold in either direction. For SARS2, data consistent with a 1-4-fold decrease with a mean est of 2-fold decrease.
Now, fit model to lo at cq differences of viruses relative to bacteria
sessioninfo::session_info()
─ Session info ─────────────────────────────────────────────────────────
setting value
version R version 4.3.0 (2023-04-21)
os Arch Linux
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2023-06-24
pandoc 3.0.1 @ /usr/bin/ (via rmarkdown)
─ Packages ─────────────────────────────────────────────────────────────
package * version date (UTC) lib source
askpass 1.1 2019-01-13 [1] CRAN (R 4.0.0)
backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.2)
beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.1.0)
bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.0.2)
bookdown 0.34 2023-05-09 [1] CRAN (R 4.3.0)
broom * 1.0.5 2023-06-09 [1] CRAN (R 4.3.0)
bslib 0.5.0 2023-06-09 [1] CRAN (R 4.3.0)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.0)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0)
codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.0)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
cowplot * 1.1.1 2021-08-27 [1] Github (wilkelab/cowplot@555c9ae)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.1)
curl 5.0.1 2023-06-07 [1] CRAN (R 4.3.0)
digest 0.6.31 2022-12-11 [1] CRAN (R 4.3.0)
distill 1.5.2 2022-11-10 [1] Github (rstudio/distill@9c1a1a2)
downlit 0.4.2 2022-07-05 [1] CRAN (R 4.2.1)
dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.0)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.1)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
fontawesome 0.5.1 2023-04-18 [1] CRAN (R 4.3.0)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.0)
fs * 1.6.2 2023-04-25 [1] CRAN (R 4.3.0)
gargle 1.5.0 2023-06-10 [1] CRAN (R 4.3.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
ggbeeswarm * 0.7.2 2023-04-29 [1] CRAN (R 4.3.0)
ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)
glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
googledrive 2.1.1 2023-06-11 [1] CRAN (R 4.3.0)
googlesheets4 1.1.1 2023-06-11 [1] CRAN (R 4.3.0)
gtable 0.3.3 2023-03-21 [1] CRAN (R 4.3.0)
here * 1.0.1 2020-12-13 [1] CRAN (R 4.0.5)
highr 0.10 2022-12-22 [1] CRAN (R 4.3.0)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.3.0)
httr 1.4.6 2023-05-08 [1] CRAN (R 4.3.0)
janitor 2.2.0 2023-02-02 [1] CRAN (R 4.3.0)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
jsonlite 1.8.5 2023-06-05 [1] CRAN (R 4.3.0)
knitr * 1.43 2023-05-25 [1] CRAN (R 4.3.0)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.3)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.1)
lubridate * 1.9.2 2023-02-10 [1] CRAN (R 4.3.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.1.2)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.0)
nvimcom * 0.9-144 2023-05-01 [1] local
openssl 2.0.6 2023-03-09 [1] CRAN (R 4.2.3)
patchwork * 1.1.2 2022-08-19 [1] CRAN (R 4.2.1)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.3)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.1)
rappdirs 0.3.3 2021-01-31 [1] CRAN (R 4.0.4)
readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)
rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)
rmarkdown * 2.22 2023-06-01 [1] CRAN (R 4.3.0)
rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.2.2)
rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1)
sass 0.4.6 2023-05-03 [1] CRAN (R 4.3.0)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.1)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.2)
snakecase 0.11.0 2019-05-25 [1] CRAN (R 4.0.0)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.3.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.3.0)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.1)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0)
vctrs 0.6.2 2023-04-19 [1] CRAN (R 4.3.0)
vipor 0.4.5 2017-03-22 [1] CRAN (R 4.0.0)
vroom 1.6.3 2023-04-28 [1] CRAN (R 4.3.0)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
xfun 0.39 2023-04-20 [1] CRAN (R 4.3.0)
xml2 1.3.4 2023-04-27 [1] CRAN (R 4.3.0)
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
[1] /home/michael/.local/lib/R/library
[2] /usr/lib/R/library
────────────────────────────────────────────────────────────────────────