Concentration tests

R

Tests of wastewater concentration methods performed by Ari in May 2023.

Michael R. McLaren true
2023-06-03
Show code
library(tidyverse)
library(fs)
library(here)

library(furrr)
plan(multisession, workers = 3)

# plotting helpers
library(cowplot)
library(patchwork)
library(ggbeeswarm)

theme_set(theme_cowplot())

# Okabe Ito color scheme with amber for yellow; see https://easystats.github.io/see/reference/scale_color_okabeito.html
colors_oi <- grDevices::palette.colors()  
colors_oi['yellow'] <- "#F5C710"

today <- format(Sys.time(), '%Y-%m-%d')

Import data

Tests 1 and 2

Show code
sheet_url <- 'https://docs.google.com/spreadsheets/d/1hRsWlFYTywcZvYOLiidpFj7ywYbag2YJ8oVDO5MTRFs'

test1_raw <- googlesheets4::read_sheet(sheet_url, 'Test 1',
  col_types = 'iccc')
test2_raw <- googlesheets4::read_sheet(sheet_url, 'Test 2',
  col_types = 'icc') %>%
  glimpse
Rows: 3
Columns: 3
$ Round    <int> 1, 2, 3
$ Amicon   <chr> "160 & 170", "210 & 260", "335 & 350"
$ Vivaspin <chr> "400 & 450", "1000 & 1340", "2150 & 2480"

Let’s format these results in tidy format by pivoting the treatments (from columns to rows) and splitting the replicate measurements into their own rows.

Show code
parse_values <- function(x) {
  x %>% 
    str_split(' & ') %>%
    map(~set_names(., nm = seq_along(.)))
}
# parse_values('123 & 523')
format_results <- function(x) {
  x %>%
    rename(round = Round) %>%
    pivot_longer(-round, names_to = 'treatment') %>%
    mutate(
      across(value, parse_values),
      replicate_id = map(value, names)
    ) %>%
    unnest(c(replicate_id, value)) %>%
    rename(volume = value) %>%
    mutate(
      across(volume, as.numeric)
    )
}
test1 <- test1_raw %>% format_results %>% glimpse
Rows: 24
Columns: 4
$ round        <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,…
$ treatment    <chr> "Influent without Reagents", "Influent without …
$ volume       <dbl> 150, 150, 170, 170, 150, 150, 160, 160, 190, 19…
$ replicate_id <chr> "1", "2", "1", "2", "1", "2", "1", "2", "1", "2…
Show code
test2 <- test2_raw %>% format_results %>% glimpse
Rows: 12
Columns: 4
$ round        <int> 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
$ treatment    <chr> "Amicon", "Amicon", "Vivaspin", "Vivaspin", "Am…
$ volume       <dbl> 160, 170, 400, 450, 210, 260, 1000, 1340, 335, …
$ replicate_id <chr> "1", "2", "1", "2", "1", "2", "1", "2", "1", "2…

Test 3

The results for this test follow a different format.

Show code
sheet_url <- 'https://docs.google.com/spreadsheets/d/1ChpXwS0azsDmoeHfyPYLBD158bHq7op-ld2EeukE_5g'

test3_samples_raw <- googlesheets4::read_sheet(sheet_url, 'Samples', col_types = 'cccccc')
test3_rounds_raw <- googlesheets4::read_sheet(sheet_url, 'Centrifugal filtration rounds', col_types = 'in')
test3_results_raw <- googlesheets4::read_sheet(sheet_url, 'Results', col_types = 'icn')

TODO: Improve

Show code
test3_samples <- test3_samples_raw %>%
  janitor::clean_names() %>%
  rename(
    filter_size = filter_size_k_da
  ) %>%
  mutate(
    across(c(treatment_group_id, filter_size), factor),
  ) %>%
  glimpse
Rows: 6
Columns: 6
$ sample_id          <chr> "Amicon_30_None", "Amicon_30_Tween", "Ami…
$ treatment_group_id <fct> 1, 2, 3, 4, 5, 6
$ filter_type        <chr> "Amicon", "Amicon", "Amicon", "Amicon", "…
$ filter_size        <fct> 30, 30, 30, 50, 50, 50
$ reagent            <chr> "None", "Tween", "BE+NaNO3", "None", "Twe…
$ notes              <chr> NA, NA, NA, NA, NA, NA
Show code
# test3_rounds_raw
test3_results <- test3_results_raw %>%
  janitor::clean_names() %>%
  rename(
    retentate_volume = retentate_volume_u_l
  ) %>%
  glimpse
Rows: 30
Columns: 3
$ round            <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3…
$ sample_id        <chr> "Amicon_30_None", "Amicon_30_Tween", "Amico…
$ retentate_volume <dbl> 190, 210, 400, 100, 160, 200, 330, 200, 580…
Show code
test3 <- test3_results %>%
  left_join(test3_samples, by = 'sample_id') %>%
  mutate(
    # treatment = str_glue('{reagent}:{filter_type}:{filter_size}'),
    # treatment = str_c(reagant, filter_type, filter_size, sep = ':'),
  ) %>%
  glimpse
Rows: 30
Columns: 8
$ round              <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3,…
$ sample_id          <chr> "Amicon_30_None", "Amicon_30_Tween", "Ami…
$ retentate_volume   <dbl> 190, 210, 400, 100, 160, 200, 330, 200, 5…
$ treatment_group_id <fct> 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2,…
$ filter_type        <chr> "Amicon", "Amicon", "Amicon", "Amicon", "…
$ filter_size        <fct> 30, 30, 30, 50, 50, 50, 30, 30, 30, 50, 5…
$ reagent            <chr> "None", "Tween", "BE+NaNO3", "None", "Twe…
$ notes              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…

Plots

Show code
test1 %>%
  mutate(across(round, as.ordered)) %>%
  ggplot(aes(y = round, x = volume, color = replicate_id)) +
  theme_minimal_hgrid() +
  theme(legend.position = 'bottom') +
  scale_color_manual(values = colors_oi[2:3] %>% unname) +
  expand_limits(x = 0) +
  facet_wrap(~treatment, ncol = 1) +
  labs(x = 'Volume (uL)', y = 'Round', color = 'Replicate') +
  plot_annotation('Test 1') +
  geom_vline(xintercept = 150, color = 'grey') +
  geom_quasirandom(groupOnX = FALSE)

Show code
test2 %>%
  mutate(across(round, as.ordered)) %>%
  ggplot(aes(y = round, x = volume, color = replicate_id)) +
  theme_minimal_hgrid() +
  theme(legend.position = 'bottom') +
  scale_color_manual(values = colors_oi[2:3] %>% unname) +
  expand_limits(x = 0) +
  facet_wrap(~treatment, ncol = 1) +
  labs(x = 'Volume (uL)', y = 'Round', color = 'Replicate') +
  plot_annotation('Test 2') +
  geom_vline(xintercept = 150, color = 'grey') +
  geom_quasirandom(groupOnX = FALSE)

Show code
test3 %>%
  mutate(
         across(round, as.ordered),
         across(c(filter_size, reagent), as.factor)
         ) %>%
  ggplot(aes(y = round, x = retentate_volume)) +
  theme_minimal_hgrid() +
  theme(legend.position = 'bottom') +
  scale_color_manual(values = colors_oi[2:3] %>% unname) +
  expand_limits(x = 0) +
  facet_wrap(~ filter_size : reagent, ncol = 1) +
  labs(x = 'Volume (uL)', y = 'Round', color = 'Replicate') +
  plot_annotation('Test 3') +
  geom_vline(xintercept = 150, color = 'grey') +
  geom_quasirandom(groupOnX = FALSE)

TODO: standardsize col names

References