library(dplyr)
library(zooper)
library(lubridate)
library(ggplot2)
library(readr)
library(tidyr)
source("functions/region_assigner.R") #loads function to analyze regional data

Zooplankton

Data

zoop_sal<-zooper::zoopEnvComb %>%
  filter(!is.na(SalSurf) & Source=="EMP" & year(Date)>=1980)%>%
  mutate(salbin=case_when(SalSurf<1 ~ "< 1 PPT",
                          SalSurf>1 & SalSurf <6 ~ "1-6 PPT",
                          SalSurf >6 ~ ">6 PPT"),
         Month=month(Date),
         Year=year(Date),
         salbin=factor(salbin, levels=c("< 1 PPT", "1-6 PPT", ">6 PPT")))%>%
  group_by(Month, Year, salbin)%>%
  summarise(N_stations=n_distinct(Station), .groups="drop")

zoop_geo<-zooper::zoopEnvComb%>%
  filter(!is.na(Latitude) & Source=="EMP" & year(Date)>=1980)%>%
  region_assigner(analysis = "annual", plot = FALSE)%>%
  mutate(Month=month(Date),
         Year=year(Date),
         Region=case_match(Region, "West" ~ "Suisun", "North" ~ "Sacramento", "South" ~ "San Joaquin"),
         Region=factor(Region, levels=c("Suisun", "Sacramento", "San Joaquin")))%>%
  group_by(Month, Year, Region)%>%
  summarise(N_stations=n_distinct(Station), .groups="drop")
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Sampling effort plots all dates

Salinity

ggplot(zoop_sal, aes(y=Month, x=Year, fill=N_stations))+
  geom_tile()+
  scale_y_continuous(breaks=1:12)+
  scale_fill_viridis_c()+
  facet_wrap(~salbin)+
  theme_bw()+
  theme(legend.position = "bottom")

Geography

ggplot(zoop_geo, aes(y=Month, x=Year, fill=N_stations))+
  geom_tile()+
  scale_y_continuous(breaks=1:12)+
  scale_fill_viridis_c()+
  facet_wrap(~Region)+
  theme_bw()+
  theme(legend.position = "bottom")

Sampling effort plots 1995 - present

Salinity

ggplot(filter(zoop_sal, Year>=1995), aes(y=Month, x=Year, fill=N_stations))+
  geom_tile()+
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F)+
  scale_color_manual(values=c("black", "white"))+
  scale_y_continuous(breaks=1:12)+
  scale_fill_viridis_c()+
  facet_wrap(~salbin)+
  theme_bw()+
  theme(legend.position = "bottom")

Geography

ggplot(filter(zoop_geo, Year>=1995), aes(y=Month, x=Year, fill=N_stations))+
  geom_tile()+
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F)+
  scale_color_manual(values=c("black", "white"))+
  scale_y_continuous(breaks=1:12)+
  scale_fill_viridis_c()+
  facet_wrap(~Region)+
  theme_bw()+
  theme(legend.position = "bottom")

Clean Benthic Data

Pre-process

# read in amphipod biomass conversion data
df_amphi_biomass <- read_csv('data/data_in/amphipod_biomass_conversions.csv') %>%
  select(SpeciesName, Month, Biomass_mean)
## Rows: 84 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): SpeciesName
## dbl (4): Month, Biomass_mean, Biomass_sd, N
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# vectors for amphipod and clam genus
vec_amph <- c('Ampelisca', 'Monocorophium', 'Sinocorophium', 'Americorophium', 'Crangonyx', 'Gammarus', 'Hyalella')
vec_clam <- c('Potamocorbula', 'Corbicula')

# format raw benthic data
df_ben_raw <-
  data.table::fread(
    'data_raw/DWR Benthic raw count data 1975-2020.csv',
    colClasses = c('Year' = 'character')
    ) %>% 
  filter(Genus %in% c(vec_clam, vec_amph)) %>%
  select(SampleDate, Year, Month, StationCode, Grab, OrganismCode, Count, Order_level, Genus, Latitude, Longitude) %>%
  mutate(
    Month = match(Month, month.name),
    CPUE = Count/0.052,
    Year = as.numeric(Year)
  ) %>%

  # join amphipod biomass and benthic data
  left_join(df_amphi_biomass, by=c('Genus'= 'SpeciesName', 'Month')) %>%
  mutate(BPUE = CPUE*Biomass_mean) %>%
  select(-Biomass_mean) %>%

  # finish cleaning benthic data  
  mutate(Month = as.character(sprintf('%02d', Month))) %>%
  group_by(across(-c(Count, Grab, CPUE, BPUE))) %>%
  summarise(across(c(CPUE, BPUE), mean), .groups='drop') %>%
  mutate(Latitude = if_else(StationCode == 'D7-C', 38.1171292, Latitude),
         Longitude = if_else(StationCode == 'D7-C', -122.0395539, Longitude)) %>%
  separate(SampleDate,into = c('Date','Time'), sep = 'T', convert = TRUE, remove = FALSE) %>%
  separate(StationCode, into = c('Station', NA), sep = '-', remove = FALSE)
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 63504 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
df_ben_raw$Date <- ymd(df_ben_raw$Date)

# assign annual regions
df_ben_lt <- df_ben_raw %>%
  region_assigner(analysis = 'annual')
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Add Salinity

# read in WQ data (for SpC data)
df_wq <- read_csv('https://portal.edirepository.org/nis/dataviewer?packageid=edi.458.9&entityid=cf231071093ac2861893793517db26f3') %>%
  select(Date, Station, SpCndSurface) %>%
  separate(Date, into = c('Year','Month',NA), sep = '-', remove = FALSE) %>%
  drop_na() %>%
  distinct() %>%
  mutate(Year = as.numeric(Year)) %>%
  filter(Year >= 1980)
## Rows: 17366 Columns: 73
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (27): Station, SampleDescription, Flag, FlagDescription, FieldNotes, We...
## dbl  (44): AirTemp, WindVelocity, WindDirection, NorthLat, WestLong, Chla, P...
## date  (1): Date
## time  (1): Time
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# join two data frames; if multiple WQ entries for a month, choose closest one
df_ben_lt <- df_ben_lt %>%
  filter(Year >= 1980) %>%
  left_join(df_wq, by = c('Year', 'Month', 'Station'), relationship = 'many-to-many') %>%
  mutate(date_diff = abs(Date.x - Date.y),
         date_same_diff = (Date.x - Date.y)) %>%
  group_by_at(vars(-date_diff,-date_same_diff,-SpCndSurface,-Date.y)) %>%
  filter((date_diff == min(date_diff)) %>% replace_na(TRUE)) %>%
  ungroup()

# if multiple WQ entries are the same number of days apart, choose the one that is after the sampling event
df_ben_lt <- df_ben_lt %>%
  group_by_at(vars(-date_diff,-date_same_diff,-SpCndSurface,-Date.y)) %>%
  filter((date_same_diff == min(date_same_diff)) %>% replace_na(TRUE)) %>%
  ungroup() %>%
  select(c(-date_diff,-date_same_diff,-Date.y)) %>%
  rename(Date = Date.x)

# map Spc to salinity
df_ben_lt <- df_ben_lt %>%
  mutate(Salinity = wql::ec2pss(.data$SpCndSurface / 1000, t = 25)) # equation taken from discretewq package

# average by StationCode, Month/Year, and Genus (Region/Lat/Lon/Order_level added so they're included in the df)
df_ben_lt <- df_ben_lt %>%
  group_by(StationCode, Region, Order_level, Genus, Month, Year, Latitude, Longitude) %>%
  summarize(across(c(CPUE, BPUE, Salinity), ~mean(., na.rm = TRUE)), .groups = 'drop') %>% 
  pivot_wider(names_from = Order_level, values_from = c(CPUE, BPUE),
    names_glue = '{Order_level}_{.value}')

Benthic Amphipods

Data

# filter data by relevant genus
df_amph <- df_ben_lt %>% filter(Genus %in% vec_amph)

# add salinity bins
df_amph_sal <- df_amph %>%
  filter(!is.na(Salinity)) %>%
  mutate(
    salbin = case_when(Salinity < 1 ~ '< 1 PPT',
                       Salinity >= 1 & Salinity <= 6 ~ '1-6 PPT',
                       Salinity > 6 ~ '> 6 PPT'),
         salbin=factor(
           salbin, levels=c('< 1 PPT', '1-6 PPT', '> 6 PPT'))
    ) %>%
  group_by(Month, Year, salbin) %>%
  summarise(N_stations = n_distinct(StationCode), .groups='drop') %>%
  mutate(N_stations = factor(N_stations, levels = sort(unique(N_stations))))

# add regions
df_amph_geo <- df_amph %>%
  filter(Year >= 1980) %>%
  rename(Orig_Region = Region) %>%
  region_assigner(analysis = 'annual', plot = FALSE) %>%
  mutate(
    Region = case_when(Orig_Region == 'West' ~ 'Suisun',
                       Orig_Region == 'North' ~ 'Sacramento',
                       Orig_Region == 'South' ~ 'San Joaquin'),
    Region = factor(Region, levels = c('Suisun', 'Sacramento', 'San Joaquin'))
    ) %>%
  group_by(Month, Year, Region) %>%
  summarise(N_stations = n_distinct(StationCode), .groups='drop') %>%
  mutate(N_stations = factor(N_stations, levels = sort(unique(N_stations))))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Sampling effort plots all dates

Salinity

ggplot(df_amph_sal, aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  scale_fill_viridis_d() +
  facet_wrap(~salbin) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )

Geography

ggplot(df_amph_geo, aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  scale_fill_viridis_d() +
  facet_wrap(~Region) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )

Sampling effort plots 1995 - present

Salinity

ggplot(filter(df_amph_sal, Year >= 1995), aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F) +
  scale_color_manual(values=c("black", "white")) +
  scale_fill_viridis_d() +
  facet_wrap(~salbin) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )
## Warning in Ops.factor(N_stations, 5): '<' not meaningful for factors

Geography

ggplot(filter(df_amph_geo, Year >= 1995), aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F) +
  scale_color_manual(values=c("black", "white")) +
  scale_fill_viridis_d()+
  facet_wrap(~Region) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )
## Warning in Ops.factor(N_stations, 5): '<' not meaningful for factors

Benthic Clams

Data

# filter data by relevant genus
df_clam <- df_ben_lt %>% filter(Genus %in% vec_clam)

# add salinity bins
df_clam_sal <- df_clam %>%
  filter(!is.na(Salinity)) %>%
  mutate(
    salbin = case_when(Salinity < 1 ~ '< 1 PPT',
                       Salinity >= 1 & Salinity <= 6 ~ '1-6 PPT',
                       Salinity > 6 ~ '> 6 PPT'),
         salbin=factor(
           salbin, levels=c('< 1 PPT', '1-6 PPT', '> 6 PPT'))
    ) %>%
  group_by(Month, Year, salbin) %>%
  summarise(N_stations = n_distinct(StationCode), .groups='drop') %>%
  mutate(N_stations = factor(N_stations, levels = sort(unique(N_stations))))

# add regions
df_clam_geo <- df_clam %>%
  filter(Year >= 1980) %>%
  rename(Orig_Region = Region) %>%
  region_assigner(analysis = 'annual', plot = FALSE) %>%
  mutate(
    Region = case_when(Orig_Region == 'West' ~ 'Suisun',
                       Orig_Region == 'North' ~ 'Sacramento',
                       Orig_Region == 'South' ~ 'San Joaquin'),
    Region = factor(Region, levels = c('Suisun', 'Sacramento', 'San Joaquin'))
    ) %>%
  group_by(Month, Year, Region) %>%
  summarise(N_stations = n_distinct(StationCode), .groups='drop') %>%
  mutate(N_stations = factor(N_stations, levels = sort(unique(N_stations))))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Sampling effort plots all dates

Salinity

ggplot(df_clam_sal, aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  scale_fill_viridis_d() +
  facet_wrap(~salbin) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )

Geography

ggplot(df_clam_geo, aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  scale_fill_viridis_d() +
  facet_wrap(~Region) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )

Sampling effort plots 1995 - present

Salinity

ggplot(filter(df_clam_sal, Year >= 1995), aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F) +
  scale_color_manual(values=c("black", "white")) +
  scale_fill_viridis_d() +
  facet_wrap(~salbin) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )
## Warning in Ops.factor(N_stations, 5): '<' not meaningful for factors

Geography

ggplot(filter(df_clam_geo, Year >= 1995), aes(y = Month, x = Year, fill = N_stations)) +
  geom_tile() +
  geom_text(aes(label=N_stations, color=N_stations<5), size=2, show.legend = F) +
  scale_color_manual(values=c("black", "white")) +
  scale_fill_viridis_d()+
  facet_wrap(~Region) +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.spacing.x = unit(0, 'cm')) +
  guides(
    fill = guide_legend(nrow = 1,
                        label.position = 'bottom')
    )
## Warning in Ops.factor(N_stations, 5): '<' not meaningful for factors