Benthic sampling includes both clam and amphipod data that we’re interested in.

Redoing benthic script with correct stations that were continuously monitored over the time series

Load packages

Download benthic data if it’s not already found locally in the data_raw folder

if(!file.exists(file.path("data_raw", "DWR Benthic raw count data 1975-2020.csv"))){
  if(!dir.exists("data_raw")){
    dir.create("data_raw")
  }
  timeout<-getOption('timeout')
  options(timeout=300)
  download.file("https://portal.edirepository.org/nis/dataviewer?packageid=edi.1036.1&entityid=e65a3e7da08744a8cd6114d4e5f78c53", method = "libcurl",
                destfile=file.path("data_raw", "DWR Benthic raw count data 1975-2020.csv"))
  options(timeout=timeout)
}

Clean and reprocess data

Read in amphipod biomass conversion data

amphipod_biomass<-read_csv("data/data_in/amphipod_biomass_conversions.csv")%>%
  select(SpeciesName, Month, Biomass_mean)

## Rows: 84 Columns: 5

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): SpeciesName
## dbl (4): Month, Biomass_mean, Biomass_sd, N

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Read in raw benthic count data

Benthic file does not exist locally so can get it from KMB: https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn:uuid:25b7befd-e931-4a50-9bf6-14715561f64a

Benthic survey used to sample on the L, R, or C of a site location historically, and eventually picked a side to monitor continuously, so need to remove the “Left, Right, Center” from the station names so can combine sites over the years.
Want to make sure that we use all of the data from that site, despite which side it was sampled on
Still want to keep A as a station designation
Create a monthly time step to see which stations were sampled continuously over the years
Take mean of lat/ long for each site since some sites moved slightly over the years based on the L, R, C

ben <- read.csv("data_raw/DWR Benthic raw count data 1975-2020.csv")  %>% 
  filter(Genus %in% c("Potamocorbula", "Corbicula", "Ampelisca", 
                      "Monocorophium", "Sinocorophium", "Americorophium", 
                      "Crangonyx", "Gammarus", "Hyalella"))%>%
  select(SampleDate, Year, Month, StationCode, Grab, OrganismCode, Count, Order_level, Genus, Latitude, Longitude)%>%
  mutate(mon = ym(paste0(Year, Month, sep = "-")), #create a month time step
         CPUE=Count/0.052,
         Month=month(mon))%>%
  left_join(amphipod_biomass, by=c("Genus"="SpeciesName", "Month"))%>%
  mutate(BPUE=CPUE*Biomass_mean)%>%
  select(-Biomass_mean)%>%
  group_by(across(-c(Count, Grab, CPUE, BPUE)))%>%
  summarise(across(c(CPUE, BPUE), mean), .groups="drop")%>%
  mutate(Latitude=if_else(StationCode=="D7-C", 38.1171292, Latitude),
         Longitude=if_else(StationCode=="D7-C", -122.0395539, Longitude))

Graph which stations were sampled continuously over the time series

#sites and number of months sampled to look at which were sampled continuously for analysis

sites_mon <- ben  %>% 
  distinct(StationCode, Latitude, Longitude, mon)

#graph of monthly sampling of each site over the time series

ggplot(sites_mon, aes(x = mon, y = StationCode), size = 3) +
  geom_point() +
  theme_classic()

only three stations were sampled consistently from 1977 to present.Stations D7 (Grizzly Bay), D4 (Chipps), D28A (franks Tract)

Additional stations picked up in the late 1990s

May want to eliminate clams from annual analysis based on small coverage with just those 3 sites, or combine sites close together. Need to explore.

Map of sites colored with stations sampled continuously from 1980s, and 1990s. Scaled by months sampled

First specify which stations are from the 1980s and which are from the 1990s

#stations sampled continuously since the 1980s

ben_1980 = c("D7-C", 
             "D4-L", 
             "D28A-L")

#stations sampled continuously since the 1990s

ben_1990 = c("P8-R", 
             "D6-R", 
             "D41-C", 
             "D41A-C", 
             "D24-L", 
             "D16-L", 
             "C9-L")

Map of all stations

Red = sites sampled continuously from the 1980s
Blue = from the 1990s
Black = sites not sampled continuously

sites <- ben %>% 
  group_by(StationCode, Latitude, Longitude) %>% 
  summarise(n_months = length(unique(mon)), .groups  =  "drop")

leaflet(sites) %>% 
  addTiles() %>% #adds the default base map
  addCircleMarkers(data= sites, #all benthic sites 
                   lat = ~Latitude,
                   lng = ~Longitude, 
                   label = ~StationCode, 
                   radius = ~n_months/ 50, #arbitrary scaling to show number of months each site was sampled 
                   color = "black",
                   fillColor = "black",
                   weight = 0.25, 
                   fillOpacity = 1) %>% 
  addCircleMarkers(data = sites %>% filter(StationCode %in% ben_1980), #sites continuously from the 1980s
                   lat = ~Latitude,
                   lng = ~Longitude, 
                   label = ~StationCode, 
                   radius = ~n_months/ 50, 
                   color = "black",
                   fillColor = "red",
                   weight = 0.25, 
                   fillOpacity = 1) %>% 
  addCircleMarkers(data = sites %>% filter(StationCode %in% ben_1990), #sites from the 1990s
                   lat = ~Latitude,
                   lng = ~Longitude, 
                   label = ~StationCode, 
                   radius = ~n_months/ 50, 
                   color = "black",
                   fillColor = "blue",
                   weight = 0.25, 
                   fillOpacity = 1)

Calculate summarised CPUE

Do all critters first and then filter out later for what we want
Benthic CPUE (organisms/ m2) = (Sum of organisms per visit/ # of grabs per visit)/ (0.052 grabs/ m2)
Do CPUE with 1980 stations first for annual analysis. Need to explore other stations that we might need to combine to expand spatial coverage for annual data
Calculate average CPUE for
1. Clam and Amphipod Annual w/o Regions
1. Clam and Amphipod Annual with Regions and
1. Clam and Amphipod monthly with Regions

ben_LT <- ben %>% 
  filter(StationCode %in% ben_1980)%>%
  region_assigner(analysis = "annual")

## Loading required package: sf

## Warning: package 'sf' was built under R version 4.1.3

## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE

#verified grab totals by comparing to old EMP benthic CPUE file from Betsy Wells

Calculate Annual CPUE for Clams

Uses only the 1980s stations, and both clam genera

LT_clams_year_noregions <- ben_LT%>%
  filter(Genus %in% c("Potamocorbula", "Corbicula"))%>%
  group_by(Genus, Year) %>%
  summarize(CPUE_Total = mean(CPUE), .groups = "drop") %>% 
  pivot_wider(names_from = Genus, values_from = CPUE_Total) %>% 
  rename(Corbicula_cpue = Corbicula,
         Potamocorbula_cpue = Potamocorbula)

write.csv(LT_clams_year_noregions, "data/annual_averages/clams_annual_noregions.csv",
          row.names=FALSE)

LT_clams_year_regions <- ben_LT%>%
  filter(Genus %in% c("Potamocorbula", "Corbicula"))%>% 
  group_by(Region, Genus, Year) %>%
  summarize(CPUE_Total = mean(CPUE), .groups = "drop") %>% 
  pivot_wider(names_from = Genus, values_from = CPUE_Total) %>% 
  rename(Corbicula_cpue = Corbicula,
         Potamocorbula_cpue = Potamocorbula)

write.csv(LT_clams_year_regions, "data/annual_averages/clams_annual_regions.csv",
          row.names=FALSE)

Export annual station list to data/stations folder

## Use ben_LT so the stations saved here match those used in the annual and annual 
## regional datasets saved above.
ben_LT %>% 
  rename(Station=StationCode) %>%
  select(Station, Latitude, Longitude) %>%
  distinct() %>%
  mutate(Survey="EMP Benthic") %>%
  write.csv(file=file.path("data/stations/stations_benthic_annual.csv"),
            row.names=FALSE)

Calculate Annual CPUE for Amphipods

1980s stations
For all amphipod data, only include amphipods routinely found in diets per data from the CDFW Diet Study: Ampelisca, Monocorophium, Sinocorophium, Gammarus, Americorophium, Crangonyx, and Hyalella
Other amphipod species found by the EMP Benthic study are either not found in diets regularly, is rare overall, or found in areas not included in our fish monitoring data (e.g. Eogammarus is typically found in marshes and eaten by Salmon) and likely only caught by EMP due to high flow or transport by vegetation.

LT_amph_year_noregions <- ben_LT%>%
  filter(Genus %in% c("Ampelisca", "Monocorophium", "Sinocorophium", "Americorophium", "Crangonyx", "Gammarus",
                      "Hyalella"))%>%
  group_by(Order_level, Year) %>%
  summarize(across(c(CPUE, BPUE),  mean), .groups = "drop") %>% 
  pivot_wider(names_from = Order_level, values_from = c(CPUE, BPUE),
    names_glue = "{Order_level}_{.value}")

write.csv(LT_amph_year_noregions, "data/annual_averages/amphipod_annual_noregions.csv",
          row.names=FALSE)

LT_amph_year_regions <- ben_LT%>%
  filter(Genus %in% c("Ampelisca", "Monocorophium", "Sinocorophium", "Americorophium", "Crangonyx", "Gammarus",
                      "Hyalella"))%>%
  group_by(Region, Order_level, Year) %>%
  summarize(across(c(CPUE, BPUE),  mean), .groups = "drop") %>% 
  pivot_wider(names_from = Order_level, values_from = c(CPUE, BPUE),
    names_glue = "{Order_level}_{.value}")

write.csv(LT_amph_year_regions, "data/annual_averages/amphipod_annual_regions.csv",
          row.names=FALSE)

Monthly CPUE for monthly/ regional analysis

Now need to calc monthly CPUE for the stations sampled in the 1990s for the monthly/ regional short term analysis Stations: D41, D41A, C9, D16, D24, D6, P8, D7, D4, D28A

CPUE with 10 ST stations, and limit to 1997 to present since D16 did not start till 1997

ben_ST <- filter(ben, StationCode %in% c(ben_1990, ben_1980) & Year >1996)%>%
  region_assigner(analysis = "monthly")

Calc monthly CPUE for just clams at the 1990 stations

ST_clams_mon <- ben_ST %>%
  filter(Genus %in% c("Potamocorbula", "Corbicula"))%>% 
  group_by(Region, Genus, Year, Month) %>%
  summarize(CPUE_Total = mean(CPUE), .groups = "drop") %>% 
  pivot_wider(names_from = Genus, values_from = CPUE_Total) %>% 
  rename(Corbicula_cpue = Corbicula,
         Potamocorbula_cpue = Potamocorbula)

write.csv(ST_clams_mon, "data/monthly_averages/clams_monthly_regions.csv",
          row.names=FALSE)

Amphipod monthly CPUE

Monthly analysis also only includes amphipods routinely found in diets per data from the CDFW Diet Study: Ampelisca, Monocorophium, Sinocorophium, Gammarus, Americorophium, Crangonyx, and Hyalella.

ST_amph_mon <- ben_ST%>%
  filter(Genus %in% c("Ampelisca", "Monocorophium", "Sinocorophium", "Americorophium", "Crangonyx", "Gammarus",
                      "Hyalella"))%>%
  group_by(Region, Order_level, Year, Month)%>%
  summarize(across(c(CPUE, BPUE),  mean), .groups = "drop") %>% 
  pivot_wider(names_from = Order_level, values_from = c(CPUE, BPUE),
    names_glue = "{Order_level}_{.value}")

write.csv(ST_amph_mon, "data/monthly_averages/amphipod_monthly_regions.csv",
          row.names=FALSE)

Export monthly station list to data/stations folder

## Use ben_ST so the stations saved here match those used in the monthly dataset saved
## above.
ben_ST %>% 
  rename(Station=StationCode) %>%  
  select(Station, Latitude, Longitude) %>%
  distinct() %>%
  mutate(Survey="EMP Benthic") %>%
  write.csv(file=file.path("data/stations/stations_benthic_monthly.csv"),
            row.names=FALSE)

EMP Benthic Data Analysis

Christina Burdi

11/30/2021