df_wq <- read_wq_data(monthly = FALSE)

## Rows: 69132 Columns: 3

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (3): Time_PST, Month, Correction

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_wq$DissAmmonia <- replace_rl(df_wq, 'DissAmmonia')
df_wq$DissNitrateNitrite <- replace_rl(df_wq, 'DissNitrateNitrite')
df_wq$DissOrthophos <- replace_rl(df_wq, 'DissOrthophos')

df_wq <- combine_wq_stations(df_wq)

df_wq_fill <- df_wq # silly step, but needed b/c of how replace_rl works rn

df_wq_fill <- df_wq_fill %>%
  select(select = -c('DissAmmonia_Sign','DissNitrateNitrite_Sign','DissOrthophos_Sign'))

station_list <- c('P8','MD10A','D8','D7','D6','D41','D4','D28A','D26','C3','C3A','C10','C10A')
df_wq_fill <- combine_wq_stations(df_wq_fill)
df_wq_fill <- df_wq_fill[df_wq_fill$Station %in% station_list,]

df_wq_fill <- df_wq_fill %>%
  pivot_longer(c(Temperature, Secchi, Chlorophyll:TKN), names_to = 'Analyte', values_to = 'Value') %>%
  select('MonthYear', 'Station', 'Analyte', 'Value')

df_wq_fill <- df_wq_fill[df_wq_fill$Analyte %in% c('Chlorophyll', 'DissNitrateNitrite', 'DissAmmonia', 'Salinity', 'Secchi', 'Temperature', 'DissOrthophos'),]

df_wq_fill <- df_wq_fill %>% pivot_wider(names_from = 'Station', values_from = 'Value', values_fn = mean)

for (analyte in (unique(df_wq_fill$Analyte))) {
  df <- df_wq_fill %>% filter(Analyte == analyte)
  
  write_csv(df, paste0('data/missing_data/missing_data/missingdat_edi_',analyte,'.csv'))
}

Switch to arima_imputation_*.Rmd files at this point Rest of code is legacy code

# read in data
df_wq <- read_wq_data(monthly = TRUE)

df_wq$DissAmmonia <- replace_rl(df_wq, 'DissAmmonia')
df_wq$DissNitrateNitrite <- replace_rl(df_wq, 'DissNitrateNitrite')
df_wq$DissOrthophos <- replace_rl(df_wq, 'DissOrthophos')

df_wq <- clean_df(df_wq)

Check Temporal Coverage

# check temporal coverage
plt <- check_temporal_coverage(df_wq)

plt

Core stations are:

P8
MD10A
D8
D7
D6
D41
D4
D28A
D26
C3/C3A?
C10/C10A?

(D10, D12, D16, D22 are missing decades of nutrient data)

Subset out Relevant Stations

# subset stations
station_list <- c('P8','MD10A','D8','D7','D6','D41','D4','D28A','D26','C3','C3A','C10','C10A')
df_wq <- df_wq[df_wq$Station %in% station_list,]

# check temporal coverage
plt <- check_temporal_coverage(df_wq)
plt

Check Spatial Coverage

map <- create_station_map(df_wq)
map

Check if C3/C3A and C10/C10A can be combined

analytes <- unique(df_wq$Analyte)
stations_list <- list(c('C10','C10A'),c('C3','C3A'))

for (stations in stations_list){
  cat('\n##', stations, '{.tabset .tabset-fade .tabset-pills}')
  for (analyte in analytes){
    cat('\n###', analyte, '\n')
    df_check <- df_wq %>% filter(Station %in% stations, Analyte == analyte)
    plt <- ggplot(df_check) +
      geom_line(aes(Date, Value, color = Station)) +
      ylab(analyte)
  
    plot(plt)
    cat('\n')
  }
}

C10 C10A

Temperature

Chlorophyll

DissAmmonia

DissNitrateNitrite

DissOrthophos

Salinity

Secchi

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

C3 C3A

Temperature

Chlorophyll

DissAmmonia

DissNitrateNitrite

DissOrthophos

Salinity

Secchi

Looks good to me, so will combine the stations.

df_wq <- combine_wq_stations(df_wq)

# check temporal coverage
plt <- check_temporal_coverage(df_wq)
plt

df_wq <- subset(df_wq, !(Analyte == 'DissOrthophos' & df_wq$Value > 4))
df_wq <- subset(df_wq, !(Analyte == 'DissNitrateNitrite' & df_wq$Value > 10))

ggplot(data=df_wq, aes(Date, Value, color = Station)) +
    geom_point() +
    facet_wrap( ~ Analyte, ncol = 2, scales='free_y')

df_wq <- df_wq %>%
region_assigner(analysis='monthly')

## Loading required package: sf

## Warning: package 'sf' was built under R version 4.1.3

## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE

df_stations <- subset(df_wq, select = c(Station, Latitude, Longitude))
df_stations <- distinct(df_stations)

# subset out coords for merged stations
df_stations <- df_stations[!(df_stations$Latitude %in% c(38.04381, 37.67575, 38.34575)),]

df_wq$Year <- lubridate::year(df_wq$Date)
df_wq$Month <- lubridate::month(df_wq$Date)

# average by region
df_wq$Value <- round(df_wq$Value, 3)

df_wq_regions <- df_wq %>%
  group_by(Year, Month, Region, Analyte) %>%
  summarize(Value = mean(Value), .groups = 'drop')

df_wq_noregions <- df_wq %>%
  group_by(Year, Month, Analyte) %>%
  summarize(Value = mean(Value), .groups = 'drop')


df_wq_wide_regions <- pivot_wider(data=df_wq_regions, id_cols=c(Year,Month,Region),
                          names_from=Analyte, values_from=Value)

df_wq_wide_noregions <- pivot_wider(data=df_wq_noregions, id_cols=c(Year,Month),
                          names_from=Analyte, values_from=Value)

write_csv(df_wq_wide_regions, 'data/monthly_averages/nutrient_data_monthly_regions.csv')
write_csv(df_wq_wide_noregions, 'data/monthly_averages/nutrient_data_monthly_noregions.csv')
write_csv(df_stations, 'data/stations/stations_nutrients_monthly.csv')

Nutrient Data - Monthly

Check Temporal Coverage

Subset out Relevant Stations

Check Spatial Coverage

Check if C3/C3A and C10/C10A can be combined

C10 C10A

Temperature

Chlorophyll

DissAmmonia

DissNitrateNitrite

DissOrthophos

Salinity

Secchi

C3 C3A

Temperature

Chlorophyll

DissAmmonia

DissNitrateNitrite

DissOrthophos

Salinity

Secchi