# R script to compile 5 years of SOTW data and reformat for presentation view
#load libraries

library(readxl)
library(dplyr)


############################################################################
#user specifications

setwd("C:/Users/Michael Palmer/Association to Preserve Cape Cod, Inc/Programs & Projects - STATE of the Waters/Data Analysis/CCC_data_processing/final_2015_2024")

station_2021 <- read_excel('outputs/CCC_CE_output_2021.xlsx', sheet="avg5yr_station_ei")
station_2022 <- read_excel('outputs/CCC_CE_output_2022.xlsx', sheet="avg5yr_station_ei")
station_2023 <- read_excel('outputs/CCC_CE_output_2023.xlsx', sheet="avg5yr_station_ei")
station_2024 <- read_excel('outputs/CCC_CE_output_2024.xlsx', sheet="avg5yr_station_ei")
station_2025 <- read_excel('outputs/CCC_CE_output_2025.xlsx', sheet="avg5yr_station_ei")
ebayment_2025 <- read_excel('outputs/CCC_CE_output_2025.xlsx', sheet="embayment_status")
stations <- read_excel('Master_Coastal_Embayment_table_20251009.xlsx') %>%
  mutate(
    Station = as.character(Station),
    Station_number = as.character(Station_number)
  )


station_all <- station_2025 %>%
  full_join(station_2021 %>% select(Station, Station_number, EI_5yr_avg_2021 = EI_5yr_avg),
            by = c("Station", "Station_number")) %>%
  full_join(station_2022 %>% select(Station, Station_number, EI_5yr_avg_2022 = EI_5yr_avg),
            by = c("Station", "Station_number")) %>%
  full_join(station_2023 %>% select(Station, Station_number, EI_5yr_avg_2023 = EI_5yr_avg),
            by = c("Station", "Station_number")) %>%
  full_join(station_2024 %>% select(Station, Station_number, EI_5yr_avg_2024 = EI_5yr_avg),
            by = c("Station", "Station_number"))

result <- station_all %>%
  select(
    Station, Station_number,
    EI_5yr_avg, Years_included, Status,
    EI_5yr_avg_2021, EI_5yr_avg_2022,
    EI_5yr_avg_2023, EI_5yr_avg_2024
  ) %>%
  left_join(
    stations %>%
      select(
        Station, Station_number,
        Embayment_name, Embayment_ID, Town,
        Salt_marsh, BBC_site, Latitude, Longitude
      ),
    by = c("Station", "Station_number")
  ) %>%
  # rename and reorder
  transmute(
    Station,
    Station_number,
    Latitude,
    Longitude,
    Salt_marsh,
    BBC_site,
    Embayment_name,
    Embayment_ID,
    Town,
    EI_5yr_avg_2021,
    EI_5yr_avg_2022,
    EI_5yr_avg_2023,
    EI_5yr_avg_2024,
    EI_5yr_avg_2025 = EI_5yr_avg,
    Years_included_2025 = Years_included,
    Status_2025 = Status
  )

null_records <- result %>%
  filter(if_all(
    c(EI_5yr_avg_2021, EI_5yr_avg_2022, EI_5yr_avg_2023,
      EI_5yr_avg_2024, EI_5yr_avg_2025),
    is.na
  ))

result_flagged <- result %>%
  group_by(Embayment_name, Embayment_ID) %>%
  mutate(
    Embayment_status = if (all(is.na(EI_5yr_avg_2025))) {
      NA_character_
    } else {
      # Identify the minimum EI_5yr_avg_2025 in the group
      min_val <- min(EI_5yr_avg_2025, na.rm = TRUE)
      
      # Pick the row(s) with min EI_5yr_avg_2025
      min_rows <- which(EI_5yr_avg_2025 == min_val)
      
      # If tie, pick the row with lowest Station_number
      chosen_row <- min(min_rows[Station_number[min_rows] == min(Station_number[min_rows])])
      
      # Populate Embayment_status only for the chosen row
      if_else(row_number() == chosen_row, Status_2025, NA_character_)
    }
  ) %>%
  ungroup() %>%
  mutate(
    Status_2025 = coalesce(as.character(Status_2025), "Insufficient Data")
  )


write_xlsx(result_flagged, path = "outputs/SOTW_coastal_embayments_2025_final_results.xlsx")
