packages <- c("tidyverse", "lubridate", "writexl", "knitr",
              "MMWRweek")

if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
  install.packages(setdiff(packages, rownames(installed.packages())), repos = "http://cran.us.r-project.org")  
}

options(knitr.kable.NA = '')

library(tidyverse)
library(lubridate)
library(writexl)
library(knitr)
library(MMWRweek)

nat <- read_csv("data/outputs/national_and_state_summary.csv") %>% 
  filter(state=="US.agg") %>% 
  rename(covid19.nchs=covid.death.hybrid)

dates <- MMWRweek(nat$week_start_date)

nat$year <- dates$MMWRyear
nat$day <- dates$MMWRday
nat$week <- dates$MMWRweek
weeks_in <- 1:19
weeks_march <- 10:19

natlsum_ad <- nat %>%
  filter(year==2020) %>% 
  filter(week %in% weeks_in)

national_weekly <- natlsum_ad %>% 
  select(week, week_end_date, year,
                     all_cause_deaths, expected_all_cause=baseline_all_cause,
                     expected_all_cause_upper=baseline_all_cause_upper, 
                     expected_all_cause_lower=baseline_all_cause_lower,
                     excess_all_cause_deaths, covid19.nchs)

natlsum_ad %>% 
  ggplot(aes(week_end_date,all_cause_deaths)) +
  geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
  geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
  geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato3", alpha=.5) +
  geom_line(data=natlsum_ad, aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
  geom_line(color="black", size=1) +
  theme_minimal() +
  labs(title="U.S. Deaths", y="Weekly deaths", x="") 

state_lag <- read_csv("data/outputs/NobBs.complete.csv")

state.abb <- c(state.abb, "DC", "NYC")
state.name <- c(state.name, "District of Columbia", "New York City")
state_names <- data.frame(state.abb, state.name)

state_lag_long <- state_lag %>%
  filter(week==4) %>% 
  pivot_longer(cols=3:ncol(state_lag),
               names_to="state",
               values_to="lag") %>% 
  mutate(quantile_rank=ntile(lag, 3)) %>% 
  select(-X1, -week) %>% 
  left_join(state_names, by=c("state"="state.name")) %>% 
  select(-state) %>% 
  rename(state=state.abb)

states <- read_csv("data/outputs/national_and_state_summary.csv") %>% 
  filter(state!="US" & state!="US.agg") %>% 
  rename(covid19.nchs=covid.death.hybrid) %>% 
  ungroup() %>% 
  left_join(state_lag_long)

orders <- read_csv("data/archive/stay_at_home_orders.csv") %>%   
  left_join(state_names, by=c("state"="state.name")) %>% 
  select(-state, state=state.abb)

states <- left_join(states, orders)

states <- states %>% 
  mutate(restriction_type=gsub("_variance", "", restrictions))
states$restriction_type = ifelse(states$state=="NYC", "major", states$restriction_type)
states_18 <- states %>% 
  filter(year==2020) %>% 
  filter(week==18) %>% 
  mutate(flag = case_when(
    all_cause_deaths < baseline_all_cause_lower ~ "Lower than baseline range",
    all_cause_deaths > baseline_all_cause_upper ~ "Higher than baseline range",
    TRUE ~ "Within range"
  )) %>% 
  select(state, flag18=flag)

states_19 <- states %>% 
  filter(year==2020) %>% 
  filter(week==19) %>% 
  mutate(flag = case_when(
    all_cause_deaths < baseline_all_cause_lower ~ "Lower than baseline range",
    all_cause_deaths > baseline_all_cause_upper ~ "Higher than baseline range",
    TRUE ~ "Within range"
  )) %>% 
  select(state, flag19=flag)
march <- states %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_march) %>% 
  group_by(state) %>%
  summarize(all_cause_deaths=sum(all_cause_deaths, na.rm=T),
            expected=sum(baseline_all_cause, na.rm=T),
            excess_deaths=sum(excess_all_cause_deaths, na.rm=T),
            covid_deaths=sum(covid19.nchs, na.rm=T),
            x_excess_covid=round(excess_deaths/covid_deaths,1),
            excess_minus_covid=excess_deaths-covid_deaths
            ) %>% 
  mutate(percent_covid_in_excess=round(covid_deaths/excess_deaths*100)) %>% 
  ungroup() %>% 
  left_join(states_18) %>% 
  left_join(states_19) 


march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess >100, NA, march$percent_covid_in_excess)
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess <0, NA, march$percent_covid_in_excess)

march <- unique(march)
march <- march %>% 
  mutate(percent_more=round((all_cause_deaths-expected)/expected*100,2))
march <- march %>% 
  left_join(state_lag_long) %>% 
  left_join(orders)  %>% 
  mutate(restriction_type=gsub("_variance", "", restrictions))

higher <- march %>% 
  filter(flag19=="Higher than baseline range") %>% 
  pull(state)

within <- march %>% 
  filter(flag19=="Within range") %>% 
  pull(state)

lower <- march %>% 
  filter(flag19=="Lower than baseline range") %>% 
  pull(state)
march$restriction_type = ifelse(march$state=="NYC", "major", march$restriction_type)


major <- march %>% 
  filter(restriction_type=="major") %>% 
  pull(state)

minor <- march %>% 
  filter(restriction_type=="minor") %>% 
  pull(state)

moderate <- march %>% 
  filter(restriction_type=="moderate") %>% 
  pull(state)


none <- march %>% 
  filter(restriction_type=="none") %>% 
  pull(state)
nat_solo <- national_weekly %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_march) %>% 
  #filter(week %in% 10:14) %>% 
  ungroup() %>% 
  summarize(all_deaths=sum(all_cause_deaths),
            excess_deaths=sum(excess_all_cause_deaths),
            covid19.nchs=sum(covid19.nchs)) %>% 
  mutate(percent_covid_in_excess=round(covid19.nchs/excess_deaths*100)) %>% 
  mutate(Place="U.S.") %>% 
  select(Place, all_deaths, excess_deaths, covid19.nchs, percent_covid_in_excess)

flagged_states <- march %>% 
# change flag here maybe
    filter(flag18=="Higher than baseline range" & flag19=="Higher than baseline range") %>% 
  filter(excess_deaths >0) %>% 
  filter(x_excess_covid >1) %>% 
  select(Place=state, all_deaths=all_cause_deaths,
         excess_deaths=excess_deaths, covid19.nchs=covid_deaths,
         percent_covid_in_excess) 

nice_table <- rbind(nat_solo, flagged_states) %>% 
  mutate(excess_minus=excess_deaths-covid19.nchs) %>% 
  rename(`All deaths`=all_deaths,
         `Excess deaths`=excess_deaths,
         `Covid-19 deaths`=covid19.nchs,
         `Covid-19 % of excess` = percent_covid_in_excess,
         `Excess deaths minus covid-19`=excess_minus)

other_states <- march %>% 
  filter(flag18!="Higher than baseline range" | flag19!="Higher than baseline range")
other_states$percent_covid_in_excess <- ifelse(other_states$percent_covid_in_excess > 100, NA, other_states$percent_covid_in_excess)
other_states$percent_covid_in_excess <- ifelse(other_states$percent_covid_in_excess == 0, NA, other_states$percent_covid_in_excess)
other_states$excess_minus_covid <- ifelse(other_states$excess_minus_covid<0, NA, other_states$excess_minus_covid)

other_states <- other_states %>%   
  select(Place=state, `All deaths`=all_cause_deaths,
         `Excess deaths`=excess_deaths, `Covid-19 deaths`=covid_deaths,
         `Covid-19 % of excess`=percent_covid_in_excess, `Week 18`=flag18, `Week 19`=flag19) 

There were more than 101,600 excess deaths between March and early May, about 25 percent more than were publicly attributed to covid-19 at the time.

This updated model adjusts not just for seasonality, population, flu, but also accounts for delay in reporting from individual states.

More recent data suggests that the gap between unattributed and attributed covid-19 deaths is shrinking.

Deaths through May 9, 2020

kable(nice_table %>% filter(Place=="U.S."))
Place All deaths Excess deaths Covid-19 deaths Covid-19 % of excess Excess deaths minus covid-19
U.S. 618298 101619 75451 74 26168

States with major stay-at-home restrictions

statessum20 <- states %>%
  filter(state %in% major) %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

statessum_ad <- states %>% 
  filter(state %in% major) %>% 
  filter(year!=2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

if (nrow(statessum20) > 0) {
statessum20  %>% 
  ggplot(aes(week_end_date,all_cause_deaths)) +
  geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
  geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
  geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
  geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
  geom_line(color="black", size=.5) +
  facet_wrap(~state, scales="free_y", ncol=5) +
  theme_minimal() +
  labs(title="States with major stay-at-home orders")#, 
       #subtitle="Exceeds the expected range and with the least delays in reporting")
}

States with moderate stay-at-home restrictions

statessum20 <- states %>%
  filter(state %in% moderate) %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

statessum_ad <- states %>% 
  filter(state %in% moderate) %>% 
  filter(year!=2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

if (nrow(statessum20) > 0) {
statessum20  %>% 
  ggplot(aes(week_end_date,all_cause_deaths)) +
  geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
  geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
  geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
  geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
  geom_line(color="black", size=.5) +
  facet_wrap(~state, scales="free_y", ncol=5) +
  theme_minimal() +
  labs(title="States with no stay-at-home orders")#, 
       #subtitle="Exceeds the expected range and with the least delays in reporting")
}

States with minor stay-at-home restrictions

statessum20 <- states %>%
  filter(state %in% minor) %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

statessum_ad <- states %>% 
  filter(state %in% minor) %>% 
  filter(year!=2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

if (nrow(statessum20) > 0) {
statessum20  %>% 
  ggplot(aes(week_end_date,all_cause_deaths)) +
  geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
  geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
  geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
  geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
  geom_line(color="black", size=.5) +
  facet_wrap(~state, scales="free_y", ncol=5) +
  theme_minimal() +
  labs(title="States with minor stay-at-home orders")#, 
       #subtitle="Exceeds the expected range and with the least delays in reporting")
}

States with no stay-at-home restrictions

statessum20 <- states %>%
  filter(state %in% none) %>% 
  filter(year==2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

statessum_ad <- states %>% 
  filter(state %in% none) %>% 
  filter(year!=2020) %>% 
  filter(week %in% weeks_in) #%>% 
  #filter(quantile_rank==3)

if (nrow(statessum20) > 0) {
statessum20  %>% 
  ggplot(aes(week_end_date,all_cause_deaths)) +
  geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
  geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
  geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
  geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
  geom_line(color="black", size=.5) +
  facet_wrap(~state, scales="free_y", ncol=5) +
  theme_minimal() +
  labs(title="States with none stay-at-home orders")#, 
       #subtitle="Exceeds the expected range and with the least delays in reporting")
}

march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess > 100, NA, march$percent_covid_in_excess)
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess == 0, NA, march$percent_covid_in_excess)
march$excess_minus_covid <- ifelse(march$excess_minus_covid<0, NA, march$excess_minus_covid)

march <- march %>%   
  mutate(lag=case_when(
    quantile_rank==1 ~ "Significant reporting lag",
    quantile_rank==2 ~ "Some reporting lag",
    quantile_rank==3 ~ "Least reporting lag"
  )) %>% 
  select(Place=state, `All deaths`=all_cause_deaths,
         `Excess deaths`=excess_deaths, `Covid-19 deaths`=covid_deaths,
         `Covid-19 % of excess`=percent_covid_in_excess, `Week 18`=flag18, `Week 19`=flag19, lag, order=restriction_type) 

kable(march %>% arrange(`Covid-19 % of excess`))
Place All deaths Excess deaths Covid-19 deaths Covid-19 % of excess Week 18 Week 19 lag order
VT 1242 152.0 46 30 Within range Within range Least reporting lag moderate
SC 10514 1087.0 332 31 Higher than baseline range Higher than baseline range Some reporting lag minor
AZ 13374 1424.0 573 40 Higher than baseline range Higher than baseline range Some reporting lag minor
TX 40978 2870.0 1134 40 Higher than baseline range Higher than baseline range Significant reporting lag minor
TN 14931 622.0 260 42 Within range Within range Some reporting lag minor
DE 1936 483.0 221 46 Higher than baseline range Higher than baseline range Significant reporting lag major
MS 6762 836.0 414 50 Higher than baseline range Higher than baseline range Some reporting lag minor
VA 15055 1845.0 981 53 Higher than baseline range Higher than baseline range Some reporting lag moderate
CA 56590 4752.5 2771 58 Higher than baseline range Higher than baseline range Least reporting lag major
IL 25700 5352.0 3330 62 Higher than baseline range Higher than baseline range Least reporting lag moderate
AL 10480 738.0 469 64 Higher than baseline range Higher than baseline range Significant reporting lag minor
GA 17338 2006.0 1286 64 Higher than baseline range Higher than baseline range Significant reporting lag minor
OR 7209 247.0 159 64 Within range Within range Significant reporting lag moderate
FL 44306 2750.0 1838 67 Higher than baseline range Higher than baseline range Least reporting lag minor
MD 12330 2642.0 1759 67 Higher than baseline range Higher than baseline range Some reporting lag major
LA 10561 2766.0 1887 68 Higher than baseline range Higher than baseline range Significant reporting lag moderate
MO 12537 775.0 526 68 Higher than baseline range Higher than baseline range Significant reporting lag minor
UT 3922 102.0 69 68 Within range Within range Least reporting lag minor
AR 6233 118.0 86 73 Within range Higher than baseline range Some reporting lag minor
NJ 28130 13972.0 10130 73 Higher than baseline range Higher than baseline range Least reporting lag major
NYC 33897 23615.0 17135 73 Higher than baseline range Higher than baseline range Least reporting lag major
CO 9368 1506.0 1118 74 Higher than baseline range Higher than baseline range Least reporting lag moderate
MI 24107 5363.0 3986 74 Higher than baseline range Higher than baseline range Some reporting lag major
DC 1479 354.0 274 77 Higher than baseline range Higher than baseline range Significant reporting lag moderate
NY 29856 10668.0 8258 77 Higher than baseline range Higher than baseline range Some reporting lag major
WI 11160 552.0 430 78 Higher than baseline range Higher than baseline range Least reporting lag minor
NH 2590 168.0 135 80 Within range Higher than baseline range Least reporting lag moderate
WA 12095 1062.0 866 82 Within range Higher than baseline range Least reporting lag moderate
MA 17453 5907.0 4971 84 Higher than baseline range Higher than baseline range Least reporting lag moderate
IN 14386 1730.0 1482 86 Higher than baseline range Higher than baseline range Some reporting lag moderate
AK 737 -25.0 10 Within range Within range Significant reporting lag none
HI 2206 -30.5 15 Within range Within range Least reporting lag moderate
IA 6035 79.0 269 Higher than baseline range Higher than baseline range Least reporting lag moderate
ID 2865 49.0 64 Within range Within range Some reporting lag moderate
KS 5216 60.0 186 Within range Within range Least reporting lag moderate
KY 8964 261.0 321 Within range Within range Significant reporting lag moderate
ME 2931 -16.0 59 Within range Within range Least reporting lag moderate
MN 9308 572.0 597 Higher than baseline range Higher than baseline range Some reporting lag major
MT 1969 1.0 20 Within range Within range Some reporting lag minor
ND 1063 -406.5 21 Lower than baseline range Lower than baseline range Some reporting lag minor
NE 3350 73.0 97 Within range Within range Significant reporting lag moderate
NM 3631 131.0 195 Within range Higher than baseline range Significant reporting lag major
NV 5284 169.0 304 Within range Within range Some reporting lag moderate
OH 23818 883.0 1277 Higher than baseline range Higher than baseline range Significant reporting lag moderate
OK 7069 -213.0 245 Lower than baseline range Lower than baseline range Significant reporting lag minor
PA 28651 3345.5 4333 Higher than baseline range Higher than baseline range Some reporting lag moderate
RI 2228 322.0 398 Higher than baseline range Higher than baseline range Significant reporting lag moderate
SD 1578 -26.0 45 Within range Within range Some reporting lag minor
WV 3962 6.0 66 Within range Lower than baseline range Significant reporting lag minor
WY 914 -6.5 3 Lower than baseline range Within range Significant reporting lag minor
states <- states %>% 
  filter(year==2020) %>% 
  mutate(adjusted_expected=all_cause_deaths-baseline_all_cause,
         adjusted_expected_minus_covid=adjusted_expected-covid19.nchs)

state_lag_narrow <- state_lag_long %>% 
  mutate(state=as.character(state)) %>% 
  select(Place=state, lag_index=lag) %>% 
  ungroup()

march <- left_join(march, state_lag_narrow)

write_csv(march, "data/outputs/states_summary_table.csv", na="")
write_csv(national_weekly, "data/outputs/national_weekly_table.csv", na="")
write_csv(states, "data/outputs/states_weekly_table.csv", na="")
write_xlsx(list(`beautiful table`= nice_table,
                `states summary table` = march,
                `national weekly` = national_weekly,
                `states weekly`=states), 
           "data/outputs/beautiful_table.xlsx")