packages <- c("tidyverse", "lubridate", "writexl", "knitr",
"MMWRweek")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())), repos = "http://cran.us.r-project.org")
}
options(knitr.kable.NA = '')
library(tidyverse)
library(lubridate)
library(writexl)
library(knitr)
library(MMWRweek)
nat <- read_csv("data/outputs/national_and_state_summary.csv") %>%
filter(state=="US.agg") %>%
rename(covid19.nchs=covid.death.hybrid)
dates <- MMWRweek(nat$week_start_date)
nat$year <- dates$MMWRyear
nat$day <- dates$MMWRday
nat$week <- dates$MMWRweek
weeks_in <- 1:19
weeks_march <- 10:19
natlsum_ad <- nat %>%
filter(year==2020) %>%
filter(week %in% weeks_in)
national_weekly <- natlsum_ad %>%
select(week, week_end_date, year,
all_cause_deaths, expected_all_cause=baseline_all_cause,
expected_all_cause_upper=baseline_all_cause_upper,
expected_all_cause_lower=baseline_all_cause_lower,
excess_all_cause_deaths, covid19.nchs)
natlsum_ad %>%
ggplot(aes(week_end_date,all_cause_deaths)) +
geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato3", alpha=.5) +
geom_line(data=natlsum_ad, aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
geom_line(color="black", size=1) +
theme_minimal() +
labs(title="U.S. Deaths", y="Weekly deaths", x="")
state_lag <- read_csv("data/outputs/NobBs.complete.csv")
state.abb <- c(state.abb, "DC", "NYC")
state.name <- c(state.name, "District of Columbia", "New York City")
state_names <- data.frame(state.abb, state.name)
state_lag_long <- state_lag %>%
filter(week==4) %>%
pivot_longer(cols=3:ncol(state_lag),
names_to="state",
values_to="lag") %>%
mutate(quantile_rank=ntile(lag, 3)) %>%
select(-X1, -week) %>%
left_join(state_names, by=c("state"="state.name")) %>%
select(-state) %>%
rename(state=state.abb)
states <- read_csv("data/outputs/national_and_state_summary.csv") %>%
filter(state!="US" & state!="US.agg") %>%
rename(covid19.nchs=covid.death.hybrid) %>%
ungroup() %>%
left_join(state_lag_long)
orders <- read_csv("data/archive/stay_at_home_orders.csv") %>%
left_join(state_names, by=c("state"="state.name")) %>%
select(-state, state=state.abb)
states <- left_join(states, orders)
states <- states %>%
mutate(restriction_type=gsub("_variance", "", restrictions))
states$restriction_type = ifelse(states$state=="NYC", "major", states$restriction_type)
states_18 <- states %>%
filter(year==2020) %>%
filter(week==18) %>%
mutate(flag = case_when(
all_cause_deaths < baseline_all_cause_lower ~ "Lower than baseline range",
all_cause_deaths > baseline_all_cause_upper ~ "Higher than baseline range",
TRUE ~ "Within range"
)) %>%
select(state, flag18=flag)
states_19 <- states %>%
filter(year==2020) %>%
filter(week==19) %>%
mutate(flag = case_when(
all_cause_deaths < baseline_all_cause_lower ~ "Lower than baseline range",
all_cause_deaths > baseline_all_cause_upper ~ "Higher than baseline range",
TRUE ~ "Within range"
)) %>%
select(state, flag19=flag)
march <- states %>%
filter(year==2020) %>%
filter(week %in% weeks_march) %>%
group_by(state) %>%
summarize(all_cause_deaths=sum(all_cause_deaths, na.rm=T),
expected=sum(baseline_all_cause, na.rm=T),
excess_deaths=sum(excess_all_cause_deaths, na.rm=T),
covid_deaths=sum(covid19.nchs, na.rm=T),
x_excess_covid=round(excess_deaths/covid_deaths,1),
excess_minus_covid=excess_deaths-covid_deaths
) %>%
mutate(percent_covid_in_excess=round(covid_deaths/excess_deaths*100)) %>%
ungroup() %>%
left_join(states_18) %>%
left_join(states_19)
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess >100, NA, march$percent_covid_in_excess)
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess <0, NA, march$percent_covid_in_excess)
march <- unique(march)
march <- march %>%
mutate(percent_more=round((all_cause_deaths-expected)/expected*100,2))
march <- march %>%
left_join(state_lag_long) %>%
left_join(orders) %>%
mutate(restriction_type=gsub("_variance", "", restrictions))
higher <- march %>%
filter(flag19=="Higher than baseline range") %>%
pull(state)
within <- march %>%
filter(flag19=="Within range") %>%
pull(state)
lower <- march %>%
filter(flag19=="Lower than baseline range") %>%
pull(state)
march$restriction_type = ifelse(march$state=="NYC", "major", march$restriction_type)
major <- march %>%
filter(restriction_type=="major") %>%
pull(state)
minor <- march %>%
filter(restriction_type=="minor") %>%
pull(state)
moderate <- march %>%
filter(restriction_type=="moderate") %>%
pull(state)
none <- march %>%
filter(restriction_type=="none") %>%
pull(state)
nat_solo <- national_weekly %>%
filter(year==2020) %>%
filter(week %in% weeks_march) %>%
#filter(week %in% 10:14) %>%
ungroup() %>%
summarize(all_deaths=sum(all_cause_deaths),
excess_deaths=sum(excess_all_cause_deaths),
covid19.nchs=sum(covid19.nchs)) %>%
mutate(percent_covid_in_excess=round(covid19.nchs/excess_deaths*100)) %>%
mutate(Place="U.S.") %>%
select(Place, all_deaths, excess_deaths, covid19.nchs, percent_covid_in_excess)
flagged_states <- march %>%
# change flag here maybe
filter(flag18=="Higher than baseline range" & flag19=="Higher than baseline range") %>%
filter(excess_deaths >0) %>%
filter(x_excess_covid >1) %>%
select(Place=state, all_deaths=all_cause_deaths,
excess_deaths=excess_deaths, covid19.nchs=covid_deaths,
percent_covid_in_excess)
nice_table <- rbind(nat_solo, flagged_states) %>%
mutate(excess_minus=excess_deaths-covid19.nchs) %>%
rename(`All deaths`=all_deaths,
`Excess deaths`=excess_deaths,
`Covid-19 deaths`=covid19.nchs,
`Covid-19 % of excess` = percent_covid_in_excess,
`Excess deaths minus covid-19`=excess_minus)
other_states <- march %>%
filter(flag18!="Higher than baseline range" | flag19!="Higher than baseline range")
other_states$percent_covid_in_excess <- ifelse(other_states$percent_covid_in_excess > 100, NA, other_states$percent_covid_in_excess)
other_states$percent_covid_in_excess <- ifelse(other_states$percent_covid_in_excess == 0, NA, other_states$percent_covid_in_excess)
other_states$excess_minus_covid <- ifelse(other_states$excess_minus_covid<0, NA, other_states$excess_minus_covid)
other_states <- other_states %>%
select(Place=state, `All deaths`=all_cause_deaths,
`Excess deaths`=excess_deaths, `Covid-19 deaths`=covid_deaths,
`Covid-19 % of excess`=percent_covid_in_excess, `Week 18`=flag18, `Week 19`=flag19)
There were more than 101,600 excess deaths between March and early May, about 25 percent more than were publicly attributed to covid-19 at the time.
This updated model adjusts not just for seasonality, population, flu, but also accounts for delay in reporting from individual states.
More recent data suggests that the gap between unattributed and attributed covid-19 deaths is shrinking.
kable(nice_table %>% filter(Place=="U.S."))
Place | All deaths | Excess deaths | Covid-19 deaths | Covid-19 % of excess | Excess deaths minus covid-19 |
---|---|---|---|---|---|
U.S. | 618298 | 101619 | 75451 | 74 | 26168 |
statessum20 <- states %>%
filter(state %in% major) %>%
filter(year==2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
statessum_ad <- states %>%
filter(state %in% major) %>%
filter(year!=2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
if (nrow(statessum20) > 0) {
statessum20 %>%
ggplot(aes(week_end_date,all_cause_deaths)) +
geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
geom_line(color="black", size=.5) +
facet_wrap(~state, scales="free_y", ncol=5) +
theme_minimal() +
labs(title="States with major stay-at-home orders")#,
#subtitle="Exceeds the expected range and with the least delays in reporting")
}
statessum20 <- states %>%
filter(state %in% moderate) %>%
filter(year==2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
statessum_ad <- states %>%
filter(state %in% moderate) %>%
filter(year!=2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
if (nrow(statessum20) > 0) {
statessum20 %>%
ggplot(aes(week_end_date,all_cause_deaths)) +
geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
geom_line(color="black", size=.5) +
facet_wrap(~state, scales="free_y", ncol=5) +
theme_minimal() +
labs(title="States with no stay-at-home orders")#,
#subtitle="Exceeds the expected range and with the least delays in reporting")
}
statessum20 <- states %>%
filter(state %in% minor) %>%
filter(year==2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
statessum_ad <- states %>%
filter(state %in% minor) %>%
filter(year!=2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
if (nrow(statessum20) > 0) {
statessum20 %>%
ggplot(aes(week_end_date,all_cause_deaths)) +
geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
geom_line(color="black", size=.5) +
facet_wrap(~state, scales="free_y", ncol=5) +
theme_minimal() +
labs(title="States with minor stay-at-home orders")#,
#subtitle="Exceeds the expected range and with the least delays in reporting")
}
statessum20 <- states %>%
filter(state %in% none) %>%
filter(year==2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
statessum_ad <- states %>%
filter(state %in% none) %>%
filter(year!=2020) %>%
filter(week %in% weeks_in) #%>%
#filter(quantile_rank==3)
if (nrow(statessum20) > 0) {
statessum20 %>%
ggplot(aes(week_end_date,all_cause_deaths)) +
geom_ribbon(aes(ymin=baseline_all_cause_lower, ymax=baseline_all_cause_upper), fill="gray70", alpha=.5) +
geom_ribbon(aes(ymin=baseline_all_cause, ymax=all_cause_deaths), fill="sienna1", alpha=.5) +
geom_ribbon(aes(ymin=all_cause_deaths-covid19.nchs, ymax=all_cause_deaths), fill="tomato", alpha=.5) +
geom_line(aes(x=week_end_date, y=all_cause_deaths),color="sienna1", alpha=.4) +
geom_line(color="black", size=.5) +
facet_wrap(~state, scales="free_y", ncol=5) +
theme_minimal() +
labs(title="States with none stay-at-home orders")#,
#subtitle="Exceeds the expected range and with the least delays in reporting")
}
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess > 100, NA, march$percent_covid_in_excess)
march$percent_covid_in_excess <- ifelse(march$percent_covid_in_excess == 0, NA, march$percent_covid_in_excess)
march$excess_minus_covid <- ifelse(march$excess_minus_covid<0, NA, march$excess_minus_covid)
march <- march %>%
mutate(lag=case_when(
quantile_rank==1 ~ "Significant reporting lag",
quantile_rank==2 ~ "Some reporting lag",
quantile_rank==3 ~ "Least reporting lag"
)) %>%
select(Place=state, `All deaths`=all_cause_deaths,
`Excess deaths`=excess_deaths, `Covid-19 deaths`=covid_deaths,
`Covid-19 % of excess`=percent_covid_in_excess, `Week 18`=flag18, `Week 19`=flag19, lag, order=restriction_type)
kable(march %>% arrange(`Covid-19 % of excess`))
Place | All deaths | Excess deaths | Covid-19 deaths | Covid-19 % of excess | Week 18 | Week 19 | lag | order |
---|---|---|---|---|---|---|---|---|
VT | 1242 | 152.0 | 46 | 30 | Within range | Within range | Least reporting lag | moderate |
SC | 10514 | 1087.0 | 332 | 31 | Higher than baseline range | Higher than baseline range | Some reporting lag | minor |
AZ | 13374 | 1424.0 | 573 | 40 | Higher than baseline range | Higher than baseline range | Some reporting lag | minor |
TX | 40978 | 2870.0 | 1134 | 40 | Higher than baseline range | Higher than baseline range | Significant reporting lag | minor |
TN | 14931 | 622.0 | 260 | 42 | Within range | Within range | Some reporting lag | minor |
DE | 1936 | 483.0 | 221 | 46 | Higher than baseline range | Higher than baseline range | Significant reporting lag | major |
MS | 6762 | 836.0 | 414 | 50 | Higher than baseline range | Higher than baseline range | Some reporting lag | minor |
VA | 15055 | 1845.0 | 981 | 53 | Higher than baseline range | Higher than baseline range | Some reporting lag | moderate |
CA | 56590 | 4752.5 | 2771 | 58 | Higher than baseline range | Higher than baseline range | Least reporting lag | major |
IL | 25700 | 5352.0 | 3330 | 62 | Higher than baseline range | Higher than baseline range | Least reporting lag | moderate |
AL | 10480 | 738.0 | 469 | 64 | Higher than baseline range | Higher than baseline range | Significant reporting lag | minor |
GA | 17338 | 2006.0 | 1286 | 64 | Higher than baseline range | Higher than baseline range | Significant reporting lag | minor |
OR | 7209 | 247.0 | 159 | 64 | Within range | Within range | Significant reporting lag | moderate |
FL | 44306 | 2750.0 | 1838 | 67 | Higher than baseline range | Higher than baseline range | Least reporting lag | minor |
MD | 12330 | 2642.0 | 1759 | 67 | Higher than baseline range | Higher than baseline range | Some reporting lag | major |
LA | 10561 | 2766.0 | 1887 | 68 | Higher than baseline range | Higher than baseline range | Significant reporting lag | moderate |
MO | 12537 | 775.0 | 526 | 68 | Higher than baseline range | Higher than baseline range | Significant reporting lag | minor |
UT | 3922 | 102.0 | 69 | 68 | Within range | Within range | Least reporting lag | minor |
AR | 6233 | 118.0 | 86 | 73 | Within range | Higher than baseline range | Some reporting lag | minor |
NJ | 28130 | 13972.0 | 10130 | 73 | Higher than baseline range | Higher than baseline range | Least reporting lag | major |
NYC | 33897 | 23615.0 | 17135 | 73 | Higher than baseline range | Higher than baseline range | Least reporting lag | major |
CO | 9368 | 1506.0 | 1118 | 74 | Higher than baseline range | Higher than baseline range | Least reporting lag | moderate |
MI | 24107 | 5363.0 | 3986 | 74 | Higher than baseline range | Higher than baseline range | Some reporting lag | major |
DC | 1479 | 354.0 | 274 | 77 | Higher than baseline range | Higher than baseline range | Significant reporting lag | moderate |
NY | 29856 | 10668.0 | 8258 | 77 | Higher than baseline range | Higher than baseline range | Some reporting lag | major |
WI | 11160 | 552.0 | 430 | 78 | Higher than baseline range | Higher than baseline range | Least reporting lag | minor |
NH | 2590 | 168.0 | 135 | 80 | Within range | Higher than baseline range | Least reporting lag | moderate |
WA | 12095 | 1062.0 | 866 | 82 | Within range | Higher than baseline range | Least reporting lag | moderate |
MA | 17453 | 5907.0 | 4971 | 84 | Higher than baseline range | Higher than baseline range | Least reporting lag | moderate |
IN | 14386 | 1730.0 | 1482 | 86 | Higher than baseline range | Higher than baseline range | Some reporting lag | moderate |
AK | 737 | -25.0 | 10 | Within range | Within range | Significant reporting lag | none | |
HI | 2206 | -30.5 | 15 | Within range | Within range | Least reporting lag | moderate | |
IA | 6035 | 79.0 | 269 | Higher than baseline range | Higher than baseline range | Least reporting lag | moderate | |
ID | 2865 | 49.0 | 64 | Within range | Within range | Some reporting lag | moderate | |
KS | 5216 | 60.0 | 186 | Within range | Within range | Least reporting lag | moderate | |
KY | 8964 | 261.0 | 321 | Within range | Within range | Significant reporting lag | moderate | |
ME | 2931 | -16.0 | 59 | Within range | Within range | Least reporting lag | moderate | |
MN | 9308 | 572.0 | 597 | Higher than baseline range | Higher than baseline range | Some reporting lag | major | |
MT | 1969 | 1.0 | 20 | Within range | Within range | Some reporting lag | minor | |
ND | 1063 | -406.5 | 21 | Lower than baseline range | Lower than baseline range | Some reporting lag | minor | |
NE | 3350 | 73.0 | 97 | Within range | Within range | Significant reporting lag | moderate | |
NM | 3631 | 131.0 | 195 | Within range | Higher than baseline range | Significant reporting lag | major | |
NV | 5284 | 169.0 | 304 | Within range | Within range | Some reporting lag | moderate | |
OH | 23818 | 883.0 | 1277 | Higher than baseline range | Higher than baseline range | Significant reporting lag | moderate | |
OK | 7069 | -213.0 | 245 | Lower than baseline range | Lower than baseline range | Significant reporting lag | minor | |
PA | 28651 | 3345.5 | 4333 | Higher than baseline range | Higher than baseline range | Some reporting lag | moderate | |
RI | 2228 | 322.0 | 398 | Higher than baseline range | Higher than baseline range | Significant reporting lag | moderate | |
SD | 1578 | -26.0 | 45 | Within range | Within range | Some reporting lag | minor | |
WV | 3962 | 6.0 | 66 | Within range | Lower than baseline range | Significant reporting lag | minor | |
WY | 914 | -6.5 | 3 | Lower than baseline range | Within range | Significant reporting lag | minor |
states <- states %>%
filter(year==2020) %>%
mutate(adjusted_expected=all_cause_deaths-baseline_all_cause,
adjusted_expected_minus_covid=adjusted_expected-covid19.nchs)
state_lag_narrow <- state_lag_long %>%
mutate(state=as.character(state)) %>%
select(Place=state, lag_index=lag) %>%
ungroup()
march <- left_join(march, state_lag_narrow)
write_csv(march, "data/outputs/states_summary_table.csv", na="")
write_csv(national_weekly, "data/outputs/national_weekly_table.csv", na="")
write_csv(states, "data/outputs/states_weekly_table.csv", na="")
write_xlsx(list(`beautiful table`= nice_table,
`states summary table` = march,
`national weekly` = national_weekly,
`states weekly`=states),
"data/outputs/beautiful_table.xlsx")