library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(tictoc)
tic("Total render time")
source("fun_dynamic_bins.R")
mem.maxVSize(vsize = 40000)
[1] 40000
I run all the robustness/sensitivity checks here.
library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(tictoc)
tic("Total render time")
source("fun_dynamic_bins.R")
mem.maxVSize(vsize = 40000)
[1] 40000
Import the full data to test pre-post pandemic and also the pre-pandemic data.
data_full <-
read_rds(here("..", "output", "data_reports.rds"))
data_full <- data_full %>%
mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
data <-
read_rds(here("..", "output", "data_reports.rds")) %>%
filter(covid_phase == "Pre-pandemic")
data <- data %>%
mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
Here are the pandemic phases. I don’t remember exactly how i defined it, but i researched it… Pretty useless now since i do pre pandemic only, but oh well
data_full %>%
distinct(date, covid_phase) %>%
group_by(covid_phase) %>%
summarise(nr_days = n(), min_date = min(date), max_date = max(date)) %>%
arrange(min_date) %>%
kable()
covid_phase | nr_days | min_date | max_date |
---|---|---|---|
Pre-pandemic | 1506 | 2016-01-01 | 2020-02-14 |
Pandemic | 625 | 2020-02-15 | 2021-10-31 |
Post-pandemic | 1035 | 2021-11-01 | 2024-08-31 |
Look at share of missing variables per phase (in pc), we have very few and not more in old data than in new one.
data_full %>%
select(reports_dv, tmean,
prec_quintile, rh_quintile, wsp_quintile,
covid_phase) %>%
group_by(covid_phase) %>%
summarise(across(everything(), ~ 100*sum(is.na(.)/n()))) %>%
kable()
covid_phase | reports_dv | tmean | prec_quintile | rh_quintile | wsp_quintile |
---|---|---|---|---|---|
Pandemic | 0 | 0.0146892 | 0.0000000 | 0.0214739 | 0.0058625 |
Post-pandemic | 0 | 0.1003574 | 0.1932367 | 0.1014712 | 0.1057671 |
Pre-pandemic | 0 | 0.0016949 | 0.0000000 | 0.0028704 | 0.0009295 |
Run the regressions:
reg_baseline <- data_full %>%
filter(covid_phase == "Pre-pandemic") %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_pandemic <- data_full %>%
filter(covid_phase == "Pandemic") %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_post <- data_full %>%
filter(covid_phase == "Post-pandemic") %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_all_periods <- data_full %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
etable(reg_baseline, reg_pandemic, reg_post, reg_all_periods) %>%
kable()
reg_baseline | reg_pandemic | reg_post | reg_all_periods | |
---|---|---|---|---|
Dependent Var.: | Reports | Reports | Reports | Reports |
Temperature | 0.0278*** (0.0033) | 0.0345*** (0.0063) | 0.0294*** (0.0035) | 0.0264*** (0.0020) |
Fixed-Effects: | —————— | —————— | —————— | —————— |
Precipitation | Yes | Yes | Yes | Yes |
Humidity | Yes | Yes | Yes | Yes |
Windspeed | Yes | Yes | Yes | Yes |
Neighborhood | Yes | Yes | Yes | Yes |
Year-Month | Yes | Yes | Yes | Yes |
Day of Week | Yes | Yes | Yes | Yes |
Day of Year | Yes | Yes | Yes | Yes |
_______________ | __________________ | __________________ | __________________ | __________________ |
S.E.: Clustered | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood |
Observations | 3,624,826 | 1,496,546 | 2,496,071 | 7,675,815 |
Squared Cor. | 0.01333 | 0.02134 | 0.02326 | 0.01987 |
Pseudo R2 | 0.05615 | 0.06571 | 0.06768 | 0.06628 |
BIC | 804,167.0 | 472,347.1 | 838,974.5 | 2,047,204.3 |
Split the sample by weekend days and weekdays
reg_weekend <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_year,
cluster = ~ ageb,
fsplit = ~ weekend_dummy)
etable(reg_weekend) %>%
kable()
reg_weekend.1 | reg_weekend.2 | reg_weekend.3 | |
---|---|---|---|
Sample (weekend_dummy) | Full sample | Weekdays | Weekend |
Dependent Var.: | Reports | Reports | Reports |
Temperature | 0.0275*** (0.0033) | 0.0281*** (0.0042) | 0.0244* (0.0103) |
Fixed-Effects: | —————— | —————— | —————- |
Precipitation | Yes | Yes | Yes |
Humidity | Yes | Yes | Yes |
Windspeed | Yes | Yes | Yes |
Neighborhood | Yes | Yes | Yes |
Year-Month | Yes | Yes | Yes |
Day of Year | Yes | Yes | Yes |
______________________ | __________________ | __________________ | ________________ |
S.E.: Clustered | by: Neighborhood | by: Neighborhood | by: Neighborhood |
Observations | 3,624,826 | 2,570,480 | 1,006,168 |
Squared Cor. | 0.01307 | 0.01252 | 0.01662 |
Pseudo R2 | 0.05542 | 0.05448 | 0.06153 |
BIC | 804,658.0 | 563,581.8 | 273,075.1 |
Or interact tmean
with the weekend dummy
reg_weekend_interacted <- data %>%
fepois(reports_dv ~ i(weekend_dummy, tmean) |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_year,
cluster = ~ ageb)
etable(reg_weekend_interacted) %>%
kable()
reg_weekend_inte.. | |
---|---|
Dependent Var.: | Reports |
Temperature x weekend_dummy = Weekdays | 0.0258*** (0.0033) |
Temperature x weekend_dummy = Weekend | 0.0352*** (0.0034) |
Fixed-Effects: | —————— |
Precipitation | Yes |
Humidity | Yes |
Windspeed | Yes |
Neighborhood | Yes |
Year-Month | Yes |
Day of Year | Yes |
______________________________________ | __________________ |
S.E.: Clustered | by: Neighborhood |
Observations | 3,624,826 |
Squared Cor. | 0.01324 |
Pseudo R2 | 0.05590 |
BIC | 804,292.2 |
[1] NA
library(car)
linearHypothesis(reg_weekend_interacted, "weekend_dummy::Weekend:tmean = weekend_dummy::Weekdays:tmean")
Linear hypothesis test:
- weekend_dummy::Weekdays:tmean + weekend_dummy::Weekend:tmean = 0
Model 1: restricted model
Model 2: reports_dv ~ i(weekend_dummy, tmean) | prec_quintile + rh_quintile +
wsp_quintile + ageb + year^month + day_of_year
Res.Df Df Chisq Pr(>Chisq)
1 3621992
2 3621991 1 348.04 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
I first want to look at the quintiles of the weather controls, how the distributions and thresholds look like.
First look at humidity:
rh_quintile | mean(rh) | min(rh) | max(rh) |
---|---|---|---|
1 | 33.41951 | 9.616082 | 41.16994 |
2 | 46.31722 | 41.169948 | 51.10550 |
3 | 55.07530 | 51.105503 | 58.86873 |
4 | 62.29113 | 58.868738 | 65.98550 |
5 | 71.56688 | 65.985513 | 97.41909 |
NA | NA | NA | NA |
Precipitation
prec_quintile | mean(prec) | min(prec) | max(prec) |
---|---|---|---|
1 | 0.0000000 | 0.0000000 | 0.0000000 |
2 | 0.0000000 | 0.0000000 | 0.0000000 |
3 | 0.0783131 | 0.0000000 | 0.3326569 |
4 | 1.3807250 | 0.3326613 | 3.0022805 |
5 | 7.9591730 | 3.0022835 | 69.8145571 |
and windspeed
wsp_quintile | mean(wsp) | min(wsp) | max(wsp) |
---|---|---|---|
1 | 1.550541 | 0.8699657 | 1.720390 |
2 | 1.830577 | 1.7203905 | 1.931156 |
3 | 2.025837 | 1.9311567 | 2.127877 |
4 | 2.263307 | 2.1278774 | 2.425652 |
5 | 2.836768 | 2.4256529 | 9.170833 |
NA | NA | NA | NA |
Now, run some regs
reg_weather_controls_linear <- data %>%
fepois(reports_dv ~ tmean + prec + rh + wsp |
#prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_no_weather_controls <- data %>%
fepois(reports_dv ~ tmean |
#prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_only_prec <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_only_rh <- data %>%
fepois(reports_dv ~ tmean |
rh_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_only_wsp <- data %>%
fepois(reports_dv ~ tmean |
wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
etable(reg_baseline,
reg_no_weather_controls,
reg_only_prec, reg_only_rh, reg_only_wsp) %>%
kable()
reg_baseline | reg_no_weather_c.. | reg_only_prec | reg_only_rh | reg_only_wsp | |
---|---|---|---|---|---|
Dependent Var.: | Reports | Reports | Reports | Reports | Reports |
Temperature | 0.0278*** (0.0033) | 0.0335*** (0.0027) | 0.0307*** (0.0029) | 0.0286*** (0.0033) | 0.0332*** (0.0028) |
Fixed-Effects: | —————— | —————— | —————— | —————— | —————— |
Precipitation | Yes | No | Yes | No | No |
Humidity | Yes | No | No | Yes | No |
Windspeed | Yes | No | No | No | Yes |
Neighborhood | Yes | Yes | Yes | Yes | Yes |
Year-Month | Yes | Yes | Yes | Yes | Yes |
Day of Week | Yes | Yes | Yes | Yes | Yes |
Day of Year | Yes | Yes | Yes | Yes | Yes |
_______________ | __________________ | __________________ | __________________ | __________________ | __________________ |
S.E.: Clustered | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood |
Observations | 3,624,826 | 3,624,883 | 3,624,883 | 3,624,826 | 3,624,883 |
Squared Cor. | 0.01333 | 0.01332 | 0.01333 | 0.01333 | 0.01332 |
Pseudo R2 | 0.05615 | 0.05611 | 0.05612 | 0.05614 | 0.05612 |
BIC | 804,167.0 | 804,018.0 | 804,066.2 | 804,053.3 | 804,072.4 |
Export the table as reg_weather_controls.tex
I aggregate the data at the municipality level, only 16 in mexico city. I weight by population size of AGEB.
data_mun <- data %>%
left_join(read_rds(here("..", "output", "ageb_attributes.rds")) %>%
select(ageb, nom_mun, pobtot),
by = "ageb") %>%
group_by(nom_mun, date) %>%
summarise(reports_dv = sum(reports_dv, na.rm = TRUE),
tmean = weighted.mean(tmean, pobtot, na.rm = TRUE),
prec = weighted.mean(prec, pobtot, na.rm = TRUE),
rh = weighted.mean(rh, pobtot, na.rm = TRUE),
wsp = weighted.mean(wsp, pobtot, na.rm = TRUE),
year = first(year),
month = first(month),
day_of_week = first(day_of_week),
day_of_year = first(day_of_year),
covid_phase = first(covid_phase)
) %>%
ungroup() %>%
mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
reg_mun <- data_mun %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
nom_mun + year^month + day_of_week + day_of_year,
cluster = ~ nom_mun)
etable(reg_baseline, reg_mun) %>%
kable()
reg_baseline | reg_mun | |
---|---|---|
Dependent Var.: | Reports | Reports |
Temperature | 0.0278*** (0.0033) | 0.0259*** (0.0048) |
Fixed-Effects: | —————— | —————— |
Precipitation | Yes | Yes |
Humidity | Yes | Yes |
Windspeed | Yes | Yes |
Neighborhood | Yes | No |
Year-Month | Yes | Yes |
Day of Week | Yes | Yes |
Day of Year | Yes | Yes |
nom_mun | No | Yes |
_______________ | __________________ | __________________ |
S.E.: Clustered | by: Neighborhood | by: nom_mun |
Observations | 3,624,826 | 24,096 |
Squared Cor. | 0.01333 | 0.60481 |
Pseudo R2 | 0.05615 | 0.27008 |
BIC | 804,167.0 | 97,833.9 |
Since removing the FE for day of week makes little difference, I run a sample split for each day. There is not much happening there….
Note, its fsplit
so estimate for NA is for the full sample.
I try with different sets of fixed effects. This takes forever…
reg_fe_no_neighborhood <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
year^month + day_of_week + day_of_year,
cluster = ~ ageb)
# reg_fe_no_temporal <- data %>%
# filter(covid_phase == "Pre-pandemic") %>%
# fepois(reports_dv ~ tmean |
# prec_quintile + rh_quintile + wsp_quintile +
# ageb, #+ year^month + day_of_week + day_of_year,
# cluster = ~ ageb)
reg_fe_drop_year <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_fe_drop_month <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year + day_of_week + day_of_year,
cluster = ~ ageb)
reg_fe_drop_month_and_doy <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year + day_of_week, # + day_of_year + month
cluster = ~ ageb)
reg_fe_drop_doy <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week, # + day_of_year,
cluster = ~ ageb)
reg_fe_drop_dow <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_year,# + day_of_week + ,
cluster = ~ ageb)
etable(reg_baseline,
reg_fe_no_neighborhood,
reg_fe_drop_month,
reg_fe_drop_doy,
reg_fe_drop_month_and_doy,
reg_fe_drop_dow) %>%
kable()
reg_baseline | reg_fe_no_neig.. | reg_fe_drop_month | reg_fe_drop_doy | reg_fe_drop_mont.. | reg_fe_drop_dow | |
---|---|---|---|---|---|---|
Dependent Var.: | Reports | Reports | Reports | Reports | Reports | Reports |
Temperature | 0.0278*** (0.0033) | 0.0141* (0.0062) | 0.0245*** (0.0031) | 0.0286*** (0.0028) | 0.0348*** (0.0019) | 0.0275*** (0.0033) |
Fixed-Effects: | —————— | —————- | —————— | —————— | —————— | —————— |
Precipitation | Yes | Yes | Yes | Yes | Yes | Yes |
Humidity | Yes | Yes | Yes | Yes | Yes | Yes |
Windspeed | Yes | Yes | Yes | Yes | Yes | Yes |
Neighborhood | Yes | No | Yes | Yes | Yes | Yes |
Year-Month | Yes | Yes | No | Yes | No | Yes |
Day of Week | Yes | Yes | Yes | Yes | Yes | No |
Day of Year | Yes | Yes | Yes | No | No | Yes |
Year | No | No | Yes | No | Yes | No |
_______________ | __________________ | ________________ | __________________ | __________________ | __________________ | __________________ |
S.E.: Clustered | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood | by: Neighborhood |
Observations | 3,624,826 | 3,657,955 | 3,624,826 | 3,624,826 | 3,624,826 | 3,624,826 |
Squared Cor. | 0.01333 | 0.00116 | 0.01328 | 0.01304 | 0.01297 | 0.01307 |
Pseudo R2 | 0.05615 | 0.00533 | 0.05599 | 0.05534 | 0.05512 | 0.05542 |
BIC | 804,167.0 | 810,344.2 | 803,611.2 | 799,303.5 | 798,799.3 | 804,658.0 |
Export the table as reg_varying_fixef.tex
If I do ols, i will need this in the table
# and a function computing the effect size in pc(i.e. 100*temp coef/mean DV)
my_fun_effsize_1 =
function(x) paste0(round(
100*coef(x)[1]/mean(model.matrix(x, type = "lhs")), 2),"%")
extralines_register("tmean_eff_size_1", my_fun_effsize_1, "__Effect size (1)")
my_fun_effsize_2 =
function(x) ifelse(is.na(coef(x)[2]),
"--",
paste0(round(100*coef(x)[2]/mean(model.matrix(x, type = "lhs")), 2),"%"))
extralines_register("tmean_eff_size_2", my_fun_effsize_2, "__Effect size (2)")
Rendered on: 2025-04-03 20:58:05
Total render time: 690.984 sec elapsed