Robustness

Note

I run all the robustness/sensitivity checks here.

  • weekend vs weekdays
  • weather controls (add and remove them)
  • pre-post pandemic
  • Aggregate at mun level
  • Set of fixed effects
  • Sample split by day of week
library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(tictoc)
tic("Total render time")

source("fun_dynamic_bins.R")

mem.maxVSize(vsize = 40000)
[1] 40000

Import the full data to test pre-post pandemic and also the pre-pandemic data.

data_full <-
  read_rds(here("..", "output", "data_reports.rds"))

data_full <- data_full %>% 
  mutate(prec_quintile = ntile(prec, 5),
         rh_quintile = ntile(rh, 5),
         wsp_quintile = ntile(wsp, 5))

data <-
  read_rds(here("..", "output", "data_reports.rds")) %>% 
  filter(covid_phase == "Pre-pandemic")

data <- data %>% 
  mutate(prec_quintile = ntile(prec, 5),
         rh_quintile = ntile(rh, 5),
         wsp_quintile = ntile(wsp, 5))

Pre, post, during PANDEMIC

Here are the pandemic phases. I don’t remember exactly how i defined it, but i researched it… Pretty useless now since i do pre pandemic only, but oh well

data_full %>% 
  distinct(date, covid_phase) %>%
  group_by(covid_phase) %>%
  summarise(nr_days = n(), min_date = min(date), max_date = max(date)) %>% 
  arrange(min_date) %>% 
  kable()
covid_phase nr_days min_date max_date
Pre-pandemic 1506 2016-01-01 2020-02-14
Pandemic 625 2020-02-15 2021-10-31
Post-pandemic 1035 2021-11-01 2024-08-31

Look at share of missing variables per phase (in pc), we have very few and not more in old data than in new one.

data_full %>%
  select(reports_dv, tmean,
         prec_quintile, rh_quintile, wsp_quintile,
         covid_phase) %>% 
  group_by(covid_phase) %>% 
  summarise(across(everything(), ~ 100*sum(is.na(.)/n()))) %>% 
  kable()
covid_phase reports_dv tmean prec_quintile rh_quintile wsp_quintile
Pandemic 0 0.0146892 0.0000000 0.0214739 0.0058625
Post-pandemic 0 0.1003574 0.1932367 0.1014712 0.1057671
Pre-pandemic 0 0.0016949 0.0000000 0.0028704 0.0009295

Run the regressions:

reg_baseline <- data_full %>% 
  filter(covid_phase == "Pre-pandemic") %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_pandemic <- data_full %>% 
  filter(covid_phase == "Pandemic") %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_post <- data_full %>% 
  filter(covid_phase == "Post-pandemic") %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)


reg_all_periods <- data_full %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)


etable(reg_baseline, reg_pandemic, reg_post, reg_all_periods) %>% 
  kable()
reg_baseline reg_pandemic reg_post reg_all_periods
Dependent Var.: Reports Reports Reports Reports
Temperature 0.0278*** (0.0033) 0.0345*** (0.0063) 0.0294*** (0.0035) 0.0264*** (0.0020)
Fixed-Effects: —————— —————— —————— ——————
Precipitation Yes Yes Yes Yes
Humidity Yes Yes Yes Yes
Windspeed Yes Yes Yes Yes
Neighborhood Yes Yes Yes Yes
Year-Month Yes Yes Yes Yes
Day of Week Yes Yes Yes Yes
Day of Year Yes Yes Yes Yes
_______________ __________________ __________________ __________________ __________________
S.E.: Clustered by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood
Observations 3,624,826 1,496,546 2,496,071 7,675,815
Squared Cor. 0.01333 0.02134 0.02326 0.01987
Pseudo R2 0.05615 0.06571 0.06768 0.06628
BIC 804,167.0 472,347.1 838,974.5 2,047,204.3
rm(data_full)

Weekend vs weekdays

Split the sample by weekend days and weekdays

reg_weekend <- data %>%
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_year,
         cluster = ~ ageb,
         fsplit = ~ weekend_dummy)

etable(reg_weekend) %>% 
  kable()
reg_weekend.1 reg_weekend.2 reg_weekend.3
Sample (weekend_dummy) Full sample Weekdays Weekend
Dependent Var.: Reports Reports Reports
Temperature 0.0275*** (0.0033) 0.0281*** (0.0042) 0.0244* (0.0103)
Fixed-Effects: —————— —————— —————-
Precipitation Yes Yes Yes
Humidity Yes Yes Yes
Windspeed Yes Yes Yes
Neighborhood Yes Yes Yes
Year-Month Yes Yes Yes
Day of Year Yes Yes Yes
______________________ __________________ __________________ ________________
S.E.: Clustered by: Neighborhood by: Neighborhood by: Neighborhood
Observations 3,624,826 2,570,480 1,006,168
Squared Cor. 0.01307 0.01252 0.01662
Pseudo R2 0.05542 0.05448 0.06153
BIC 804,658.0 563,581.8 273,075.1

Or interact tmean with the weekend dummy

reg_weekend_interacted <- data %>%
  fepois(reports_dv ~ i(weekend_dummy, tmean) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_year,
         cluster = ~ ageb)

etable(reg_weekend_interacted) %>% 
  kable()
reg_weekend_inte..
Dependent Var.: Reports
Temperature x weekend_dummy = Weekdays 0.0258*** (0.0033)
Temperature x weekend_dummy = Weekend 0.0352*** (0.0034)
Fixed-Effects: ——————
Precipitation Yes
Humidity Yes
Windspeed Yes
Neighborhood Yes
Year-Month Yes
Day of Year Yes
______________________________________ __________________
S.E.: Clustered by: Neighborhood
Observations 3,624,826
Squared Cor. 0.01324
Pseudo R2 0.05590
BIC 804,292.2
wald(reg_weekend_interacted, "weekend_dummy::Weekend:tmean = weekend_dummy::Weekdays:tmean")
[1] NA
library(car)
linearHypothesis(reg_weekend_interacted, "weekend_dummy::Weekend:tmean = weekend_dummy::Weekdays:tmean")

Linear hypothesis test:
- weekend_dummy::Weekdays:tmean  + weekend_dummy::Weekend:tmean = 0

Model 1: restricted model
Model 2: reports_dv ~ i(weekend_dummy, tmean) | prec_quintile + rh_quintile + 
    wsp_quintile + ageb + year^month + day_of_year

   Res.Df Df  Chisq Pr(>Chisq)    
1 3621992                         
2 3621991  1 348.04  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Weather controls

I first want to look at the quintiles of the weather controls, how the distributions and thresholds look like.

First look at humidity:

rh_quintile mean(rh) min(rh) max(rh)
1 33.41951 9.616082 41.16994
2 46.31722 41.169948 51.10550
3 55.07530 51.105503 58.86873
4 62.29113 58.868738 65.98550
5 71.56688 65.985513 97.41909
NA NA NA NA

Precipitation

prec_quintile mean(prec) min(prec) max(prec)
1 0.0000000 0.0000000 0.0000000
2 0.0000000 0.0000000 0.0000000
3 0.0783131 0.0000000 0.3326569
4 1.3807250 0.3326613 3.0022805
5 7.9591730 3.0022835 69.8145571

and windspeed

wsp_quintile mean(wsp) min(wsp) max(wsp)
1 1.550541 0.8699657 1.720390
2 1.830577 1.7203905 1.931156
3 2.025837 1.9311567 2.127877
4 2.263307 2.1278774 2.425652
5 2.836768 2.4256529 9.170833
NA NA NA NA

Now, run some regs

  • entering controls linearly instead of quintiles
  • excluding them altogether
  • only including one at a time
reg_weather_controls_linear <- data %>% 
  fepois(reports_dv ~ tmean + prec + rh + wsp |
           #prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_no_weather_controls <- data %>% 
  fepois(reports_dv ~ tmean |
           #prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_only_prec <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_only_rh <- data %>% 
  fepois(reports_dv ~ tmean |
           rh_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_only_wsp <- data %>% 
  fepois(reports_dv ~ tmean |
           wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

etable(reg_baseline,
       reg_no_weather_controls,
       reg_only_prec, reg_only_rh, reg_only_wsp) %>% 
  kable()
reg_baseline reg_no_weather_c.. reg_only_prec reg_only_rh reg_only_wsp
Dependent Var.: Reports Reports Reports Reports Reports
Temperature 0.0278*** (0.0033) 0.0335*** (0.0027) 0.0307*** (0.0029) 0.0286*** (0.0033) 0.0332*** (0.0028)
Fixed-Effects: —————— —————— —————— —————— ——————
Precipitation Yes No Yes No No
Humidity Yes No No Yes No
Windspeed Yes No No No Yes
Neighborhood Yes Yes Yes Yes Yes
Year-Month Yes Yes Yes Yes Yes
Day of Week Yes Yes Yes Yes Yes
Day of Year Yes Yes Yes Yes Yes
_______________ __________________ __________________ __________________ __________________ __________________
S.E.: Clustered by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood
Observations 3,624,826 3,624,883 3,624,883 3,624,826 3,624,883
Squared Cor. 0.01333 0.01332 0.01333 0.01333 0.01332
Pseudo R2 0.05615 0.05611 0.05612 0.05614 0.05612
BIC 804,167.0 804,018.0 804,066.2 804,053.3 804,072.4

Export the table as reg_weather_controls.tex

Aggregate at mun level

I aggregate the data at the municipality level, only 16 in mexico city. I weight by population size of AGEB.

data_mun <- data %>% 
  left_join(read_rds(here("..", "output", "ageb_attributes.rds")) %>% 
              select(ageb, nom_mun, pobtot),
            by = "ageb") %>%
  group_by(nom_mun, date) %>% 
  summarise(reports_dv = sum(reports_dv, na.rm = TRUE),
            tmean = weighted.mean(tmean, pobtot, na.rm = TRUE),
            prec = weighted.mean(prec, pobtot, na.rm = TRUE),
            rh = weighted.mean(rh, pobtot, na.rm = TRUE),
            wsp = weighted.mean(wsp, pobtot, na.rm = TRUE),
            year = first(year),
            month = first(month),
            day_of_week = first(day_of_week),
            day_of_year = first(day_of_year),
            covid_phase = first(covid_phase)
  ) %>% 
  ungroup() %>% 
  mutate(prec_quintile = ntile(prec, 5),
         rh_quintile = ntile(rh, 5),
         wsp_quintile = ntile(wsp, 5))

reg_mun <- data_mun %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           nom_mun + year^month + day_of_week + day_of_year,
         cluster = ~ nom_mun)

etable(reg_baseline, reg_mun) %>% 
  kable()
reg_baseline reg_mun
Dependent Var.: Reports Reports
Temperature 0.0278*** (0.0033) 0.0259*** (0.0048)
Fixed-Effects: —————— ——————
Precipitation Yes Yes
Humidity Yes Yes
Windspeed Yes Yes
Neighborhood Yes No
Year-Month Yes Yes
Day of Week Yes Yes
Day of Year Yes Yes
nom_mun No Yes
_______________ __________________ __________________
S.E.: Clustered by: Neighborhood by: nom_mun
Observations 3,624,826 24,096
Squared Cor. 0.01333 0.60481
Pseudo R2 0.05615 0.27008
BIC 804,167.0 97,833.9

Sample split by day of week

Since removing the FE for day of week makes little difference, I run a sample split for each day. There is not much happening there….

reg_split_day_of_week <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_year, 
         cluster = ~ ageb,
         fsplit = ~ day_of_week)

Note, its fsplit so estimate for NA is for the full sample.

Set of fixed effects

I try with different sets of fixed effects. This takes forever…

reg_fe_no_neighborhood <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

# reg_fe_no_temporal <- data %>% 
#   filter(covid_phase == "Pre-pandemic") %>% 
#   fepois(reports_dv ~ tmean |
#            prec_quintile + rh_quintile + wsp_quintile +
#            ageb, #+ year^month + day_of_week + day_of_year,
#          cluster = ~ ageb)

reg_fe_drop_year <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_fe_drop_month <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_fe_drop_month_and_doy <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year + day_of_week, # + day_of_year + month
         cluster = ~ ageb)

reg_fe_drop_doy <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week, # + day_of_year,
         cluster = ~ ageb)

reg_fe_drop_dow <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_year,# + day_of_week + ,
         cluster = ~ ageb)

etable(reg_baseline,
       reg_fe_no_neighborhood,
       reg_fe_drop_month,
       reg_fe_drop_doy,
       reg_fe_drop_month_and_doy,
       reg_fe_drop_dow) %>% 
  kable()
reg_baseline reg_fe_no_neig.. reg_fe_drop_month reg_fe_drop_doy reg_fe_drop_mont.. reg_fe_drop_dow
Dependent Var.: Reports Reports Reports Reports Reports Reports
Temperature 0.0278*** (0.0033) 0.0141* (0.0062) 0.0245*** (0.0031) 0.0286*** (0.0028) 0.0348*** (0.0019) 0.0275*** (0.0033)
Fixed-Effects: —————— —————- —————— —————— —————— ——————
Precipitation Yes Yes Yes Yes Yes Yes
Humidity Yes Yes Yes Yes Yes Yes
Windspeed Yes Yes Yes Yes Yes Yes
Neighborhood Yes No Yes Yes Yes Yes
Year-Month Yes Yes No Yes No Yes
Day of Week Yes Yes Yes Yes Yes No
Day of Year Yes Yes Yes No No Yes
Year No No Yes No Yes No
_______________ __________________ ________________ __________________ __________________ __________________ __________________
S.E.: Clustered by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood by: Neighborhood
Observations 3,624,826 3,657,955 3,624,826 3,624,826 3,624,826 3,624,826
Squared Cor. 0.01333 0.00116 0.01328 0.01304 0.01297 0.01307
Pseudo R2 0.05615 0.00533 0.05599 0.05534 0.05512 0.05542
BIC 804,167.0 810,344.2 803,611.2 799,303.5 798,799.3 804,658.0

Export the table as reg_varying_fixef.tex

Modelling the LHS

If I do ols, i will need this in the table

# and a function computing the effect size in pc(i.e. 100*temp coef/mean DV)
my_fun_effsize_1 =
  function(x) paste0(round(
    100*coef(x)[1]/mean(model.matrix(x, type = "lhs")), 2),"%")
extralines_register("tmean_eff_size_1", my_fun_effsize_1, "__Effect size (1)")

my_fun_effsize_2 =
  function(x) ifelse(is.na(coef(x)[2]),
                     "--",
                     paste0(round(100*coef(x)[2]/mean(model.matrix(x, type = "lhs")), 2),"%"))
extralines_register("tmean_eff_size_2", my_fun_effsize_2, "__Effect size (2)")
Rendered on: 2025-04-03 20:58:05
Total render time: 690.984 sec elapsed