Pollution

Note

I run all the regressions that focus on pollution

start_time <- Sys.time()

library(tidyverse)
library(here) # path control
library(lubridate) # handle dates
library(fixest) # estimations with fixed effects
library(modelsummary) # for the modelplot figure
library(tictoc)
library(knitr)
library(patchwork)
tic("Total render time")

mem.maxVSize(vsize = 40000)
[1] 40000
data <-
  read_rds(here("..", "output", "data_reports.rds")) %>% 
  filter(covid_phase == "Pre-pandemic")

# quintiles are computed last minute, on the selected period
data <- data %>% 
  mutate(prec_quintile = ntile(prec, 5),
         rh_quintile = ntile(rh, 5),
         wsp_quintile = ntile(wsp, 5))

Plot associations between pollution and reports

data %>% 
  group_by(pm25_mean = round(pm25_mean)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_mean, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_min = round(pm25_min)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_min, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_max = round(pm25_max)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_max, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_h_above_aqg) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_aqg, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_h_above_it3) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it3, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_h_above_it2) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it2, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

data %>% 
  group_by(pm25_h_above_it1) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it1, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_light()

Run the regs

# hoffman use "number of hours in which fine particulate matter exceeded the
#WHO’s IT1, IT2, IT3, or AQG in locality 푙 of municipality 푚 on day 푡 in week 푤."
reg_pollution_simple <- data %>% 
  fepois(reports_dv ~ tmean + 
           sw(pm25_mean, pm25_min, pm25_max) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_pollution_hours_per_day <- data %>% 
  fepois(reports_dv ~ tmean + 
           sw(pm25_h_above_aqg, pm25_h_above_it3, pm25_h_above_it2, pm25_h_above_it1) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

etable(reg_pollution_simple, reg_pollution_hours_per_day) %>% 
    kable()
reg_pollution_s..1 reg_pollution_s..2 reg_pollution_s..3 reg_pollution_h..1 reg_pollution_h..2 reg_pollution_h..3 reg_pollution_h..4
Dependent Var.: reports_dv reports_dv reports_dv reports_dv reports_dv reports_dv reports_dv
tmean 0.0282*** (0.0034) 0.0276*** (0.0034) 0.0279*** (0.0034) 0.0275*** (0.0034) 0.0276*** (0.0034) 0.0272*** (0.0034) 0.0273*** (0.0034)
pm25_mean -0.0013* (0.0006)
pm25_min -0.0004 (0.0008)
pm25_max -0.0008* (0.0003)
pm25_h_above_aqg -0.0003 (0.0008)
pm25_h_above_it3 -0.0017 (0.0012)
pm25_h_above_it2 -0.0041* (0.0020)
pm25_h_above_it1 -0.0121** (0.0042)
Fixed-Effects: —————— —————— —————— —————— —————— —————— ——————
prec_quintile Yes Yes Yes Yes Yes Yes Yes
rh_quintile Yes Yes Yes Yes Yes Yes Yes
wsp_quintile Yes Yes Yes Yes Yes Yes Yes
ageb Yes Yes Yes Yes Yes Yes Yes
year-month Yes Yes Yes Yes Yes Yes Yes
day_of_week Yes Yes Yes Yes Yes Yes Yes
day_of_year Yes Yes Yes Yes Yes Yes Yes
________________ __________________ __________________ __________________ __________________ __________________ __________________ __________________
S.E.: Clustered by: ageb by: ageb by: ageb by: ageb by: ageb by: ageb by: ageb
Observations 3,622,101 3,622,101 3,622,101 3,622,101 3,622,101 3,622,101 3,622,101
Squared Cor. 0.01333 0.01333 0.01333 0.01333 0.01333 0.01333 0.01334
Pseudo R2 0.05613 0.05612 0.05613 0.05612 0.05612 0.05613 0.05613
BIC 803,787.0 803,791.1 803,785.8 803,791.2 803,789.1 803,786.5 803,782.5

Plot the results

coefplot(reg_pollution_hours_per_day, only.params = T,
         dict = c(pm25_h_above_aqg = "AQG",
                  pm25_h_above_it3 = "IT3",
                  pm25_h_above_it2 = "IT2",
                  pm25_h_above_it1 = "IT1")
         )$prms %>% 
  filter(estimate_names_raw != "tmean") %>% 
  mutate(estimate_names = fct_reorder(estimate_names, id)) %>%  # Reorder by id
  ggplot(aes(x = estimate_names, y = estimate, ymin = ci_low, ymax = ci_high, group = 1)) +
  geom_linerange(color = "blue") +
  geom_point(color = "blue", size = 2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_light() +
  labs(y = "Effect on Domestic Violence",
       x = "PM 2.5 Pollution Treshold") +
  scale_y_continuous()

ggsave(path = here("..", "output", "figures"),
       filename = "reg_pm25_thresholds.png",
       width = 6, height = 4)
Rendered on: 2025-07-27 03:41:59
Total render time: 450.63 sec elapsed