Pollution

Note

I run all the regressions that focus on pollution

library(tidyverse)
library(here) # path control
library(lubridate) # handle dates
library(fixest) # estimations with fixed effects
library(modelsummary) # for the modelplot figure

library(patchwork)

mem.maxVSize(vsize = 40000)


data <-
  readRDS(here("output", "final_data_for_analysis.rds")) 
# QUINTILES!!!!

source(here("codes", "my_functions.R"))


# Plot associations between pollution and reports ---- 

data %>% 
  group_by(pm25_mean = round(pm25_mean)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_mean, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_min = round(pm25_min)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_min, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_max = round(pm25_max)) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_max, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_h_above_aqg) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_aqg, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_h_above_it3) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it3, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_h_above_it2) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it2, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()

data %>% 
  group_by(pm25_h_above_it1) %>% 
  summarise(mean(reports_dv), n()) %>% 
  ggplot(aes(x = pm25_h_above_it1, y = `mean(reports_dv)`)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()


# Run the regs ------------------
# hoffman use "number of hours in which fine particulate matter exceeded the
#WHO’s IT1, IT2, IT3, or AQG in locality 푙 of municipality 푚 on day 푡 in week 푤."
reg_pollution_simple <- data %>% 
  filter(covid_phase == "Pre-pandemic") %>% 
  fepois(reports_dv ~ tmean + 
           sw(pm25_mean, pm25_min, pm25_max) |
           prec_quintile + rh_quintile + wsp_quintile +
           col_id + year^month + day_of_week + day_of_year,
         cluster = ~ col_id)

reg_pollution_hours_per_day <- data %>% 
  filter(covid_phase == "Pre-pandemic") %>% 
  fepois(reports_dv ~ tmean + 
           sw(pm25_h_above_aqg, pm25_h_above_it3, pm25_h_above_it2, pm25_h_above_it1) |
           prec_quintile + rh_quintile + wsp_quintile +
           col_id + year^month + day_of_week + day_of_year,
         cluster = ~ col_id)


# I find exatcly the same as Hoffman Rud.
# I still need to comapre magnitude, but this is big
etable(reg_baseline, reg_pollution_simple, reg_pollution_hours_per_day)
# pollution as a mobility restrictor


# Plot the results ------------------

coefplot(reg_pollution_hours_per_day, only.params = T,
         dict = c(pm25_h_above_aqg = "AQG",
                  pm25_h_above_it3 = "IT3",
                  pm25_h_above_it2 = "IT2",
                  pm25_h_above_it1 = "IT1")
         )$prms %>% 
  filter(estimate_names_raw != "tmean") %>% 
  mutate(estimate_names = fct_reorder(estimate_names, id)) %>%  # Reorder by id
  ggplot(aes(x = estimate_names, y = estimate, ymin = ci_low, ymax = ci_high, group = 1)) +
  geom_linerange(color = "blue") +
  geom_point(color = "blue", size = 2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_minimal() +
  labs(y = "Effect on Domestic Violence",
       x = "PM 2.5 Pollution Treshold") +
  scale_y_continuous()


ggsave(path = here("output", "figures"),
       filename = "reg_pm25_thresholds.png",
       width = 6, height = 4)