Heterogeneity: poverty

Note

I run the heterogeneity on poverty. I have a categorical variable thats not great, but I also have a continuous variable that is the share of people living in poverty.

From the continuous, I bin population into ntiles and run similar regressions to the ones on income.

EVENTUALLY, I DECIDED NOT TO USE POVERTY, SO THIS SCRIPT IS JUST FOR REFERENCE

start_time <- Sys.time()

library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(sf)
library(tictoc)
tic("Total render time")

source("fun_dynamic_bins.R")
source("fixest_etable_options.R")

mem.maxVSize(vsize = 40000)
data <-
  read_rds(here("..", "output", "data_reports.rds")) %>% 
  filter(covid_phase == "Pre-pandemic")

data <- data %>% 
  mutate(prec_quintile = ntile(prec, 5),
         rh_quintile = ntile(rh, 5),
         wsp_quintile = ntile(wsp, 5))

ageb_attributes <- 
  readRDS(here("..", "output", "ageb_attributes.rds"))

poligonos <-  
  read_sf(here("..",
                "output",
                "ageb_poligonos.geojson"),
          quiet = TRUE)

Compute ntiles for poverty and detail the data

poverty_nbi is defined as the share of “Población pobre por Necesidades Básicas Insatisfechas (NBI)” where poverty is defined considering both income and social deprivation… I also have a more raw variable for poverty, that is category of share of ppl living in poverty, named pobreza. I discuss both these variables and their source on this page (AGEBs in Mexico City).

I can relate the continuous and categorical poverty variables to log(income).

ageb_attributes %>% 
  # scatter poverty_nbi vs income_std
  ggplot(aes(x = poverty_nbi, y = income_log, color = pobreza)) +
  geom_point() +
  scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
  theme_light()

And some statistics by category of poverty.

ageb_attributes %>% 
  group_by(pobreza) %>%
  summarise(
    mean(poverty_nbi, na.rm = T),
    mean(income, na.rm = T),
    pobtot = sum(pobtot)
    ) %>% 
  mutate(share_pob = 100*pobtot/sum(pobtot)) %>% 
  drop_na() %>% 
  kable() 

Compute some ntiles of poverty_nbi, we weight by AGEB’s population.

ageb_attributes <- ageb_attributes %>% 
  arrange(poverty_nbi) %>% 
  mutate(
    # Population-Weighted Deciles
    poverty_nbi_decile = ifelse(
      is.na(income) | is.na(pobtot), NA, 
      as_factor(cut(
        cumsum(pobtot[!is.na(poverty_nbi)]) / sum(pobtot[!is.na(poverty_nbi)], na.rm = TRUE), 
        breaks = seq(0, 1, by = 0.1), 
        labels = 1:10, 
        include.lowest = TRUE
      ))
    )) %>% 
  mutate(
    poverty_nbi_quintile = case_when(
      poverty_nbi_decile %in% c("1", "2") ~ "1",
      poverty_nbi_decile %in% c("3", "4") ~ "2",
      poverty_nbi_decile %in% c("5", "6") ~ "3",
      poverty_nbi_decile %in% c("7", "8") ~ "4",
      poverty_nbi_decile %in% c("9", "10") ~ "5",
      TRUE ~ NA),
    poverty_nbi_median = case_when(
      poverty_nbi_decile %in% c("1", "2", "3", "4", "5") ~ "Below Median",
      poverty_nbi_decile %in% c("6", "7", "8", "9", "10") ~ "Above Median",
      TRUE ~ NA)
  ) %>% 
  mutate(
    poverty_nbi_decile = as_factor(poverty_nbi_decile),
    poverty_nbi_quintile = as_factor(poverty_nbi_quintile),
    poverty_nbi_median = as_factor(poverty_nbi_median)
  )

I plot those new poverty quintiles. Now, the number of AGEB in each is much more balanced (altough not equal since we bin by population). We see that it does not correspond to the quintiles of income (would be horizontal cuts on the plot).

ageb_attributes %>% 
  # scatter poverty_nbi vs income_std
  ggplot(aes(x = poverty_nbi, y = income_log, color = poverty_nbi_quintile)) +
  geom_point() +
  scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
  theme_light() 

I join these attributes to the main panel

data <- 
  left_join(data, ageb_attributes)

Baseline regressions for reference

First the baseline regressions for reference:

baseline_linear <- data %>% 
  fepois(reports_dv ~ tmean |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

baseline_bins <- data %>% 
  mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>% 
  fepois(reports_dv ~ i(bins_tmean_one, ref = "[16") |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

Regs with pobreza (categorical only)

Either temp enters linearly:

reg_pobreza_linear <- data %>% 
  fepois(reports_dv ~ i(pobreza, tmean) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

reg_pobreza_linear %>% 
  etable() %>% 
  kable()

Or temp enters semi parametrically:

reg_pobreza_bins <- data %>% 
  mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>% 
  fepois(reports_dv ~ i(pobreza, bins_tmean_one, ref2 = "[16") |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

I plot these results.

Regs with poverty_nbi (continuous)

This variable is continuous, so I can use it as a continuous variable or bin it.

Gradient-style heterogeneity

Because we can, but not interpretable as such.

reg_poverty_cont_linear <- data %>% 
  fepois(reports_dv ~ tmean * poverty_nbi |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

etable(baseline_linear, reg_poverty_cont_linear) %>% 
  kable()

N-tiles regressions

With the n-tiles I defined based on the continuous variable poverty_nbi.

Below and above median

First the simple linear version:

reg_above_below_linear <- data %>%
  fepois(reports_dv ~ i(poverty_nbi_median, tmean) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

etable(baseline_linear, reg_above_below_linear) %>% 
  kable()

Or semi parametric with bins:

reg_above_below_bins <- data %>%
  mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>% 
  fepois(reports_dv ~ i(poverty_nbi_median, bins_tmean_one, ref2 = "[16") |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

I can plot these results and save as reg_poverty_median.png

Quintile regressions

I first run the model imposing linearity on tmean and interacting it with poverty quintiles.

reg_quintile_linear <- data %>%
  fepois(reports_dv ~ i(poverty_nbi_quintile, tmean) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)

etable(reg_quintile_linear) %>% 
  kable()

Export the table as reg_poverty_quintile.tex

I then allow for possible non-linearities in the relationship between temperature and DV. With the quintiles, its a bit of a crazy estimation…

tic("quintile")
reg_quintile_bins <- data %>%
  mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>% 
  fepois(reports_dv ~ i(poverty_nbi_quintile, bins_tmean_one, ref2 = "[16") |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)
toc()

I plot these results.

I save the full results as reg_poverty_quintiles_bins_full.png.

And only the two extreme quintiles as reg_poverty_quintiles_bins_extreme.png

Decile

tic("decile")
reg_decile_linear <- data %>%
  fepois(reports_dv ~ i(poverty_nbi_decile, tmean) |
           prec_quintile + rh_quintile + wsp_quintile +
           ageb + year^month + day_of_week + day_of_year,
         cluster = ~ ageb)
toc()
etable(reg_decile_linear) %>% 
  kable()