start_time <- Sys.time()
library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(sf)
library(tictoc)
tic("Total render time")
source("fun_dynamic_bins.R")
source("fixest_etable_options.R")
mem.maxVSize(vsize = 40000)Heterogeneity: poverty
I run the heterogeneity on poverty. I have a categorical variable thats not great, but I also have a continuous variable that is the share of people living in poverty.
From the continuous, I bin population into ntiles and run similar regressions to the ones on income.
EVENTUALLY, I DECIDED NOT TO USE POVERTY, SO THIS SCRIPT IS JUST FOR REFERENCE
data <-
read_rds(here("..", "output", "data_reports.rds")) %>%
filter(covid_phase == "Pre-pandemic")
data <- data %>%
mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
ageb_attributes <-
readRDS(here("..", "output", "ageb_attributes.rds"))
poligonos <-
read_sf(here("..",
"output",
"ageb_poligonos.geojson"),
quiet = TRUE)Compute ntiles for poverty and detail the data
poverty_nbi is defined as the share of “Población pobre por Necesidades Básicas Insatisfechas (NBI)” where poverty is defined considering both income and social deprivation… I also have a more raw variable for poverty, that is category of share of ppl living in poverty, named pobreza. I discuss both these variables and their source on this page (AGEBs in Mexico City).
I can relate the continuous and categorical poverty variables to log(income).
ageb_attributes %>%
# scatter poverty_nbi vs income_std
ggplot(aes(x = poverty_nbi, y = income_log, color = pobreza)) +
geom_point() +
scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
theme_light()And some statistics by category of poverty.
ageb_attributes %>%
group_by(pobreza) %>%
summarise(
mean(poverty_nbi, na.rm = T),
mean(income, na.rm = T),
pobtot = sum(pobtot)
) %>%
mutate(share_pob = 100*pobtot/sum(pobtot)) %>%
drop_na() %>%
kable() Compute some ntiles of poverty_nbi, we weight by AGEB’s population.
ageb_attributes <- ageb_attributes %>%
arrange(poverty_nbi) %>%
mutate(
# Population-Weighted Deciles
poverty_nbi_decile = ifelse(
is.na(income) | is.na(pobtot), NA,
as_factor(cut(
cumsum(pobtot[!is.na(poverty_nbi)]) / sum(pobtot[!is.na(poverty_nbi)], na.rm = TRUE),
breaks = seq(0, 1, by = 0.1),
labels = 1:10,
include.lowest = TRUE
))
)) %>%
mutate(
poverty_nbi_quintile = case_when(
poverty_nbi_decile %in% c("1", "2") ~ "1",
poverty_nbi_decile %in% c("3", "4") ~ "2",
poverty_nbi_decile %in% c("5", "6") ~ "3",
poverty_nbi_decile %in% c("7", "8") ~ "4",
poverty_nbi_decile %in% c("9", "10") ~ "5",
TRUE ~ NA),
poverty_nbi_median = case_when(
poverty_nbi_decile %in% c("1", "2", "3", "4", "5") ~ "Below Median",
poverty_nbi_decile %in% c("6", "7", "8", "9", "10") ~ "Above Median",
TRUE ~ NA)
) %>%
mutate(
poverty_nbi_decile = as_factor(poverty_nbi_decile),
poverty_nbi_quintile = as_factor(poverty_nbi_quintile),
poverty_nbi_median = as_factor(poverty_nbi_median)
)I plot those new poverty quintiles. Now, the number of AGEB in each is much more balanced (altough not equal since we bin by population). We see that it does not correspond to the quintiles of income (would be horizontal cuts on the plot).
ageb_attributes %>%
# scatter poverty_nbi vs income_std
ggplot(aes(x = poverty_nbi, y = income_log, color = poverty_nbi_quintile)) +
geom_point() +
scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
theme_light() I join these attributes to the main panel
data <-
left_join(data, ageb_attributes)Baseline regressions for reference
First the baseline regressions for reference:
baseline_linear <- data %>%
fepois(reports_dv ~ tmean |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
baseline_bins <- data %>%
mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(bins_tmean_one, ref = "[16") |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)Regs with pobreza (categorical only)
Either temp enters linearly:
reg_pobreza_linear <- data %>%
fepois(reports_dv ~ i(pobreza, tmean) |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
reg_pobreza_linear %>%
etable() %>%
kable()Or temp enters semi parametrically:
reg_pobreza_bins <- data %>%
mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(pobreza, bins_tmean_one, ref2 = "[16") |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)I plot these results.
Regs with poverty_nbi (continuous)
This variable is continuous, so I can use it as a continuous variable or bin it.
Gradient-style heterogeneity
Because we can, but not interpretable as such.
reg_poverty_cont_linear <- data %>%
fepois(reports_dv ~ tmean * poverty_nbi |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
etable(baseline_linear, reg_poverty_cont_linear) %>%
kable()N-tiles regressions
With the n-tiles I defined based on the continuous variable poverty_nbi.
Below and above median
First the simple linear version:
reg_above_below_linear <- data %>%
fepois(reports_dv ~ i(poverty_nbi_median, tmean) |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
etable(baseline_linear, reg_above_below_linear) %>%
kable()Or semi parametric with bins:
reg_above_below_bins <- data %>%
mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(poverty_nbi_median, bins_tmean_one, ref2 = "[16") |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)I can plot these results and save as reg_poverty_median.png
Quintile regressions
I first run the model imposing linearity on tmean and interacting it with poverty quintiles.
reg_quintile_linear <- data %>%
fepois(reports_dv ~ i(poverty_nbi_quintile, tmean) |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
etable(reg_quintile_linear) %>%
kable()Export the table as reg_poverty_quintile.tex
I then allow for possible non-linearities in the relationship between temperature and DV. With the quintiles, its a bit of a crazy estimation…
tic("quintile")
reg_quintile_bins <- data %>%
mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(poverty_nbi_quintile, bins_tmean_one, ref2 = "[16") |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
toc()I plot these results.
I save the full results as reg_poverty_quintiles_bins_full.png.
And only the two extreme quintiles as reg_poverty_quintiles_bins_extreme.png
Decile
tic("decile")
reg_decile_linear <- data %>%
fepois(reports_dv ~ i(poverty_nbi_decile, tmean) |
prec_quintile + rh_quintile + wsp_quintile +
ageb + year^month + day_of_week + day_of_year,
cluster = ~ ageb)
toc()
etable(reg_decile_linear) %>%
kable()