<- Sys.time()
start_time
library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(sf)
library(tictoc)
tic("Total render time")
source("fun_dynamic_bins.R")
source("fixest_etable_options.R")
mem.maxVSize(vsize = 40000)
Heterogeneity: poverty
I run the heterogeneity on poverty. I have a categorical variable thats not great, but I also have a continuous variable that is the share of people living in poverty.
From the continuous, I bin population into ntiles and run similar regressions to the ones on income.
EVENTUALLY, I DECIDED NOT TO USE POVERTY, SO THIS SCRIPT IS JUST FOR REFERENCE
<-
data read_rds(here("..", "output", "data_reports.rds")) %>%
filter(covid_phase == "Pre-pandemic")
<- data %>%
data mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
<-
ageb_attributes readRDS(here("..", "output", "ageb_attributes.rds"))
<-
poligonos read_sf(here("..",
"output",
"ageb_poligonos.geojson"),
quiet = TRUE)
Compute ntiles for poverty and detail the data
poverty_nbi
is defined as the share of “Población pobre por Necesidades Básicas Insatisfechas (NBI)” where poverty is defined considering both income and social deprivation… I also have a more raw variable for poverty, that is category of share of ppl living in poverty, named pobreza
. I discuss both these variables and their source on this page (AGEBs in Mexico City).
I can relate the continuous and categorical poverty variables to log(income).
%>%
ageb_attributes # scatter poverty_nbi vs income_std
ggplot(aes(x = poverty_nbi, y = income_log, color = pobreza)) +
geom_point() +
scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
theme_light()
And some statistics by category of poverty.
%>%
ageb_attributes group_by(pobreza) %>%
summarise(
mean(poverty_nbi, na.rm = T),
mean(income, na.rm = T),
pobtot = sum(pobtot)
%>%
) mutate(share_pob = 100*pobtot/sum(pobtot)) %>%
drop_na() %>%
kable()
Compute some ntiles of poverty_nbi, we weight by AGEB’s population.
<- ageb_attributes %>%
ageb_attributes arrange(poverty_nbi) %>%
mutate(
# Population-Weighted Deciles
poverty_nbi_decile = ifelse(
is.na(income) | is.na(pobtot), NA,
as_factor(cut(
cumsum(pobtot[!is.na(poverty_nbi)]) / sum(pobtot[!is.na(poverty_nbi)], na.rm = TRUE),
breaks = seq(0, 1, by = 0.1),
labels = 1:10,
include.lowest = TRUE
))%>%
)) mutate(
poverty_nbi_quintile = case_when(
%in% c("1", "2") ~ "1",
poverty_nbi_decile %in% c("3", "4") ~ "2",
poverty_nbi_decile %in% c("5", "6") ~ "3",
poverty_nbi_decile %in% c("7", "8") ~ "4",
poverty_nbi_decile %in% c("9", "10") ~ "5",
poverty_nbi_decile TRUE ~ NA),
poverty_nbi_median = case_when(
%in% c("1", "2", "3", "4", "5") ~ "Below Median",
poverty_nbi_decile %in% c("6", "7", "8", "9", "10") ~ "Above Median",
poverty_nbi_decile TRUE ~ NA)
%>%
) mutate(
poverty_nbi_decile = as_factor(poverty_nbi_decile),
poverty_nbi_quintile = as_factor(poverty_nbi_quintile),
poverty_nbi_median = as_factor(poverty_nbi_median)
)
I plot those new poverty quintiles. Now, the number of AGEB in each is much more balanced (altough not equal since we bin by population). We see that it does not correspond to the quintiles of income (would be horizontal cuts on the plot).
%>%
ageb_attributes # scatter poverty_nbi vs income_std
ggplot(aes(x = poverty_nbi, y = income_log, color = poverty_nbi_quintile)) +
geom_point() +
scale_color_brewer(palette = "RdYlBu", direction = -1, na.value = "grey90") +
theme_light()
I join these attributes to the main panel
<-
data left_join(data, ageb_attributes)
Baseline regressions for reference
First the baseline regressions for reference:
<- data %>%
baseline_linear fepois(reports_dv ~ tmean |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
<- data %>%
baseline_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(bins_tmean_one, ref = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
Regs with pobreza
(categorical only)
Either temp enters linearly:
<- data %>%
reg_pobreza_linear fepois(reports_dv ~ i(pobreza, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
%>%
reg_pobreza_linear etable() %>%
kable()
Or temp enters semi parametrically:
<- data %>%
reg_pobreza_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(pobreza, bins_tmean_one, ref2 = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
I plot these results.
Regs with poverty_nbi
(continuous)
This variable is continuous, so I can use it as a continuous variable or bin it.
Gradient-style heterogeneity
Because we can, but not interpretable as such.
<- data %>%
reg_poverty_cont_linear fepois(reports_dv ~ tmean * poverty_nbi |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
etable(baseline_linear, reg_poverty_cont_linear) %>%
kable()
N-tiles regressions
With the n-tiles I defined based on the continuous variable poverty_nbi
.
Below and above median
First the simple linear version:
<- data %>%
reg_above_below_linear fepois(reports_dv ~ i(poverty_nbi_median, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
etable(baseline_linear, reg_above_below_linear) %>%
kable()
Or semi parametric with bins:
<- data %>%
reg_above_below_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(poverty_nbi_median, bins_tmean_one, ref2 = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
I can plot these results and save as reg_poverty_median.png
Quintile regressions
I first run the model imposing linearity on tmean and interacting it with poverty quintiles.
<- data %>%
reg_quintile_linear fepois(reports_dv ~ i(poverty_nbi_quintile, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
etable(reg_quintile_linear) %>%
kable()
Export the table as reg_poverty_quintile.tex
I then allow for possible non-linearities in the relationship between temperature and DV. With the quintiles, its a bit of a crazy estimation…
tic("quintile")
<- data %>%
reg_quintile_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(poverty_nbi_quintile, bins_tmean_one, ref2 = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
toc()
I plot these results.
I save the full results as reg_poverty_quintiles_bins_full.png
.
And only the two extreme quintiles as reg_poverty_quintiles_bins_extreme.png
Decile
tic("decile")
<- data %>%
reg_decile_linear fepois(reports_dv ~ i(poverty_nbi_decile, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
toc()
etable(reg_decile_linear) %>%
kable()