<- Sys.time()
start_time
library(tidyverse)
library(here) # path control
library(fixest) # estimations with fixed effects
library(knitr)
library(sf)
library(tictoc)
tic("Total render time")
source("fun_dynamic_bins.R")
source("fixest_etable_options.R")
Heterogeneity: income
I run the heterogeneity on income.
I interact with above/below median, quintiles and deciles.
- I save a plot for bins and above/below median
- I save a table for linear quintiles
- I save two plots for bins and quintiles
Generally, the quintiles is much more interesting than the above/below median
There is a clear gradient in the quintiles in the linear reg From 3.1% for the poorest to 2% for the richest with the bins, its hard to see, comparing dozens of coefficients
So i do bins and above below median. The linear is not very striking I get 2.9% for the poor, 2.4% for the rich However, with the bins, there is a small difference in “slope” which is kind of interesting
So probably the best is to show the table for quintiles and the plots for the above/below median with the bins
<-
data read_rds(here("..", "output", "data_reports.rds")) %>%
filter(covid_phase == "Pre-pandemic")
<- data %>%
data mutate(prec_quintile = ntile(prec, 5),
rh_quintile = ntile(rh, 5),
wsp_quintile = ntile(wsp, 5))
<-
ageb_attributes readRDS(here("..", "output", "ageb_attributes.rds"))
<-
poligonos read_sf(here("..",
"output",
"ageb_poligonos.geojson"),
quiet = TRUE)
Compute ntiles for income
I bin AGEBs into income deciles, quintiles and above/below median. I weight by population so that there are the same number of ppl in each decile and not the same number of AGEBs.
<- ageb_attributes %>%
ageb_attributes arrange(income) %>%
mutate(
# Population-Weighted Deciles
income_decile = ifelse(
is.na(income) | is.na(pobtot), NA,
as_factor(cut(
cumsum(pobtot[!is.na(income)]) / sum(pobtot[!is.na(income)], na.rm = TRUE),
breaks = seq(0, 1, by = 0.1),
labels = 1:10,
include.lowest = TRUE
))%>%
)) mutate(
income_quintile = case_when(
%in% c("1", "2") ~ "1",
income_decile %in% c("3", "4") ~ "2",
income_decile %in% c("5", "6") ~ "3",
income_decile %in% c("7", "8") ~ "4",
income_decile %in% c("9", "10") ~ "5",
income_decile TRUE ~ NA),
income_median = case_when(
%in% c("1", "2", "3", "4", "5") ~ "Below Median",
income_decile %in% c("6", "7", "8", "9", "10") ~ "Above Median",
income_decile TRUE ~ NA)
%>%
) mutate(
income_decile = as_factor(income_decile),
income_quintile = as_factor(income_quintile),
income_median = as_factor(income_median)
)
Visualize income quintiles:
%>%
ageb_attributes ggplot(aes(x = income_decile, y = income, color = income_quintile)) +
geom_boxplot() +
labs(title = "Income distribution within deciles (ageb level)",
x = "Income Decile", y = "Income",) +
guides(color = "none") +
theme_light()
full_join(poligonos, ageb_attributes) %>%
ggplot() +
geom_sf(aes(fill = income_quintile), color = NA) +
scale_fill_brewer(palette = "RdYlBu", direction = 1, na.value = "grey90") +
labs(title = "Income Quintiles by AGEB", fill = "Quintile") +
theme_light()
%>%
ageb_attributes filter(!is.na(income_quintile)) %>%
group_by(income_quintile) %>%
summarise(
mean_income = mean(income, na.rm = TRUE),
pobtot = sum(pobtot, na.rm = TRUE)
%>%
) kable()
income_quintile | mean_income | pobtot |
---|---|---|
1 | 24511.90 | 1802283 |
2 | 30623.98 | 1802272 |
3 | 37101.17 | 1798938 |
4 | 45775.35 | 1804070 |
5 | 80920.06 | 1805085 |
Join these ageb attributes to the main panel:
# Merge with data
<-
data left_join(data, ageb_attributes)
Average level of crime per income quintile:
%>%
data # filter(!is.na(income_quintile)) %>%
group_by(income_quintile) %>%
summarise(
mean_reports = mean(reports_dv, na.rm = TRUE),
total_reports = sum(reports_dv, na.rm = TRUE)
%>%
) kable()
income_quintile | mean_reports | total_reports |
---|---|---|
1 | 0.0249909 | 15318 |
2 | 0.0281556 | 18233 |
3 | 0.0246617 | 17976 |
4 | 0.0234682 | 17000 |
5 | 0.0162459 | 14533 |
NA | 0.0232806 | 1157 |
Baseline regressions for reference
First the baseline regressions for reference:
<- data %>%
baseline_linear fepois(reports_dv ~ tmean |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
<- data %>%
baseline_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(bins_tmean_one, ref = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
Above/below median income
I can also do simple categories; above and below median.
Imposing linearity, its only two estimates, I don’t export the table
<- data %>%
reg_above_below_linear fepois(reports_dv ~ i(income_median, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
etable(baseline_linear, reg_above_below_linear) %>%
kable()
baseline_linear | reg_above_below_.. | |
---|---|---|
Dependent Var.: | Reports for DV | Reports for DV |
Temperature | 0.0274*** (0.0034) | |
Temperature x income_median = BelowMedian | 0.0289*** (0.0037) | |
Temperature x income_median = AboveMedian | 0.0251*** (0.0038) | |
Fixed-Effects: | —————— | —————— |
Prec, hum, wsp quintiles | Yes | Yes |
Neighborhood | Yes | Yes |
Year-Month | Yes | Yes |
Day of Week | Yes | Yes |
Day of Year | Yes | Yes |
________________________________________ | __________________ | __________________ |
S.E.: Clustered | by: Neighborhood | by: Neighborhood |
Observations | 3,624,826 | 3,578,253 |
Or semi parametric with bins:
<- data %>%
reg_above_below_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(income_median, bins_tmean_one, ref2 = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
I can plot these results and save as reg_income_median.png
Income quintile regressions
I first run the model imposing linearity on tmean and interacting it with income quintiles.
<- data %>%
reg_quintile_linear fepois(reports_dv ~ i(income_quintile, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
etable(reg_quintile_linear) %>%
kable()
reg_quintile_lin.. | |
---|---|
Dependent Var.: | Reports for DV |
Temperature x income_quintile = 1 | 0.0342*** (0.0048) |
Temperature x income_quintile = 2 | 0.0297*** (0.0044) |
Temperature x income_quintile = 3 | 0.0254*** (0.0045) |
Temperature x income_quintile = 4 | 0.0239*** (0.0046) |
Temperature x income_quintile = 5 | 0.0216*** (0.0048) |
Fixed-Effects: | —————— |
Prec, hum, wsp quintiles | Yes |
Neighborhood | Yes |
Year-Month | Yes |
Day of Week | Yes |
Day of Year | Yes |
_________________________________ | __________________ |
S.E.: Clustered | by: Neighborhood |
Observations | 3,578,253 |
Export the table as reg_income_quintile.tex
I then allow for possible non-linearities in the relationship between temperature and DV. With the quintiles, its a bit of a crazy estimation…
tic("quintile")
<- data %>%
reg_quintile_bins mutate(bins_tmean_one = create_dynamic_bins(tmean, width = 1, 0.01)) %>%
fepois(reports_dv ~ i(income_quintile, bins_tmean_one, ref2 = "[16") |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
toc()
quintile: 2423.325 sec elapsed
I plot these results.
I save the full results as reg_income_quintiles_bins_full.png
.
And only the two extreme quintiles as reg_income_quintiles_bins_extreme.png
Income decile
I also tried using decile instead of quintile, but it doesnt add much else than noise and it takes too long to run.
tic("decile")
<- data %>%
reg_decile_linear fepois(reports_dv ~ i(income_decile, tmean) |
+ rh_quintile + wsp_quintile +
prec_quintile + year^month + day_of_week + day_of_year,
ageb cluster = ~ ageb)
toc()
decile: 194.053 sec elapsed
etable(reg_decile_linear) %>%
kable()
reg_decile_linear | |
---|---|
Dependent Var.: | Reports for DV |
Temperature x income_decile = 1 | 0.0377*** (0.0063) |
Temperature x income_decile = 2 | 0.0314*** (0.0059) |
Temperature x income_decile = 3 | 0.0263*** (0.0057) |
Temperature x income_decile = 4 | 0.0332*** (0.0053) |
Temperature x income_decile = 5 | 0.0182** (0.0058) |
Temperature x income_decile = 6 | 0.0328*** (0.0054) |
Temperature x income_decile = 7 | 0.0233*** (0.0057) |
Temperature x income_decile = 8 | 0.0245*** (0.0059) |
Temperature x income_decile = 9 | 0.0198*** (0.0058) |
Temperature x income_decile = 10 | 0.0237*** (0.0063) |
Fixed-Effects: | —————— |
Prec, hum, wsp quintiles | Yes |
Neighborhood | Yes |
Year-Month | Yes |
Day of Week | Yes |
Day of Year | Yes |
________________________________ | __________________ |
S.E.: Clustered | by: Neighborhood |
Observations | 3,578,253 |
Rendered on: 2025-07-27 03:01:11
Total render time: 3397.264 sec elapsed