library(tidyverse)
library(here)
library(ggrepel)
library(patchwork)
<-
data_reports read_rds(here("..", "output",
"data_reports.rds"))
<-
data_calls read_rds(here("..", "output",
"data_calls.rds"))
source(here("fun_dynamic_bins.R"))
Raw associations
In this script, I plot some raw associations between temperature and DV.
Raw in the sense that there is no regression, fixed effects etc. Simple averages and scatterplots.
1 Scatterplot of monthly average DV and temp
I simply want to compute monthly averages, and scatterplot. To see if warmer months have more DV, in general. It is the case.
First, compute avg temp per month, and avg sum of crimes per month
<- data_reports %>%
data_monthly group_by(year, month) %>%
summarise(reports = sum(reports_dv),
tmean = mean(tmean, na.rm = T),
.groups = "drop")
<- data_monthly %>%
data_monthly full_join(data_calls %>%
group_by(year, month) %>%
summarise(calls = sum(calls),
.groups = "drop"))
<- data_monthly %>%
data_monthly_avg group_by(month) %>%
summarise(reports = mean(reports),
calls = mean(calls, na.rm = T),
tmean = mean(tmean),
.groups = "drop")
And then plot for both reports and calls.
Warmer months are associated with a higher nr of DV events on avg.
ggplot(data_monthly_avg, aes(x = tmean, y = reports)) +
geom_point() +
geom_text_repel(aes(label = month)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
size = 1.5, alpha = 0.3, color = "blue") +
scale_x_continuous(breaks = seq(0, 30, 1)) +
labs(y = "Reports for DV", x = "Temperature") +
theme_light()
ggsave(here("..", "output", "figures", "assoc_monthly_reports.png"),
width = 6, height = 4, dpi = 300)
ggplot(data_monthly_avg, aes(x = tmean, y = calls)) +
geom_point() +
geom_text_repel(aes(label = month)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
size = 1.5, alpha = 0.3, color = "blue") +
scale_x_continuous(breaks = seq(0, 30, 1)) +
labs(y = "Calls for DV", x = "Temperature") +
theme_light()
ggsave(here("..", "output", "figures", "assoc_monthly_calls.png"),
width = 6, height = 4, dpi = 300)
Now we can do the same but facet wrap per year, to see if the trend holds.
Holds in every year but in 2020 pandemic year for reports
ggplot(data_monthly, aes(x = tmean, y = reports)) +
geom_point() +
#geom_text_repel(aes(label = month)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
size = 1.5, alpha = 0.3, color = "blue") +
scale_x_continuous(breaks = seq(0, 30, 1)) +
labs(y = "Reports for DV", x = "Temperature") +
theme_light() +
facet_wrap(~year, scales = "free_y")
Not quite for calls, but there is still sth.
%>% drop_na() %>%
data_monthly group_by(year) %>%
# keep only years for which we have all month
filter(n() == 12) %>%
ggplot(aes(x = tmean, y = calls)) +
geom_point() +
#geom_text_repel(aes(label = month)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
size = 1.5, alpha = 0.3, color = "blue") +
scale_x_continuous(breaks = seq(0, 30, 1)) +
labs(y = "Reports for DV", x = "Temperature") +
theme_light() +
facet_wrap(~year)
rm(data_monthly, data_monthly_avg)
2 Raw data, AGEB per day
Now, I want the plot the raw data. I want to plot the avg “number of crime per day per AGEB”, by the temperature bin the obs falls in.
First compute the bins in which each observation falls, and then compute the avg number of DV crime by temperature bin. I computed sd, se, and 95% CI so i can plot it. I also computed the support of each bin.
<- data_reports %>%
bins_reports filter(!is.na(reports_dv)) %>%
# customize the treshold to have more or less bins
group_by(bins_tmean_one = create_dynamic_bins(tmean, 1, 0.001)) %>%
filter(!is.na(bins_tmean_one)) %>%
summarise(mean = mean(reports_dv),
count = n(),
sd = sd(reports_dv),
se = sd / sqrt(count)) %>%
filter(!is.na(sd)) %>% #to compute the sd, I need more than 1 crime
mutate(prop_in_pc = 100*count/sum(count)) %>%
mutate(lower_ci = mean - qt(0.975, count-1) * se,
upper_ci = mean + qt(0.975, count-1) * se) %>%
print(n=100)
# A tibble: 17 × 8
bins_tmean_one mean count sd se prop_in_pc lower_ci upper_ci
<fct> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 <9 0.0184 13100 0.139 0.00121 0.170 0.0160 0.0208
2 [9,10) 0.0200 18345 0.145 0.00107 0.239 0.0178 0.0221
3 [10,11) 0.0232 36595 0.157 0.000820 0.476 0.0216 0.0248
4 [11,12) 0.0244 83254 0.163 0.000563 1.08 0.0233 0.0255
5 [12,13) 0.0257 177915 0.166 0.000394 2.31 0.0249 0.0265
6 [13,14) 0.0275 321991 0.172 0.000303 4.19 0.0269 0.0281
7 [14,15) 0.0282 543747 0.174 0.000236 7.07 0.0278 0.0287
8 [15,16) 0.0289 878511 0.176 0.000188 11.4 0.0285 0.0293
9 [16,17) 0.0298 1143799 0.179 0.000167 14.9 0.0294 0.0301
10 [17,18) 0.0302 1328636 0.180 0.000157 17.3 0.0298 0.0305
11 [18,19) 0.0315 1189582 0.184 0.000169 15.5 0.0311 0.0318
12 [19,20) 0.0333 847487 0.190 0.000207 11.0 0.0329 0.0337
13 [20,21) 0.0350 523838 0.195 0.000269 6.81 0.0345 0.0355
14 [21,22) 0.0354 325928 0.197 0.000345 4.24 0.0347 0.0360
15 [22,23) 0.0376 151525 0.202 0.000520 1.97 0.0366 0.0386
16 [23,24) 0.0421 71816 0.216 0.000806 0.934 0.0406 0.0437
17 >=24 0.0487 31337 0.237 0.00134 0.408 0.0460 0.0513
%>%
bins_reports ggplot(aes(bins_tmean_one, mean, color = prop_in_pc)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci),
#color = "gray50"
+
) geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
scale_colour_viridis_c(option = "D", direction = -1,
limits = c(0, 5),
na.value = "black",
breaks = c(0, 1, 2, 3, 4, 5),
labels = c("0%", "1%", "2%", "3%", "4%", ">5%")) +
labs(#title = "Avg. count of DV reports per day per neighborhood, by temp. bin",
x = element_blank(), y = "DV reports", color = "Support of temperature bin (% of obs)") +
theme_minimal() +
theme(legend.position = "bottom") +
guides(colour = guide_colourbar(title.position = "top",
barwidth = 13))
ggsave(here("..", "output", "figures", "assoc_bins_reports_color.png"),
width = 6, height = 4, dpi = 300)
Same figure with call data:
Note that the bins are not the same because they are created dynamically, based on the data, and the time coverage for calls and reports is different.
<- data_calls %>%
bins_calls filter(!is.na(calls)) %>%
# customize the treshold to have more or less bins
group_by(bins_tmean_one = create_dynamic_bins(tmean, 1, 0.001)) %>%
filter(!is.na(bins_tmean_one)) %>%
summarise(mean = mean(calls),
count = n(),
sd = sd(calls),
se = sd / sqrt(count)) %>%
filter(!is.na(sd)) %>% #to compute the sd, I need more than 1 crime
mutate(prop_in_pc = 100*count/sum(count)) %>%
mutate(lower_ci = mean - qt(0.975, count-1) * se,
upper_ci = mean + qt(0.975, count-1) * se)
%>%
bins_calls ggplot(aes(bins_tmean_one, mean, color = prop_in_pc)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci),
#color = "gray50"
+
) geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
scale_colour_viridis_c(option = "D", direction = -1,
limits = c(0, 5),
na.value = "black",
breaks = c(0, 1, 2, 3, 4, 5),
labels = c("0%", "1%", "2%", "3%", "4%", ">5%")) +
labs(x = element_blank(), y = "Calls", color = "Support of temperature bin (% of obs)") +
theme_minimal() +
theme(legend.position = "bottom") +
guides(colour = guide_colourbar(title.position = "top",
barwidth = 13))
ggsave(here("..", "output", "figures", "assoc_bins_calls_color.png"),
width = 6, height = 4, dpi = 300)
Same figures; but for different types of crime
To compare to other types of crime, to see if the association is specific to DV.
<- data_reports %>%
bins_homicides filter(!is.na(reports_homicide)) %>%
# customize the treshold to have more or less bins
group_by(bins_tmean_one = create_dynamic_bins(tmean, 1, 0.001)) %>%
filter(!is.na(bins_tmean_one)) %>%
summarise(mean = mean(reports_homicide),
count = n(),
sd = sd(reports_homicide),
se = sd / sqrt(count)) %>%
filter(!is.na(sd)) %>% #to compute the sd, I need more than 1 crime
mutate(prop_in_pc = 100*count/sum(count)) %>%
mutate(lower_ci = mean - qt(0.975, count-1) * se,
upper_ci = mean + qt(0.975, count-1) * se)
<- data_reports %>%
bins_thefts filter(!is.na(reports_theft)) %>%
# customize the treshold to have more or less bins
group_by(bins_tmean_one = create_dynamic_bins(tmean, 1, 0.001)) %>%
filter(!is.na(bins_tmean_one)) %>%
summarise(mean = mean(reports_theft),
count = n(),
sd = sd(reports_theft),
se = sd / sqrt(count)) %>%
filter(!is.na(sd)) %>% #to compute the sd, I need more than 1 crime
mutate(prop_in_pc = 100*count/sum(count)) %>%
mutate(lower_ci = mean - qt(0.975, count-1) * se,
upper_ci = mean + qt(0.975, count-1) * se)
<- data_reports %>%
bins_frauds filter(!is.na(reports_fraud)) %>%
# customize the treshold to have more or less bins
group_by(bins_tmean_one = create_dynamic_bins(tmean, 1, 0.001)) %>%
filter(!is.na(bins_tmean_one)) %>%
summarise(mean = mean(reports_fraud),
count = n(),
sd = sd(reports_fraud),
se = sd / sqrt(count)) %>%
filter(!is.na(sd)) %>% #to compute the sd, I need more than 1 crime
mutate(prop_in_pc = 100*count/sum(count)) %>%
mutate(lower_ci = mean - qt(0.975, count-1) * se,
upper_ci = mean + qt(0.975, count-1) * se)
Plot for the different types of crime
### plots ----
<- bins_reports %>%
plot_reports ggplot(aes(bins_tmean_one, mean)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
labs(title = "Domestic violence (reports)",
x = NULL, y = "Reports per day-AGEB") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
<- bins_calls %>%
plot_calls ggplot(aes(bins_tmean_one, mean)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
labs(title = "Domestic violence (calls)",
x = NULL, y = "Calls per day-neighborhood") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
<- bins_thefts %>%
plot_thefts ggplot(aes(bins_tmean_one, mean)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
labs(title = "Thefts",
x = NULL, y = "Reports per day-AGEB") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
<- bins_frauds %>%
plot_frauds ggplot(aes(bins_tmean_one, mean)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
labs(title = "Frauds",
x = NULL, y = "Reports per day-AGEB") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
<- bins_homicides %>%
plot_homicides ggplot(aes(bins_tmean_one, mean)) +
geom_linerange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_point(stat = "identity", size = 2) +
scale_x_discrete(guide = guide_axis(n.dodge = 1, angle = 45)) +
labs(title = "Homicides",
x = NULL, y = "Reports per day-AGEB") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Patchwork them:
<-
plot_combined | plot_calls) / (plot_thefts | plot_frauds | plot_homicides)
(plot_reports
plot_combined
ggsave(plot = plot_combined,
here("..", "output", "figures", "assoc_bins_type_combined.png"),
width = 14, height = 8, dpi = 300)