Baltimore Ceasefire 365 is a city-wide call asking Baltimore residents to avoid having any murders through quarterly Ceasefires and Peace Challenges (February, May, August, and November).
In this post, we use open data and R to look at the distribution of shootings in space and time, and model the impact of the Ceasefires.
Baltimore releases detailed data on issues relevant to the city, including crime. This allows us to get a good idea of the distribution of shootings in Baltimore.
Shootings per day appear to have increased over time, but the time series is complicated. Here’s how we got the data and created the above figure:
library(tidyverse)
library(scales)
bpd <- read_csv("https://raw.githubusercontent.com/peterphalen/ceasefire/master/BPD_Part_1_Victim_Based_Crime_Data.csv")
# subset to shootings or homicides with a firearm
bpd <- subset(bpd, Description == "SHOOTING" |
(Description == "HOMICIDE" & Weapon == "FIREARM"))
bpd$CrimeDate <- as.Date(bpd$CrimeDate, format = "%m/%d/%Y")
# there are many crimes per day. collapse to daily counts
daily <- bpd %>% group_by(CrimeDate) %>% summarise(shootings = n())
# fill missing dates, because some had no shootings
full.ts <- data.frame(CrimeDate = seq(daily$CrimeDate[1],
daily$CrimeDate[nrow(daily)], by="day"))
daily <- full_join(full.ts,daily)
daily <- daily %>% group_by(CrimeDate) %>% mutate_all(funs(ifelse(is.na(.),0,.)))
ggplot(daily) +
aes(x=CrimeDate, y=shootings) +
geom_point(alpha=.2) +
xlab("date") +
ylab("shootings") +
scale_y_continuous(breaks=c(0,4,8,12)) +
scale_x_date(labels = date_format("%b %Y")) +
ggtitle(" ",
subtitle="Baltimore (2012-present)")
These shootings disproportionately affect black communities. You can tap areas of the map to see which neighborhoods are most impacted.
Here is code to produce the above map—as well as a population-adjusted version (not run):
library(geojsonio)
library(leaflet)
bpd$Neighborhood <- as.character(bpd$Neighborhood)
bpd <- subset(bpd, !is.na(Neighborhood))
count <- bpd %>%
group_by(Neighborhood) %>%
summarise(total.count=n())
# get polygon data to draw neighborhoods.
# these shapes downloaded from Baltimore Open Data at https://gis-baltimore.opendata.arcgis.com/datasets/1ca93e68f11541d4b59a63243725c4b7_0.geojson
# but I'm pulling from a github backup for stability
nbds <- geojsonio::geojson_read("https://raw.githubusercontent.com/peterphalen/ceasefire/master/Neighborhoods.geojson", what = "sp")
get_shooting_count <- function(neighborhood){
nbd <- as.character(neighborhood)
if(nbd %in% count$Neighborhood){
count <- count[count$Neighborhood == nbd,]$total.count
return(count)
}
if(!(nbd %in% count$Neighborhood)){
return(0)
}
}
nbds$count <- sapply(nbds$Name, get_shooting_count)
# draw legend
range.count <- range(nbds$count,na.rm=T)
labs <- c(0,50,100,150,200,250)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)
leaflet(nbds) %>%
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
popup=paste0(nbds$Name,"<br/>Shootings: ",nbds$count),
color=~pal.crime(count),
fillOpacity=.5) %>%
addLegend("bottomright",title="# of Shootings (2012-present)",colors=~pal.crime(labs),labels=~labs)
#--------- population-adjusted --------------#
nbds$per1k <- nbds$count / nbds$Population * 1000
nbds$per1k <- round(nbds$per1k)
nbds$per1k <- ifelse(nbds$Population == 0, NA, nbds$per1k)
labs <- c(0,20,40,60)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')),
labs,
na.color = "#b2b2b2")
countlabel <- paste0(nbds$Name,"<br/>",nbds$count," shootings among ",nbds$Population," residents")
nbds$countlabel <- ifelse(nbds$Population == 0, paste0(nbds$Name,":<br/>","No residents"), countlabel)
leaflet(nbds) %>% #draw population-adjusted map,
#areas with 0 residents are greyed
#out but can still be clicked
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
popup=nbds$countlabel,
color=~pal.crime(per1k),
fillOpacity=.6) %>%
addLegend("bottomright",title="Shootings per one</br>thousand residents</br>(2012-present)",colors=~pal.crime(labs),labels=~labs)
Ceasefires have been called four times per year since August 2017. These are “ceasefire weekends” but their impact often extends well beyond a few days. One lasted twelve.
Here are the first days of each ceasefire (all Fridays).
# first day (Friday) of ceasefire weekends
ceasefire.initial <-
as.Date(
c("08/04/2017",
"11/03/2017",
"02/02/2018",
"05/11/2018",
"08/03/2018",
"11/02/2018",
"02/01/2019",
"05/10/2019",
"08/02/2019"),
format="%m/%d/%Y")
We determine the weekends of each ceasefire so we can see whether these weekends had fewer shootings, after accounting for other trends and seasonal effects.
ceasefire.weekends <-
lapply(ceasefire.initial,
function(x){
seq(from=x,
by="day",
length.out=3)
}
)
ceasefire.weekends <- do.call("c",
ceasefire.weekends)
First we’re going to use the prophet
package to check the rough effect of the ceasefire (coded as a “holiday”) while accounting for time trends and yearly and weekly seasonality. The prophet
package was designed to be a good first pass: it gives you decent forecasts without a lot of manual effort.
We feed the ceasefire weekend dates into the model as special days or “holidays.”
ceasefires <- data_frame(
holiday = 'ceasefire',
ds = ceasefire.initial, # Fridays
lower_window = 0,
upper_window = 2 # for Sat and Sun
)
Then fit the model, accounting for general time trends as well as yearly and weekly seasonality.
library(prophet)
ts <- data.frame(ds=daily$CrimeDate,
y=daily$shootings)
m1 <- prophet(ts,
yearly.seasonality = T,
weekly.seasonality = T,
mcmc.samples = 500,
holidays = ceasefires,
cores=4)
We plot the model against the data. There are more shootings in the past few years, but clear seasonality. And check out the multiple downward blue spikes at the bottom-right of the plot…
Here is the decomposition of the above time series. The “holidays” are ceasefires (those downward spikes). Ceasefires appear to be associated with fewer shootings, even after accounting for weekly seasonality, yearly seasonality, and overall time trends.
The prophet
code is really simple, which is nice. Here’s how we produced the above decomposition plots:
future <- make_future_dataframe(m1, periods = 30)
forecast <- predict(m1, future)
plot(m1, forecast)
prophet_plot_components(m1, forecast)
The prophet
package is user-friendly but it doesn’t let us drill down into the various effects very easily, and it doesn’t allow us to use a poisson link function, which we want because the outcome is a count and has almost identical mean and standard deviation. So we’re going to fit our own Bayesian model in Stan using the more flexible rstanarm
package.
We want to include information about the date, the day of the week (Mon-Sun), the day of the year (1-365), and a binary variable indicating whether a date occurs during ceasefire.
library(lubridate)
# the julian calendar is a simple system for numeric dates
daily$jul <- julian(daily$CrimeDate)
daily$weekday <- factor(weekdays(daily$CrimeDate),
levels=c("Monday","Tuesday","Wednesday","Thursday",
"Friday","Saturday","Sunday"))
daily$day.of.year <- yday(daily$CrimeDate)
daily$ceasefire <- factor(ifelse(daily$CrimeDate %in% ceasefire.weekends, 1, 0),
labels=c("Regular Day","Ceasefire Weekend"))
We’ll predict shootings using a spline time trend, a cyclical spline for yearly seasonality, random effects for day of the week, and a binary indicator for the ceasefire. We use a Poisson link function because the outcome is a count and the mean
is about the same as sd
.
library(rstanarm)
m2 <- stan_gamm4(shootings ~
s(jul) +
s(day.of.year,
bs="cc") + #cyclical constraint
ceasefire,
random= ~ (1 | weekday),
data=daily,
cores=4,
iter=1000,
family=poisson)
Here is a plot of the model against the observations. Ceasefires are visible as seven dramatic downward red spikes beginning in 2017. The results cohere well with prophet
, but this model fits the data better in many ways. For example, we no longer see impossible predictions of less than zero shootings.
Here’s how we created the above plot:
daily$Estimate <- apply(posterior_linpred(m2, transform=TRUE),
2, median)
# 80% posterior predictive interval for main plot
preds <- posterior_predict(m2, transform=TRUE)
preds <- apply(preds, 2, function(x){quantile(x, prob=c(.1, .9))})
daily$high <- preds["90%",]
daily$low <- preds["10%",]
daily %>%
ggplot(aes(x = CrimeDate, y = shootings)) +
geom_point(alpha=.2) +
geom_line(aes(y = Estimate), alpha=.5, color="red") +
geom_ribbon(aes(ymin=low, ymax=high), alpha=.2) +
scale_y_continuous(breaks=c(0,4,8,12)) +
xlab("date") +
theme_bw()
Here are the marginal seasonal and time trend effects, showing the components that make up the above time series. The specific numbers of shootings are to some extent dependent upon the reference point, but these figures give you the right idea of the shape of the trends.
Our model specification has slower-moving seasonal trends. Summers still show up as particularly bad.
Finally, we can use this model to measure the effect of Ceasefires on shootings per day, after accounting for all these trends and seasonalities. The effect of the Ceasefire (plotted here as an odds ratio) is classically statistically significant and suggest an approximate 60% reduction in shootings during ceasefire weekends:
ceasefire.effect <- as.array(m2, regex_pars = "ceasefire")
# scale to an odds ratio by exponentiating
ceasefire.effect <- exp(ceasefire.effect)
library(bayesplot)
mcmc_intervals(ceasefire.effect) +
scale_y_discrete(labels="ceasefire effect") +
xlab("odds ratio") +
xlim(c(0,1))
We can also use this model to see the impact of the ceasefire at specific points in time. For example, here is the model-estimated impact of the ceasefire on Friday May 10th, 2019.
Without a ceasefire, we would expect about three or four people to get shot on the first day of the weekend, on average. But this will be a Ceasefire weekend, so the model expects about two fewer shootings per day.
#----- code to produce final decomposition figures ------
### Day of year plot
doy.frame <- with(daily, # Ref: regular day in mid-2018
data.frame(
jul=julian(as.Date("2018-08-01"))[1],
weekday=0, # weekday not used for this prediction
ceasefire="Regular Day",
day.of.year=1:365))
post <- posterior_linpred(m2,
newdata=doy.frame,
transform=TRUE,
re.form = NA)
doy.frame$Estimate <- apply(post,2, median)
# 95% CI
ci <- apply(post,2,function(x){quantile(x, prob=c(.025, .975))})
doy.frame$low <- ci["2.5%",]
doy.frame$high <- ci["97.5%",]
doy.axis.dates <- seq(as.Date("0-01-01"),by="month",length.out=12)
doy.plot <-
doy.frame %>%
ggplot() +
aes(x=day.of.year, y=Estimate) +
geom_line(aes(y = Estimate), alpha=.5) +
geom_ribbon(aes(ymin=low, ymax=high), alpha=.2) +
xlab("Day of year") +
ylab("Shootings") +
ggtitle(" ") +
scale_x_continuous(
breaks=yday(c(doy.axis.dates, as.Date("0-12-31"))),
labels=date_format("%b %d")(c(doy.axis.dates, as.Date("0-01-01")))
) +
theme_bw()
### Day of week plot
wday.frame <- with(daily, # Ref: regular day in August 2018
data.frame(
jul=julian(as.Date("2018-08-01"))[1],
weekday=unique(daily$weekday),
ceasefire="Regular Day",
day.of.year=yday(as.Date("2018-08-01"))))
post <- posterior_linpred(m2,
newdata=wday.frame,
transform=TRUE)
wday.frame$Estimate <- apply(post,2, median)
# 95% CI
ci <- apply(post,2,function(x){quantile(x, prob=c(.025, .975))})
wday.frame$low <- ci["2.5%",]
wday.frame$high <- ci["97.5%",]
wday.plot <-
wday.frame %>%
ggplot() +
aes(x=weekday, y=Estimate) +
geom_point(size=2) +
geom_errorbar(aes(ymin=low, ymax=high),
width=.2) +
xlab("Day of week") +
ylab("Shootings") +
ggtitle(" ") +
theme_bw()
### Time trend plot
time.frame <- with(daily, # Ref: August
data.frame(
jul=jul,
weekday=0, # not used for this prediction
ceasefire="Regular Day",
day.of.year=yday(as.Date("2018-08-01"))))
post <- posterior_linpred(m2,
newdata=time.frame,
transform=TRUE,
re.form = NA)
time.frame$Estimate <- apply(post,2, median)
# 95% CI
ci <- apply(post,2,function(x){quantile(x, prob=c(.025, .975))})
time.frame$low <- ci["2.5%",]
time.frame$high <- ci["97.5%",]
trend.axis.dates <- seq(from=as.Date("2012-01-01"),
by="year",
length.out=9)
time.plot <-
time.frame %>%
ggplot() +
aes(x=jul, y=Estimate) +
geom_line(aes(y = Estimate), alpha=.5) +
geom_ribbon(aes(ymin=low, ymax=high), alpha=.2) +
xlab("Time trend") +
ylab("Shootings") +
ggtitle(" ") +
scale_x_continuous(
breaks=julian(trend.axis.dates),
labels=date_format("%m-%Y")(trend.axis.dates)) +
theme_bw()
# Display above plots together
library(gridExtra)
grid.arrange(time.plot, wday.plot, doy.plot)
### Ceasefire plot
pred.day <- as.Date("2019-05-10")
ceasefire.frame <- with(daily,
data.frame(
jul=julian(pred.day)[1],
weekday="Friday",
ceasefire=factor(c("Regular Day",
"Ceasefire Weekend"),
levels=c("Regular Day",
"Ceasefire Weekend")),
day.of.year=yday(pred.day)))
post <- posterior_linpred(m2,
newdata=ceasefire.frame,
transform=TRUE)
ceasefire.frame$Estimate <- apply(post,2, median)
# 95% CI
ci <- apply(post,2,function(x){quantile(x, prob=c(.25, .75))})
ceasefire.frame$low <- ci["25%",]
ceasefire.frame$high <- ci["75%",]
# 50% posterior predictive interval for main plot
preds <- posterior_predict(m2,
newdata=ceasefire.frame,
transform=TRUE)
ceasefire.frame$high.ppd <- apply(preds,2,function(x){quantile(x, prob=c(.75), na.rm=T)})
ceasefire.frame$low.ppd <- apply(preds,2,function(x){quantile(x, prob=c(.25), na.rm=T)})
ceasefire.frame %>%
ggplot() +
aes(x=ceasefire, y=Estimate) +
geom_point(aes(y = low.ppd), col="blue", shape=95, size=5) +
geom_point(aes(y = high.ppd), col="blue", shape=95, size=5) +
geom_point(aes(y = Estimate),
size=2) +
geom_errorbar(aes(ymin=low, ymax=high),
width=.2) +
xlab("") +
ylab("Shootings") +
ggtitle("Predicted shooting count for Friday May 10, 2019",
subtitle="with 50% credible intervals (black) and posterior predictive intervals (blue)") +
theme_bw()