Sunday, August 16, 2015

Booking Recidivism code and more

Code to look at 'Booking Recidivism'. Political piece is here. R based animation is here.

# Notes on Data
# The Whatcom County Inmate press releases were used to compile this data. Unique identifiers were created from paste First, Middle, Last Names. In R:

# paste(Booked201x$FirstMiddle,Booked201x$Last)

#Unique numerical IDs weren't appended to the press releases until 2013. Data may be subject to revision. For R code see here This code is messy,over complicated,uses some side effects, etc. You can do better than this? 

library(lattice)
library(dplyr)

#load data for Crimes (Charges Booked)

setwd("C:/JoyGilfilen/Bookings/2011")
Crimes2011 <- read.csv("CrimeASCII.csv", fill=FALSE, header = FALSE, strip.white = TRUE, sep = ",", quote = "", stringsAsFactors = FALSE)
setwd("C:/JoyGilfilen/Bookings/2012")
Crimes2012 <- read.csv("CrimeASCII.csv", fill=FALSE, header = FALSE, strip.white = TRUE, sep = ",", quote = "", stringsAsFactors = FALSE)
setwd("C:/JoyGilfilen/Bookings/2013")
Crimes2013 <- read.csv("CrimeASCII.csv", fill=FALSE, header = FALSE, strip.white = TRUE, sep = ",", quote = "", stringsAsFactors = FALSE)
setwd("C:/JoyGilfilen/Bookings/2014")
Crimes2014 <- read.csv("CrimeASCII.csv", fill=FALSE, header = FALSE, strip.white = TRUE, sep = ",", quote = "", stringsAsFactors = FALSE)
setwd("C:/JoyGilfilen/Bookings/2015")
Crimes2015 <- read.csv("CrimeASCII.csv", fill=FALSE, header = FALSE, strip.white = TRUE, sep = ",", quote = "", stringsAsFactors = FALSE)
Crimes <- (rbind(Crimes2011,Crimes2012,Crimes2013,Crimes2014,Crimes2015))
colnames(Crimes) <- c("Court","Charge")

#load data for Bookings
setwd("C:/JoyGilfilen/Bookings/2011")
Booked2011 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE,fill=FALSE)
setwd("C:/JoyGilfilen/Bookings/2012")
Booked2012 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE,fill=FALSE)
setwd("C:/JoyGilfilen/Bookings/2013")
Booked2013 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE,fill=FALSE)
setwd("C:/JoyGilfilen/Bookings/2014")
Booked2014 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE,fill=FALSE)
setwd("C:/JoyGilfilen/Bookings/2015")
Booked2015 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE,fill=FALSE)

# Compare Crimes, Booked, Unique
# download.file("http://www.census.gov/popest/data/counties/asrh/2014/files/CC-EST2014-ALLDATA-53.csv","CC-EST2014-ALLDATA-53.csv")
Years <- rbind(2011,2012,2013,2014,2015)
TotPop <- rbind("2011"=203329,"2012"=204827,"2013"=206248,"2014"=208351,"2015"=210454) # from CC-EST2014-ALLDATA-53.csv 2015 estimated
Charge <- as.data.frame(rbind("2011"=nrow(Crimes2011),"2012"=nrow(Crimes2012),"2013"=nrow(Crimes2013),"2014"=nrow(Crimes2014),"2015"=nrow(Crimes2015) * 2))
Booked <- as.data.frame(rbind("2011"=nrow(Booked2011),"2012"=nrow(Booked2012),"2013"=nrow(Booked2013),"2014"=nrow(Booked2014),"2015"=nrow(Booked2015) * 2))
Unique <- as.data.frame(rbind("2011"=nrow(unique(Booked2011)),"2012"=nrow(unique(Booked2012)),"2013"=nrow(unique(Booked2013)),"2014"=nrow(unique(Booked2014)),"2015"=nrow(unique(Booked2015)) * 2))
CBU <- cbind(Years,Charge,Booked,Unique,TotPop)
colnames(CBU) <- c("Years","Charge","Booked","Unique","TotPop")
rownames(CBU) <- c(1,2,3,4,5)

CBU_D <- with(CBU,cbind(Years,"Charge.TotPop"=Charge/TotPop,"Booked.TotPop"=Booked/TotPop,"Unique.TotPop"=Unique/TotPop))
CBU_A <- cbind(CBU,CBU_D)
CBU
CBU_D
CBU_A

cat(' 
CBU
  Years Charge Booked Unique TotPop
1  2011 11333   6860   4989 203329
2  2012  9650   6523   4723 204827
3  2013  9577   6677   4890 206248
4  2014 10520   6348   4466 208351
5  2015 13304   6458   5190 210454
CBU_A
  Years Charge Booked Unique TotPop Years Charge.TotPop Booked.TotPop Unique.TotPop
1  2011 11333   6860   4989 203329  2011   0.05573725    0.03373842    0.02453659
2  2012  9650   6523   4723 204827  2012   0.04711293    0.03184639    0.02305848
3  2013  9577   6677   4890 206248  2013   0.04643439    0.03237365    0.02370932
4  2014 10520   6348   4466 208351  2014   0.05049172    0.03046782    0.02143498
5  2015 13304   6458   5190 210454  2015   0.06321571    0.03068604    0.02466097
')

# Plot comparisons
plot(CBU$Booked ~ CBU$Years,type="b",xlab="Year",ylab="",ylim=c(4000,14000),col="red",lwd=5)
lines(CBU$Charge ~ CBU$Years,type="b",xlab="",ylab="",ylim=c(4000,14000),col="blue",lwd=5)
lines(CBU$Unique ~ CBU$Years,type="b",xlab="",ylab="",ylim=c(4000,14000),col="orange",lwd=5)
mtext("Booked (red), Crime (blue), Unique (orange). 2015 projected")

plot(CBU$Charge/CBU$Booked ~  CBU$Years,type="b",xlab="Year",ylab="",ylim=c(0,3),col="purple",lwd=5)
lines(CBU$Charge/CBU$Unique ~  CBU$Years,type="b",xlab="",ylab="",ylim=c(0,3),col="violet",lwd=5)
mtext("Crime/Booked (purple) and Crime/Unique (violet). 2015 projected")

plot(CBU$Charge/CBU$TotPop ~  CBU$Years,type="b",xlab="Year",ylab="",ylim=c(0,.08),col="purple",lwd=5)
lines(CBU$Unique/CBU$TotPop ~  CBU$Years,type="b",xlab="",ylab="",ylim=c(0,.08),col="violet",lwd=5)
lines(CBU$Booked/CBU$TotPop ~  CBU$Years,type="b",xlab="",ylab="",ylim=c(0,.08),col="blue",lwd=5)
mtext("Crime/TotPop (purple), Unique/TotPop(violet) and Booked/TotPop (blue). 2015 projected")

#Concat First, Middle, Last
setwd("C:/JoyGilfilen/Bookings/2011")
Booked2011 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE)
colnames(Booked2011) <- c("Last","FirstMiddle")
paste_2011 <- (paste(Booked2011$FirstMiddle,Booked2011$Last))

setwd("C:/JoyGilfilen/Bookings/2012")
Booked2012 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE)
colnames(Booked2012) <- c("Last","FirstMiddle")
paste_2012 <- (paste(Booked2012$FirstMiddle,Booked2012$Last))

setwd("C:/JoyGilfilen/Bookings/2013")
Booked2013 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE)
colnames(Booked2013) <- c("ID","Last","FirstMiddle")
paste_2013 <- (paste(Booked2013$FirstMiddle,Booked2013$Last))

setwd("C:/JoyGilfilen/Bookings/2014")
Booked2014 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE)
colnames(Booked2014) <- c("ID","Last","FirstMiddle")
paste_2014 <- (paste(Booked2014$FirstMiddle,Booked2014$Last))

setwd("C:/JoyGilfilen/Bookings/2015")
Booked2015 <- read.csv("Booked.csv",stringsAsFactors=FALSE,header=FALSE)
colnames(Booked2015) <- c("ID","Last","FirstMiddle")
paste_2015 <- (paste(Booked2015$FirstMiddle,Booked2015$Last))

# rbind Names for all Years
Names <- as.data.frame(rbind(as.matrix(paste_2011),as.matrix(paste_2012),as.matrix(paste_2013),as.matrix(paste_2014),as.matrix(paste_2015)))
colnames(Names) <- c("FullName")

# some recidivism subsets
NamesGTR15 <- subset(count(Names,FullName,sort=TRUE), n > 14)
NamesGTR13 <- subset(count(Names,FullName,sort=TRUE), n > 12)
NamesGTR10 <- subset(count(Names,FullName,sort=TRUE), n > 9)
NamesGTR5 <- subset(count(Names,FullName,sort=TRUE), n > 4)
NamesGTR3 <- subset(count(Names,FullName,sort=TRUE), n > 2)
NamesGTR1 <- subset(count(Names,FullName,sort=TRUE), n > 1)

nrow(Names)
nrow(NamesGTR15)
nrow(NamesGTR13)
nrow(NamesGTR10)
nrow(NamesGTR5)
nrow(NamesGTR3)
nrow(NamesGTR1)
length(unique(Names$FullName))

Tgtr <- rbind("15orGTR"=nrow(NamesGTR15),"13orGTR"=nrow(NamesGTR13),"10orGTR"=nrow(NamesGTR10),"5orGTR"=nrow(NamesGTR5),"3orGTR"=nrow(NamesGTR3),"2orGTR"=nrow(NamesGTR1))
Tgtr
# some charts
barchart(count(Names,FullName,sort=TRUE)[,2]);mtext("Recidivism: Full Spectrum for 4.5 years",line=0)
mtext(paste("Individual Bookings = ",nrow(Names),"Unique Names = ",length(unique(Names$FullName))),line=-1)
mtext(paste("Bookings Per Day Average = ",as.integer(nrow(Names)/(4.5 * 365)), "Individual Bookings each day."),line=-2) 
mtext(paste("Bookings Per Year Average = ",as.integer(length(unique(Names$FullName))/4.5),"Unique Names each year."),line=-3)

barchart(NamesGTR15[,2],horizontal=TRUE);mtext("Recidivism > 14") ;mtext(paste(nrow(NamesGTR15)," = Individuals with 15 Bookings or more in 4.5 years."),line=2)
barchart(NamesGTR13[,2],horizontal=TRUE);mtext("Recidivism > 12") ;mtext(paste(nrow(NamesGTR10)," = Individuals with 13 Bookings or more in 4.5 years."),line=2)
barchart(NamesGTR10[,2],horizontal=TRUE);mtext("Recidivism > 9") ;mtext(paste(nrow(NamesGTR10)," = Individuals with 10 Bookings or more in 4.5 years."),line=2)
barchart(NamesGTR5[,2],horizontal=TRUE);mtext("Recidivism > 4") ;mtext(paste(nrow(NamesGTR5)," = Individuals with 5 Bookings or more in 4.5 years."),line=2)
barchart(NamesGTR3[,2],horizontal=TRUE);mtext("Recidivism > 2") ;mtext(paste(nrow(NamesGTR3)," = Individuals with 3 Bookings or more in 4.5 years."),line=2)
barchart(NamesGTR1[,2],horizontal=TRUE);mtext("Recidivism > 1") ;mtext(paste(nrow(NamesGTR1)," = Individuals with 2 Bookings or more in 4.5 years."),line=2)

# Time(x) vs. Users(y) for bookings at various recidivism levels
list <- as.list(as.character(NamesGTR15$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 15 Bookings or More.")

list <- as.list(as.character(NamesGTR13$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 13 Bookings or More.")

list <- as.list(as.character(NamesGTR10$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 10 Bookings or More.")

list <- as.list(as.character(NamesGTR5$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 5 Bookings or More.")

list <- as.list(as.character(NamesGTR3$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 3 Bookings or More.")

list <- as.list(as.character(NamesGTR1$FullName))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Bookings",ylab="4.5 years of unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}
mtext("Individuals with 2 Bookings or More.")

list <- as.list(as.character(unique(Names$FullName)))
x <- 0; y <- 0; plot.default(x,y,xlab="1/1/2011 - 7/1/2015: 4.5 years, 29,637 Bookings into WC Jail.",ylab="17,256 unique names",xlim=c(1,30000),ylim=c(1,17256))
for(i in list) {h <- subset(Names, FullName == i);with(h,points(row.names(h),FullName,col=rgb(runif(1),runif(1),runif(1))))}

# Time(x) vs. Charge(y) [Can be specific] for bookings at various recidivism levels
# Arbitrary terms for most common crimes
DUI_ <- summarise(subset(Crimes, grepl('DUI',Charge)),length(Charge))
ASSAULT_ <- summarise(subset(Crimes, grepl('ASSAULT',Charge)),length(Charge))
DWLS_ <- summarise(subset(Crimes, grepl('DWLS',Charge)),length(Charge))
THEFT_ <- summarise(subset(Crimes, grepl('THEFT',Charge)),length(Charge))
CONT_ <- summarise(subset(Crimes, grepl('CONT',Charge)),length(Charge))
FTA_ <- summarise(subset(Crimes, grepl('FTA',Charge)),length(Charge))
DOC_ <- summarise(subset(Crimes, grepl('DOC',Charge)),length(Charge))
VIOL_ <- summarise(subset(Crimes, grepl('VIOL',Charge)),length(Charge))
ORDER_ <-summarise(subset(Crimes, grepl('ORDER',Charge)),length(Charge))
DRUG_ <-summarise(subset(Crimes, grepl('DRUG',Charge)),length(Charge))
BURGLARY_ <-summarise(subset(Crimes, grepl('BURGLARY',Charge)),length(Charge))
ROBBERY_ <-summarise(subset(Crimes, grepl('ROBBERY',Charge)),length(Charge))

# Build the sample
Sample <- data.frame(
rbind(
cbind("Cat"="DUI","Amount"=DUI_),
cbind("Cat"="ASSAULT","Amount"=ASSAULT_),
cbind("Cat"="DWLS","Amount"=DWLS_),
cbind("Cat"="THEFT","Amount"=THEFT_),
cbind("Cat"="CONT","Amount"=CONT_),
cbind("Cat"="FTA","Amount"=FTA_),
cbind("Cat"="DOC","Amount"=DOC_),
cbind("Cat"="VIOL","Amount"=VIOL_),
cbind("Cat"="ORDER","Amount"=ORDER_),
cbind("Cat"="CONT","Amount"=CONT_),
cbind("Cat"="DRUG","Amount"=DRUG_),
cbind("Cat"="BURGLARY","Amount"=VIOL_),
cbind("Cat"="ROBBERY","Amount"=ORDER_)
)
)
colnames(Sample) <- c("Category", "Charged")
Sample <- arrange(Sample,desc(Charged))

list <- as.list(as.character(Sample$Category))
x <- 0; y <- 0; plot.default(x,y,xlab="4.5 years Events",ylab="Arbitrary Categories",xlim=c(1,15000),ylim=c(1,length(list)))
for(i in list) {h <- subset(Crimes, grepl(i,Charge));with(h,points(row.names(h),c(1:length(Charge)),lwd=2,pch=3,col=rgb(runif(1),runif(1),runif(1))))}

# More calculations
# Math of Recidivism and Unique names
# count(Names,FullName,sort=TRUE)
c1 <- count(Names,FullName,sort=TRUE)
x <- NULL; for(i in c(22:1)) {x <- rbind(x,nrow(filter(c1,n == i)) * i)}
x
x <- NULL; for(i in c(1:10)) {x <- rbind(x,nrow(filter(r1,n == i)) * i)}

barchart(c(22:1) ~ x,origin=0,las=2,xlab="Count",ylab="Booked Individuals Categorized by Number of Bookings")
mtext("Recidivism by Booking Count per Individual")
plot(density(x,kernel="gaussian"))
plot(density(x,kernel="gaussian"))


sum(x)
nrow(Names)
length(unique(Names$FullName))
nrow(Names) - length(unique(Names$FullName))
sum(x)
sum(x[c(1:21)])
sum(x) - sum(x[c(1:21)])


cat('
c1 <- count(Names,FullName,sort=TRUE)
x <- NULL; for(i in c(22:1)) {x <- rbind(x,nrow(filter(c1,n == i)) * i)}
x
       [,1]
 [1,]    22
 [2,]     0
 [3,]     0
 [4,]     0
 [5,]    18
 [6,]    51
 [7,]    48
 [8,]    45
 [9,]   168
[10,]   208
[11,]   168
[12,]   308
[13,]   400
[14,]   621
[15,]   712
[16,]   987
[17,]  1296
[18,]  1635
[19,]  2268
[20,]  3516
[21,]  5224
[22,] 11942
sum(x)
[1] 29637
nrow(Names)
[1] 29637
length(unique(Names$FullName))
[1] 17256
nrow(Names) - length(unique(Names$FullName))
[1] 12381
sum(x)
[1] 29637
sum(x[c(1:21)])
[1] 17695
sum(x) - sum(x[c(1:21)])
[1] 11942
17695 + 11942
[1] 29637
(17695 + 11942) - 12381
[1] 17256
')

c2 <- data.frame(cbind("Number"=c(22:1),"Count"=x[1:22]))
c3 <- with(c2,cbind("Users" = Count/Number,c2))

cat('
> sum(c3$Users)
[1] 17256
> c3
   Users Number Count
1      1     22    22
2      0     21     0
3      0     20     0
4      0     19     0
5      1     18    18
6      3     17    51
7      3     16    48
8      3     15    45
9     12     14   168
10    16     13   208
11    14     12   168
12    28     11   308
13    40     10   400
14    69      9   621
15    89      8   712
16   141      7   987
17   216      6  1296
18   327      5  1635
19   567      4  2268
20  1172      3  3516
21  2612      2  5224
22 11942      1 11942
> sum(c3$Count)
[1] 29637
')
with(c3,plot(density(Users),col="red")); with(c3,lines(density(Count),col="blue"))
mtext("Density plots for Users in red and Count in blue.")

No comments:

Post a Comment