Wednesday, August 5, 2015

An Analysis of Whatcom County Jail Press Releases: Word Counts, Word Clouds, Relational Graphs,Criminogenic Cycle : Part II

Political piece is here.


# Code to examine booking press release information for Whatcom County Jail 
# Code to examine crime patterns using data mining algorithms; Does not contain web query routines
# For more see Rdatamining.com
# graph and Rgraphviz libraries from bioconductor
# 8:33 PM Tuesday, July 28, 2015 -RMF

library(tm)
library(wordcloud)
library(graph)
library(Rgraphviz)

str(Crimes)
myCorpus <- Corpus(VectorSource(Crimes$Charge))
myTdm <- TermDocumentMatrix(myCorpus,control=list(wordLengths=c(1,Inf)))
(freq.terms <- findFreqTerms(myTdm, lowfreq=20))
plot(myTdm, term=freq.terms, corThreshold=0.1, weighting=T)
# plot(myTdm, term=freq.terms, corThreshold=0.01, weighting=T) # long
# plot(myTdm, term=freq.terms, corThreshold=0.001, weighting=T) # very long ; intensive

f5000 <- (freq.terms <- findFreqTerms(myTdm, lowfreq=5000))
f4000 <- (freq.terms <- findFreqTerms(myTdm, lowfreq=4000))
f3000 <- (freq.terms <- findFreqTerms(myTdm, lowfreq=3000))
f2000 <- (freq.terms <- findFreqTerms(myTdm, lowfreq=2000)) 
f1000 <- (freq.terms <- findFreqTerms(myTdm, lowfreq=2000))

f5000
f4000
f3000 
f2000
f1000

(freq.terms <- findFreqTerms(myTdm, lowfreq=2000))
freq.terms
plot(myTdm, term=freq.terms, corThreshold=.0001, weighting=T)
mtext("Frequency relationships between Booking charge terms with mentions > 2000",cex=1.5,line=-1,col="blue")

(freq.terms <- findFreqTerms(myTdm, lowfreq=1000))
freq.terms
plot(myTdm, term=freq.terms, corThreshold=.0001, weighting=T)
mtext("Frequency relationships between Booking charge terms with mentions > 1000",cex=1.5,line=-1,col="blue")


data.frame(findAssocs(myTdm, "dv", 0.2))
data.frame(findAssocs(myTdm, "dv", 0.1))
data.frame(findAssocs(myTdm, "dv", 0.01))
data.frame(findAssocs(myTdm, "dui", 0.001))
data.frame(findAssocs(myTdm, "dwls", 0.01))
data.frame(findAssocs(myTdm, "assault", 0.2))
data.frame(findAssocs(myTdm, "assault", 0.1))
data.frame(findAssocs(myTdm, "assault", 0.01))

m <- as.matrix(myTdm)
freq <- sort(rowSums(m), decreasing=T)
# wordcloud(words=names(freq), freq=freq, min.freq=8, random.order=F)
wordcloud(words=names(freq), freq=freq, min.freq=1000, random.order=F)

wordcloud(words=names(freq), freq=freq,scale = c(6.25,1.25),min.freq=1000, random.order=F,col="red")
mtext("Whatcom County Jail Word Cloud:",line= -7,side=3,cex=1.75)
mtext("Terms mentioned > 1K times each in 47,732 Bookings Charges from 1/1/11 - 7/1/15.",line= -7,side=1,cex=1.5,col="blue")

wordcloud(words=names(freq), freq=freq,scale = c(4.25,.7),min.freq=10, random.order=F,col="red")
mtext("Whatcom County Jail Word Cloud:",line= -1.5,side=3,cex=1.75)
mtext("Terms mentioned > 10 times each in 47,732 Bookings Charges from 1/1/11 - 7/1/15.",line=-1.5,side=1,cex=1.5,col="blue")

wordcloud(words=names(freq), freq=freq,scale = c(5.5,1.25),min.freq=100, random.order=F,col="red")
mtext("Whatcom County Jail Word Cloud:",line= -1.5,side=3,cex=1.75)
mtext("Terms mentioned > 100 times each in 47,732 Bookings Charges from 1/1/11 - 7/1/15.",line=-1.5,side=1,cex=1.5,col="blue")






No comments:

Post a Comment