Notes on Scientific and Mathematical Programming: Gun Deaths in America: Suicides, Homicides, "Mass Shooting Tracker" Data

Gun Deaths in America: Suicides, Homicides, "Mass Shooting Tracker" Data. Code below. Political piece here.

library(dplyr)
library(lubridate)
library(lattice)

setwd("C:/Politics")
##12/24/2015 This list is now maintained by Gun Violence Archive at http://www.shootingtracker.com/wiki/Main_Page

cat('
No longer working
download.file("http://shootingtracker.com/tracker/2015CURRENT.csv", "2015CURRENT.csv")
download.file("http://shootingtracker.com/tracker/2014MASTER.csv", "2014MASTER.csv")
download.file("http://shootingtracker.com/tracker/2013MASTER.csv", "2013MASTER.csv")
')

GV_2015 <- read.csv("2015CURRENT.csv")
GV_2014 <- read.csv("2014MASTER.csv")
GV_2013 <- read.csv("2013MASTER.csv")

GV_2013 <- GV_2013[,2:6]
GV_2014 <- GV_2014[,2:6]
GV_2015 <- GV_2015[,2:6]

colnames(GV_2013) <- c("Date","Shooter","Dead","Injured","Location")
GV <- rbind(GV_2013,GV_2014,GV_2015)
# GV$Date <- mdy(GV$Date)

## Fix a few things from a database perspective, found the through examination of data.

# State Capitalization fix for Tx,Az,Ga,Il,Mi,Ca,In,Ma,MO,Ok and others
# grep(", Il",GV$Location,value=TRUE)
GV$Location <- sub("Il","IL", GV$Location)
# grep(', Mi',GV$Location,value=TRUE)
GV$Location <- sub(', Mi','MI', GV$Location)
# grep(', Ca',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ca','CA', GV$Location)
# grep(', In',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', In','IN', GV$Location)
# grep(', Tx',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Tx','TX', GV$Location)
# grep(', Az',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Az','AZ', GV$Location)
# grep(', Ga',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ga','GA', GV$Location)
# grep(', Ma',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ma','MA', GV$Location)
# grep(', Mo',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Mo','MO', GV$Location)
# grep(', Ok',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ok','OK', GV$Location)

#Fix City names
# grep('Cleaveland',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub('Cleaveland','Cleveland', GV$Location)
# grep('San Bernadino,',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub('San Bernadino,','San Bernardino,', GV$Location)

#Fix 'Shooter' "Unknown" mispellings
# grep('unknown',GV$Shooter,perl=TRUE,value=TRUE)
GV$Shooter <- sub('unknown','Unknown', GV$Shooter)
# grep('Unkown',GV$Shooter,perl=TRUE,value=TRUE)
GV$Shooter <- sub('Unkown','Unknown', GV$Shooter)

# If you have an existing conglomerated,fixed spreadsheet, you can import it here.
# GV <- read.csv("GV.csv")

setwd("C:/Politics")
library(dplyr)
library(lubridate)
library(lattice)

# GV <- read.csv("GV.csv")

(count(GV,Shooter,sort=TRUE))
(count(GV,Location,sort=TRUE))
(count(GV,(Dead + Injured),sort=TRUE))

tbl_df(arrange(as.data.frame(xtabs(Dead ~ Shooter,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(Dead ~ Location,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Shooter,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))

arrange(tbl_df(count(GV,Location,"DI"=(Dead + Injured))),desc(DI))
arrange(tbl_df(count(GV,Location,"DI"=(Dead + Injured))),desc(n))

par(mfrow=(c(1,2)))
L <-{};for(i in GV$Location) (L <- rbind(L,strsplit(i,",")[[1]][2]))
colnames(L)= "State";L <- count(as.data.frame(L),State,sort=TRUE)
df1 <- (arrange(as.data.frame(xtabs(n ~ State,data=L)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=State,ylim=c(0,150),cex.names=.65,las=2,col="blue"))

L <-{};for(i in GV$Location) (L <- rbind(L,strsplit(i,",")[[1]][1]))
colnames(L)= "City";L <- count(as.data.frame(L),City,sort=TRUE)
df1 <- (arrange(as.data.frame(xtabs(n ~ City,data=L)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=City,ylim=c(0,75),cex.names=.65,las=2,col="blue"))
par(mfrow=(c(1,1)))
mtext("Aggregated Top 25 State and City Mentions for 'Mass Shooting Tracker'",cex= 1.5)

# Manually fix ('fix' or 'edit') these items here before exporting and lubridating data (vecor and factors only)
# fix(GV)
subset(GV, !grepl(", ",Location))
subset(as.data.frame(L), nchar(as.character(V1)) > 3 )
subset(GV, is.na(Location))

GV$Date <- mdy(GV$Date)
write.csv(GV, "GV.csv", row.names=FALSE)

# Charts
# Chart creation Macro
jpeg_create <- function() {
systime <- as.numeric(Sys.time())
# dev.new()
jpeg(filename = systime,
width = 1224, height = 968, units = "px", pointsize = 16,
quality = 100, bg = "white", res = NA, family = "", restoreConsole = TRUE,
type = c("windows"))
Sys.sleep(2)
}

# Create charts; graphics.off(); ren with jpg extension
jpeg_create()
xyplot(Date ~ (Dead + Injured),cex=1.25, data=GV)

jpeg_create()
with(GV,plot((Dead + Injured) ~ Date,cex=1.25))
with(GV,lines(lowess((Dead + Injured) ~ Date),cex=1.25,col="blue",lwd=5))

df1 <- (arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))

jpeg_create()
with(df1[1:25,],barplot(Freq, names.arg=Location,ylim=c(0,250),cex.names=.65,las=2,col="blue"))

jpeg_create()
barchart(Location ~ Freq,cex=1.25,data=df1[1:25,])

# Manual charts
xyplot(Date ~ (Dead + Injured),cex=1.25, data=GV)
with(GV,plot((Dead + Injured) ~ Date,cex=1.25))
with(GV,lines(lowess((Dead + Injured) ~ Date),cex=1.25,col="blue",lwd=5))
df1 <- (arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=Location,ylim=c(0,250),cex.names=.65,las=2,col="blue"))
barchart(Location ~ Freq,cex=1.25,data=df1[1:25,])

Notes on Scientific and Mathematical Programming

Friday, December 11, 2015

Gun Deaths in America: Suicides, Homicides, "Mass Shooting Tracker" Data

No comments:

Post a Comment