R, Julia, SQL, Octave and others: Personal notes on data analysis, computation, data access most especially for querying voter history, Census, PDC, and other election data. Reader is advised to just paste the code text into Notepad++.
Friday, December 11, 2015
Gun Deaths in America: Suicides, Homicides, "Mass Shooting Tracker" Data
Gun Deaths in America: Suicides, Homicides, "Mass Shooting Tracker" Data. Code below. Political piece here.
library(dplyr)
library(lubridate)
library(lattice)
setwd("C:/Politics")
##12/24/2015 This list is now maintained by Gun Violence Archive at http://www.shootingtracker.com/wiki/Main_Page
cat('
No longer working
download.file("http://shootingtracker.com/tracker/2015CURRENT.csv", "2015CURRENT.csv")
download.file("http://shootingtracker.com/tracker/2014MASTER.csv", "2014MASTER.csv")
download.file("http://shootingtracker.com/tracker/2013MASTER.csv", "2013MASTER.csv")
')
GV_2015 <- read.csv("2015CURRENT.csv")
GV_2014 <- read.csv("2014MASTER.csv")
GV_2013 <- read.csv("2013MASTER.csv")
GV_2013 <- GV_2013[,2:6]
GV_2014 <- GV_2014[,2:6]
GV_2015 <- GV_2015[,2:6]
colnames(GV_2013) <- c("Date","Shooter","Dead","Injured","Location")
GV <- rbind(GV_2013,GV_2014,GV_2015)
# GV$Date <- mdy(GV$Date)
## Fix a few things from a database perspective, found the through examination of data.
# State Capitalization fix for Tx,Az,Ga,Il,Mi,Ca,In,Ma,MO,Ok and others
# grep(", Il",GV$Location,value=TRUE)
GV$Location <- sub("Il","IL", GV$Location)
# grep(', Mi',GV$Location,value=TRUE)
GV$Location <- sub(', Mi','MI', GV$Location)
# grep(', Ca',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ca','CA', GV$Location)
# grep(', In',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', In','IN', GV$Location)
# grep(', Tx',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Tx','TX', GV$Location)
# grep(', Az',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Az','AZ', GV$Location)
# grep(', Ga',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ga','GA', GV$Location)
# grep(', Ma',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ma','MA', GV$Location)
# grep(', Mo',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Mo','MO', GV$Location)
# grep(', Ok',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub(', Ok','OK', GV$Location)
#Fix City names
# grep('Cleaveland',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub('Cleaveland','Cleveland', GV$Location)
# grep('San Bernadino,',GV$Location,perl=TRUE,value=TRUE)
GV$Location <- sub('San Bernadino,','San Bernardino,', GV$Location)
#Fix 'Shooter' "Unknown" mispellings
# grep('unknown',GV$Shooter,perl=TRUE,value=TRUE)
GV$Shooter <- sub('unknown','Unknown', GV$Shooter)
# grep('Unkown',GV$Shooter,perl=TRUE,value=TRUE)
GV$Shooter <- sub('Unkown','Unknown', GV$Shooter)
# If you have an existing conglomerated,fixed spreadsheet, you can import it here.
# GV <- read.csv("GV.csv")
setwd("C:/Politics")
library(dplyr)
library(lubridate)
library(lattice)
# GV <- read.csv("GV.csv")
(count(GV,Shooter,sort=TRUE))
(count(GV,Location,sort=TRUE))
(count(GV,(Dead + Injured),sort=TRUE))
tbl_df(arrange(as.data.frame(xtabs(Dead ~ Shooter,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(Dead ~ Location,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Shooter,data=GV)),desc(Freq)))
tbl_df(arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))
arrange(tbl_df(count(GV,Location,"DI"=(Dead + Injured))),desc(DI))
arrange(tbl_df(count(GV,Location,"DI"=(Dead + Injured))),desc(n))
par(mfrow=(c(1,2)))
L <-{};for(i in GV$Location) (L <- rbind(L,strsplit(i,",")[[1]][2]))
colnames(L)= "State";L <- count(as.data.frame(L),State,sort=TRUE)
df1 <- (arrange(as.data.frame(xtabs(n ~ State,data=L)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=State,ylim=c(0,150),cex.names=.65,las=2,col="blue"))
L <-{};for(i in GV$Location) (L <- rbind(L,strsplit(i,",")[[1]][1]))
colnames(L)= "City";L <- count(as.data.frame(L),City,sort=TRUE)
df1 <- (arrange(as.data.frame(xtabs(n ~ City,data=L)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=City,ylim=c(0,75),cex.names=.65,las=2,col="blue"))
par(mfrow=(c(1,1)))
mtext("Aggregated Top 25 State and City Mentions for 'Mass Shooting Tracker'",cex= 1.5)
# Manually fix ('fix' or 'edit') these items here before exporting and lubridating data (vecor and factors only)
# fix(GV)
subset(GV, !grepl(", ",Location))
subset(as.data.frame(L), nchar(as.character(V1)) > 3 )
subset(GV, is.na(Location))
GV$Date <- mdy(GV$Date)
write.csv(GV, "GV.csv", row.names=FALSE)
# Charts
# Chart creation Macro
jpeg_create <- function() {
systime <- as.numeric(Sys.time())
# dev.new()
jpeg(filename = systime,
width = 1224, height = 968, units = "px", pointsize = 16,
quality = 100, bg = "white", res = NA, family = "", restoreConsole = TRUE,
type = c("windows"))
Sys.sleep(2)
}
# Create charts; graphics.off(); ren with jpg extension
jpeg_create()
xyplot(Date ~ (Dead + Injured),cex=1.25, data=GV)
jpeg_create()
with(GV,plot((Dead + Injured) ~ Date,cex=1.25))
with(GV,lines(lowess((Dead + Injured) ~ Date),cex=1.25,col="blue",lwd=5))
df1 <- (arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))
jpeg_create()
with(df1[1:25,],barplot(Freq, names.arg=Location,ylim=c(0,250),cex.names=.65,las=2,col="blue"))
jpeg_create()
barchart(Location ~ Freq,cex=1.25,data=df1[1:25,])
# Manual charts
xyplot(Date ~ (Dead + Injured),cex=1.25, data=GV)
with(GV,plot((Dead + Injured) ~ Date,cex=1.25))
with(GV,lines(lowess((Dead + Injured) ~ Date),cex=1.25,col="blue",lwd=5))
df1 <- (arrange(as.data.frame(xtabs(cbind(Dead + Injured) ~ Location,data=GV)),desc(Freq)))
with(df1[1:25,],barplot(Freq, names.arg=Location,ylim=c(0,250),cex.names=.65,las=2,col="blue"))
barchart(Location ~ Freq,cex=1.25,data=df1[1:25,])
No comments:
Post a Comment