R, Julia, SQL, Octave and others: Personal notes on data analysis, computation, data access most especially for querying voter history, Census, PDC, and other election data. Reader is advised to just paste the code text into Notepad++.
Monday, April 25, 2016
The Bernie Effect
Political piece here. Click to enlarge chart.
#Looking at net change, flux, other characteristics between voterdb pulls in Whatcom County.
# 12:12 PM 4/25/2016
library(data.table)
library(dplyr)
library(lubridate)
# utility Function
End <- function() {barplot(rep(1,8), yaxt="n", col=palette())}
vdb2014 <- fread("C:/Politics/11.22.2014/11.22.2014.txt")
vdb2015 <- fread("C:/Politics/RyanFerrisList_20151028/10.28.15.txt")
vdb2016 <- fread("C:/Politics/RyanFerris_20160415_094952_JessieS/RyanFerris_20160415_094952_JessieS.txt")
setkey(vdb2014,RegistrationNumber)
setkey(vdb2015,RegistrationNumber)
setkey(vdb2016,RegistrationNumber)
#Populate the years as appropriate with fun1()
#vdb <- vdb2014
#vdb <- vdb2015
#vdb <- vdb2016
fun1 <<- function()
{
vdb <- data.table(vdb)
vdb$RegistrationYear <<- with(vdb,year(mdy(RegistrationDate)))
vdb$LastUpdateYear <<- with(vdb,year(ymd_hms(LastUpdateDate)))
vdb$BirthYear <<- with(vdb,year(mdy(BirthDate)))
setkey(vdb,RegistrationNumber)
vdbA <<- data.table(vdb[StatusCode == "A",])
vdbA$RegistrationYear <<- with(vdbA,year(mdy(RegistrationDate)))
vdbA$LastUpdateYear <<- with(vdbA,year(ymd_hms(LastUpdateDate)))
vdbA$BirthYear <<- with(vdbA,year(mdy(BirthDate)))
vdbI <<- data.table(vdb[StatusCode == "I",])
vdbI$RegistrationYear <<- with(vdbI,year(mdy(RegistrationDate)))
vdbI$LastUpdateYear <<- with(vdbI,year(ymd_hms(LastUpdateDate)))
vdbI$BirthYear <<- with(vdbI,year(mdy(BirthDate)))
}
vdb <- vdb2016
fun1()
rY2016 <- arrange(count(vdb,RegistrationYear),desc(RegistrationYear))
lY2016 <- arrange(count(vdb,LastUpdateYear),desc(LastUpdateYear))
bY2016 <- arrange(count(vdb,BirthYear),desc(BirthYear))
rN2016 <- arrange(count(vdb,RegistrationNumber),desc(RegistrationNumber))
rNA2016 <- arrange(count(vdbA,RegistrationNumber),desc(RegistrationNumber))
rNI2016 <- arrange(count(vdbI,RegistrationNumber),desc(RegistrationNumber))
vdb <- vdb2015
fun1()
rY2015 <- arrange(count(vdb,RegistrationYear),desc(RegistrationYear))
lY2015 <- arrange(count(vdb,LastUpdateYear),desc(LastUpdateYear))
bY2015 <- arrange(count(vdb,BirthYear),desc(BirthYear))
rN2015 <- arrange(count(vdb,RegistrationNumber),desc(RegistrationNumber))
rNA2015 <- arrange(count(vdbA,RegistrationNumber),desc(RegistrationNumber))
rNI2015 <- arrange(count(vdbI,RegistrationNumber),desc(RegistrationNumber))
vdb <- vdb2014
fun1()
rY2014 <- arrange(count(vdb,RegistrationYear),desc(RegistrationYear))
lY2014 <- arrange(count(vdb,LastUpdateYear),desc(LastUpdateYear))
bY2014 <- arrange(count(vdb,BirthYear),desc(BirthYear))
rN2014 <- arrange(count(vdb,RegistrationNumber),desc(RegistrationNumber))
rNA2014 <- arrange(count(vdbA,RegistrationNumber),desc(RegistrationNumber))
rNI2014 <- arrange(count(vdbI,RegistrationNumber),desc(RegistrationNumber))
# Some Computations
# %in%
count(data.table(rNA2014$RegistrationNumber %in% rNA2015$RegistrationNumber),V1);
count(data.table(rNA2015$RegistrationNumber %in% rNA2016$RegistrationNumber),V1);
count(data.table(rNA2014$RegistrationNumber %in% rNA2016$RegistrationNumber),V1);
count(data.table(rNA2015$RegistrationNumber %in% rNA2014$RegistrationNumber),V1);
count(data.table(rNA2016$RegistrationNumber %in% rNA2015$RegistrationNumber),V1);
count(data.table(rNA2016$RegistrationNumber %in% rNA2014$RegistrationNumber),V1);
library(Hmisc)
# %nin%
count(data.table(rNA2014$RegistrationNumber %nin% rNA2015$RegistrationNumber),V1);
count(data.table(rNA2015$RegistrationNumber %nin% rNA2016$RegistrationNumber),V1);
count(data.table(rNA2014$RegistrationNumber %nin% rNA2016$RegistrationNumber),V1);
count(data.table(rNA2015$RegistrationNumber %in% rNA2014$RegistrationNumber),V1);
count(data.table(rNA2016$RegistrationNumber %in% rNA2015$RegistrationNumber),V1);
count(data.table(rNA2016$RegistrationNumber %in% rNA2014$RegistrationNumber),V1);
t1 <- vdb2016[StatusCode == "A",]
t2 <- vdb2014[StatusCode == "A",]
new <- t1[RegistrationNumber %nin% t2$RegistrationNumber,.(RegistrationNumber,BirthDate,PrecinctID,RegistrationDate)]
old <- t2[RegistrationNumber %nin% t1$RegistrationNumber,.(RegistrationNumber,BirthDate,PrecinctID,RegistrationDate)]
BirthYear <- aggregate(RegistrationNumber ~ year(mdy(BirthDate)),data=new,length)
colnames(BirthYear) <- c("BirthYear", "Count")
newAge <- arrange(BirthYear,desc(BirthYear))
PrecinctID <- aggregate(RegistrationNumber ~ PrecinctID,data=new,length)
colnames(PrecinctID) <- c("PrecinctID", "Count")
newPrecinct <- arrange(PrecinctID,desc(PrecinctID))
newAge <- data.table(newAge)
newPrecinct <- data.table(newPrecinct)
N <- nrow(new)
P <- newPrecinct[PrecinctID > 199 & PrecinctID < 300,sum(Count)]
A <- newAge[BirthYear > 1966,sum(Count)]
newbd <- with(new,year(mdy(BirthDate)))
oldbd <- with(old,year(mdy(BirthDate)))
# Some Charts
par(mfrow=c(1,2))
plot(newAge,col="blue")
plot(newPrecinct,col="red")
par(mfrow=c(1,1))
mtext(paste0(prettyNum(N,big.mark=","), " new Whatcom County Registrants by Birth Year and PrecinctID added between 11/22/2014 and 4/15/2016."),cex=1.25, line = 1)
mtext(paste0(prettyNum(P,big.mark=","), " new registrants come from the 200 (Bellingham) series. ",prettyNum(A,big.mark=","), " new registrants are less than 50 years old (e.g BirthYear > 1966)."),cex=1.25,line = 0)
par(mfrow=c(1,2))
plot(density((oldbd)),xlim=c(1900,2010),ylim=c(0,.06),lwd=2,col="blue")
plot(density((newbd)),xlim=c(1900,2010),ylim=c(0,.06),lwd=2,col="red")
par(mfrow=c(1,1))
End()
# Complicated to interpret but interesting
par(mfrow=c(1,3))
plot(bY2016,type="l",col="red1");lines(bY2015,col="blue2");lines(bY2014,col="green3")
plot(lY2016,type="l",col="red1");lines(lY2015,col="blue2");lines(lY2014,col="green3")
plot(rY2016,type="l",col="red1");lines(rY2015,col="blue2");lines(rY2014,col="green3")
par(mfrow=c(1,1))
par(mfrow=c(1,3))
plot(rN2016[,RegistrationNumber,],type="l",col="red1");lines(rN2015[,RegistrationNumber,],col="blue2");lines(rN2014[,RegistrationNumber,],col="green3")
plot(rNA2016[,RegistrationNumber,],type="l",col="red1");lines(rNA2015[,RegistrationNumber,],col="blue2");lines(rNA2014[,RegistrationNumber,],col="green3")
plot(rNI2016[,RegistrationNumber,],type="l",col="red1");lines(rNI2015[,RegistrationNumber,],col="blue2");lines(rNI2014[,RegistrationNumber,],col="green3")
par(mfrow=c(1,1))
End()
No comments:
Post a Comment