R, Julia, SQL, Octave and others: Personal notes on data analysis, computation, data access most especially for querying voter history, Census, PDC, and other election data. Reader is advised to just paste the code text into Notepad++.
Monday, May 16, 2016
Tale of Threes...
This code experiments with using data.table to group and select data. Political piece is here.
## Looks at Precincts in groups...
library(data.table)
#Utility Function
End <- function() {barplot(rep(1,8), yaxt="n", col=palette())}
voterdb041516 <- fread("C:/Politics/RyanFerris_20160415_094952_JessieS/RyanFerris_20160415_094952_JessieS.txt")
setkey(voterdb041516,RegistrationNumber)
vdb <- voterdb041516
Total <- vdb[,.N]
Active <- vdb[StatusCode == "A",.N]
Inactive <- vdb[StatusCode == "I",.N]
Cancelled <- vdb[StatusCode == "C",.N]
S <- data.table(cbind("Total"= Total,"Active"=Active, "Inactive"=Inactive, "Cancelled"=Cancelled))
print(S)
BC <- vdb[,.N,.(PrecinctID,ResidenceZipCode,PrecinctPortion,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
BC
cat('
PrecinctID ResidenceZipCode PrecinctPortion BallotCounted1 BallotCounted2 BallotCounted3 N
1: 101 98281 NA 1 1 1 263
2: 101 98281 NA NA NA NA 147
3: 101 98281 NA 0 1 1 78
4: 101 98281 NA 0 0 0 294
5: 101 98281 NA NA 0 0 36
---
5531: 801 98295 NA 0 NA 0 3
5532: 801 98295 NA 1 NA NA 11
5533: 801 98295 NA 1 0 NA 3
5534: 801 98295 NA NA 0 NA 5
5535: 801 98295 NA 0 1 NA 8
')
## Next four paragraphs of code handle PIDBCIII: Active Registrants in the LD 42nd who have either:
#(1) Voted in last three consecutive General Elections (GE: 2013,2014,2015)
#(2) Active, but not voted in the last three GE
#(3) Not able to vote in the last three GE (e.g. new or returning registrants who were not registered for the last three GE
## Stats for those voted in last three general elections (2013 : 2015) all Status:
BC_1a <- vdb[BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_1b <- vdb[DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_1c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_1d <- merge(BC_1b,BC_1c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]
names(BC_1d)
# [1] "PIDBCIII" "All42_status" "All42_active" "PCT"
BC_1d[,sum(All42_active),]
# [1] 30851
vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
# 89488
with(BC_1d,barplot(All42_active,names.arg=PIDBCIII,las=2,col=rainbow(length(PIDBCIII)))
## Stats for those who did not vote ("0") in last three general elections (2013 : 2015) all Status:
BC_2a <- vdb[BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_2b <- vdb[DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_2c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_2d <- merge(BC_2b,BC_2c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]
names(BC_2d)
BC_2d[,sum(All42_active),]
vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
with(BC_2d,barplot(All42_active,names.arg=PIDBCIII,las=2,col=rainbow(length(PIDBCIII)))
## Stats for those who could not vote (NA) in last three general elections (2013 : 2015) all Status:
BC_3a <- vdb[is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_3b <- vdb[DistrictName_1 == "42nd Legislative District"][is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_3c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_3d <- merge(BC_3b,BC_3c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]
## Chart
#Chart data
n1 <- sum(BC_1d$All42_active)
n2 <-sum(BC_2d$All42_active)
n3 <- sum(BC_3d$All42_active)
all_n <- sum(sum(BC_1d$All42_active),sum(BC_2d$All42_active),sum(BC_3d$All42_active),na.rm=TRUE)
all42 <- vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
PID42 <- vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,by=PrecinctID][order(PrecinctID)][,PrecinctID]
lPID42 <- length(PID42)
# Chart
with(BC_1d,matplot(PIDBCIII,All42_active,las=2,pch = 1:4,col="blue"))
with(BC_2d,matpoints(PIDBCIII,All42_active,las=2,pch = 1:4,col="red"))
with(BC_3d,matpoints(PIDBCIII,All42_active,las=2,pch = 1:4,col="black"))
for(i in PID42) abline(v=i,lwd=.5,col=rgb(.5,.1,.5,.5))
# Titles
mtext(paste0("WC Voterdb 4/15/2016 : ",all_n," Registered, Active Voters, Non Voters and New Voters in the LD 42nd by Precincts."))
mtext(paste0("Total Active LD 42 (",lPID42, " Precincts) = ",all42," ."),line=-1)
mtext(paste0("Voted last three GE (blue): ",n1),line=-2)
mtext(paste0("Active, but did not vote in last three GE (red): ",n2),line=-3)
mtext(paste0("Newcomers: Could not vote in the last three GE (black): ",n3),line=-4)
No comments:
Post a Comment