Monday, May 16, 2016

Tale of Threes...

Groups of Voter per 42nd LD Precinct sorted into groups of threes: 3 Votes for last GEs (2013,2014,2015), Active but no votes for the last 3 GEs, NA for the last three GEs. Click to enlarge. Based on 4/15/2016 WC voterdb.

This code experiments with using data.table to group and select data. Political piece is here.


## Looks at Precincts in groups...
library(data.table)
#Utility Function
End <- function() {barplot(rep(1,8), yaxt="n", col=palette())}

voterdb041516 <- fread("C:/Politics/RyanFerris_20160415_094952_JessieS/RyanFerris_20160415_094952_JessieS.txt")
setkey(voterdb041516,RegistrationNumber)
vdb <- voterdb041516

Total <- vdb[,.N]
Active <- vdb[StatusCode == "A",.N]
Inactive <- vdb[StatusCode == "I",.N]
Cancelled <- vdb[StatusCode == "C",.N]
S <- data.table(cbind("Total"= Total,"Active"=Active, "Inactive"=Inactive, "Cancelled"=Cancelled))

print(S)


BC <- vdb[,.N,.(PrecinctID,ResidenceZipCode,PrecinctPortion,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
BC
cat(' 
     PrecinctID ResidenceZipCode PrecinctPortion BallotCounted1 BallotCounted2 BallotCounted3   N
   1:        101            98281              NA              1              1              1 263
   2:        101            98281              NA             NA             NA             NA 147
   3:        101            98281              NA              0              1              1  78
   4:        101            98281              NA              0              0              0 294
   5:        101            98281              NA             NA              0              0  36
  ---                                                                                             
5531:        801            98295              NA              0             NA              0   3
5532:        801            98295              NA              1             NA             NA  11
5533:        801            98295              NA              1              0             NA   3
5534:        801            98295              NA             NA              0             NA   5
5535:        801            98295              NA              0              1             NA   8
')

## Next four paragraphs of code handle PIDBCIII: Active Registrants in the LD 42nd who have either:
#(1) Voted in last three consecutive General Elections (GE: 2013,2014,2015)
#(2) Active, but not voted in the last three GE
#(3) Not able to vote in the last three GE (e.g. new or returning registrants who were not registered for the last three GE

## Stats for those voted in last three general elections (2013 : 2015) all Status:
BC_1a <- vdb[BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_1b <- vdb[DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_1c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="1" & BallotCounted_2=="1" & BallotCounted_3=="1",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_1d <- merge(BC_1b,BC_1c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]

names(BC_1d)
# [1] "PIDBCIII"     "All42_status" "All42_active" "PCT"         
BC_1d[,sum(All42_active),]
# [1] 30851
vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
# 89488
with(BC_1d,barplot(All42_active,names.arg=PIDBCIII,las=2,col=rainbow(length(PIDBCIII)))

## Stats for those who did not vote ("0") in last three general elections (2013 : 2015) all Status:
BC_2a <- vdb[BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_2b <- vdb[DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_2c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][BallotCounted_1=="0" & BallotCounted_2=="0" & BallotCounted_3=="0",.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_2d <- merge(BC_2b,BC_2c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]

names(BC_2d)      
BC_2d[,sum(All42_active),]
vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
with(BC_2d,barplot(All42_active,names.arg=PIDBCIII,las=2,col=rainbow(length(PIDBCIII)))

## Stats for those who could not vote (NA) in last three general elections (2013 : 2015) all Status:
BC_3a <- vdb[is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above, but only 42nd LD:
BC_3b <- vdb[DistrictName_1 == "42nd Legislative District"][is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# As above but only Active 42nd LD
BC_3c <- vdb[StatusCode == "A" & DistrictName_1 == "42nd Legislative District"][is.na(BallotCounted_1) & is.na(BallotCounted_2) & is.na(BallotCounted_3),.N,.(PrecinctID,BallotCounted1=BallotCounted_1,BallotCounted2=BallotCounted_2,BallotCounted3=BallotCounted_3)][order(PrecinctID)]
# Active and All Status 42nd LD voted last three general elections (2013 : 2015)
BC_3d <- merge(BC_3b,BC_3c,by="PrecinctID",all=TRUE)[,.(PIDBCIII=PrecinctID,All42_status=N.x,All42_active=N.y,PCT=(N.y/N.x) * 100)]

## Chart
#Chart data
n1 <- sum(BC_1d$All42_active)
n2 <-sum(BC_2d$All42_active)
n3 <- sum(BC_3d$All42_active)
all_n <- sum(sum(BC_1d$All42_active),sum(BC_2d$All42_active),sum(BC_3d$All42_active),na.rm=TRUE)
all42 <- vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,]
PID42 <- vdb[DistrictName_1 == "42nd Legislative District" & StatusCode == "A",.N,by=PrecinctID][order(PrecinctID)][,PrecinctID]
lPID42 <- length(PID42)

# Chart
with(BC_1d,matplot(PIDBCIII,All42_active,las=2,pch = 1:4,col="blue"))
with(BC_2d,matpoints(PIDBCIII,All42_active,las=2,pch = 1:4,col="red"))
with(BC_3d,matpoints(PIDBCIII,All42_active,las=2,pch = 1:4,col="black"))
for(i in PID42) abline(v=i,lwd=.5,col=rgb(.5,.1,.5,.5))

# Titles 
mtext(paste0("WC Voterdb 4/15/2016 : ",all_n," Registered, Active Voters, Non Voters and New Voters in the LD 42nd by Precincts."))
mtext(paste0("Total Active LD 42 (",lPID42, " Precincts)  = ",all42," ."),line=-1)
mtext(paste0("Voted last three GE (blue): ",n1),line=-2)
mtext(paste0("Active, but did not vote in last three GE (red): ",n2),line=-3)
mtext(paste0("Newcomers: Could not vote in the last three GE (black): ",n3),line=-4)
  

No comments:

Post a Comment