Wednesday, November 4, 2015

Matchback Code for GE 2015

Political pieces:  Matchbacks dated October/November 2015


# RMF Matchback Code 10:38 AM Wednesday, November 04, 2015
# Whatcom County MatchBacks look like this:

> as.matrix(names(x))
      [,1]                
 [1,] "StateVoterID"      
 [2,] "RegistrationNumber"
 [3,] "LastName"          
 [4,] "FirstName"         
 [5,] "MiddleName"        
 [6,] "NameSuffix"        
 [7,] "ResidenceCity"     
 [8,] "ResidenceState"    
 [9,] "ResidenceZipCode"  
[10,] "MailAddress1"      
[11,] "MailAddress2"      
[12,] "PrecinctID"        
[13,] "AVReturnedDate"    
[14,] "AVReturnStatus"    
[15,] "AVReturnChallenge" 


library(dplyr)
# Prototype Matchback Code. Change the file name as needed
MB.10.23.15 <- read.delim("MB.10.23.15.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.10.23.15_status <- subset(MB.10.23.15, select = c(RegistrationNumber,PrecinctID,AVReturnStatus,AVReturnChallenge))
count(MB.10.23.15_status,"AVReturnStatus")
count(MB.10.23.15_status,"AVReturnChallenge")
MB.10.23.15_Good <- subset(MB.10.23.15_status, AVReturnStatus == "Good")
arrange(data.frame(with(MB.10.23.15_Good,(table(PrecinctID)))),desc(Freq))
data.frame(with(MB.10.23.15_Good,(table(PrecinctID))))
MB.10.23.15_Undeliverable <- subset(MB.10.23.15_status, AVReturnStatus == "Undeliverable")
arrange(data.frame(with(MB.10.23.15_Undeliverable,(table(PrecinctID)))),desc(Freq))

# Previoius MatchBacks
MBGE2008 <- read.delim("MBGE2008.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MBGE2012 <- read.delim("MBGE2012.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MBGE2013 <- read.delim("MBGE2013.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)

(as.data.frame(count(MBGE2008, AVReturnedDate)))
(as.data.frame(count(MBGE2012, AVReturnedDate)))
(as.data.frame(count(MBGE2013, AVReturnedDate)))
(as.data.frame(count(MBacc, AVReturnedDate)))

# Matchback Function
diag_MB <- function() {
library(dplyr)
x <- read.delim("MB.11.03.15.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
print(paste0("Total Registered (Active) Voters: ",nrow(x)))
print(count(x, AVReturnStatus))
print(count(x, AVReturnChallenge))
print(as.data.frame(count(x, AVReturnedDate)))
Good <- subset(x, AVReturnStatus == "Good")
Challenged <- subset(x, AVReturnStatus == "Challenged")
Undeliverable <- subset(x, AVReturnStatus == "Undeliverable")
Void <- subset(x, AVReturnStatus == "Void")
t1 <- arrange(data.frame(with(x,(table(PrecinctID)))),desc(PrecinctID))
t2 <- arrange(data.frame(with(Good,(table(PrecinctID)))),desc(PrecinctID))
t3 <- arrange(data.frame(with(Challenged,(table(PrecinctID)))),desc(PrecinctID))
t4 <- arrange(data.frame(with(Undeliverable,(table(PrecinctID)))),desc(PrecinctID))
t5 <- arrange(data.frame(with(Void,(table(PrecinctID)))),desc(PrecinctID))
m1 <- merge(t1,t2,by="PrecinctID"); m1 <- with(m1,cbind(m1,scales::percent(Freq.y/Freq.x)));colnames(m1) <- c("PrecinctID","Total_Registered", "Good", "PCT")
m1$PrecinctID <- as.numeric(as.character(m1$PrecinctID))
m2 <- merge(t1,t2,by="PrecinctID"); m2 <- with(m2,cbind(m2,(Freq.y/Freq.x)));colnames(m2) <- c("PrecinctID","Total_Registered", "Good", "PCT")
m2$PrecinctID <- as.numeric(as.character(m2$PrecinctID))
print(m1)
print("Challenged:");print(t3)
print("Undeliverable:");print(t4)
print("Void:");print(t5)
}

#Matchback without function

library(dplyr)
x <- read.delim("MB.11.03.15.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
print(paste0("Total Registered (Active) Voters: ",nrow(x)))
print(count(x, AVReturnStatus))
print(count(x, AVReturnChallenge))
print(as.data.frame(count(x, AVReturnedDate)))
Good <- subset(x, AVReturnStatus == "Good")
Challenged <- subset(x, AVReturnStatus == "Challenged")
Undeliverable <- subset(x, AVReturnStatus == "Undeliverable")
Void <- subset(x, AVReturnStatus == "Void")
t1 <- arrange(data.frame(with(x,(table(PrecinctID)))),desc(PrecinctID))
t2 <- arrange(data.frame(with(Good,(table(PrecinctID)))),desc(PrecinctID))
t3 <- arrange(data.frame(with(Challenged,(table(PrecinctID)))),desc(PrecinctID))
t4 <- arrange(data.frame(with(Undeliverable,(table(PrecinctID)))),desc(PrecinctID))
t5 <- arrange(data.frame(with(Void,(table(PrecinctID)))),desc(PrecinctID))
m1 <- merge(t1,t2,by="PrecinctID"); m1 <- with(m1,cbind(m1,scales::percent(Freq.y/Freq.x)));colnames(m1) <- c("PrecinctID","Total_Registered", "Good", "PCT")
m1$PrecinctID <- as.numeric(as.character(m1$PrecinctID))
m2 <- merge(t1,t2,by="PrecinctID"); m2 <- with(m2,cbind(m2,(Freq.y/Freq.x)));colnames(m2) <- c("PrecinctID","Total_Registered", "Good", "PCT")
m2$PrecinctID <- as.numeric(as.character(m2$PrecinctID))

# Top 25
sum(arrange(m1,desc(Good))[1:25,]$Good)
sum(arrange(m1,desc(Good))[1:25,]$Total_Registered)
sum(arrange(m1,desc(Good))[1:25,]$Good) / sum(arrange(m1,desc(Good))[1:25,]$Total_Registered)


# All 2014
MB.10.29.14 <- read.delim("MB.10.29.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.10.30.14 <- read.delim("MB.10.30.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.10.31.14 <- read.delim("MB.10.31.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.03.14 <- read.delim("MB.11.03.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.04.14 <- read.delim("MB.11.04.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.05.14 <- read.delim("MB.11.05.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.06.14 <- read.delim("MB.11.06.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.07.14 <- read.delim("MB.11.07.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.08.14 <- read.delim("MB.11.08.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB.11.11.14 <- read.delim("MB.11.11.14.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
# Accumulates all MatchBacks for General Election
MBacc <- arrange(rbind(MB.10.29.14,MB.10.30.14,MB.10.31.14,MB.11.03.14,MB.11.04.14,MB.11.05.14,MB.11.06.14,MB.11.07.14,MB.11.08.14,MB.11.11.14),PrecinctID)
MBCon2014 <- read.delim("MB_2014_Consolidated.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)

diag_MB <- function() {
library(dplyr)
x <- MBacc
print(paste0("Total Registered (Active) Voters: ",nrow(x)))
print(count(x, AV_ReturnStatus))
print(count(x, AVReturnChallenge))
print(as.data.frame(count(x, AVReturnedDate)))
Good <- subset(x, AVReturnStatus == "Good")
Challenged <- subset(x, AVReturnStatus == "Challenged")
Undeliverable <- subset(x, AVReturnStatus == "Undeliverable")
Void <- subset(x, AVReturnStatus == "Void")
t1 <- arrange(data.frame(with(x,(table(PrecinctID)))),desc(PrecinctID))
t2 <- arrange(data.frame(with(Good,(table(PrecinctID)))),desc(PrecinctID))
t3 <- arrange(data.frame(with(Challenged,(table(PrecinctID)))),desc(PrecinctID))
t4 <- arrange(data.frame(with(Undeliverable,(table(PrecinctID)))),desc(PrecinctID))
t5 <- arrange(data.frame(with(Void,(table(PrecinctID)))),desc(PrecinctID))
m1 <- merge(t1,t2,by="PrecinctID"); m1 <- with(m1,cbind(m1,scales::percent(Freq.y/Freq.x)));colnames(m1) <- c("PrecinctID","Total_Voted", "Votes_Good", "PCT")
print(m1)
print("Challenged:");print(t3)
print("Undeliverable:");print(t4)
print("Void:");print(t5)
}


# Charts
library(dplyr)
par(mfrow=c(1,1))
par(mfrow=c(1,2))
p1 <- count(MBacc, AVReturnedDate);p1$AVReturnedDate <- mdy(p1$AVReturnedDate)
p2 <- count(x, AVReturnedDate);p2$AVReturnedDate <- mdy(p2$AVReturnedDate)
plot(p1,type="b",las=2,ylim=c(0,20000),ylab="",col="red")
plot(p2[-1,],type="b",las=2,ylim=c(0,20000),ylab="",col="blue")
par(mfrow=c(1,1))
mtext("Ballots by Return Date for 2014 (left) and 2015 to date (right)",line=2,col="red",cex=1.5)
par(mfrow=c(1,1))

# Chart  not working
par(mfrow=c(1,1))
p1 <- count(MBacc, AVReturnedDate);p1$AVReturnedDate <- mdy(p1$AVReturnedDate)
p2 <- count(x, AVReturnedDate);p2$AVReturnedDate <- mdy(p2$AVReturnedDate)
plot(p2[-1,],las=2, ylim=c(0,20000),ylab="",col="blue")
par(new=FALSE)
lines(p1,type="b",las=2,ylim=c(0,20000),ylab="",col="red")
par(new=TRUE)
mtext("Ballots by Return Date for 2014 (red) and 2015 to date (blue)",line=2,col="red",cex=1.5)

# By MB date. Change the dates as needed
d1 <- data.frame(xtabs(~PrecinctID,data=subset(MBacc, mdy(AVReturnedDate) <= mdy("11/04/2014"),select=c(PrecinctID))))
d2 <- data.frame(xtabs(~PrecinctID,data=subset(x, mdy(AVReturnedDate) <= mdy("11/03/2015"),select=c(PrecinctID))))
d3 <- merge(d1,d2,by="PrecinctID");d3$PrecinctID <- as.integer(as.character(d3$PrecinctID))
colnames(d3) <- c("PrecinctID","2014","2015");colSums(d3[,2:3])

d4 <- rbind(
"UnIncCounty1"=colSums(subset(d3,PrecinctID >= 101 & PrecinctID <= 150)[,2:3]),
"UnIncCounty2"=colSums(subset(d3,PrecinctID >= 151 & PrecinctID <= 199)[,2:3]),
"Bham"=colSums(subset(d3,PrecinctID >= 200 & PrecinctID <= 299)[,2:3]),
"SmallCities1"=colSums(subset(d3,PrecinctID >= 300 & PrecinctID <= 599)[,2:3]),
"SmallCities2"=colSums(subset(d3,PrecinctID >= 600 & PrecinctID <= 900)[,2:3])
)
colnames(d4) <- c("2014","2015")
colSums(d4)
d4

d4 <- cbind(d4,
rbind(
nrow(subset(d3,PrecinctID >= 101 & PrecinctID <= 150)),
nrow(subset(d3,PrecinctID >= 151 & PrecinctID <= 199)),
nrow(subset(d3,PrecinctID >= 200 & PrecinctID <= 299)),
nrow(subset(d3,PrecinctID >= 300 & PrecinctID <= 599)),
nrow(subset(d3,PrecinctID >= 600 & PrecinctID <= 900))
)
)

d4 <- cbind(d4,
rbind(
nrow(subset(x,PrecinctID >= 101 & PrecinctID <= 150)),
nrow(subset(x,PrecinctID >= 151 & PrecinctID <= 199)),
nrow(subset(x,PrecinctID >= 200 & PrecinctID <= 299)),
nrow(subset(x,PrecinctID >= 300 & PrecinctID <= 599)),
nrow(subset(x,PrecinctID >= 600 & PrecinctID <= 900))
)
)

colnames(d4) <- c("x2014","x2015","NPre","NVoters")
d4 <- as.data.frame(d4)
d4 <- with(d4,as.data.frame(cbind(d4,"NV_x2015"=NVoters - x2015,"PCT_Voted"=scales::percent(x2015/NVoters))))
d4


# Bellingham Only. Change the dates as needed
d1 <- data.frame(xtabs(~PrecinctID,data=subset(MBacc, mdy(AVReturnedDate) <= mdy("11/04/2014"),select=c(PrecinctID))))
d2 <- data.frame(xtabs(~PrecinctID,data=subset(x, mdy(AVReturnedDate) <= mdy("11/03/2015"),select=c(PrecinctID))))
d3 <- merge(d1,d2,by="PrecinctID");d3$PrecinctID <- as.integer(as.character(d3$PrecinctID))
colnames(d3) <- c("PrecinctID","2014","2015");colSums(d3[,2:3])

d4 <- rbind(

"Bham1"=colSums(subset(d3,PrecinctID >= 200 & PrecinctID <= 209)[,2:3]),
"Bham2"=colSums(subset(d3,PrecinctID >= 210 & PrecinctID <= 219)[,2:3]),
"Bham3"=colSums(subset(d3,PrecinctID >= 220 & PrecinctID <= 229)[,2:3]),
"Bham4"=colSums(subset(d3,PrecinctID >= 230 & PrecinctID <= 239)[,2:3]),
"Bham5"=colSums(subset(d3,PrecinctID >= 240 & PrecinctID <= 249)[,2:3]),
"Bham6"=colSums(subset(d3,PrecinctID >= 250 & PrecinctID <= 259)[,2:3]),
"Bham7"=colSums(subset(d3,PrecinctID >= 260 & PrecinctID <= 299)[,2:3])

)
colnames(d4) <- c("2014","2015")
colSums(d4)
d4

d4 <- cbind(d4,
rbind(
nrow(subset(d3,PrecinctID >= 200 & PrecinctID <= 209)),
nrow(subset(d3,PrecinctID >= 210 & PrecinctID <= 219)),
nrow(subset(d3,PrecinctID >= 220 & PrecinctID <= 229)),
nrow(subset(d3,PrecinctID >= 230 & PrecinctID <= 239)),
nrow(subset(d3,PrecinctID >= 240 & PrecinctID <= 249)),
nrow(subset(d3,PrecinctID >= 250 & PrecinctID <= 259)),
nrow(subset(d3,PrecinctID >= 260 & PrecinctID <= 299))
)
)

d4 <- cbind(d4,
rbind(
nrow(subset(x,PrecinctID >= 200 & PrecinctID <= 209)),
nrow(subset(x,PrecinctID >= 210 & PrecinctID <= 219)),
nrow(subset(x,PrecinctID >= 220 & PrecinctID <= 229)),
nrow(subset(x,PrecinctID >= 230 & PrecinctID <= 239)),
nrow(subset(x,PrecinctID >= 240 & PrecinctID <= 249)),
nrow(subset(x,PrecinctID >= 250 & PrecinctID <= 259)),
nrow(subset(x,PrecinctID >= 260 & PrecinctID <= 299))
)
)

colnames(d4) <- c("x2014","x2015","NPre","NVoters")
d4 <- as.data.frame(d4)
d4 <- with(d4,as.data.frame(cbind(d4,"NV_x2015"=NVoters - x2015,"PCT_Voted"=scales::percent(x2015/NVoters))))
d4

# Grouping Bellingham
b1 = as.data.frame(
cbind(

rbind(
matrix(
c(
"b.199.300",
"b.204.256",
"b.209.254",
"b.214.252",
"b.219.250",
"b.223.248",
"b.227.246"
)
)
)
,

rbind(
nrow(count(subset(x, PrecinctID > 199 & PrecinctID < 300 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 204 & PrecinctID < 256 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 209 & PrecinctID < 254 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 214 & PrecinctID < 252 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 219 & PrecinctID < 250 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 223 & PrecinctID < 248 & AVReturnStatus == ""),PrecinctID)),
nrow(count(subset(x, PrecinctID > 227 & PrecinctID < 246 & AVReturnStatus == ""),PrecinctID))
)
,

rbind(
nrow(subset(x, PrecinctID > 199 & PrecinctID < 300 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 204 & PrecinctID < 256 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 209 & PrecinctID < 254 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 214 & PrecinctID < 252 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 219 & PrecinctID < 250 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 223 & PrecinctID < 248 & AVReturnStatus == "")),
nrow(subset(x, PrecinctID > 227 & PrecinctID < 246 & AVReturnStatus == ""))
)
)
)

colnames(b1) <- c("PrecinctRange","NPrecincts","NumberNotVoted")
b1$NPrecincts <- as.numeric(as.character(b1$NPrecincts))
b1$NumberNotVoted <- as.numeric(as.character(b1$NumberNotVoted))
b1 <- with(b1,cbind(b1,"NPRE/NNV"=scales::percent(NPrecincts/NumberNotVoted)))
b1

#This could has bit of a 'equal row' problem. I solved through deletion and added zeros when I used a spreadsheet. -RMF

# Percentage from Bham each AVReturnedDate
c1 <- cbind(
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,PrecinctID > 199 & PrecinctID < 300,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID))))),
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID)))))
);colnames(c1) <- c("BhamDate","BhamFreq","AllDate","AllFreq")
c1 <- arrange(c1 <- c1[-1,])

# Percentage from County each AVReturnedDate
c2 <- cbind(
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,PrecinctID > 0 & PrecinctID < 200,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID))),na.action=na.omit)),
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID)))))[-3,]
);colnames(c2) <- c("CountyDate","CountyFreq","AllDate","AllFreq")
c2 <- arrange(c2 <- c2[-1,])

# Percentage from Small Cities each AVReturnedDate
c3 <- cbind(
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,PrecinctID > 299 & PrecinctID < 900,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID))))),
as.data.frame(xtabs(~AVReturnedDate,data=(subset(x,select=c(RegistrationNumber,AVReturnedDate,AVReturnStatus,PrecinctID)))))[c(-3,-5,-13),]
);colnames(c3) <- c("SmallCityDate","SmallCityFreq","AllDate","AllFreq")
c3 <- arrange(c3 <- c3[-1,])

par(mfrow=c(1,3))
par(cex=1.25)
par(lwd=2)
par(pch=19)
with(c1,plot(AllFreq,type="l",col="grey",xlab="Bellingham",ylab="Votes",))
with(c1,lines(BhamFreq,type="l",col="blue"))
with(c2,plot(AllFreq,type="l",col="grey",xlab="UnIncorp County",ylab="Votes"))
with(c2,lines(CountyFreq,type="l",col="red"))
with(c3,plot(AllFreq,type="l",col="grey",xlab="Small Cities",ylab="Votes"))
with(c3,lines(SmallCityFreq,type="l",col="purple"))
par(mfrow=c(1,1))
mtext("AVReturns ~ AVReturnedDate as of 11/04/2015 for Bellingham(blue), UnIncCounty(red), SmallCities(purple), AllVotes(grey)",cex=1.25,line=1)
mtext(sum(c1$BhamFreq),col="blue",line=-3,cex=1.5)
mtext(sum(c2$CountyFreq),col="red",line=-4,cex=1.5)
mtext(sum(c3$SmallCityFreq),col="purple",line=-5,cex=1.5)
mtext(sum(c1$AllFreq),col="grey",line=-6,cex=1.5)
par(mfrow=c(1,1))


par(mfrow=c(1,1))
par(cex=1.25)
par(lwd=2)
par(pch=19)
with(c1,plot(AllFreq,type="b",col="grey",xlab="AVReturnedDate Range",ylab="Votes",xlim=c(1,26)))
with(c1,lines(BhamFreq,col="blue"))
with(c1,points(BhamFreq,col="blue"))
with(c2,lines(CountyFreq,col="red"))
with(c2,points(CountyFreq,col="red"))
with(c3,lines(SmallCityFreq,col="purple"))
with(c3,points(SmallCityFreq,col="purple"))
mtext("AVReturns ~ AVReturnedDate as of 11/04/2015 for Bellingham(blue), UnIncCounty(red), SmallCities(purple), AllVotes(grey)",cex=1.25,line=1)
mtext(sum(c1$BhamFreq),col="blue",line=-2,cex=1.5)
mtext(sum(c2$CountyFreq),col="red",line=-3,cex=1.5)
mtext(sum(c3$SmallCityFreq),col="purple",line=-4,cex=1.5)
mtext(sum(c1$AllFreq),col="grey",line=-5,cex=1.5)
par(mfrow=c(1,1))

No comments:

Post a Comment