Wednesday, August 6, 2014

More Matchback Code

Political piece for this code is here. "Match backs" are the database rendition of the first part of the Vote By Mail process in Whatcom County. Before your ballots are run through a Sequoia 400C Optical Scanner, the outer envelope is processed: verification and validation procedures are applied to the incoming ballot. A number of interesting fields are returned:
> as.matrix(names(MB_08.01.2014))

      [,1]                
 [1,] "RegistrationNumber"
 [2,] "LastName"          
 [3,] "FirstName"         
 [4,] "MiddleName"        
 [5,] "NameSuffix"        
 [6,] "ResidenceAddress"  
 [7,] "ResidenceCity"     
 [8,] "ResidenceState"    
 [9,] "ResidenceZipCode"  
[10,] "MailAddress1"      
[11,] "MailAddress2"      
[12,] "MailAddress3"      
[13,] "MailAddress4"      
[14,] "PrecinctID"        
[15,] "AVReturnedDate"    
[16,] "AVReturnStatus"    
[17,] "AVReturnChallenge" 

Most interesting are the "AVReturnStatus" and "AVReturnChallenge":


> count(MB_08.01.2014_status,"AVReturnStatus")
  AVReturnStatus  freq
1     Challenged   190
2           Good 22350
3  Undeliverable  1552
> count(MB_08.01.2014_status,"AVReturnChallenge")
        AVReturnChallenge  freq
1                         22350
2               No Ballot     2
3            No Signature    53
4      No Signature Match   113
5           Undeliverable  1552
6 Wrong Voter's Signature    18
7            Z - Deceased     1
8               Z - Other     3

When we subset for the "Undeliverable" (return to sender) ballots, we can construct a list or map of ballots that quite possibly need address changes. I sort them by precinct:

MB_08_01.2014_Undeliverable <- subset(MB_08.01.2014_status, AVReturnStatus == "Undeliverable")
arrange(data.frame(with(MB_08_01.2014_Undeliverable,(table(PrecinctID)))),desc(Freq))

    PrecinctID Freq
1          245   78
2          228   61
3          226   37
4          208   36
5          231   35
6          252   33
7          201   32
8          137   30
9          227   30
10         249   29
11         253   28
12         257   26
13         229   25
14         230   25
15         247   25
16         206   24
17         246   24
....

library(plyr)

library(sqldf)
library(lattice)

setwd("C:/Politics")
jpeg_create <- function() {
 systime <- as.numeric(Sys.time())
 # dev.new()
 jpeg(filename = systime,
          width = 1024, height = 768, units = "px", pointsize = 12,
          quality = 100, bg = "white", res = NA, family = "", restoreConsole = TRUE,
          type = c("windows"))
 Sys.sleep(2)
   }
 
# Accumulates all MatchBacks
C08_01_08_04_05_06_07_08 <- arrange(rbind(MB_08.01.2014,MB_08.04.2014,MB_08.05.2014,MB_08.06.2014,MB_08.07.2014,MB_08.08.2014),LastName)
C08_01_08_04_05_06_07_08_status <- subset(C08_01_08_04_05_06_07_08, select = c(RegistrationNumber,PrecinctID,AVReturnStatus,AVReturnChallenge))
C08_01_08_04_05_06_07_08_Good <- subset(C08_01_08_04_05_06_07_08_status, AVReturnStatus == "Good")
VDB <- read.delim("08.01.2014.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)

# Under Construction. Uses sqldf to select out precinct ranges
par(mfrow=c(1,2))
P1 <- data.frame(with(C08_01_08_04_05_06_07_08_Good,(table(PrecinctID))))
MB_200 <- sqldf("Select * from P1 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_200$Freq, names.arg=MB_200$PrecinctID,xlab=nrow(MB_200),ylab=sum(MB_200$Freq),ylim=c(0,600),col=rgb(0,0,1,.75))
MB_LT_200 <- sqldf("Select * from P1 where PrecinctID < 200 Order By Freq DESC")
MB_GTR_299 <- sqldf("Select * from P1 where PrecinctID > 299 Order By Freq DESC")
MB_ROP <- arrange(rbind(MB_LT_200,MB_GTR_299),desc(Freq))
barplot(MB_ROP$Freq, names.arg=MB_ROP$PrecinctID,xlab=nrow(MB_ROP),ylab=sum(MB_ROP$Freq),ylim=c(0,600),col=rgb(1,0,0,.75))
nrow(MB_200)
nrow(MB_ROP)
sum(MB_200$Freq)
sum(MB_ROP$Freq)

# Use %in% and sqldf to plot 42nd LD only.
# Requires a list of precincts named Precinct42.csv

par(mfrow=c(1,2))
LD42 <- read.csv('Precinct42.csv') # 119 Precincts for 2014
CLD42_Good <- subset(C08_01_08_04_05_06_07_08_Good, PrecinctID %in% LD42$PrecinctID42)
P2 <- arrange(data.frame(with(CLD42_Good,(table(PrecinctID)))),desc(Freq))
MB_LD42_200 <- sqldf("Select * from P2 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_LD42_200$Freq, names.arg=MB_LD42_200$PrecinctID,xlab=nrow(MB_LD42_200),ylab=sum(MB_LD42_200$Freq),ylim=c(0,600),col=rgb(0,0,1,.75))
MB_LD42_LT_200 <- sqldf("Select * from P2 where PrecinctID < 200 Order By Freq DESC")
MB_LD42_GTR_299 <- sqldf("Select * from P2 where PrecinctID > 299 Order By Freq DESC")
MB_LD42_ROP <- arrange(rbind(MB_LD42_LT_200,MB_LD42_GTR_299),desc(Freq))
barplot(MB_LD42_ROP$Freq, names.arg=MB_LD42_ROP$PrecinctID,xlab=nrow(MB_LD42_ROP),ylab=sum(MB_LD42_ROP$Freq),ylim=c(0,600),col=rgb(1,0,0,.75))
nrow(MB_LD42_200)
nrow(MB_LD42_ROP)
sum(MB_LD42_200$Freq)
sum(MB_LD42_ROP$Freq)
MB_LD42_200A <- MB_LD42_200
MB_LD42_ROPA <- MB_LD42_ROP

# require(sqldf)
par(mfrow=c(1,2))
LD42 <- read.csv('Precinct42.csv') # 119 Precincts for 2014
CLD42_Good <- subset(VDB, PrecinctID %in% LD42$PrecinctID42)
P2 <- arrange(data.frame(with(CLD42_Good,(table(PrecinctID)))),desc(Freq))
MB_LD42_200 <- sqldf("Select * from P2 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_LD42_200$Freq, names.arg=MB_LD42_200$PrecinctID,xlab=nrow(MB_LD42_200),ylab=sum(MB_LD42_200$Freq),ylim=c(0,1500),col=rgb(0,0,1,.75))
MB_LD42_LT_200 <- sqldf("Select * from P2 where PrecinctID < 200 Order By Freq DESC")
MB_LD42_GTR_299 <- sqldf("Select * from P2 where PrecinctID > 299 Order By Freq DESC")
MB_LD42_ROP <- arrange(rbind(MB_LD42_LT_200,MB_LD42_GTR_299),desc(Freq))
barplot(MB_LD42_ROP$Freq, names.arg=MB_LD42_ROP$PrecinctID,xlab=nrow(MB_LD42_ROP),ylab=sum(MB_LD42_ROP$Freq),ylim=c(0,1500),col=rgb(1,0,0,.75))
nrow(MB_LD42_200)
nrow(MB_LD42_ROP)
sum(MB_LD42_200$Freq)
sum(MB_LD42_ROP$Freq)
MB_LD42_200B <- MB_LD42_200
MB_LD42_ROPB <- MB_LD42_ROP

par(mfrow=c(1,1))
barplot(MB_LD42_200B$Freq,ylim=c(0,1400),col=rgb(1,0,0,.75))
par(new=t)
barplot(MB_LD42_200A$Freq,ylim=c(0,1400),col=rgb(0,0,1,.75))
mtext("Precincts:",side = 3, line = 2)
mtext(nrow(MB_LD42_200B),side = 3, line = 1)
options(digits=2)
mtext(sum(MB_LD42_200A$Freq)/sum(MB_LD42_200B$Freq),side = 1, line = 3)
mtext(sum(MB_LD42_200A$Freq),side = 1, line = 2)
mtext(sum(MB_LD42_200B$Freq),side = 1, line = 1)
mtext("RED = Active Voters as of 08/01/2014 in the 200 Series 42LD ; BLUE = Primary Voters in the 200 Series 42LD",side = 1, line = 0)
par(new=F)

par(mfrow=c(1,1))
barplot(MB_LD42_ROPB$Freq,ylim=c(0,1400),col=rgb(1,0,0,.75))
par(new=t)
barplot(MB_LD42_ROPA$Freq,ylim=c(0,1400),col=rgb(0,0,1,.75))
mtext("Precincts:",side = 3, line = 2)
mtext(nrow(MB_LD42_ROPB),side = 3, line = 1)
options(digits=2)
mtext(sum(MB_LD42_ROPA$Freq)/sum(MB_LD42_ROPB$Freq),side = 1, line = 3)
mtext(sum(MB_LD42_ROPA$Freq),side = 1, line = 2)
mtext(sum(MB_LD42_ROPB$Freq),side = 1, line = 1)
mtext("RED = Active Voters as of 08/01/2014 in the ROP 42LD ; BLUE = Primary Voters in the ROP 42LD",side = 1, line = 0)
par(new=F)

X <- arrange(MB_LD42_200A,desc(PrecinctID))
Y <- arrange(MB_LD42_200B,desc(PrecinctID))
LD200 <- data.frame(cbind(X,Y))
LD200 <- with(LD200,(data.frame(PrecinctID,Primary=Freq,General=Freq.1)))
LD200 <- with(LD200,(data.frame(LD200,Primary/General)))
(arrange(LD200,desc(Primary.General)))


X <- arrange(MB_LD42_ROPA,desc(PrecinctID))
Y <- arrange(MB_LD42_ROPB,desc(PrecinctID))
LDROP <- data.frame(cbind(X,Y))
LDROP <- with(LDROP,(data.frame(PrecinctID,Primary=Freq,General=Freq.1)))
LDROP <- with(LDROP,(data.frame(LDROP,Primary/General)))
(arrange(LDROP,desc(Primary.General)))
library(plyr)
library(sqldf)
library(lattice)
# Accumulates all MatchBacks
C08_01_08_04_05_06_07_08 <- arrange(rbind(MB_08.01.2014,MB_08.04.2014,MB_08.05.2014,MB_08.06.2014,MB_08.07.2014,MB_08.08.2014),LastName)
C08_01_08_04_05_06_07_08_status <- subset(C08_01_08_04_05_06_07_08, select = c(RegistrationNumber,PrecinctID,AVReturnStatus,AVReturnChallenge))
C08_01_08_04_05_06_07_08_Good <- subset(C08_01_08_04_05_06_07_08_status, AVReturnStatus == "Good")
VDB <- read.delim("08.01.2014.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)

# Under Construction. Uses sqldf to select out precinct ranges
par(mfrow=c(1,2))
P1 <- data.frame(with(C08_01_08_04_05_06_07_08_Good,(table(PrecinctID))))
MB_200 <- sqldf("Select * from P1 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_200$Freq, names.arg=MB_200$PrecinctID,xlab=nrow(MB_200),ylab=sum(MB_200$Freq),ylim=c(0,600),col=rgb(0,0,1,.75))
MB_LT_200 <- sqldf("Select * from P1 where PrecinctID < 200 Order By Freq DESC")
MB_GTR_299 <- sqldf("Select * from P1 where PrecinctID > 299 Order By Freq DESC")
MB_ROP <- arrange(rbind(MB_LT_200,MB_GTR_299),desc(Freq))
barplot(MB_ROP$Freq, names.arg=MB_ROP$PrecinctID,xlab=nrow(MB_ROP),ylab=sum(MB_ROP$Freq),ylim=c(0,600),col=rgb(1,0,0,.75))
nrow(MB_200)
nrow(MB_ROP)
sum(MB_200$Freq)
sum(MB_ROP$Freq)

# Use %in% and sqldf to plot 42nd LD only.
# Requires a list of precincts named Precinct42.csv

par(mfrow=c(1,2))
LD42 <- read.csv('Precinct42.csv') # 119 Precincts for 2014
CLD42_Good <- subset(C08_01_08_04_05_06_07_08_Good, PrecinctID %in% LD42$PrecinctID42)
P2 <- arrange(data.frame(with(CLD42_Good,(table(PrecinctID)))),desc(Freq))
MB_LD42_200 <- sqldf("Select * from P2 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_LD42_200$Freq, names.arg=MB_LD42_200$PrecinctID,xlab=nrow(MB_LD42_200),ylab=sum(MB_LD42_200$Freq),ylim=c(0,600),col=rgb(0,0,1,.75))
MB_LD42_LT_200 <- sqldf("Select * from P2 where PrecinctID < 200 Order By Freq DESC")
MB_LD42_GTR_299 <- sqldf("Select * from P2 where PrecinctID > 299 Order By Freq DESC")
MB_LD42_ROP <- arrange(rbind(MB_LD42_LT_200,MB_LD42_GTR_299),desc(Freq))
barplot(MB_LD42_ROP$Freq, names.arg=MB_LD42_ROP$PrecinctID,xlab=nrow(MB_LD42_ROP),ylab=sum(MB_LD42_ROP$Freq),ylim=c(0,600),col=rgb(1,0,0,.75))
nrow(MB_LD42_200)
nrow(MB_LD42_ROP)
sum(MB_LD42_200$Freq)
sum(MB_LD42_ROP$Freq)
MB_LD42_200A <- MB_LD42_200
MB_LD42_ROPA <- MB_LD42_ROP

# require(sqldf)
par(mfrow=c(1,2))
LD42 <- read.csv('Precinct42.csv') # 119 Precincts for 2014
CLD42_Good <- subset(VDB, PrecinctID %in% LD42$PrecinctID42)
P2 <- arrange(data.frame(with(CLD42_Good,(table(PrecinctID)))),desc(Freq))
MB_LD42_200 <- sqldf("Select * from P2 where PrecinctID > 199  and PrecinctID < 300 Order By Freq DESC")
barplot(MB_LD42_200$Freq, names.arg=MB_LD42_200$PrecinctID,xlab=nrow(MB_LD42_200),ylab=sum(MB_LD42_200$Freq),ylim=c(0,1500),col=rgb(0,0,1,.75))
MB_LD42_LT_200 <- sqldf("Select * from P2 where PrecinctID < 200 Order By Freq DESC")
MB_LD42_GTR_299 <- sqldf("Select * from P2 where PrecinctID > 299 Order By Freq DESC")
MB_LD42_ROP <- arrange(rbind(MB_LD42_LT_200,MB_LD42_GTR_299),desc(Freq))
barplot(MB_LD42_ROP$Freq, names.arg=MB_LD42_ROP$PrecinctID,xlab=nrow(MB_LD42_ROP),ylab=sum(MB_LD42_ROP$Freq),ylim=c(0,1500),col=rgb(1,0,0,.75))
nrow(MB_LD42_200)
nrow(MB_LD42_ROP)
sum(MB_LD42_200$Freq)
sum(MB_LD42_ROP$Freq)
MB_LD42_200B <- MB_LD42_200
MB_LD42_ROPB <- MB_LD42_ROP

par(mfrow=c(1,1))
barplot(MB_LD42_200B$Freq,ylim=c(0,1400),col=rgb(1,0,0,.75))
par(new=t)
barplot(MB_LD42_200A$Freq,ylim=c(0,1400),col=rgb(0,0,1,.75))
mtext("Precincts:",side = 3, line = 2)
mtext(nrow(MB_LD42_200B),side = 3, line = 1)
options(digits=2)
mtext(sum(MB_LD42_200A$Freq)/sum(MB_LD42_200B$Freq),side = 1, line = 3)
mtext(sum(MB_LD42_200A$Freq),side = 1, line = 2)
mtext(sum(MB_LD42_200B$Freq),side = 1, line = 1)
mtext("RED = Active Voters as of 08/01/2014 in the 200 Series 42LD ; BLUE = Primary Voters in the 200 Series 42LD",side = 1, line = 0)
par(new=F)

par(mfrow=c(1,1))
barplot(MB_LD42_ROPB$Freq,ylim=c(0,1400),col=rgb(1,0,0,.75))
par(new=t)
barplot(MB_LD42_ROPA$Freq,ylim=c(0,1400),col=rgb(0,0,1,.75))
mtext("Precincts:",side = 3, line = 2)
mtext(nrow(MB_LD42_ROPB),side = 3, line = 1)
options(digits=2)
mtext(sum(MB_LD42_ROPA$Freq)/sum(MB_LD42_ROPB$Freq),side = 1, line = 3)
mtext(sum(MB_LD42_ROPA$Freq),side = 1, line = 2)
mtext(sum(MB_LD42_ROPB$Freq),side = 1, line = 1)
mtext("RED = Active Voters as of 08/01/2014 in the ROP 42LD ; BLUE = Primary Voters in the ROP 42LD",side = 1, line = 0)
par(new=F)

X <- arrange(MB_LD42_200A,desc(PrecinctID))
Y <- arrange(MB_LD42_200B,desc(PrecinctID))
LD200 <- data.frame(cbind(X,Y))
LD200 <- with(LD200,(data.frame(PrecinctID,Primary=Freq,General=Freq.1)))
LD200 <- with(LD200,(data.frame(LD200,Primary/General)))
(arrange(LD200,desc(Primary.General)))

X <- arrange(MB_LD42_ROPA,desc(PrecinctID))
Y <- arrange(MB_LD42_ROPB,desc(PrecinctID))
LDROP <- data.frame(cbind(X,Y))
LDROP <- with(LDROP,(data.frame(PrecinctID,Primary=Freq,General=Freq.1)))
LDROP <- with(LDROP,(data.frame(LDROP,Primary/General)))
(arrange(LDROP,desc(Primary.General)))

library(lattice)

barchart(with(LD200,(General ~ PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
barchart(with(LD200,(Primary ~ PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
barchart(with(LD200,(Primary.General ~ PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active")

barchart(with(LD200,(~ General | PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
barchart(with(LD200,(~ Primary | PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
barchart(with(LD200,(~ Primary.General | PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active (Percentage)")

barchart(with(LDROP,(General ~ PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
barchart(with(LDROP,(Primary ~ PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
barchart(with(LDROP,(Primary.General ~ PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active")

barchart(with(LDROP,(~ General | PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
barchart(with(LDROP,(~ Primary | PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
barchart(with(LDROP,(~ Primary.General | PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active (Percentage)")

jpeg_create()
barchart(with(LD200,(General ~ PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
jpeg_create()
barchart(with(LD200,(Primary ~ PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
jpeg_create()
barchart(with(LD200,(Primary.General ~ PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active")

jpeg_create()
barchart(with(LD200,(~ General | PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
jpeg_create()
barchart(with(LD200,(~ Primary | PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
jpeg_create()
barchart(with(LD200,(~ Primary.General | PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active (Percentage)")

jpeg_create()
barchart(with(LDROP,(General ~ PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
jpeg_create()
barchart(with(LDROP,(Primary ~ PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
jpeg_create()
barchart(with(LDROP,(Primary.General ~ PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active")

jpeg_create()
barchart(with(LDROP,(~ General | PrecinctID)),col=rgb(0,0,1,.75),ylab="Active Voters 08/01/2014")
jpeg_create()
barchart(with(LDROP,(~ Primary | PrecinctID)),col=rgb(1,0,0,.75),ylab="2014 Primary Results") 
jpeg_create()
barchart(with(LDROP,(~ Primary.General | PrecinctID)),col=rgb(0,1,0,.75),ylab="Primary / Active (Percentage)")
graphics.off()

par(mfrow=c(1,1))
barplot(MB_LD42_ROPB$Freq,ylim=c(0,1400),col=rgb(1,.1,0,.75))
par(new=t)
barplot(MB_LD42_ROPA$Freq,ylim=c(0,1400),col=rgb(0,.1,1,.75))
par(new=t)
barplot(MB_LD42_200B$Freq,ylim=c(0,1400),col=rgb(1,.3,0,.75))
par(new=t)
barplot(MB_LD42_200A$Freq,ylim=c(0,1400),col=rgb(0,.3,1,.75))
par(new=F)

par(mfrow=c(1,1))
barplot(MB_LD42_ROPB$Freq,ylim=c(0,1400),col=rgb(1,.1,0,.75))
par(new=t)
barplot(MB_LD42_ROPA$Freq,ylim=c(0,1400),col=rgb(0,.1,1,.75))
par(new=t)
barplot(MB_LD42_200B$Freq,ylim=c(0,1400),col=rgb(1,.3,0,.75))
par(new=t)
barplot(MB_LD42_200A$Freq,ylim=c(0,1400),col=rgb(0,.3,1,.75))
par(new=F)

No comments:

Post a Comment