Friday, December 5, 2014

Analysis of Final Matchbacks for 2014

Analysis of  Final Matchbacks for 2014. Political piece here.
# End Run MatchBacks Cumulative File
library(plyr)
MB_2014_Consolidated <- read.delim("MB_2014_Consolidated.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
MB_2014_Consolidated_status <- subset(MB_2014_Consolidated, select = c(RegistrationNumber,PrecinctID,AVReturnStatus,AVReturnChallenge))
count(MB_2014_Consolidated_status,"AVReturnStatus")
count(MB_2014_Consolidated_status,"AVReturnChallenge")
MB_2014_Consolidated_Good <- subset(MB_2014_Consolidated_status, AVReturnStatus == "Good")
arrange(data.frame(with(MB_2014_Consolidated_Good,(table(PrecinctID)))),desc(Freq))
MB_2014_Consolidated_Undeliverable <- subset(MB_2014_Consolidated_status, AVReturnStatus == "Undeliverable")
arrange(data.frame(with(MB_2014_Consolidated_Undeliverable,(table(PrecinctID)))),desc(Freq))
MB_2014_Consolidated_Challenged <- subset(MB_2014_Consolidated_status, AVReturnStatus == "Challenged")
arrange(data.frame(with(MB_2014_Consolidated_Challenged,(table(PrecinctID)))),desc(Freq))

# Descriptive Statistics for LD 42nd
LD42 <- read.delim("Precinct42.txt",header=TRUE)
MB42_Good <- subset(MB_2014_Consolidated_Good, PrecinctID %in% LD42$precincts42)
nrow(MB42_Good)
nrow(MB42_Good) / nrow(MB_2014_Consolidated_Good)
nrow(subset(subset(MB42_Good, PrecinctID > 0), PrecinctID < 200))
nrow(subset(subset(MB42_Good, PrecinctID > 199), PrecinctID < 300))
nrow(subset(subset(MB42_Good, PrecinctID > 299), PrecinctID < 803))

# Summarizing
Test <- (xtabs(~PrecinctID + AVReturnStatus, data=MB_2014_Consolidated_status))
Test <- data.frame(head(Test,length(Test)))
dimnames(Test)[[2]][1] <- "NoVote"
Test$PrecinctID <- rownames(Test)
Test <- with(Test,cbind(Test,
"Undel_Void" = Undeliverable + Void,
"PctGood_nC"=(Good/(NoVote + Good + Undeliverable + Void)) * 100,
"PctUndelVoid_nC"=(Undeliverable/(NoVote + Good + Undeliverable + Void))* 100))

# Summarizing 42nd
library(plyr)
ld42 <- read.csv("Precinct42.csv")
arrange(subset(Test,NoVote > Good & PrecinctID %in% ld42),desc(NoVote))
arrange(subset(Test,PrecinctID %in% ld42$precincts42),desc(NoVote))

Test42NoVote <- arrange(subset(Test,NoVote > 300 & PrecinctID %in% ld42$precincts42),desc(NoVote))
with(Test42NoVote,(barplot(NoVote, names.arg=PrecinctID,las=2)))
mtext("Who did not vote from LD 42 precincts (with more than 300 'not voting') in 2014 General Election",side=3,line=1)
with(Test42NoVote,(mtext(sum(NoVote),side=3,line=2)))

library(sqldf)
duplicates <- sqldf("Select RegistrationNumber,Count(RegistrationNumber) from MB_2014_Consolidated Group By RegistrationNumber having Count(RegistrationNumber) > 1")
duplicated <- subset(MB_2014_Consolidated, RegistrationNumber %in% duplicates$RegistrationNumber)
length(unique(duplicated$RegistrationNumber))

No comments:

Post a Comment