Thursday, August 15, 2019

Code for age tables from post at http://www.bellinghampoliticsandeconomics.com/2019/08/election-summary-mostly-for-whatcom.html

Code for age tables from post at http://www.bellinghampoliticsandeconomics.com/2019/08/election-summary-mostly-for-whatcom.html

# make sure to the text 'unwraps' so it line breaks on appropriate delimiter (e.g. ',')

library(data.table)
library(lubridate)

# state matchback code
# Had trouble parsing the most recent VRDB drop...
setwd("D:/Politics/CumulativeBallotStatus_Primary_Aug7/August_8_VRDB")
t1 <- read.csv("201908_VRDB_Extract_001.csv",
header = TRUE,
skipNul=TRUE,
fill=TRUE,
strip.white = TRUE,
sep =",",
quote = "",
stringsAsFactors = FALSE)
t2 <- copy(as.data.table(t1))
t2 <- t2[StatusCode == "Active" | Column.35 == "Active",]
fwrite(t2,"t2.csv")
t2 <- fread("t2.csv")
#t2 <- t2[as.numeric(StateVoterID),]

# read in state matchbacks from https://www.sos.wa.gov/elections/research/2019-primary-data.aspx
setwd("D:\\Politics\\CumulativeBallotStatus_Primary_Aug7")
stateBR <- fread("ballotreturnsexport-080619.csv")
colnames(stateBR) <- stateBR[,gsub(" ","",names(stateBR))]
setkey(stateBR,BallotID)

load("County.CC") # County name and County Code 2 column list
stateBR_en <-merge(stateBR,County.CC,by="County")
t3 <- merge(t2,stateBR_en,by.x="StateVoterID",by.y="VoterID")

t3[,PrecinctCode:=round(as.numeric(Split))]
t3[,Combination:=paste0(PrecinctCode,CountyCode.y)]
t2[,PrecinctCode_:=round(as.numeric(PrecinctPart))]
t2[,Combination:=paste0(PrecinctCode_,CountyCode)]

gc()

# get only participating county and precinctcode combinations
uniqComb <- t3[t2,.(PrecinctCode,PrecinctCode_,Combination),.EACHI][!is.na(PrecinctCode),.(unique(Combination)),.(uniqComb=Combination)]$uniqComb
t2[Combination %in% uniqComb,.N]
t4 <- t2[Combination %in% uniqComb,]

# t2[t3,.(PrecinctCode,PrecinctCode_),by=.EACHI][,.(Comb=paste0(PrecinctCode,PrecinctCode_))]


# insert and age field
t2[,Age:=as.numeric(year(ymd_hms(Sys.time()))) - year(ymd(Birthdate))]
t3[,Age:=as.numeric(year(ymd_hms(Sys.time()))) - year(ymd(Birthdate))]
t4[,Age:=as.numeric(year(ymd_hms(Sys.time()))) - year(ymd(Birthdate))]



cbind(
rbind(
t3[BallotStatus != "Rejected" & data.table::between(Age,30,60,incbounds=TRUE),.N],
t3[BallotStatus != "Rejected" & Age > 60,.N],
t3[BallotStatus != "Rejected" & Age < 30,.N]
),
t4[,.N,.(LT30=Age < 30,
BTW30.60=data.table::between(Age,30,60,incbounds=TRUE),
GTR60=Age > 60)][,
as.data.table(cbind(Ages=colnames(.SD[,c(2,3,1)]),Registered=N))]
)[,.(Ages,Voted=as.integer(V1),Registered=as.integer(Registered))][,.SD[,.(PCT=round(Voted/Registered,4) * 100)],
.(Ages,Voted,Registered)][order(PCT)] 
#


library(data.table)
library(lubridate)

# Whatcom County matchback code
setwd("D:\\Politics\\MatchBacks2019PRI")
MB_08.07.19 <- fread("MB_08.07.19.csv")
colnames(MB_08.07.19) <- MB_08.07.19[,gsub(" ","",names(.SD))]
MB_08.07.19[,Age:=as.numeric(year(ymd_hms(Sys.time()))) - as.integer(YOB)]

MB_ <- MB_08.07.19[BallotStatus !="Canceled" & RegistrantStatus == "Active",]

Title <- as.data.table(cbind(Age = c("Less Than 30","Between 30 and 60","Greater Than 60")) )
Numbers <- 
cbind(
rbind(
MB_[BallotStatus=="Accepted" & Age < 30,.N,.(BallotStatus)][order(-BallotStatus)],
MB_[BallotStatus=="Accepted" & data.table::between(Age,30,60,incbounds=TRUE),.N,.(BallotStatus)][order(-BallotStatus)],
MB_[BallotStatus=="Accepted" & Age > 60,.N,.(BallotStatus)][order(-BallotStatus)]
),

rbind(
MB_[!duplicated(RegistrantID) & Age < 30,.N],
MB_[!duplicated(RegistrantID) & data.table::between(Age,30,60,incbounds=TRUE),.N],
MB_[!duplicated(RegistrantID) & Age > 60,.N]
))[,.SD[,.(PCT=round(as.numeric(N)/as.numeric(V1),3) * 100)],.(Voted=N,Registered=V1)]

cbind(Title,Numbers)

No comments:

Post a Comment