Saturday, October 3, 2015

Code to establish voter roll 'flux' in time series...





 11:53 AM 10/3/2015 - RMF Political piece here.
### Voter Net Gain and Loss per precinct from year over year and from last general 11.22.2014 to current date 06.26.2015;
## Also,code to establish 'Flux' from a voterdb time series
## Status Codes: A = Active; I = Inactive; C = Cancelled
## Registration Numbers, the unit of change here, can (sometimes but not often) be recycled to other users in the Whatcom County voterdb
## Ryan M. Ferris 11:53 AM 10/3/2015
## Load six voterdbs
## 5.20.2014
## 11.22.2014
## 12.19.2014
## 02.27.2015
## 06.26.2015
## 10.02.2015

voterdb1 <- read.delim("C:/Politics/05.20.2014voterdb_w.ERIC/Ryan Ferris Voter List w.ERIC.txt",header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb1A <- subset(voterdb1, StatusCode == "A")

voterdb2 <- read.delim("C:/Politics/11.22.2014.txt",header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb2A <- subset(voterdb2, StatusCode == "A")

voterdb3 <- read.delim("C:/Politics/12.19.2014_001.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb3A <- subset(voterdb3, StatusCode == "A")

voterdb4 <- read.delim("C:/Politics/02.27.2015.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb4A <- subset(voterdb4, StatusCode == "A")

voterdb5 <- read.delim("C:/Politics/06.26.2015.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb5A <- subset(voterdb4, StatusCode == "A")

## Most current for all status
voterdb6 <- read.delim("C:/Politics/10.02.2015.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb6A <- subset(voterdb6, StatusCode == "A")
voterdb6I <- subset(voterdb6, StatusCode == "I")
voterdb6C <- subset(voterdb6, StatusCode == "C")

# Compute differences
# Counts represent simply sequential net loss and gain of Active Voters for all periods: 
nrow(voterdb1A)
nrow(voterdb2A)
nrow(voterdb3A)
nrow(voterdb4A)
nrow(voterdb5A)
nrow(voterdb6A)

# Current Active, Inactive, Cancelled Voters
nrow(voterdb6A)
nrow(voterdb6I)
nrow(voterdb6C)

## None of these capture 'flux' from one year to another: this is intersect,loss,gain from point of origin
## Intersection from one point of origin - voters lost. e.g. intersect(x,y) =  "In x and in y"
## point of origin here: 05.20.2014

length(intersect(voterdb1A$RegistrationNumber,voterdb2A$RegistrationNumber))
length(intersect(voterdb1A$RegistrationNumber,voterdb3A$RegistrationNumber))
length(intersect(voterdb1A$RegistrationNumber,voterdb4A$RegistrationNumber))
length(intersect(voterdb1A$RegistrationNumber,voterdb5A$RegistrationNumber))
length(intersect(voterdb1A$RegistrationNumber,voterdb6A$RegistrationNumber))

##  setdiff from one point of origin - voters lost. e.g. setdiff(x,y) =  "In x not in y"
length(setdiff(voterdb1A$RegistrationNumber,voterdb2A$RegistrationNumber))
length(setdiff(voterdb1A$RegistrationNumber,voterdb3A$RegistrationNumber))
length(setdiff(voterdb1A$RegistrationNumber,voterdb4A$RegistrationNumber))
length(setdiff(voterdb1A$RegistrationNumber,voterdb5A$RegistrationNumber))
length(setdiff(voterdb1A$RegistrationNumber,voterdb6A$RegistrationNumber))

##  setdiff from points of origin - voters gained . e.g. setdiff(y,x) =  "In y not in x"
length(setdiff(voterdb2A$RegistrationNumber,voterdb1A$RegistrationNumber))
length(setdiff(voterdb3A$RegistrationNumber,voterdb1A$RegistrationNumber))
length(setdiff(voterdb4A$RegistrationNumber,voterdb1A$RegistrationNumber))
length(setdiff(voterdb5A$RegistrationNumber,voterdb1A$RegistrationNumber))
length(setdiff(voterdb6A$RegistrationNumber,voterdb1A$RegistrationNumber))

## difference between periods
##  setdiff from consecutive points of origin - voters lost each 'flux' period. e.g. setdiff(x,y) =  "In x not in y"
length(setdiff(voterdb1A$RegistrationNumber,voterdb2A$RegistrationNumber))
length(setdiff(voterdb2A$RegistrationNumber,voterdb3A$RegistrationNumber))
length(setdiff(voterdb3A$RegistrationNumber,voterdb4A$RegistrationNumber))
length(setdiff(voterdb4A$RegistrationNumber,voterdb5A$RegistrationNumber))
length(setdiff(voterdb5A$RegistrationNumber,voterdb6A$RegistrationNumber))

##  setdiff from an arbitray point of origin showing significant 'flux' - voters lost between these periods.
## Comparison points here are 11.22.2014 to 10.02.2015
## e.g. setdiff(x,y) =  "In x not in y"
length(setdiff(voterdb2A$RegistrationNumber,voterdb6A$RegistrationNumber))
length(setdiff(voterdb6A$RegistrationNumber,voterdb2A$RegistrationNumber))

# tabled difference
table_lost <- as.data.frame(table(voterdb2A$PrecinctID) - table(voterdb6A$PrecinctID))
table_gained <- as.data.frame(table(voterdb6A$PrecinctID) - table(voterdb2A$PrecinctID))
WhatISLost <- cbind("lost"=table_lost,"gained"=table_gained)
WhatISLost

# cumulative gain/loss for this arbitrary period
GAIN <- as.data.frame(table(voterdb6A$PrecinctID) - table(voterdb2A$PrecinctID))
plot(setdiff(voterdb2A$RegistrationNumber,voterdb6A$RegistrationNumber))
mtext("Voter Registration IDs lost from 11.22.2014 to 10.02.2015",line=2)
plot(setdiff(voterdb6A$RegistrationNumber,voterdb2A$RegistrationNumber))
mtext("Voters Registration IDs gained from 11.22.2014 to 10.02.2015",line=2)
with(GAIN,barplot(Freq,names.arg=Var1,las=2,cex.names=.65))
mtext("Net precinct gain or loss from 11.22.2014 to 10.02.2015",line=-1)
with(GAIN,barplot(Freq,names.arg=Var1,las=2,cex.names=.65))
mtext("Net precinct gain or loss from 11.22.2014 to 10.02.2015",line=-1)


No comments:

Post a Comment