R, Julia, SQL, Octave and others: Personal notes on data analysis, computation, data access most especially for querying voter history, Census, PDC, and other election data. Reader is advised to just paste the code text into Notepad++.
Saturday, April 2, 2016
Using data.table, dplyr to understand "The Bernie Effect" on Whatcom County voterdb
Political piece here.
# Part I . Most output cat()ed.
library(data.table)
library(dplyr)
voterdb033016 <- read.delim("C:/Politics/RyanFerrisVoterList_20160330_104658/RyanFerrisVoterList_20160330_104658_JessieS.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
t(head(voterdb033016,1))
vdb <- data.table(voterdb033016)
unique(vdb$PrecinctPortion)
setkey(vdb,RegistrationNumber)
vdbA <- data.table(vdb[StatusCode == "A",])
setkey(vdbA,RegistrationNumber)
x2012 <- arrange(vdbA[year(mdy(RegistrationDate))== 2012,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2013 <- arrange(vdbA[year(mdy(RegistrationDate))== 2013,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2014 <- arrange(vdbA[year(mdy(RegistrationDate))== 2014,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2015 <- arrange(vdbA[year(mdy(RegistrationDate))== 2015,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2016 <- arrange(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
# cbind(x2012,x2013,x2014,x2015,x2016)
cbind("2012"=x2012,"2013"=x2013,"2014"=x2014,"2015"=x2015,"2016"=x2016)
cat('
2012.month 2012.V1 2013.month 2013.V1 2014.month 2014.V1 2015.month 2015.V1 2016.month 2016.V1
1: 1 500 1 376 1 405 1 577 1 1468
2: 2 458 2 267 2 380 2 588 2 1624
3: 3 795 3 346 3 428 3 591 3 2774
4: 4 352 4 323 4 380 4 633 1 1468
5: 5 562 5 361 5 407 5 675 2 1624
6: 6 870 6 404 6 622 6 676 3 2774
7: 7 844 7 617 7 774 7 934 1 1468
8: 8 1001 8 612 8 1087 8 1128 2 1624
9: 9 1932 9 1020 9 1784 9 2623 3 2774
10: 10 2091 10 898 10 1787 10 1874 1 1468
11: 11 380 11 355 11 897 11 936 2 1624
12: 12 734 12 427 12 650 12 802 3 2774
')
m1 <- merge(x2012,x2013,by="month")
m2 <- merge(x2014,x2015,by="month")
m3 <- merge(m1,m2,by="month")
m4 <- merge(m3,x2016,by="month")
barplot(rowSums(t(m4)[,1:3])[2:6],ylim=c(0,6000),col=c("black","light grey","grey","dark grey","gold"))
mtext(" 'RegistrationDate' totals for the first three months of 2012 - 2016. From Whatcom County Voterdb 03/30/2016")
# Below covers January 1, 2016 - March 30, 2016
arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~StatusReason)]),desc(N))
cat('
StatusReason N
1: Added by WEI Statewide Online Reg 3464
2: Re-registration; no changes 918
3: Original Registration 831
4: Re-registration due to Address change 274
5: Updated by WEI Statewide ACS 136
6: Confirm by voter 134
7: Re-registration due to Name change 33
8: Address Updated by WEI Addr Chg 26
9: Third Party Change of Address (In-County) 18
10: Verification Notice Returned 18
11: Re-Registration due to Name & Address Change 12
12: 2016-03-17 11:27:33.570000000 1
13: Name Updated by WEI Statewide Addr Chg 1
')
BirthYear_2016 <- arrange(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~year(mdy(BirthDate))),by=year(mdy(BirthDate))],desc(year))
BirthYear_2016
cat('
year V1
1: 1998 177
2: 1997 362
3: 1996 298
4: 1995 330
5: 1994 242
6: 1993 249
7: 1992 249
8: 1991 222
9: 1990 190
10: 1989 163
11: 1988 161
12: 1987 135
13: 1986 142
14: 1985 144
15: 1984 132
16: 1983 109
17: 1982 92
18: 1981 96
19: 1980 90
20: 1979 93
21: 1978 81
22: 1977 77
23: 1976 73
24: 1975 58
')
arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~ResidenceCity)]),desc(N))
cat('
ResidenceCity N
1: Bellingham 3963
2: Ferndale 503
3: Blaine 421
4: Lynden 377
5: Everson 162
6: Maple Falls 85
7: Sedro-Woolley 65
8: Deming 61
9: Custer 54
10: Sumas 47
11: Lummi Island 34
12: Pt Roberts 33
13: Nooksack 30
14: Acme 16
15: Glacier 11
16: Rockport 3
')
BirthYear_2016 <- arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~ResidenceCity)]),desc(N))
arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~PrecinctID)]),desc(N))
cat('
PrecinctID N
1: 253 147
2: 225 122
3: 247 114
4: 231 98
5: 229 90
---
175: 167 4
176: 183 3
177: 267 3
178: 268 1
')
Precinct_2016 <- arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~PrecinctID)]),desc(N))
cat('
head(Precinct_2016,40)
PrecinctID N
1: 253 147
2: 225 122
3: 247 114
4: 231 98
5: 229 90
6: 228 85
7: 206 84
8: 246 84
9: 226 82
10: 257 81
11: 230 76
12: 208 70
13: 244 66
14: 263 66
15: 175 65
16: 182 63
17: 108 61
18: 204 60
19: 211 60
20: 505 60
21: 220 59
22: 227 59
23: 239 58
24: 248 56
25: 249 56
26: 135 55
27: 203 52
28: 212 52
29: 262 52
30: 301 52
31: 178 50
32: 148 49
33: 219 49
34: 258 49
35: 201 46
36: 214 43
37: 302 42
38: 169 41
39: 213 41
40: 508 41
PrecinctID N
')
# Part II
# Scratch Not all cat()ed output
# working
voterdb033016 <- read.delim("C:/Politics/RyanFerrisVoterList_20160330_104658/RyanFerrisVoterList_20160330_104658_JessieS.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
vdb <- data.table(voterdb033016)
setkey(vdb,RegistrationNumber)
vdbA <- data.table(vdb[StatusCode == "A",])
setkey(vdbA,RegistrationNumber)
voterdb102815 <- read.delim("C:/Politics/RyanFerrisList_20151028/10.28.15.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
vdb_ <- data.table(voterdb102815)
setkey(vdb_,RegistrationNumber)
vdbA_ <- data.table(vdb_[StatusCode == "A",])
setkey(vdbA_,RegistrationNumber)
vdbA$RegistrationYear <- with(vdbA,year(mdy(RegistrationDate)))
vdbA$LastUpdateDateYear <- with(vdbA,year(ymd_hms(LastUpdateDate)))
vdbA$RegistrationMonth <- with(vdbA,month(mdy(RegistrationDate)))
vdbA$LastUpdateDateMonth <- with(vdbA,month(ymd_hms(LastUpdateDate)))
vdbA$BirthYear <- with(vdbA,year(mdy(BirthDate)))
vdbA_$RegistrationYear <- with(vdbA_,year(mdy(RegistrationDate)))
vdbA_$LastUpdateDateYear <- with(vdbA_,year(ymd_hms(LastUpdateDate)))
vdbA_$RegistrationMonth <- with(vdbA_,month(mdy(RegistrationDate)))
vdbA_$LastUpdateDateMonth <- with(vdbA_,month(ymd_hms(LastUpdateDate)))
vdbA_$BirthYear <- with(vdbA_,year(mdy(BirthDate)))
count(data.table(V1 = vdbA$RegistrationNumber %in% vdbA_$RegistrationNumber),V1)
Source: local data table [2 x 2]
cat('
V1 n
(lgl) (int)
1 TRUE 124831
2 FALSE 7790
')
count(data.table(V1 = vdbA_$RegistrationNumber %in% vdbA$RegistrationNumber),V1)
cat('
Source: local data table [2 x 2]
V1 n
(lgl) (int)
1 TRUE 124831
2 FALSE 4471
')
head(subset(vdbA,(vdbA_$RegistrationNumber %in% RegistrationNumber),2))
cat('
RegistrationNumber
1: 13
2: 16
3: 19
4: 24
5: 25
6: 31
')
cat('
print("Not Useful")
nrow(subset(vdbA_, !(vdbA$RegistrationNumber %in% RegistrationNumber)))
#[1] 7790
nrow(subset(vdbA_, !(vdbA$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
#[1] 7790
d1 <- data.table(subset(vdbA_,!(vdbA$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
head(d1,1)
')
nrow(subset(vdbA, !(vdbA_$RegistrationNumber %in% RegistrationNumber)))
nrow(subset(vdbA, !(vdbA_$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
d1 <- data.table(subset(vdbA,!(vdbA_$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
head(d1,1)
setkey(d1,RegistrationNumber)
d1$RegistrationYear <- with(d1,year(mdy(RegistrationDate)))
d1$LastUpdateDateYear <- with(d1,year(ymd_hms(LastUpdateDate)))
d1$BirthYear <- with(d1,year(mdy(BirthDate)))
# Scratch Not cat()ed output
data.table(vdbA[RegistrationYear == 2015 & StatusReason == "Original Registration",xtabs(~RegistrationMonth)])
RegistrationMonth N
1: 1 58
2: 2 61
3: 3 62
4: 4 68
5: 5 81
6: 6 94
7: 7 174
8: 8 157
9: 9 1254
10: 10 541
11: 11 107
12: 12 104
data.table(vdbA[RegistrationYear == 2016 & StatusReason == "Original Registration",xtabs(~RegistrationMonth)])
RegistrationMonth N
1: 1 176
2: 2 264
3: 3 391
data.table(vdbA[RegistrationYear == 2016 & StatusReason == "Added by WEI Statewide Online Reg",xtabs(~RegistrationMonth)])
RegistrationMonth N
1: 1 723
2: 2 883
3: 3 1858
data.table(vdbA[RegistrationYear == 2015 & StatusReason == "Added by WEI Statewide Online Reg",xtabs(~RegistrationMonth)])
RegistrationMonth N
1: 1 317
2: 2 252
3: 3 312
4: 4 342
5: 5 342
6: 6 397
7: 7 528
8: 8 633
9: 9 848
10: 10 756
11: 11 512
12: 12 538
> arrange(data.table(vdbA[,xtabs(~StatusReason)]),desc(N))
StatusReason N
1: Added by WEI Statewide Online Reg 19438
2: Third Party Change of Address (In-County) 18417
3: Re-registration; no changes 15260
4: 14543
5: Original Registration 13066
6: Address Updated by WEI Addr Chg 10424
7: Updated by WEI Statewide ACS 10252
8: Re-registration due to Address change 9611
9: A - Phone/Email update from ballot envelope 9256
10: Office Correction 5742
11: Precinct Line Adjustment 2200
12: Re-registration due to Name change 1092
13: Name Updated by WEI Statewide Addr Chg 1020
14: Name/Address Updated by WEI Statewide Addr Chg 832
15: Confirm by voter 575
16: Verification Notice Returned 283
17: Confirmed 3PCOA (In-County) 236
18: Re-Registration due to Name & Address Change 181
19: Z - Re-registration due to Seasonal Update Card 154
20: Re-registration; Signed Petition 24
21: Cancel 45 Day No Contact 4
22: Confirmed 3PCOA (Undeliverable) 3
23: Third Party Change of Address (Out-of-County) 3
24: Third Party Change of Address (Undeliverable) 2
25: 2016-03-17 11:27:33.570000000 1
26: Registered in another State 1
27: Requested by Voter 1
StatusReason N
> arrange(data.table(vdbA[RegistrationYear == 2016,xtabs(~StatusReason)]),desc(N))
StatusReason N
1: Added by WEI Statewide Online Reg 3464
2: Re-registration; no changes 918
3: Original Registration 831
4: Re-registration due to Address change 274
5: Updated by WEI Statewide ACS 136
6: Confirm by voter 134
7: Re-registration due to Name change 33
8: Address Updated by WEI Addr Chg 26
9: Third Party Change of Address (In-County) 18
10: Verification Notice Returned 18
11: Re-Registration due to Name & Address Change 12
12: 2016-03-17 11:27:33.570000000 1
13: Name Updated by WEI Statewide Addr Chg 1
> arrange(data.table(vdbA[RegistrationYear == 2015,xtabs(~StatusReason)]),desc(N))
StatusReason N
1: Added by WEI Statewide Online Reg 5777
2: Original Registration 2761
3: Re-registration; no changes 1045
4: Re-registration due to Address change 827
5: Address Updated by WEI Addr Chg 396
6: Third Party Change of Address (In-County) 394
7: Updated by WEI Statewide ACS 327
8: Confirm by voter 318
9: Re-registration due to Name change 75
10: Verification Notice Returned 52
11: Re-Registration due to Name & Address Change 35
12: Name Updated by WEI Statewide Addr Chg 22
13: Name/Address Updated by WEI Statewide Addr Chg 6
14: Office Correction 2
No comments:
Post a Comment