Saturday, April 2, 2016

Using data.table, dplyr to understand "The Bernie Effect" on Whatcom County voterdb



Political piece here.



# Part I . Most output cat()ed.

library(data.table)
library(dplyr)

voterdb033016 <- read.delim("C:/Politics/RyanFerrisVoterList_20160330_104658/RyanFerrisVoterList_20160330_104658_JessieS.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
t(head(voterdb033016,1))
vdb <- data.table(voterdb033016)
unique(vdb$PrecinctPortion)

setkey(vdb,RegistrationNumber)
vdbA <- data.table(vdb[StatusCode == "A",])
setkey(vdbA,RegistrationNumber)

x2012 <- arrange(vdbA[year(mdy(RegistrationDate))== 2012,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2013 <- arrange(vdbA[year(mdy(RegistrationDate))== 2013,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2014 <- arrange(vdbA[year(mdy(RegistrationDate))== 2014,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2015 <- arrange(vdbA[year(mdy(RegistrationDate))== 2015,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
x2016 <- arrange(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~month(mdy(RegistrationDate))),by=month(mdy(RegistrationDate))],month)
# cbind(x2012,x2013,x2014,x2015,x2016)


cbind("2012"=x2012,"2013"=x2013,"2014"=x2014,"2015"=x2015,"2016"=x2016)

cat('
    2012.month 2012.V1 2013.month 2013.V1 2014.month 2014.V1 2015.month 2015.V1 2016.month 2016.V1
 1:          1     500          1     376          1     405          1     577          1    1468
 2:          2     458          2     267          2     380          2     588          2    1624
 3:          3     795          3     346          3     428          3     591          3    2774
 4:          4     352          4     323          4     380          4     633          1    1468
 5:          5     562          5     361          5     407          5     675          2    1624
 6:          6     870          6     404          6     622          6     676          3    2774
 7:          7     844          7     617          7     774          7     934          1    1468
 8:          8    1001          8     612          8    1087          8    1128          2    1624
 9:          9    1932          9    1020          9    1784          9    2623          3    2774
10:         10    2091         10     898         10    1787         10    1874          1    1468
11:         11     380         11     355         11     897         11     936          2    1624
12:         12     734         12     427         12     650         12     802          3    2774
')


m1 <- merge(x2012,x2013,by="month")
m2 <- merge(x2014,x2015,by="month")
m3 <- merge(m1,m2,by="month")
m4 <- merge(m3,x2016,by="month")

barplot(rowSums(t(m4)[,1:3])[2:6],ylim=c(0,6000),col=c("black","light grey","grey","dark grey","gold"))
mtext(" 'RegistrationDate' totals for the first three months of 2012 - 2016. From Whatcom County Voterdb 03/30/2016")

# Below covers January 1, 2016 - March 30, 2016
arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~StatusReason)]),desc(N))
cat('
                                    StatusReason    N
 1:            Added by WEI Statewide Online Reg 3464
 2:                  Re-registration; no changes  918
 3:                        Original Registration  831
 4:        Re-registration due to Address change  274
 5:                 Updated by WEI Statewide ACS  136
 6:                             Confirm by voter  134
 7:           Re-registration due to Name change   33
 8:              Address Updated by WEI Addr Chg   26
 9:    Third Party Change of Address (In-County)   18
10:                 Verification Notice Returned   18
11: Re-Registration due to Name & Address Change   12
12:                2016-03-17 11:27:33.570000000    1
13:       Name Updated by WEI Statewide Addr Chg    1
')


BirthYear_2016 <- arrange(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~year(mdy(BirthDate))),by=year(mdy(BirthDate))],desc(year))

BirthYear_2016
cat('
    year  V1
 1: 1998 177
 2: 1997 362
 3: 1996 298
 4: 1995 330
 5: 1994 242
 6: 1993 249
 7: 1992 249
 8: 1991 222
 9: 1990 190
10: 1989 163
11: 1988 161
12: 1987 135
13: 1986 142
14: 1985 144
15: 1984 132
16: 1983 109
17: 1982  92
18: 1981  96
19: 1980  90
20: 1979  93
21: 1978  81
22: 1977  77
23: 1976  73
24: 1975  58
')

arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~ResidenceCity)]),desc(N))
cat(' 
               ResidenceCity    N
 1:                Bellingham 3963
 2:                  Ferndale  503
 3:                    Blaine  421
 4:                    Lynden  377
 5:                   Everson  162
 6:               Maple Falls   85
 7:             Sedro-Woolley   65
 8:                    Deming   61
 9:                    Custer   54
10:                     Sumas   47
11:              Lummi Island   34
12:                Pt Roberts   33
13:                  Nooksack   30
14:                      Acme   16
15:                   Glacier   11
16:                  Rockport    3
')

BirthYear_2016 <- arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~ResidenceCity)]),desc(N))
arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~PrecinctID)]),desc(N))
cat('
     PrecinctID   N
  1:        253 147
  2:        225 122
  3:        247 114
  4:        231  98
  5:        229  90
 ---               
175:        167   4
176:        183   3
177:        267   3
178:        268   1
')

Precinct_2016 <- arrange(data.table(vdbA[year(mdy(RegistrationDate))== 2016,xtabs(~PrecinctID)]),desc(N))
cat('
head(Precinct_2016,40)
    PrecinctID   N
 1:        253 147
 2:        225 122
 3:        247 114
 4:        231  98
 5:        229  90
 6:        228  85
 7:        206  84
 8:        246  84
 9:        226  82
10:        257  81
11:        230  76
12:        208  70
13:        244  66
14:        263  66
15:        175  65
16:        182  63
17:        108  61
18:        204  60
19:        211  60
20:        505  60
21:        220  59
22:        227  59
23:        239  58
24:        248  56
25:        249  56
26:        135  55
27:        203  52
28:        212  52
29:        262  52
30:        301  52
31:        178  50
32:        148  49
33:        219  49
34:        258  49
35:        201  46
36:        214  43
37:        302  42
38:        169  41
39:        213  41
40:        508  41
    PrecinctID   N
')


# Part II
# Scratch Not all cat()ed output

# working

voterdb033016 <- read.delim("C:/Politics/RyanFerrisVoterList_20160330_104658/RyanFerrisVoterList_20160330_104658_JessieS.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
vdb <- data.table(voterdb033016)
setkey(vdb,RegistrationNumber)
vdbA <- data.table(vdb[StatusCode == "A",])
setkey(vdbA,RegistrationNumber)

voterdb102815 <- read.delim("C:/Politics/RyanFerrisList_20151028/10.28.15.txt", header = TRUE,fill=FALSE,strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
vdb_ <- data.table(voterdb102815)
setkey(vdb_,RegistrationNumber)
vdbA_ <- data.table(vdb_[StatusCode == "A",])
setkey(vdbA_,RegistrationNumber)

vdbA$RegistrationYear <- with(vdbA,year(mdy(RegistrationDate)))
vdbA$LastUpdateDateYear <- with(vdbA,year(ymd_hms(LastUpdateDate)))
vdbA$RegistrationMonth <- with(vdbA,month(mdy(RegistrationDate)))
vdbA$LastUpdateDateMonth <- with(vdbA,month(ymd_hms(LastUpdateDate)))
vdbA$BirthYear <- with(vdbA,year(mdy(BirthDate)))

vdbA_$RegistrationYear <- with(vdbA_,year(mdy(RegistrationDate)))
vdbA_$LastUpdateDateYear <- with(vdbA_,year(ymd_hms(LastUpdateDate)))
vdbA_$RegistrationMonth <- with(vdbA_,month(mdy(RegistrationDate)))
vdbA_$LastUpdateDateMonth <- with(vdbA_,month(ymd_hms(LastUpdateDate)))
vdbA_$BirthYear <- with(vdbA_,year(mdy(BirthDate)))

count(data.table(V1 = vdbA$RegistrationNumber %in% vdbA_$RegistrationNumber),V1)
Source: local data table [2 x 2]

cat('
     V1      n
  (lgl)  (int)
1  TRUE 124831
2 FALSE   7790
')

count(data.table(V1 = vdbA_$RegistrationNumber %in% vdbA$RegistrationNumber),V1)
cat('
Source: local data table [2 x 2]

     V1      n
  (lgl)  (int)
1  TRUE 124831
2 FALSE   4471
')

head(subset(vdbA,(vdbA_$RegistrationNumber %in% RegistrationNumber),2))
cat('
   RegistrationNumber
1:                 13
2:                 16
3:                 19
4:                 24
5:                 25
6:                 31
')


cat('
print("Not Useful")
nrow(subset(vdbA_, !(vdbA$RegistrationNumber %in% RegistrationNumber)))
#[1] 7790
nrow(subset(vdbA_, !(vdbA$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
#[1] 7790
d1 <- data.table(subset(vdbA_,!(vdbA$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))
head(d1,1)
')


nrow(subset(vdbA, !(vdbA_$RegistrationNumber %in% RegistrationNumber)))

nrow(subset(vdbA, !(vdbA_$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))

d1 <- data.table(subset(vdbA,!(vdbA_$RegistrationNumber %in% RegistrationNumber),
select=c("RegistrationNumber","BirthDate","PrecinctID","RegistrationDate","LastUpdateDate","StatusReason")))

head(d1,1)

setkey(d1,RegistrationNumber)
d1$RegistrationYear <- with(d1,year(mdy(RegistrationDate)))
d1$LastUpdateDateYear <- with(d1,year(ymd_hms(LastUpdateDate)))
d1$BirthYear <- with(d1,year(mdy(BirthDate)))

# Scratch Not cat()ed output

 data.table(vdbA[RegistrationYear == 2015 & StatusReason == "Original Registration",xtabs(~RegistrationMonth)])
    RegistrationMonth    N
 1:                 1   58
 2:                 2   61
 3:                 3   62
 4:                 4   68
 5:                 5   81
 6:                 6   94
 7:                 7  174
 8:                 8  157
 9:                 9 1254
10:                10  541
11:                11  107
12:                12  104
 data.table(vdbA[RegistrationYear == 2016 & StatusReason == "Original Registration",xtabs(~RegistrationMonth)])
   RegistrationMonth   N
1:                 1 176
2:                 2 264
3:                 3 391

 data.table(vdbA[RegistrationYear == 2016 & StatusReason == "Added by WEI Statewide Online Reg",xtabs(~RegistrationMonth)])
   RegistrationMonth    N
1:                 1  723
2:                 2  883
3:                 3 1858
 data.table(vdbA[RegistrationYear == 2015 & StatusReason == "Added by WEI Statewide Online Reg",xtabs(~RegistrationMonth)])
    RegistrationMonth   N
 1:                 1 317
 2:                 2 252
 3:                 3 312
 4:                 4 342
 5:                 5 342
 6:                 6 397
 7:                 7 528
 8:                 8 633
 9:                 9 848
10:                10 756
11:                11 512
12:                12 538

> arrange(data.table(vdbA[,xtabs(~StatusReason)]),desc(N))
                                       StatusReason     N
 1:               Added by WEI Statewide Online Reg 19438
 2:       Third Party Change of Address (In-County) 18417
 3:                     Re-registration; no changes 15260
 4:                                                 14543
 5:                           Original Registration 13066
 6:                 Address Updated by WEI Addr Chg 10424
 7:                    Updated by WEI Statewide ACS 10252
 8:           Re-registration due to Address change  9611
 9:     A - Phone/Email update from ballot envelope  9256
10:                               Office Correction  5742
11:                        Precinct Line Adjustment  2200
12:              Re-registration due to Name change  1092
13:          Name Updated by WEI Statewide Addr Chg  1020
14:  Name/Address Updated by WEI Statewide Addr Chg   832
15:                                Confirm by voter   575
16:                    Verification Notice Returned   283
17:                     Confirmed 3PCOA (In-County)   236
18:    Re-Registration due to Name & Address Change   181
19: Z - Re-registration due to Seasonal Update Card   154
20:                Re-registration; Signed Petition    24
21:                        Cancel 45 Day No Contact     4
22:                 Confirmed 3PCOA (Undeliverable)     3
23:   Third Party Change of Address (Out-of-County)     3
24:   Third Party Change of Address (Undeliverable)     2
25:                   2016-03-17 11:27:33.570000000     1
26:                     Registered in another State     1
27:                              Requested by Voter     1
                                       StatusReason     N
> arrange(data.table(vdbA[RegistrationYear == 2016,xtabs(~StatusReason)]),desc(N))
                                    StatusReason    N
 1:            Added by WEI Statewide Online Reg 3464
 2:                  Re-registration; no changes  918
 3:                        Original Registration  831
 4:        Re-registration due to Address change  274
 5:                 Updated by WEI Statewide ACS  136
 6:                             Confirm by voter  134
 7:           Re-registration due to Name change   33
 8:              Address Updated by WEI Addr Chg   26
 9:    Third Party Change of Address (In-County)   18
10:                 Verification Notice Returned   18
11: Re-Registration due to Name & Address Change   12
12:                2016-03-17 11:27:33.570000000    1
13:       Name Updated by WEI Statewide Addr Chg    1
> arrange(data.table(vdbA[RegistrationYear == 2015,xtabs(~StatusReason)]),desc(N))
                                      StatusReason    N
 1:              Added by WEI Statewide Online Reg 5777
 2:                          Original Registration 2761
 3:                    Re-registration; no changes 1045
 4:          Re-registration due to Address change  827
 5:                Address Updated by WEI Addr Chg  396
 6:      Third Party Change of Address (In-County)  394
 7:                   Updated by WEI Statewide ACS  327
 8:                               Confirm by voter  318
 9:             Re-registration due to Name change   75
10:                   Verification Notice Returned   52
11:   Re-Registration due to Name & Address Change   35
12:         Name Updated by WEI Statewide Addr Chg   22
13: Name/Address Updated by WEI Statewide Addr Chg    6
14:                              Office Correction    2


No comments:

Post a Comment