voterdb100215 <- read.delim("C:/Politics/10.02.2015.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
voterdb <- arrange(subset(voterdb100215,StatusCode == 'A'),LastUpdateDate)
voterdbA <- subset(voterdb100215, StatusCode == "A")
voterdbI <- subset(voterdb100215, StatusCode == "I")
voterdbC <- subset(voterdb100215, StatusCode == "C")
## Voterdb as of October 2, 2015
# Active
# Inactive
# Cancelled
## Profile for Active Voters Only
# Last Update Year,year(ymd_hms(as.character(voterdb$LastUpdateDate))),sort=TRUE))
# Status Reason,StatusReason,sort=TRUE))
# Residence City,ResidenceCity,sort=TRUE))
# Residence Zip,ResidenceZipCode,sort=TRUE))
# Gender,Gender,sort=TRUE))
# Look at Top 25 Counts of Precints for Gender ; e.g.
MF <- cbind(aggregate(as.integer(as.logical(Gender)) ~ PrecinctID,data=voterdb,length),
aggregate(RegistrationNumber ~ PrecinctID,data=voterdb,length))
colnames(MF) <- c("PrecinctID", "Females","PrecinctID", "Count")
MF <- MF[,c(1,2,4)]
MF <- with(MF,cbind(MF,"PCT_female"=Females/Count))
barchart(PrecinctID ~ MF$PCT_female,data=MF)
# Birth Year,year(mdy(as.character(voterdb$BirthDate))),sort=TRUE))
# PrecinctID,PrecinctID,sort=TRUE))
# Age Buckets
t1 <-,year(mdy(as.character(voterdb$BirthDate))),sort=TRUE))
colnames(t1) <- c("BirthYear","Count")
# Ages 17 - 35
l1 <- as.list(2015 - c(17:35))
r1 <- subset(t1, BirthYear %in% l1)
# Ages 36 - 54
l2 <- as.list(2015 - c(36:54))
r2 <- subset(t1, BirthYear %in% l2)
# Ages 55 - 110
l3 <- as.list(2015 - c(55:110))
r3 <- subset(t1, BirthYear %in% l3)
# Ages 17 - 22 e.g. Student Vote
sl1 <- as.list(2015 - c(17:22))
s1 <- subset(t1, BirthYear %in% sl1)
# Look at Top 25 Counts of Precincts for BirthDate <= 1981; e.g. Age 36 or older
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) <= 1980,select=c(PrecinctID,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length);colnames(a2) <- c("PrecinctID","Count")
# Look at Top 25 Counts of Precincts for BirthDate >= 1980; e.g. Age 35 or younger
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1980,select=c(PrecinctID,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length);colnames(a2) <- c("PrecinctID","Count")
# Look at Top 25 Counts of Precincts for BirthDate >= 1993; e.g. Age 22 or younger
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1993,select=c(PrecinctID,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length); colnames(a2) <- c("PrecinctID","Count")
# General Elections 2014:2010 (descending). Key: 1 = ballot counted, 0 = ballot counted, N/A = not applicable or maybe "not around" at that time.
# Note no NA listed with aggregate functions below
# Look at Top 25 Counts of Precints for BirthDate <= 1981; e.g. Age 36 or older
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) <= 1980,select=c(BallotCounted_1,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length);colnames(a2) <- c("BallotCounted2014","Count")
# Look at Top 25 Counts of registered last General Election (2014) for BirthDate >= 1980; e.g. Age 35 or younger
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1980,select=c(BallotCounted_1,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length);colnames(a2) <- c("BallotCounted2014","Count")
# Look at Top 25 Counts of those registered last General Election (2014) for BirthDate >= 1993; e.g. Age 22 or younger
a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1993,select=c(BallotCounted_1,BirthDate))
a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length); colnames(a2) <- c("BallotCounted2014","Count")
# in tbl_df format
> library(dplyr)
> library(lattice)
> library(lubridate)
> voterdb100215 <- read.delim("C:/Politics/10.02.2015.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
> voterdb <- arrange(subset(voterdb100215,StatusCode == 'A'),LastUpdateDate)
> voterdbA <- subset(voterdb100215, StatusCode == "A")
> voterdbI <- subset(voterdb100215, StatusCode == "I")
> voterdbC <- subset(voterdb100215, StatusCode == "C")
> ## Voterdb as of October 2, 2015
> # Active
> nrow(voterdbA)
[1] 128572
> # Inactive
> nrow(voterdbI)
[1] 12481
> # Cancelled
> nrow(voterdbC)
[1] 24532
> ## Profile for Active Voters Only
> # Last Update Year
year(ymd_hms(as.character(voterdb$Las... n
1 2014 29374
2 2015 20241
3 2005 19503
4 2013 13899
5 2008 11496
6 2012 10018
7 2010 7516
8 2011 6305
9 2009 4381
10 2006 3003
11 2007 2836
> # Status Reason
StatusReason n
1 Third Party Change of Address (In-County) 17600
2 Added by WEI Statewide Online Reg 16520
3 15255
4 Re-registration; no changes 14870
5 Original Registration 13255
6 A - Phone/Email update from ballot envelope 10064
7 Re-registration due to Address change 9593
8 Updated by WEI Statewide ACS 9569
9 Address Updated by WEI Addr Chg 8974
10 Office Correction 6283
11 Precinct Line Adjustment 2379
12 Re-registration due to Name change 1104
13 Name Updated by WEI Statewide Addr Chg 965
14 Name/Address Updated by WEI Statewide Addr Chg 777
15 Confirm by voter 408
16 Verification Notice Returned 287
17 Confirmed 3PCOA (In-County) 276
18 Re-Registration due to Name & Address Change 179
19 Z - Re-registration due to Seasonal Update Card 169
20 Re-registration; Signed Petition 29
21 Cancel 45 Day No Contact 5
22 Confirmed 3PCOA (Undeliverable) 3
23 Third Party Change of Address (Undeliverable) 3
24 Third Party Change of Address (Out-of-County) 2
25 ID provided (no longer FTFV) 1
26 Registered in another State 1
27 Requested by Voter 1
> # Residence City
ResidenceCity n
1 Bellingham 74768
2 Ferndale 14151
3 Lynden 13107
4 Blaine 10332
5 Everson 4958
6 Custer 1922
7 Maple Falls 1816
8 Deming 1807
9 Sumas 1445
10 Sedro-Woolley 1212
11 Pt Roberts 909
12 Nooksack 794
13 Lummi Island 713
14 Acme 412
15 Glacier 196
16 Rockport 30
> # Residence Zip
ResidenceZipCode n
1 98225 28156
2 98226 26467
3 98229 20145
4 98248 14151
5 98264 13107
6 98230 10332
7 98247 4958
8 98244 2003
9 98240 1922
10 98266 1816
11 98295 1445
12 98284 1212
13 98281 909
14 98276 794
15 98262 713
16 98220 412
17 98283 30
> # Gender
Gender n
1 F 66984
2 M 61581
3 7
> # Look at Top 25 Counts of Precints for Gender ; e.g.
> MF <- cbind(aggregate(as.integer(as.logical(Gender)) ~ PrecinctID,data=voterdb,length),
+ aggregate(RegistrationNumber ~ PrecinctID,data=voterdb,length))
> colnames(MF) <- c("PrecinctID", "Females","PrecinctID", "Count")
> MF <- MF[,c(1,2,4)]
> MF <- with(MF,cbind(MF,"PCT_female"=Females/Count))
> plot(density(MF$PCT_female))
> plot(density(sort(MF$PCT_female)))
> barchart(PrecinctID ~ MF$PCT_female,data=MF)
> # Birth Year
year(mdy(as.character(voterdb$BirthDa... n
1 1954 2474
2 1952 2450
3 1953 2450
4 1994 2420
5 1951 2409
6 1947 2400
7 1992 2371
8 1993 2370
9 1950 2351
10 1957 2328
11 1955 2299
12 1949 2288
13 1948 2286
14 1991 2263
15 1959 2262
16 1956 2252
17 1958 2221
18 1960 2213
19 1961 2204
20 1990 2196
21 1970 2161
22 1989 2147
23 1962 2133
24 1984 2123
25 1963 2111
26 1969 2102
27 1964 2080
28 1981 2077
29 1980 2073
30 1995 2056
31 1985 2051
32 1996 2051
33 1979 2050
34 1946 2042
35 1983 2021
36 1982 2014
37 1988 2000
38 1978 1997
39 1968 1990
40 1971 1975
41 1986 1968
42 1977 1959
43 1965 1949
44 1987 1925
45 1967 1866
46 1975 1841
47 1974 1810
48 1976 1780
49 1966 1776
50 1972 1762
51 1945 1692
52 1973 1673
53 1943 1602
54 1944 1600
55 1942 1458
56 1941 1268
57 1940 1162
58 1997 1124
59 1939 1055
60 1938 972
61 1937 892
62 1936 841
63 1935 785
64 1934 728
65 1933 624
66 1932 594
67 1930 586
68 1931 551
69 1929 493
70 1928 410
71 1927 372
72 1926 333
73 1925 286
74 1924 247
75 1923 213
76 1921 147
77 1922 146
78 1920 108
79 1919 64
80 1918 61
81 1916 32
82 1917 28
83 1915 12
84 1914 8
85 1909 2
86 1911 2
87 1913 2
88 1905 1
89 1912 1
> # PrecinctID
PrecinctID n
1 245 1385
2 201 1284
3 208 1223
4 253 1183
5 182 1175
6 140 1143
7 302 1082
8 169 1079
9 225 1070
10 231 1051
11 601 1050
12 108 1045
13 505 1041
14 206 1034
15 229 1032
16 146 1019
17 126 1017
18 134 1012
19 127 999
20 181 998
21 213 979
22 508 978
23 301 969
24 137 968
25 133 967
26 148 962
27 239 955
28 222 953
29 107 952
30 250 944
31 151 936
32 150 928
33 609 927
34 141 925
35 118 924
36 166 924
37 178 920
38 131 914
39 101 909
40 211 909
41 145 903
42 168 901
43 160 891
44 175 889
45 203 885
46 220 880
47 249 875
48 610 871
49 171 866
50 210 866
51 227 864
52 144 862
53 506 862
54 135 860
55 258 859
56 602 854
57 240 853
58 604 850
59 115 849
60 153 840
61 503 840
62 247 837
63 125 829
64 147 826
65 136 824
66 142 821
67 606 821
68 263 818
69 111 815
70 233 815
71 234 812
72 230 803
73 232 800
74 129 796
75 219 796
76 228 791
77 701 789
78 215 788
79 106 786
80 209 784
81 303 784
82 152 776
83 216 776
84 221 766
85 501 760
86 174 756
87 502 753
88 246 751
89 244 749
90 608 749
91 256 744
92 243 740
93 218 737
94 266 737
95 120 726
96 163 718
97 264 715
98 110 714
99 138 714
100 262 713
101 603 713
102 801 706
103 507 705
104 607 702
105 103 698
106 162 697
107 402 677
108 251 672
109 113 670
110 132 668
111 509 659
112 241 649
113 226 624
114 102 622
115 180 621
116 214 619
117 254 619
118 173 616
119 257 605
120 238 603
121 260 601
122 176 600
123 611 599
124 202 597
125 204 595
126 149 589
127 248 577
128 255 575
129 205 574
130 177 571
131 217 569
132 261 567
133 143 565
134 401 565
135 117 561
136 265 558
137 155 553
138 121 552
139 223 550
140 161 549
141 154 544
142 122 538
143 237 536
144 170 534
145 123 530
146 207 527
147 130 519
148 242 516
149 172 514
150 252 505
151 116 494
152 235 478
153 259 476
154 605 475
155 124 472
156 212 469
157 114 466
158 504 446
159 119 441
160 157 439
161 104 433
162 165 425
163 158 405
164 105 402
165 112 396
166 139 368
167 156 367
168 236 355
169 224 332
170 304 313
171 159 267
172 164 255
173 179 193
174 167 132
175 267 98
176 268 54
177 183 32
> # Age Buckets
> t1 <-,year(mdy(as.character(voterdb$BirthDate))),sort=TRUE))
> colnames(t1) <- c("BirthYear","Count")
> attach(t1)
> # Ages 17 - 35
> l1 <- as.list(2015 - c(17:35))
> r1 <- subset(t1, BirthYear %in% l1)
> sum(r1$Count)
[1] 37250
> # Ages 36 - 54
> l2 <- as.list(2015 - c(36:54))
> r2 <- subset(t1, BirthYear %in% l2)
> sum(r2$Count)
[1] 37219
> # Ages 55 - 110
> l3 <- as.list(2015 - c(55:110))
> r3 <- subset(t1, BirthYear %in% l3)
> sum(r3$Count)
[1] 54103
> # Ages 17 - 22 e.g. Student Vote
> sl1 <- as.list(2015 - c(17:22))
> s1 <- subset(t1, BirthYear %in% sl1)
> sum(s1$Count)
[1] 10021
> detach(t1)
> # Look at Top 25 Counts of Precincts for BirthDate <= 1981; e.g. Age 36 or older
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) <= 1980,select=c(PrecinctID,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length);colnames(a2) <- c("PrecinctID","Count")
> arrange(a2,desc(Count))[1:25,]
PrecinctID Count
1 201 935
2 182 890
3 601 871
4 208 849
5 169 846
6 126 840
7 302 828
8 140 815
9 107 813
10 101 810
11 181 806
12 250 801
13 134 800
14 213 783
15 127 782
16 108 763
17 146 763
18 505 758
19 168 757
20 301 752
21 222 750
22 148 740
23 131 734
24 145 731
25 303 731
> # Look at Top 25 Counts of Precincts for BirthDate >= 1980; e.g. Age 35 or younger
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1980,select=c(PrecinctID,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length);colnames(a2) <- c("PrecinctID","Count")
> arrange(a2,desc(Count))[1:25,]
PrecinctID Count
1 245 1385
2 253 942
3 247 577
4 225 522
5 252 497
6 231 493
7 229 461
8 228 437
9 226 427
10 257 423
11 230 401
12 208 398
13 246 397
14 220 382
15 263 371
16 201 370
17 206 369
18 140 351
19 258 347
20 137 310
21 227 309
22 182 305
23 505 303
24 508 303
25 604 303
> # Look at Top 25 Counts of Precincts for BirthDate >= 1993; e.g. Age 22 or younger
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1993,select=c(PrecinctID,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ PrecinctID,data = a1,length); colnames(a2) <- c("PrecinctID","Count")
> arrange(a2,desc(Count))[1:25,]
PrecinctID Count
1 245 1359
2 252 486
3 253 485
4 257 260
5 247 207
6 226 180
7 246 171
8 263 165
9 258 151
10 228 146
11 231 107
12 140 100
13 248 89
14 229 84
15 230 84
16 208 78
17 227 78
18 225 77
19 255 76
20 169 69
21 144 68
22 508 68
23 142 67
24 118 66
25 137 66
> # General Elections 2014:2010 (descending). Key: 1 = ballot counted, 0 = ballot counted, N/A = not applicable or maybe "not around" at that time.
> count(voterdb,BallotCounted_1)
Source: local data frame [3 x 2]
BallotCounted_1 n
1 0 44958
2 1 73882
3 NA 9732
> count(voterdb,BallotCounted_2)
Source: local data frame [3 x 2]
BallotCounted_2 n
1 0 45543
2 1 65161
3 NA 17868
> count(voterdb,BallotCounted_3)
Source: local data frame [3 x 2]
BallotCounted_3 n
1 0 14741
2 1 90354
3 NA 23477
> count(voterdb,BallotCounted_4)
Source: local data frame [3 x 2]
BallotCounted_4 n
1 0 34784
2 1 60208
3 NA 33580
> count(voterdb,BallotCounted_5)
Source: local data frame [3 x 2]
BallotCounted_5 n
1 0 19140
2 1 71692
3 NA 37740
> # Note no NA listed with aggregate functions below
> # Look at Top 25 Counts of Precints for BirthDate <= 1981; e.g. Age 36 or older
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) <= 1980,select=c(BallotCounted_1,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length);colnames(a2) <- c("BallotCounted2014","Count")
> arrange(a2,desc(Count))
BallotCounted2014 Count
1 1 63385
2 0 26409
> # Look at Top 25 Counts of registered last General Election (2014) for BirthDate >= 1980; e.g. Age 35 or younger
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1980,select=c(BallotCounted_1,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length);colnames(a2) <- c("BallotCounted2014","Count")
> arrange(a2,desc(Count))
BallotCounted2014 Count
1 0 19516
2 1 11453
> # Look at Top 25 Counts of those registered last General Election (2014) for BirthDate >= 1993; e.g. Age 22 or younger
> a1 <- subset(voterdb,year(mdy(as.character(BirthDate))) >= 1993,select=c(BallotCounted_1,BirthDate))
> a2 <- aggregate(as.integer(year(mdy(as.character(BirthDate)))) ~ BallotCounted_1,data = a1,length); colnames(a2) <- c("BallotCounted2014","Count")
> arrange(a2,desc(Count))
BallotCounted2014 Count
1 0 4445
2 1 2211
> # in tbl_df format
> (count(voterdb,year(ymd_hms(as.character(voterdb$LastUpdateDate))),sort=TRUE))
Source: local data frame [11 x 2]
year(ymd_hms(as.character(voterdb$Las... n
1 2014 29374
2 2015 20241
3 2005 19503
4 2013 13899
5 2008 11496
6 2012 10018
7 2010 7516
8 2011 6305
9 2009 4381
10 2006 3003
11 2007 2836
> (count(voterdb,StatusReason,sort=TRUE))
Source: local data frame [27 x 2]
StatusReason n
1 Third Party Change of Address (In-County) 17600
2 Added by WEI Statewide Online Reg 16520
3 15255
4 Re-registration; no changes 14870
5 Original Registration 13255
6 A - Phone/Email update from ballot envelope 10064
7 Re-registration due to Address change 9593
8 Updated by WEI Statewide ACS 9569
9 Address Updated by WEI Addr Chg 8974
10 Office Correction 6283
.. ... ...
> (count(voterdb,ResidenceCity,sort=TRUE))
Source: local data frame [16 x 2]
ResidenceCity n
1 Bellingham 74768
2 Ferndale 14151
3 Lynden 13107
4 Blaine 10332
5 Everson 4958
6 Custer 1922
7 Maple Falls 1816
8 Deming 1807
9 Sumas 1445
10 Sedro-Woolley 1212
11 Pt Roberts 909
12 Nooksack 794
13 Lummi Island 713
14 Acme 412
15 Glacier 196
16 Rockport 30
> (count(voterdb,ResidenceZipCode,sort=TRUE))
Source: local data frame [17 x 2]
ResidenceZipCode n
1 98225 28156
2 98226 26467
3 98229 20145
4 98248 14151
5 98264 13107
6 98230 10332
7 98247 4958
8 98244 2003
9 98240 1922
10 98266 1816
11 98295 1445
12 98284 1212
13 98281 909
14 98276 794
15 98262 713
16 98220 412
17 98283 30
> (count(voterdb,Gender,sort=TRUE))
Source: local data frame [3 x 2]
Gender n
1 F 66984
2 M 61581
3 7
> (count(voterdb,year(mdy(as.character(voterdb$BirthDate))),sort=TRUE))
Source: local data frame [89 x 2]
year(mdy(as.character(voterdb$BirthDa... n
1 1954 2474
2 1952 2450
3 1953 2450
4 1994 2420
5 1951 2409
6 1947 2400
7 1992 2371
8 1993 2370
9 1950 2351
10 1957 2328
.. ... ...
> (count(voterdb,PrecinctID,sort=TRUE))
Source: local data frame [177 x 2]
PrecinctID n
1 245 1385
2 201 1284
3 208 1223
4 253 1183
5 182 1175
6 140 1143
7 302 1082
8 169 1079
9 225 1070
10 231 1051
.. ... ...
> count(voterdb,BallotCounted_1)
Source: local data frame [3 x 2]
BallotCounted_1 n
1 0 44958
2 1 73882
3 NA 9732
> count(voterdb,BallotCounted_2)
Source: local data frame [3 x 2]
BallotCounted_2 n
1 0 45543
2 1 65161
3 NA 17868
> count(voterdb,BallotCounted_3)
Source: local data frame [3 x 2]
BallotCounted_3 n
1 0 14741
2 1 90354
3 NA 23477
> count(voterdb,BallotCounted_4)
Source: local data frame [3 x 2]
BallotCounted_4 n
1 0 34784
2 1 60208
3 NA 33580
> count(voterdb,BallotCounted_5)
Source: local data frame [3 x 2]
BallotCounted_5 n
1 0 19140
2 1 71692
3 NA 37740
No comments:
Post a Comment