The corresponding political piece for this article is here.
The active voter Whatcom County database as of 05.20.2014 is (row) 36 * (column) 126283 = (ncell) 4546188. On an 8 GB I-5 laptop on running 64 bit R 3.1 this isn't much of a problem. There are a number of different approaches/packaged I have found to using R for large data:
- data.table
- RPostgres
- plyr
- sqldf
b2009__ <-(subset(BallotCounted2009,grepl("NA",BallotCounted2009),select = freq))[2,]
BallotCounted is the table of summarized votes for the years 2009 - 2013 (General Election only). I subset using grepl and then am forced to subset again apparently because either grepl is seeing '0' as equivalent to 'NA' or the subset function itself reads a data.frame column as one line?
> BallotCounted
x freq x freq x freq x freq x freq
1 0 53080 0 17044 0 38248 0 21075 0 38042
2 1 69012 1 97212 1 63444 1 75750 1 52735
3 NA 4190 NA 12026 NA 24590 NA 29457 NA 35505
> BallotCounted2009
x freq
1 0 38042
2 1 52735
3 NA 35505
freq
1 38042
3 35505
> subset(BallotCounted2009,grepl("NA",BallotCounted2009),select = freq)[2,]
[1] 35505
Nothing I try here helps:
> subset(BallotCounted2009,grepl("NA",BallotCounted2009,useBytes=TRUE))
x freq
1 0 38042
3 NA 35505
> subset(BallotCounted2009,grepl("NA",BallotCounted2009,fixed=TRUE))
x freq
1 0 38042
3 NA 35505
> subset(BallotCounted2009,grepl("NA",BallotCounted2009,perl=TRUE))
x freq
1 0 38042
3 NA 35505
Clearly, some of this code is less than optimized.
#require(sqldf)
require(plyr)
setwd("C:/Politics")
VDB_05.20.2014 <- read.delim("05.20.2014.txt")
VDB <- VDB_05.20.2014
VDB_Voted <- VDB[,c(1,32:36)]
#sqldf("Select Count(*) from VDB_Voted where BallotCounted_5 = 1")
#sqldf("Select Count(*) from VDB_Voted where BallotCounted_4 = 1")
#sqldf("Select Count(*) from VDB_Voted where BallotCounted_3 = 1")
#sqldf("Select Count(*) from VDB_Voted where BallotCounted_2 = 1")
#sqldf("Select Count(*) from VDB_Voted where BallotCounted_1 = 1")
BallotCounted2013 <- (count(VDB_Voted$BallotCounted_1))
BallotCounted2012 <- (count(VDB_Voted$BallotCounted_2))
BallotCounted2011 <- (count(VDB_Voted$BallotCounted_3))
BallotCounted2010 <- (count(VDB_Voted$BallotCounted_4))
BallotCounted2009 <- (count(VDB_Voted$BallotCounted_5))
BallotCounted <- cbind(BallotCounted2013,BallotCounted2012,BallotCounted2011,BallotCounted2010,BallotCounted2009)
par(mfrow=c(1,4))
b2009_ <-(subset(BallotCounted2009,x == 1,select = freq)[1,])
b2010_ <-(subset(BallotCounted2010,x == 1,select = freq)[1,])
b2011_ <-(subset(BallotCounted2011,x == 1,select = freq)[1,])
b2012_ <-(subset(BallotCounted2012,x == 1,select = freq)[1,])
b2013_ <-(subset(BallotCounted2013,x == 1,select = freq)[1,])
barplot( c(b2009_,b2010_,b2011_,b2012_,b2013_),names.arg=c("2009","2010","2011","2012","2013"),xlab="Voted",ylim=c(0,100000),col="red")
b2009 <-(subset(BallotCounted2009,x == 0,select = freq)[1,])
b2010 <-(subset(BallotCounted2010,x == 0,select = freq)[1,])
b2011 <-(subset(BallotCounted2011,x == 0,select = freq)[1,])
b2012 <-(subset(BallotCounted2012,x == 0,select = freq)[1,])
b2013 <-(subset(BallotCounted2013,x == 0,select = freq)[1,])
barplot( c(b2009,b2010,b2011,b2012,b2013),names.arg=c("2009","2010","2011","2012","2013"),xlab="Not Voted",ylim=c(0,100000),col="blue")
barplot( c((b2009_/(b2009 + b2009_)),
(b2010_/(b2010 + b2010_)),
(b2011_/(b2011 + b2011_)),
(b2012_/(b2012 + b2012_)),
(b2013_/(b2013 + b2013_)) ),
names.arg=c("2009","2010","2011","2012","2013"),xlab="Voted/(Voted + Not Voted)",col="dark gray")
b2009__ <-(subset(BallotCounted2009,grepl("NA",BallotCounted2009),select = freq))[2,]
b2010__ <-(subset(BallotCounted2010,grepl("NA",BallotCounted2009),select = freq))[2,]
b2011__ <-(subset(BallotCounted2011,grepl("NA",BallotCounted2009),select = freq))[2,]
b2012__ <-(subset(BallotCounted2012,grepl("NA",BallotCounted2009),select = freq))[2,]
b2013__ <-(subset(BallotCounted2013,grepl("NA",BallotCounted2009),select = freq))[2,]
barplot( c(b2009__,b2010__,b2011__,b2012__,b2013__),names.arg=c("2009","2010","2011","2012","2013"),ylab="N/A : 05.20.2014 voters not registered in these elections",ylim=c(0,100000),col="light gray")
mtext("Whatcom County General Elections 2009 - 2013 for voters active May 20 2014 only",side=3,outer=TRUE,line=-1)
mtext("Totals for Voters active May 20 2014 only", side=1, outer=TRUE,line=-1)
par(mfrow=c(1,2))
b2009_ <-(subset(BallotCounted2009,x == 1,select = freq)[1,])
b2010_ <-(subset(BallotCounted2010,x == 1,select = freq)[1,])
b2011_ <-(subset(BallotCounted2011,x == 1,select = freq)[1,])
b2012_ <-(subset(BallotCounted2012,x == 1,select = freq)[1,])
b2013_ <-(subset(BallotCounted2013,x == 1,select = freq)[1,])
barplot( c(b2009_,b2010_,b2011_,b2012_,b2013_),names.arg=c("2009","2010","2011","2012","2013"),ylim=c(0,100000),xlab="Voted",col="red")
b2009 <-(subset(BallotCounted2009,x == 0,select = freq)[1,])
b2010 <-(subset(BallotCounted2010,x == 0,select = freq)[1,])
b2011 <-(subset(BallotCounted2011,x == 0,select = freq)[1,])
b2012 <-(subset(BallotCounted2012,x == 0,select = freq)[1,])
b2013 <-(subset(BallotCounted2013,x == 0,select = freq)[1,])
barplot( c(b2009,b2010,b2011,b2012,b2013),names.arg=c("2009","2010","2011","2012","2013"),xlab="Not Voted",ylim=c(0,100000),col="blue")
mtext("Whatcom County General Elections 2009 - 2013 for voters active May 20 2014 only",side=3,outer=TRUE,line=-1)
par(mfrow=c(1,2))
plot( c(b2009_,b2010_,b2011_,b2012_,b2013_),type="p",pch=15,cex=5,xlab="2009 - 2013 General Election : Voted(Blue) Not Voted(Red)",ylab="Votes",ylim=c(0,100000),col.lab="red",col="blue")
points( c(b2009,b2010,b2011,b2012,b2013),type="p",pch=15,cex=5,col="red")
barplot( c((b2009_/(b2009 + b2009_)),
(b2010_/(b2010 + b2010_)),
(b2011_/(b2011 + b2011_)),
(b2012_/(b2012 + b2012_)),
(b2013_/(b2013 + b2013_)) ),
names.arg=c("2009","2010","2011","2012","2013"),xlab="Voted/(Voted + Not Voted)",col="dark gray")
mtext("Whatcom County General Elections 2009 - 2013 for voters active May 20 2014 only",side=3,outer=TRUE,line=-1)
par(mfrow=c(1,2))
b2009__ <-(subset(BallotCounted2009,grepl("NA",BallotCounted2009),select = freq))[2,]
b2010__ <-(subset(BallotCounted2010,grepl("NA",BallotCounted2009),select = freq))[2,]
b2011__ <-(subset(BallotCounted2011,grepl("NA",BallotCounted2009),select = freq))[2,]
b2012__ <-(subset(BallotCounted2012,grepl("NA",BallotCounted2009),select = freq))[2,]
b2013__ <-(subset(BallotCounted2013,grepl("NA",BallotCounted2009),select = freq))[2,]
barplot( c(b2009__,b2010__,b2011__,b2012__,b2013__),names.arg=c("2009","2010","2011","2012","2013"),xlab="N/A : 05.20.2014 voters not registered in these elections with total votes in red",ylim=c(0,100000),col="light gray")
points( c(b2009_,b2010_,b2011_,b2012_,b2013_),type="p",pch=15,cex=5,col="red")
# mtext("Whatcom County General Elections 2009 - 2013 for voters active May 20 2014 only",side=3,outer=TRUE,line=-1)
barplot( c((b2009 + b2009_),
(b2010 + b2010_),
(b2011 + b2011_),
(b2012 + b2012_),
(b2013 + b2013_) ),
names.arg=c("2009","2010","2011","2012","2013"),xlab="Voted + Not Voted",col="light blue")
mtext("Whatcom County General Elections 2009 - 2013 for voters active May 20 2014 only",side=3,outer=TRUE,line=-1)
No comments:
Post a Comment