Friday, February 6, 2015

R code to look at Voter Age in Buckets


This is R code to look at Voter Age in Buckets. I have added some smoothing with parameters and a locfit density graph.

## To look at Voter database history in age bucket with smoothing and (locfit) density curve

## Note that 'as.numeric(year(now())' is just what it seems. 
## '2014' may be more appropriate

library(plyr)
library(lubridate)
library(locfit)

voterdb121914 <- read.delim("C:/Politics/12.19.14_001.txt", header = TRUE, strip.white = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)    
voterdb <- voterdb121914
voterdb <- cbind(voterdb,"Age"=as.numeric(year(now()) - year(mdy(as.character(voterdb$BirthDate)))))
as.data.frame(with(voterdb,xtabs(~Age)))

# To look at data for just one Precinct
# as.data.frame(with(subset(voterdb,PrecinctID == "208"),xtabs(~Age)))

# voterdb <- subset(voterdb,PrecinctID == "208")

RN <-voterdb$RegistrationNumber
AGE <- voterdb$Age
x <- data.frame(xtabs(~Age, data=voterdb))[,2]
y <- data.frame(xtabs(~Age, data=voterdb))[,1]

# barplot(data.frame(xtabs(~Age, data=voterdb3))[,2],names.arg=data.frame(xtabs(~Age, data=voterdb3))[,1],las=2,cex.names=.75
barplot(x,names.arg=y,las=2,cex.names=.75,cex=1.25,xlab="")
mtext(length(RN),side=3, line=0)
lines(stats::lowess(x,iter=100,delta = 0.001 * diff(range(x))),col="red")

Age.fit.et <- locfit(~x, data=voterdb,alpha=0.5)

plot(Age.fit.et, get.data = F,pch=19,cex=.25,col="blue")

dff <- data.frame(cbind(
"SampleSize" = nrow(voterdb),
"Mean" = mean(AGE),
"Stdev" = sd(AGE),
"Median"= median(AGE),
"MAD"= mad(AGE),
"RangeDiff" = diff(r.x <- range(AGE)),
"Min Age" = min(AGE),
"Max Age" = max(AGE)))

print(t(dff))
print(summary(AGE))
print(quantile(AGE, probs=seq(0,1,.1)))

No comments:

Post a Comment