Notes on Scientific and Mathematical Programming: Scripts for PDC Data:10.07.2014 Contributions to WA LD 42

Political piece is here.

## Scripts for PDC Data
## 10.07.2014 Contributions to WA LD 42
# Candidates Alias
#JM
#SS
#VKB
#LVW
#SMF
#DJE

# Candidate Columns
#Contributor "character"
#Date "character"
#Amount "numeric"
#P.G "character"
#City "character"
#State "character"
#Zip "integer"
#Employer "character"
#Occupation "character"
#CN "factor"

# graph print function
setwd("C:/Politics/10.09.2014FinancialContributions")
jpeg_create <- function() {
systime <- as.numeric(Sys.time())
# dev.new()
jpeg(filename = systime,
width = 1024, height = 768, units = "px", pointsize = 12,
quality = 100, bg = "white", res = NA, family = "", restoreConsole = TRUE,
type = c("windows"))
Sys.sleep(2)
}

## start LD script here
library(plyr)
library(lattice)
library(sqldf)
PDC_10.09.2014 <- read.csv("PDC_10.09.2014.csv", header = TRUE, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
PDC <- PDC_10.09.2014
TOPLD <- arrange(as.data.frame(with(PDC,(xtabs(Contributions ~ District)))),desc(Freq))
jpeg_create()
with(arrange(TOPLD[1:10,],desc(Freq)),(barchart(~ Freq | as.factor(District),as.table=TRUE,xlab="Top 10 WA LD Districts")))
jpeg_create()
with(TOPLD[1:10,],(barplot(Freq/1000, names.arg=as.factor(District),xlab="Contributions to Top 10 WA LD Districts",ylab="Contributions in 1000s")))
with(TOPLD[1:10,],(mtext(sum(Freq),side=1,line=2)))
TOPLD[1:10,]

## start WC candidate script here
# setwd("C:/Politics")
cL <- list("JM.csv","SS.csv","VKB.csv","LVW.csv","SMF.csv","DJE.csv")

JM <- read.csv("JM.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
SS <- read.csv("SS.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
VKB <- read.csv("VKB.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
LVW <- read.csv("LVW.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
SMF <- read.csv("SMF.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)
DJE <- read.csv("DJE.csv", header = TRUE, skip=4, strip.white = TRUE, quote = "", stringsAsFactors = FALSE)

# Bind Candidate name to Alias and Party Affiliation
JM <- cbind(JM,"CN"='JM',"PA"='D')
SS <- cbind(SS,"CN"='SS',"PA"='D')
VKB <- cbind(VKB,"CN"='VKB',"PA"='R')
LVW <- cbind(LVW,"CN"='LVW',"PA"='R')
SMF <- cbind(SMF,"CN"='SMF',"PA"='D')
DJE <- cbind(DJE,"CN"='DJE',"PA"='R')
ALL <- rbind(JM,SS,VKB,LVW,SMF,DJE)

# tables still working on tables...
tAmount <-arrange(as.data.frame(with(ALL,(table(Amount=Amount)),stringsAsFactors = FALSE)),desc(Freq))
tCandA <- arrange(subset(as.data.frame(with(ALL,(table(Amount=Amount,Candidate=CN)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))
tContribA <- arrange(subset(as.data.frame(with(ALL,(table(Amount=Amount,Contributor=Contributor)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))

tCityA <- arrange(subset(as.data.frame(with(ALL,(table(Amount=Amount,City=City)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))
tCityAR <- arrange(subset(as.data.frame(with(subset(ALL,PA=='R'),(table(Amount=Amount,City=City)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))
tCityAD <- arrange(subset(as.data.frame(with(subset(ALL,PA=='D'),(table(Amount=Amount,City=City)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))

tStateA <- arrange(subset(as.data.frame(with(ALL,(table(Amount=Amount,State=State)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))
tStateAR <- arrange(subset(as.data.frame(with(subset(ALL,PA=='R'),(table(Amount=Amount,State=State)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))
tStateAD <- arrange(subset(as.data.frame(with(subset(ALL,PA=='D'),(table(Amount=Amount,State=State)),stringsAsFactors = FALSE)),Freq != 0),desc(Freq))

# xtabs
xtContribA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ Contributor)))),desc(Freq))
xtContribAR <- arrange(as.data.frame(with(subset(ALL,PA=='R'),(xtabs(Amount ~ Contributor)))),desc(Freq))
xtContribAD <- arrange(as.data.frame(with(subset(ALL,PA=='D'),(xtabs(Amount ~ Contributor)))),desc(Freq))

xtCityA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ City)))),desc(Freq))
xtCityAR <- arrange(as.data.frame(with(subset(ALL,PA=='R'),(xtabs(Amount ~ City)))),desc(Freq))
xtCityAD <- arrange(as.data.frame(with(subset(ALL,PA=='D'),(xtabs(Amount ~ City)))),desc(Freq))
xtStateA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ State)))),desc(Freq))
xtStateAR <- arrange(as.data.frame(with(subset(ALL,PA=='R'),(xtabs(Amount ~ State)))),desc(Freq))
xtStateAD <- arrange(as.data.frame(with(subset(ALL,PA=='D'),(xtabs(Amount ~ State)))),desc(Freq))

xtCandA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ CN)))),desc(Freq))
xtZipA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ Zip)))),desc(Freq))
xtEmployA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ Employer)))),desc(Freq))
xtOccupA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ Occupation)))),desc(Freq))
# xtAmountA <- arrange(as.data.frame(with(ALL,(xtabs(Amount ~ as.numeric(row.names(ALL)))))),desc(Freq))

# Lists
xtCandA[1:6,]
tAmount[1:20,]
tCandA[1:50,]
xtContribA[1:50,]
xtCityA[1:50,]
xtStateA[1:50,]
xtZipA[1:50,]
xtEmployA[1:50,]
xtOccupA[1:50,]

# Charts
#jpeg_create()
with(xtCandA,(barplot(Freq/1000,names.arg=CN,xlab="Sums to Candidates",ylab="Contributions in $1000s")))
with(xtCandA,(mtext(c(sum(Freq)),side=1,line=2)))
#jpeg_create()
with(xtContribA,(barplot(Freq,xlab="Contributors",ylab="Contributions in $")))
with(xtContribA,(mtext(c(length(Contributor)),side=1,line=2)))
jpeg_create()
par(mfrow=c(1,2))
with(xtContribAD,(barplot(Freq,xlab="Contributions from Contributors",xlim=c(0,1157),ylim=c(0,3500),ylab="Democrat Contributions in $",col=rgb(0,0,1,1))))
with(xtContribAD,(mtext(c(length(Contributor)),side=1,line=2)))
with(xtContribAD,(mtext(c(sum(Freq)),side=1,line=1)))
with(xtContribAR,(barplot(Freq,xlab="Contributions from Contributors",xlim=c(0,1157),ylim=c(0,3500),ylab="Republican Contributions in $",col=rgb(1,0,0,1))))
with(xtContribAR,(mtext(c(length(Contributor)),side=1,line=2)))
with(xtContribAR,(mtext(c(sum(Freq)),side=1,line=1)))
par(mfrow=c(1,1))
jpeg_create()
with(subset(xtStateA, State !="WA"),(barplot(Freq/1000,names.arg=State,xlab="Sums of States not Washington",ylab="Contributions in $1000s",las=2)))
with(xtStateA,(mtext(sum(subset(xtStateA, State !="WA",select=Freq)),side=1,line=2)))
# with(xtContribA,(mtext(c(length(Contributor)),side=1,line=2)))
jpeg_create()
with(subset(xtStateAR, State !="WA"),(barplot(Freq/1000,names.arg=State,xlab="Sums of States not Washington",ylab="Republican Contributions in $1000s",las=2)))
with(xtStateAR,(mtext(sum(subset(xtStateAR, State !="WA",select=Freq)),side=1,line=2)))
jpeg_create()
with(subset(xtStateAD, State !="WA"),(barplot(Freq/1000,names.arg=State,xlab="Sums of States not Washington",ylab="DemocratContributions in $1000s",las=2)))
with(xtStateAD,(mtext(sum(subset(xtStateAD, State !="WA",select=Freq)),side=1,line=2)))

jpeg_create()
with(subset(xtCityA[1:30,], City !="BELLINGHAM"),(barplot(Freq/1000,names.arg=City,cex.names=.675,ylab="Contributions in $1000s",las=2)))
mtext("Sums of Top 30 Cities not Bellingham",side=3,line=1)
with(xtCityA,(mtext(sum(subset(xtCityA[1:30,], City !="BELLINGHAM",select=Freq)),side=3,line=2)))
jpeg_create()
with(subset(xtCityAR[1:30,], City !="BELLINGHAM"),(barplot(Freq/1000,names.arg=City,cex.names=.675,ylab="Republican Contributions in $1000s",las=2)))
mtext("Sums of Top 30 Cities not Bellingham",side=3,line=1)
with(xtCityAR,(mtext(sum(subset(xtCityAR[1:30,], City !="BELLINGHAM",select=Freq)),side=3,line=2)))
jpeg_create()
with(subset(xtCityAD[1:30,], City !="BELLINGHAM"),(barplot(Freq/1000,names.arg=City,cex.names=.675,ylab="Democrat Contributions in $1000s",las=2)))
mtext("Sums of Top 30 Cities not Bellingham",side=3,line=1)
with(xtCityAD,(mtext(sum(subset(xtCityAD[1:30,], City !="BELLINGHAM",select=Freq)),side=3,line=2)))
graphics.off()

#populist profile data
library(lattice)
library(plyr)
D <- with(ALL,(subset(Amount,PA == "D")));sum(D);length(D)
R <- with(ALL,(subset(Amount,PA == "R")));sum(R);length(R)
# Top Donations of high frequency count
R10 <- arrange(arrange(count(R),desc(freq))[1:10,],desc(x))
D10 <- arrange(arrange(count(D),desc(freq))[1:10,],desc(x))
names(R10)
colnames(D10) <- c("DDonation","DCount")
colnames(R10) <- c("RDonation","RCount")
RD10 <- (cbind(R10,D10))
RD10
#jpeg_create()
with(RD10,(barchart(~ RCount + DCount | as.factor(c(RDonation,DDonation)),as.table=TRUE,col=c("red","blue"),xlab="Top 10 Donation Frequencies resorted by Amount")))

R20 <- arrange(arrange(count(R),desc(freq))[1:20,],desc(x))
D20 <- arrange(arrange(count(D),desc(freq))[1:20,],desc(x))
names(R20)
colnames(D20) <- c("DDonation","DCount")
colnames(R20) <- c("RDonation","RCount")
RD20 <- (cbind(R20,D20))
RD20
#jpeg_create()
with(RD20,(barchart(~ RCount + DCount | as.factor(c(RDonation,DDonation)),as.table=TRUE,col=c("red","blue"),xlab="Top 20 Donation Frequencies resorted by Amount")))
#jpeg_create()
barchart(with(ALL,(quantile(subset(Amount,PA == "D"),probs = seq(0, .95, 0.05),type=7))),col=rgb(1,0,0,.75),xlab="")
Ejpeg_create()
barchart(with(ALL,(quantile(subset(Amount,PA == "R"),probs = seq(0, .95, 0.05),type=7))),col=rgb(0,0,1,.75),xlab="")

#jpeg_create()
barchart(~ R | as.factor(length(R)),col=c("red"))
#jpeg_create()
barchart(~ D | as.factor(length(D)),col=c("blue"))
#jpeg_create()
barchart(~ R + D | as.factor(length(D)),col=c("red","blue"))

quantile(ALL$Amount,probs = seq(0, 1, 0.05),type=7)
with(ALL,(quantile(subset(Amount,PA == "D"),probs = seq(0, 1, 0.05),type=7)))
with(ALL,(quantile(subset(Amount,PA == "R"),probs = seq(0, 1, 0.05),type=7)))

Q <- data.frame("RQuant"=with(ALL,(quantile(subset(Amount,PA == "R"),probs = seq(0, .95, 0.05),type=7))))
Q <- cbind(data.frame(Q,"DQuant"=with(ALL,(quantile(subset(Amount,PA == "D"),probs = seq(0, .95, 0.05),type=7)))))
Q <- cbind(data.frame(Q,"AllQuant"=with(ALL,(quantile(Amount,probs = seq(0, .95, 0.05),type=7)))))

#jpeg_create()
row.names(Q) <- gsub('%','',row.names(Q))
with(Q,(barchart(RQuant + AllQuant + DQuant ~ as.factor(row.names(Q)),col=c(rgb(1,0,0,.75),"grey",rgb(0,0,1,.75)),xlab="Quantile Percentage",ylab="Donation Amount")))

#sqldf queries for the top 50
C_all <- ALL
SUM_CN_D_Donation <- sqldf("Select Distinct(Contributor),CN,SUM(Amount)as Donation from C_all where PA = 'D' Group By Contributor,CN Order By Donation DESC LIMIT 50")
SUM_CN_R_Donation <- sqldf("Select Distinct(Contributor),CN,SUM(Amount)as Donation from C_all where PA = 'R' Group By Contributor,CN Order By Donation DESC LIMIT 50")
SUM_D_Donation <- sqldf("Select Distinct(Contributor),SUM(Amount) as Donation from C_all where PA = 'D' Group By Contributor Order By Donation DESC LIMIT 50")
SUM_R_Donation <- sqldf("Select Distinct(Contributor),SUM(Amount) as Donation from C_all where PA = 'R' Group By Contributor Order By Donation DESC LIMIT 50")

par(mgp=c(-1,-14, -1))
with(SUM_D_Donation[2:26,],(barplot(Donation,names.arg=Contributor,las=2,horiz=TRUE,xlab="Top 25 Democrat contributors by Amount",cex.axis=.65,cex.names=.65,col=rgb(0,0,1,.50))))
with(SUM_R_Donation[1:25,],(barplot(Donation,names.arg=Contributor,las=2,horiz=TRUE,xlab="Top 25 Republican contributors by Amount",cex.axis=.65,cex.names=.65,col=rgb(1,0,0,.50))))
with(SUM_R_Donation[1:25,],(barplot(Donation,names.arg=Contributor,las=2,horiz=TRUE,xlab="Top 25 Republican contributors by Amount",cex.axis=.65,cex.names=.65,col=rgb(1,0,0,.50))))

par(mgp=c(3, 1, 0))
with(SUM_R_Donation,(barplot(Donation,ylim=c(0,max(Donation)),las=2,col=rgb(1,0,0,.75))))
par(new=TRUE)
with(SUM_D_Donation,(barplot(Donation,ylim=c(0,max(Donation)),las=2,col=rgb(0,0,1,.75))))
mtext("Profile of Top 50 Contributors for Democrats (blue) and Republicans (red) in the WA 42 LD.",side=3,line=1)
par(new=FALSE)

# exploratory code for data
require(plyr)
ByPA <-
ddply(ALL, .(PA),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
CITY.UNIQ = length(unique(City)),
STATE.UNIQ = length(unique(State)),
ZIP.UNIQ = length(unique(Zip))
)

ByCN <-
ddply(ALL, .(CN),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
CITY.UNIQ = length(unique(City)),
STATE.UNIQ = length(unique(State)),
ZIP.UNIQ = length(unique(Zip))
)

ByState <-
ddply(ALL, .(State),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
CITY.UNIQ = length(unique(City)),
ZIP.UNIQ = length(unique(Zip))
)

ByCity <-
ddply(ALL, .(City),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
STATE.UNIQ = length(unique(State)),
ZIP.UNIQ = length(unique(Zip))
)

ByCityD <-
ddply(subset(ALL,PA=="D"), .(City),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
STATE.UNIQ = length(unique(State)),
ZIP.UNIQ = length(unique(Zip))
)

ByCityR <-
ddply(subset(ALL,PA=="R"), .(City),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
STATE.UNIQ = length(unique(State)),
ZIP.UNIQ = length(unique(Zip))
)

arrange(ByCityD,desc(SUM))
arrange(ByCityD,desc(SUM))[1:10,]
arrange(ByCityR,desc(SUM))[1:10,]

ByDonation <-
ddply(ALL, .(Amount),plyr::summarize,
SUM = sum(Amount),
CONTRIB = length(Contributor),
CONTRIB.UNIQ = length(unique(Contributor)),
STATE.UNIQ = length(unique(State)),
PA.UNIQ = length(unique(PA))
)

arrange(ByCN, desc(SUM))
arrange(ByPA, desc(SUM))
arrange(ByState, desc(SUM))
arrange(ByCity, desc(SUM))

# more populist profile analysis code
R_All <- subset(ALL, PA=="R")
D_All <- subset(ALL, PA=="D")

R <- sum(subset(ALL, PA=="R",select=c(Amount)))
D <- sum(subset(ALL, PA=="D",select=c(Amount)))
R_lt_100 <- sum(subset(R_All, Amount <= 100,select=c(Amount)))
D_lt_100 <- sum(subset(D_All, Amount <= 100,select=c(Amount)))

D1 <- sum(SUM_D_Donation$Donation)
R1 <- sum(SUM_R_Donation$Donation)
D2 <- length(SUM_D_Donation$Donation)
R2 <- length(SUM_R_Donation$Donation)
R + D
R1 + D1
(R1 + D1) / (R + D)
R1/R
D1/D

BHAM <- (subset(ALL, City == "BELLINGHAM",select=c(Amount,PA)))
sum(subset(BHAM, PA =="D",select=Amount))
sum(subset(BHAM, PA =="R",select=Amount))
BHAM1 <- (subset(ALL, City == "BELLLINGHAM",select=c(Amount,PA)))
sum(subset(BHAM1, PA =="D",select=Amount))
sum(subset(BHAM1, PA =="R",select=Amount))
SEATTLE <- (subset(ALL, City == "SEATTLE",select=c(Amount,PA)))
sum(subset(SEATTLE, PA =="D",select=Amount))
sum(subset(SEATTLE, PA =="R",select=Amount))
OLYMPIA <- (subset(ALL, City == "OLYMPIA",select=c(Amount,PA)))
sum(subset(OLYMPIA, PA =="D",select=Amount))
sum(subset(OLYMPIA, PA =="R",select=Amount))
OLYMPIA1 <- (subset(ALL, City == "OLYMPIIA",select=c(Amount,PA)))
sum(subset(OLYMPIA1, PA =="D",select=Amount))
sum(subset(OLYMPIA1, PA =="R",select=Amount))

nrow(subset(xtContribAR, Freq <= 100))
nrow(subset(xtContribAD, Freq <= 100))
sum(subset(xtContribAR, Freq <= 100,select=Freq))
sum(subset(xtContribAD, Freq <= 100,select=Freq))

nrow(subset(xtContribAR, Freq >= 1000))
nrow(subset(xtContribAD, Freq >= 1000))
sum(subset(xtContribAR, Freq >= 1000,select=Freq))
sum(subset(xtContribAD, Freq >= 1000,select=Freq))

nrow(subset(xtContribAR, Freq <= 1000))
nrow(subset(xtContribAD, Freq <= 1000))
sum(subset(xtContribAR, Freq <= 1000,select=Freq))
sum(subset(xtContribAD, Freq <= 1000,select=Freq))

Notes on Scientific and Mathematical Programming

Friday, October 10, 2014

Scripts for PDC Data:10.07.2014 Contributions to WA LD 42

No comments:

Post a Comment