Thursday, September 25, 2014

avg(age::int4) OVER (partition by precinctid)

Political piece is here.

 precinctid | activeaverage | inactiveaverage | diffaverage
------------+---------------+-----------------+-------------
        268 |  53.6         |  36.1           |  17.5
        203 |  52.9         |  36.4           |  16.5
        139 |  45.9         |  30.6           |  15.4
        202 |  60.1         |  45.3           |  14.8
        167 |  53.9         |  39.2           |  14.7
        103 |  58.9         |  44.5           |  14.4
        205 |  50.7         |  36.4           |  14.3
        201 |  52.5         |  39.2           |  13.3
        144 |  49.6         |  36.6           |  13.1
        131 |  52.2         |  39.5           |  12.7
....

There are several new Postgres moves here for me. At this point, I expect my joins to become more sophisticated in the future.  Postgres syntax:

avg(age::int4) OVER (partition by precinctid)

allows a statistical slice of a factor similar to xtabs  (cross tabulation) in R.  Quite frankly, I think R does this with greater fluidity and less code.



--- Age Difference by Precinct in Active,Inactive voters
--- Active vs Inactive by precinctid average age
drop view Joinview1;
drop view InactPrAge;
drop view ActPrAge;
drop view Inactive;
drop view Active;

Create view Active as 
Select precinctid,(2014 - extract(year from birthdate)) as AGE
From voterdb092314
where statuscode = 'A';

Create view Inactive as 
Select precinctid,(2014 - extract(year from birthdate)) as AGE
From voterdb092314
where statuscode = 'I';

Create view ActPrAge as
Select Distinct(precinctid),avg(age::int4)
OVER (partition by precinctid)
From Active 
where precinctid in (Select precincts42 from ld42)
Order by avg;

Create view InactPrAge as
Select Distinct(precinctid),avg(age::int4)
OVER (partition by precinctid)
From Inactive 
Where precinctid in (Select precincts42 from ld42)
Order by avg;

create view Joinview1 as
Select 
ActPrAge.precinctid,
ActPrAge.Avg as ActiveAverage,
InactPrAge.Avg as InactiveAverage,
(ActPrAge.Avg - InactPrAge.Avg) as DiffAverage
From InactPrAge,ActPrAge
  Where 
ActPrAge.precinctid =  InactPrAge.precinctid 
Order By DiffAverage DESC;

Select  to_char(activeaverage::real,'99D9') as ActiveAverage,
to_char(inactiveaverage::real,'99D9') as InactiveAverage,
to_chardiffaverage::real,'99D9') as DiffAverage
From Joinview1 
Order By DiffAverage DESC;


# This is R code that uses the DBI compliant RPostgresSQL interface to chart data from SQL queries

library(RPostgreSQL)
library(lattice)
library(plyr)

jpeg_create <- function() {
 systime <- as.numeric(Sys.time())
 # dev.new()
 jpeg(filename = systime,
          width = 1024, height = 768, units = "px", pointsize = 12,
          quality = 100, bg = "white", res = NA, family = "", restoreConsole = TRUE,
          type = c("windows"))
 Sys.sleep(2)
   }

con <- dbConnect(PostgreSQL(), user= "postgres", password="password", port="5432", host="localhost", dbname="dbname")

AgeGroupsld42 <- dbGetQuery(con, "Select age,Count(age) from Active where precinctid in (Select precincts42 from ld42) Group By Age Order by Age")
AgeGroupsld40 <- dbGetQuery(con, "Select age,Count(age) from Active where precinctid not in (Select precincts42 from ld42) Group By Age Order by Age")

jpeg_create()
with(AgeGroupsld42,(barplot(count,names.arg=age,xlab="Age By Count Active Voters 9/23/2014 Blue=WC 40th LD Red=42nd LD",xlim=c(0,110),ylim=c(0,1800),col=rgb(1,0,0,.75))));par(new=T)
with(AgeGroupsld40,(barplot(count,names.arg=age,xlim=c(0,110),ylim=c(0,1800),col=rgb(0,0,1,.75))));par(new=F)

AgeGroupsld42 <- dbGetQuery(con, "Select age,Count(age) from Inactive where precinctid in (Select precincts42 from ld42) Group By Age Order by Age")
AgeGroupsld40 <- dbGetQuery(con, "Select age,Count(age) from Inactive where precinctid not in (Select precincts42 from ld42) Group By Age Order by Age")

jpeg_create()
with(AgeGroupsld42,(barplot(count,names.arg=age,xlab="Age By Count Inactive Voters 9/23/2014 Blue=WC 40th LD Red=42nd LD",xlim=c(0,110),ylim=c(0,500),col=rgb(1,0,0,.75))));par(new=T)
with(AgeGroupsld40,(barplot(count,names.arg=age,xlim=c(0,110),ylim=c(0,500),col=rgb(0,0,1,.75))));par(new=F)

graphics.off()

No comments:

Post a Comment