Tuesday, September 23, 2014

SQL for comparing Active vs Inactive voters over time

/* 6:20 PM 9/23/2014 -RMF Political piece for this code is here.
I create query to dump out the various (voter history) databases I wish to compare to Postgres 'views'.  A 'view' in Postgres is essentially and 'in memory' table. There's more to than that but...:

Select * from voterdb where statuscode = 'I'; -- 'I' for 'inactive' 

The unique voter registration number isn't quite reliable as a primary key over time (in my humble opinion) so I use a 'unique tuple' (ARRAY[lastname,firstname,middlename]) of my own invention.
Something like ARRAY[lastname,firstname,middlename,registrationnumber::TEXT] would be even more unique.  You also need the 119 precincts of LD 42.
*/


--By Age
drop view Inactive;
Create view Inactive as 
Select ARRAY[lastname,firstname,middlename],precinctid,(2014 - extract(year from birthdate)) as AGE 
From inactive092314 
Where ARRAY[lastname,firstname,middlename] 
not in 
(Select ARRAY[lastname,firstname,middlename]from inactive090514);

Select count(age),age
From Inactive 
Group by age order by count DESC LIMIT 10;

Select count(age),age
From Inactive 
Where precinctid in (Select precincts42 from ld42)
Group by age order by count DESC LIMIT 10;

-- By Precinct
drop view Inactive;
Create view Inactive as
Select ARRAY[lastname,firstname,middlename],precinctid 
from inactive092314 where ARRAY[lastname,firstname,middlename]
not in
(Select ARRAY[lastname,firstname,middlename] from inactive090514);

Select count(precinctid),precinctid 
From Inactive 
Group by precinctid order by count DESC LIMIT 10;

Select count(precinctid),precinctid 
From Inactive 
Where precinctid in (Select precincts42 from ld42) 
Group by precinctid order by count DESC LIMIT 10;



/* This query is too computationally intensive for an entire voter database (nrow * ncol = 5M plus records):

Select Count(RegistrationNumber) from voterdb091014 where statuscode = 'A' and
registrationnumber NOT IN (Select RegistrationNumber from voterdb091914);

I found using select Postgres 'views' (as below) with 'NOT EXISTS' more optimized.
*/

SELECT Count(registrationnumber) FROM voterdb090514 where statuscode = 'A';
SELECT Count(registrationnumber) FROM voterdb091914 where statuscode = 'A';
Select ((SELECT Count(registrationnumber) FROM voterdb091014 where statuscode = 'A')
- (SELECT Count(registrationnumber) FROM voterdb091914 where statuscode = 'A'))  as difference;

SELECT Count(registrationnumber) 
FROM voterdb090514
WHERE statuscode = 'A'
AND NOT EXISTS (
  SELECT *
  -- may want 'WHERE statuscode= 'A''
  FROM voterdb091914
  where voterdb091014.registrationnumber = voterdb091914.registrationnumber
  );

SELECT precinctid,Count(registrationnumber)
FROM voterdb090514 
WHERE statuscode= 'A'
AND NOT EXISTS (
  SELECT *
  FROM voterdb091914
  -- may want 'WHERE statuscode= 'A''
  WHERE voterdb090514.registrationnumber = voterdb091914.registrationnumber
  ) Group By PrecinctID Order By Count DESC LIMIT 20;

drop view UsersGone42nd;
drop view UsersGone;
drop view UserArray;
drop view UserArray1;
Create View UserArray AS  
SELECT statuscode,precinctid,ARRAY[lastname,firstname,middlename] as array
FROM voterdb090514
WHERE statuscode = 'A';

Create View UserArray1 AS  
SELECT statuscode,precinctid, ARRAY[lastname,firstname,middlename] as array
FROM voterdb091914
WHERE statuscode = 'A';

Create View UsersGone AS
Select *  
FROM Userarray
where NOT EXISTS (
  SELECT *
  FROM Userarray1
  where UserArray.array = UserArray1.array
  );
  
Create View UsersGone42nd as 
Select precinctid,Count(*) 
from UsersGone where precinctid in
(Select precincts42 from ld42)
Group By PrecinctID Order By Count DESC; 

 --- with addresses 
drop view UG42nd;
drop view UsersGone42nd;
drop view UsersGone;
drop view UserArray;
drop view UserArray1;
Create View UserArray AS  
SELECT *, ARRAY[lastname,firstname,middlename] as array
FROM voterdb090514
WHERE statuscode = 'A';

Create View UserArray1 AS  
SELECT *, ARRAY[lastname,firstname,middlename] as array
FROM voterdb091914
WHERE statuscode = 'A';

Create View UsersGone AS
Select *  
FROM Userarray
where NOT EXISTS (
  SELECT *
  FROM Userarray1
  where UserArray.array = UserArray1.array
  ) Order By precinctid DESC;

Create View UG42nd as 
Select *
from UsersGone where precinctid in
(Select precincts42 from ld42)
Order By precinctid DESC;   
  Create View UsersGone42nd as 
Select precinctid,Count(*) 
from UsersGone where precinctid in
(Select precincts42 from ld42)
Group By PrecinctID Order By Count DESC; 

 Select Sum(ballotcounted_1::INT) from UsersGone where ballotcounted_1 = '1';
 Select Sum(ballotcounted_1::INT) from UG42nd where ballotcounted_1 = '1';
 ---

No comments:

Post a Comment