Compare commits

..

1 Commits

Author SHA1 Message Date
Stuart Lynn
01eef5ee9e inital attempt at contours 2016-03-15 17:01:03 -04:00
9 changed files with 63 additions and 146 deletions

View File

@@ -137,53 +137,6 @@ BEGIN
END;
$$
LANGUAGE plpgsql VOLATILE;
CREATE OR REPLACE FUNCTION
cdb_create_segment (
segment_name TEXT,
table_name TEXT,
column_name TEXT,
geoid_column TEXT DEFAULT 'geoid',
census_table TEXT DEFAULT 'block_groups'
)
RETURNS NUMERIC
AS $$
from crankshaft.segmentation import create_segemnt
# TODO: use named parameters or a dictionary
return create_segment('table')
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_predict_segment (
segment_name TEXT,
geoid_column TEXT DEFAULT 'geoid',
census_table TEXT DEFAULT 'block_groups'
)
RETURNS TABLE(geoid TEXT, prediction NUMERIC)
AS $$
from crankshaft.segmentation import create_segemnt
# TODO: use named parameters or a dictionary
return create_segment('table')
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_adaptive_histogram (
table_name TEXT,
column_name TEXT
)
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
AS $$
from crankshaft.bayesian_blocks import adaptive_histogram
return adaptive_histogram(table_name,column_name)
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_simple_test (
)
RETURNS NUMERIC
AS $$
return 5
$$ LANGUAGE plpythonu;
-- Make sure by default there are no permissions for publicuser
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;

View File

@@ -1,11 +0,0 @@
CREATE OR REPLACE FUNCTION
cdb_adaptive_histogram (
table_name TEXT,
column_name TEXT
)
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
AS $$
from crankshaft.bayesian_blocks import adaptive_histogram
return adaptive_histogram(table_name,column_name)
$$ LANGUAGE plpythonu;

View File

@@ -0,0 +1,12 @@
CREATE OR REPLACE FUNCTION
cdb_contours_count (
query TEXT,
levels NUMERIC[]
)
RETURNS TABLE (the_geom geometry , level Numeric)
AS $$
from crankshaft.contours import create_countours_count
return create_countours_count(query,levels)
$$ LANGUAGE plpythonu;

View File

@@ -1,3 +1,3 @@
import random_seeds
import clustering
import bayesian_blocks
import contours

View File

@@ -1 +0,0 @@
from bayesian_blocks import *

View File

@@ -1,84 +0,0 @@
import plpy
import numpy as np
def adaptive_histogram(table_name,column_name):
data = plpy.execute("select {column_name} from {table_name}".format(**locals()))
data = [float(d['count']) for d in data]
plpy.notice(data)
vals, bins = np.histogram( data, bins=_bayesian_blocks(data))
return zip(vals,bins, bins[1:])
def _bayesian_blocks(t):
"""Bayesian Blocks Implementation
By Jake Vanderplas. License: BSD
Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
Parameters
----------
t : ndarray, length N
data to be histogrammed
Returns
-------
bins : ndarray
array containing the (N+1) bin edges
Notes
-----
This is an incomplete implementation: it may fail for some
datasets. Alternate fitness functions and prior forms can
be found in the paper listed above.
"""
# copy and sort the array
t = np.sort(t)
N = t.size
# create length-(N + 1) array of cell edges
edges = np.concatenate([t[:1],
0.5 * (t[1:] + t[:-1]),
t[-1:]])
block_length = t[-1] - edges
# arrays needed for the iteration
nn_vec = np.ones(N)
best = np.zeros(N, dtype=float)
last = np.zeros(N, dtype=int)
#-----------------------------------------------------------------
# Start with first data cell; add one cell at each iteration
#-----------------------------------------------------------------
for K in range(N):
# Compute the width and count of the final bin for all possible
# locations of the K^th changepoint
width = block_length[:K + 1] - block_length[K + 1]
count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
# evaluate fitness function for these possibilities
fit_vec = count_vec * (np.log(count_vec) - np.log(width))
fit_vec -= 4 # 4 comes from the prior on the number of changepoints
fit_vec[1:] += best[:K]
# find the max of the fitness: this is the K^th changepoint
i_max = np.argmax(fit_vec)
last[K] = i_max
best[K] = fit_vec[i_max]
#-----------------------------------------------------------------
# Recover changepoints by iteratively peeling off the last block
#-----------------------------------------------------------------
change_points = np.zeros(N, dtype=int)
i_cp = N
ind = N
while True:
i_cp -= 1
change_points[i_cp] = ind
if ind == 0:
break
ind = last[ind - 1]
change_points = change_points[i_cp:]
return edges[change_points]

View File

@@ -0,0 +1 @@
from contours import *

View File

@@ -0,0 +1,47 @@
import matplotlib.pyplot as plt
import numpy as np
import plpy
def contour_to_polygon(contour):
plpy.notice('appending contour ')
c = np.append(contour, [contour[0]], axis=0)
points =','.join( [ " ".join(str(a) for a in b) for b in c])
return "POLYGON(({points}))::geometry".format(points=points)
def create_countours_count(query,levels,mesh_size=20):
qresult = plpy.execute( "select ST_X(the_geom)::Numeric as x, ST_Y(the_geom)::Numeric as y from ({query}) a ".format(query=query))
x =[]
y =[]
for a in qresult:
if a['x'] and a['y']:
x.append(float(a['x']))
y.append(float(a['y']))
plpy.notice(np.shape(x))
plpy.notice(np.shape(y))
if None in x:
plpy.notice("NULL IN LIST X ")
if None in y:
plpy.notice("NULL IN LIST Y ")
x_min,x_max = np.min(x), np.max(x)
y_min,y_max = np.min(y), np.max(y)
plpy.notice(x_min)
plpy.notice(x_max)
plpy.notice(y_min)
plpy.notice(y_max)
plpy.notice(mesh_size)
x_grid = np.linspace(x_min,x_max, mesh_size)
y_grid = np.linspace(y_min,y_max, mesh_size)
range = [[x_min,x_max],[y_min,y_max]]
a, xedges, yedges= np.histogram2d(x,y,bins=(mesh_size,mesh_size), range=range)
a = np.swapaxes(a,0,1)
plpy.notice("here about to create the contours")
CS = plt.contour(xedges[1:],yedges[1:] ,a,4,linewidths=0.5, colors='b')
plpy.notice(levels)
return[(contour_to_polygon(CS.Cntr.trace((level))[0]), float(level)) for level in levels]

View File

@@ -40,9 +40,9 @@ setup(
# The choice of component versions is dictated by what's
# provisioned in the production servers.
install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0'],
install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0', 'matplotlib==1.4.3'],
requires=['pysal', 'numpy'],
requires=['pysal', 'numpy', 'matplotlib'],
test_suite='test'
)