inital attempt at contours

2016-03-15 17:01:03 -04:00
9 changed files with 63 additions and 146 deletions
--- a/pg/crankshaft--0.0.1.sql
+++ b/pg/crankshaft--0.0.1.sql
@@ -137,53 +137,6 @@ BEGIN
 END;
 $$
 LANGUAGE plpgsql VOLATILE;
-CREATE OR REPLACE FUNCTION
-  cdb_create_segment (
-      segment_name TEXT,
-      table_name TEXT,
-  	  column_name TEXT,
-      geoid_column TEXT DEFAULT 'geoid',
-      census_table TEXT DEFAULT 'block_groups'
-  )
-RETURNS NUMERIC
-AS $$
-  from crankshaft.segmentation import create_segemnt
-  # TODO: use named parameters or a dictionary
-  return create_segment('table')
-$$ LANGUAGE plpythonu;
-
-CREATE OR REPLACE FUNCTION
-  cdb_predict_segment (
-      segment_name TEXT,
-      geoid_column TEXT DEFAULT 'geoid',
-      census_table TEXT DEFAULT 'block_groups'
-  )
-RETURNS TABLE(geoid TEXT, prediction NUMERIC)
-AS $$
-  from crankshaft.segmentation import create_segemnt
-  # TODO: use named parameters or a dictionary
-  return create_segment('table')
-$$ LANGUAGE plpythonu;
-CREATE OR REPLACE FUNCTION
-  cdb_adaptive_histogram (
-      table_name  TEXT,
-      column_name TEXT
-  )
-RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
-
-AS $$
-  from crankshaft.bayesian_blocks import adaptive_histogram
-  return adaptive_histogram(table_name,column_name)
-$$ LANGUAGE plpythonu;
-
-CREATE OR REPLACE FUNCTION
-  cdb_simple_test (
-  )
-RETURNS NUMERIC
-
-AS $$
-  return 5
-$$ LANGUAGE plpythonu;
 -- Make sure by default there are no permissions for publicuser
 -- NOTE: this happens at extension creation time, as part of an implicit transaction.
 -- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
--- a/pg/sql/0.0.1/06_bayesian_blocks.sql
+++ b/pg/sql/0.0.1/06_bayesian_blocks.sql
@@ -1,11 +0,0 @@
-CREATE OR REPLACE FUNCTION
-  cdb_adaptive_histogram (
-      table_name  TEXT,
-      column_name TEXT
-  )
-RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
-
-AS $$
-  from crankshaft.bayesian_blocks import adaptive_histogram
-  return adaptive_histogram(table_name,column_name)
-$$ LANGUAGE plpythonu;
--- a/pg/sql/0.0.1/07_contours.sql
+++ b/pg/sql/0.0.1/07_contours.sql
@@ -0,0 +1,12 @@
+
+
+CREATE OR REPLACE FUNCTION
+  cdb_contours_count (
+      query TEXT,
+      levels NUMERIC[]
+  )
+RETURNS TABLE (the_geom geometry , level Numeric)
+AS $$
+  from crankshaft.contours import create_countours_count
+  return create_countours_count(query,levels)
+$$ LANGUAGE plpythonu;
--- a/python/crankshaft/crankshaft/init.py
+++ b/python/crankshaft/crankshaft/init.py
@@ -1,3 +1,3 @@
 import random_seeds
 import clustering
-import bayesian_blocks
+import contours
--- a/python/crankshaft/crankshaft/bayesian_blocks/init.py
+++ b/python/crankshaft/crankshaft/bayesian_blocks/init.py
@@ -1 +0,0 @@
-from bayesian_blocks import *
--- a/python/crankshaft/crankshaft/bayesian_blocks/bayesian_blocks.py
+++ b/python/crankshaft/crankshaft/bayesian_blocks/bayesian_blocks.py
@@ -1,84 +0,0 @@
-import plpy
-import numpy as np
-
-
-def adaptive_histogram(table_name,column_name):
-    data = plpy.execute("select {column_name} from {table_name}".format(**locals()))
-
-    data = [float(d['count']) for d in data]
-    plpy.notice(data)
-    vals, bins = np.histogram( data, bins=_bayesian_blocks(data))
-    return zip(vals,bins, bins[1:])
-
-
-def _bayesian_blocks(t):
-    """Bayesian Blocks Implementation
-
-    By Jake Vanderplas.  License: BSD
-    Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
-
-    Parameters
-    ----------
-    t : ndarray, length N
-        data to be histogrammed
-
-    Returns
-    -------
-    bins : ndarray
-        array containing the (N+1) bin edges
-
-    Notes
-    -----
-    This is an incomplete implementation: it may fail for some
-    datasets.  Alternate fitness functions and prior forms can
-    be found in the paper listed above.
-    """
-    # copy and sort the array
-    t = np.sort(t)
-    N = t.size
-
-    # create length-(N + 1) array of cell edges
-    edges = np.concatenate([t[:1],
-                            0.5 * (t[1:] + t[:-1]),
-                            t[-1:]])
-    block_length = t[-1] - edges
-
-    # arrays needed for the iteration
-    nn_vec = np.ones(N)
-    best = np.zeros(N, dtype=float)
-    last = np.zeros(N, dtype=int)
-
-    #-----------------------------------------------------------------
-    # Start with first data cell; add one cell at each iteration
-    #-----------------------------------------------------------------
-    for K in range(N):
-        # Compute the width and count of the final bin for all possible
-        # locations of the K^th changepoint
-        width = block_length[:K + 1] - block_length[K + 1]
-        count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
-
-        # evaluate fitness function for these possibilities
-        fit_vec = count_vec * (np.log(count_vec) - np.log(width))
-        fit_vec -= 4  # 4 comes from the prior on the number of changepoints
-        fit_vec[1:] += best[:K]
-
-        # find the max of the fitness: this is the K^th changepoint
-        i_max = np.argmax(fit_vec)
-        last[K] = i_max
-        best[K] = fit_vec[i_max]
-
-    #-----------------------------------------------------------------
-    # Recover changepoints by iteratively peeling off the last block
-    #-----------------------------------------------------------------
-    change_points =  np.zeros(N, dtype=int)
-    i_cp = N
-    ind = N
-    while True:
-        i_cp -= 1
-        change_points[i_cp] = ind
-        if ind == 0:
-            break
-        ind = last[ind - 1]
-    change_points = change_points[i_cp:]
-
-    return edges[change_points]
--- a/python/crankshaft/crankshaft/contours/init.py
+++ b/python/crankshaft/crankshaft/contours/init.py
@@ -0,0 +1 @@
+from contours import * 
--- a/python/crankshaft/crankshaft/contours/contours.py
+++ b/python/crankshaft/crankshaft/contours/contours.py
@@ -0,0 +1,47 @@
+
+import matplotlib.pyplot as plt
+import numpy as np
+import plpy
+
+def contour_to_polygon(contour):
+    plpy.notice('appending contour ')
+    c = np.append(contour, [contour[0]], axis=0)
+    points =','.join( [  " ".join(str(a) for a in b) for b in c])
+
+    return "POLYGON(({points}))::geometry".format(points=points)
+
+def create_countours_count(query,levels,mesh_size=20):
+    qresult = plpy.execute( "select ST_X(the_geom)::Numeric as x, ST_Y(the_geom)::Numeric as y from ({query}) a ".format(query=query))
+    x =[]
+    y =[]
+    for a in qresult:
+        if a['x'] and a['y']:
+            x.append(float(a['x']))
+            y.append(float(a['y']))
+
+    plpy.notice(np.shape(x))
+    plpy.notice(np.shape(y))
+
+    if None in x:
+        plpy.notice("NULL IN LIST X ")
+    if None in y:
+        plpy.notice("NULL IN LIST Y ")
+
+    x_min,x_max = np.min(x), np.max(x)
+    y_min,y_max = np.min(y), np.max(y)
+    plpy.notice(x_min)
+    plpy.notice(x_max)
+    plpy.notice(y_min)
+    plpy.notice(y_max)
+    plpy.notice(mesh_size)
+
+    x_grid = np.linspace(x_min,x_max, mesh_size)
+    y_grid = np.linspace(y_min,y_max, mesh_size)
+    range  = [[x_min,x_max],[y_min,y_max]]
+    a, xedges, yedges= np.histogram2d(x,y,bins=(mesh_size,mesh_size), range=range)
+    a = np.swapaxes(a,0,1)
+    plpy.notice("here about to create the contours")
+
+    CS = plt.contour(xedges[1:],yedges[1:] ,a,4,linewidths=0.5, colors='b')
+    plpy.notice(levels)
+    return[(contour_to_polygon(CS.Cntr.trace((level))[0]), float(level)) for level in levels]
--- a/python/crankshaft/setup.py
+++ b/python/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(

    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
-    install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0'],
+    install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0', 'matplotlib==1.4.3'],

-    requires=['pysal', 'numpy'],
+    requires=['pysal', 'numpy', 'matplotlib'],

    test_suite='test'
 )