inital attempt at contours

Merge pull request #5 from CartoDB/4-pgxs-fix
Adapt Makefile of the extension for some PGXS versions
2016-03-15 17:01:03 -04:00 · 2016-02-29 16:35:04 +01:00 · 2016-02-26 19:09:17 +01:00 · 2016-02-26 19:02:18 +01:00 · 2016-02-23 17:23:33 +01:00 · 2016-02-23 17:18:19 +01:00
14 changed files with 112 additions and 146 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,7 +12,7 @@ name must be created.
 ### Version numbers

 The version of both the SQL extension and the Python package shall
-follow the[Semantic Versioning 2.0](http://semver.org/) guidelines:
+follow the [Semantic Versioning 2.0](http://semver.org/) guidelines:

 * When backwards incompatibility is introduced the major number is incremented
 * When functionally is added (in a backwards-compatible manner) the minor number
--- a/README.md
+++ b/README.md
@@ -7,8 +7,6 @@ CartoDB Spatial Analysis extension for PostgreSQL.
 * *pg* contains the PostgreSQL extension source code
 * *python* Python module

-FIXME: should it be `./extension` and `./lib/python' ?
-
 ## Requirements

 * pip
--- a/pg/Makefile
+++ b/pg/Makefile
@@ -28,3 +28,6 @@ REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
+
+# This seems to be needed at least for PG 9.3.11
+all: $(DATA)
--- a/pg/crankshaft--0.0.1.sql
+++ b/pg/crankshaft--0.0.1.sql
@@ -1,3 +1,6 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
 -- Internal function.
 -- Set the seeds of the RNGs (Random Number Generators)
 -- used internally.
@@ -133,4 +136,13 @@ BEGIN
  RETURN ST_Collect(points);
 END;
 $$
-LANGUAGE plpgsql VOLATILE
+LANGUAGE plpgsql VOLATILE;
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
--- a/pg/sql/0.0.1/00_header.sql
+++ b/pg/sql/0.0.1/00_header.sql
@@ -0,0 +1,3 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
--- a/pg/sql/0.0.1/04_dot_density.sql
+++ b/pg/sql/0.0.1/04_dot_density.sql
@@ -51,4 +51,4 @@ BEGIN
  RETURN ST_Collect(points);
 END;
 $$
-LANGUAGE plpgsql VOLATILE
+LANGUAGE plpgsql VOLATILE;
--- a/pg/sql/0.0.1/07_contours.sql
+++ b/pg/sql/0.0.1/07_contours.sql
@@ -0,0 +1,12 @@
+
+
+CREATE OR REPLACE FUNCTION
+  cdb_contours_count (
+      query TEXT,
+      levels NUMERIC[]
+  )
+RETURNS TABLE (the_geom geometry , level Numeric)
+AS $$
+  from crankshaft.contours import create_countours_count
+  return create_countours_count(query,levels)
+$$ LANGUAGE plpythonu;
--- a/pg/sql/0.0.1/90_permissions.sql
+++ b/pg/sql/0.0.1/90_permissions.sql
@@ -0,0 +1,9 @@
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
--- a/pg/sql/0.0.1/population.sql
+++ b/pg/sql/0.0.1/population.sql
@@ -1,138 +0,0 @@
-- Function to obtain an estimate of the population living inside
-- an area (polygon) from the CartoDB Data Observatory
-CREATE OR REPLACE FUNCTION cdb_population(area geometry)
-RETURNS NUMERIC AS $$
-DECLARE
-  georef_column TEXT;
-  table_id TEXT;
-  tag_value TEXT;
-  table_name TEXT;
-  column_name TEXT;
-  population NUMERIC;
-BEGIN
-
-  -- Note: comments contain pseudo-code that should be implemented
-
-  -- Register metadata tables:
-  -- This would require super-user privileges
-  /*
-  SELECT cdb_add_remote_table('observatory', 'bmd_column_table');
-  SELECT cdb_add_remote_table('observatory', 'bmd_column_2_column');
-  SELECT cdb_add_remote_table('observatory', 'bmd_table');
-  SELECT cdb_add_remote_table('observatory', 'bmd_column_table');
-  SELECT cdb_add_remote_table('observatory', 'bmd_column_tag');
-  SELECT cdb_add_remote_table('observatory', 'bmd_tag');
-  */
-
-  tag_value := 'population';
-
-
-  -- Determine the georef column id to be used: it must have type 'geometry',
-  -- the maximum weight.
-  -- TODO: in general, multiple columns with maximal weight could be found;
-  -- we should use the timespan of the table to disambiguate (choose the
-  -- most recent). Also a rank of geometry columns should be introduced to
-  -- find select the greatest resolution available.
-  /*
-  WITH selected_tables AS (
-    -- Find tables that have population columns and cover the input area
-    SELECT tab.id AS id
-    FROM observatory.bmd_column col,
-         observatory.bmd_column_table coltab,
-         observatory.bmd_table tab,
-         observatory.bmd_tag tag,
-         observatory.bmd_column_tag coltag
-    WHERE coltab.column_id = col.id
-      AND coltab.table_id = tab.id
-      AND coltag.tag_id = tag.id
-      AND coltag.column_id = col.id
-      AND tag.name ILIKE tag_value
-      AND tab.id = table_id
-      AND tab.bounds && area;
-  )
-  SELECT
-    FROM bmd_column col
-    JOIN bmd_table tab ON col.table_id = tab.id
-    WHERE type = 'geometry'
-      AND tab.id IN (selected_tables)
-    ORDER BY weight DESC LIMIT 1;
-  */
-  georef_column := '"us.census.tiger".block_group_2013';
-
-  -- Now we will query the metadata to find which actual tables correspond
-  -- to this datasource and resolution/timespan
-  -- and choose the 'parent' or more general of them.
-  /*
-  SELECT from_table_geoid.id data_table_id
-  FROM observatory.bmd_column_table from_column_table_geoid,
-       observatory.bmd_column_table to_column_table_geoid,
-       observatory.bmd_column_2_column rel,
-       observatory.bmd_column_table to_column_table_geom,
-       observatory.bmd_table from_table_geoid,
-       observatory.bmd_table to_table_geoid,
-       observatory.bmd_table to_table_geom
-  WHERE from_column_table_geoid.column_id = to_column_table_geoid.column_id
-    AND to_column_table_geoid.column_id = rel.from_id
-    AND rel.reltype = 'geom_ref'
-    AND rel.to_id = to_column_table_geom.column_id
-    AND to_column_table_geom.column_id = georef_column
-    AND from_table_geoid.id = from_column_table_geoid.table_id
-    AND to_table_geoid.id = to_column_table_geoid.table_id
-    AND to_table_geom.id = to_column_table_geom.table_id
-    AND from_table_geoid.bounds && area
-  ORDER by from_table_geoid.timespan desc
-  INTO table_id;
-  */
-  table_id := '"us.census.acs".extract_2013_5yr_block_group';
-
-  -- Next will fetch the columns of that table that are tagged as population:
-  -- and get the more general one (not having a parent or denominator)
-  /*
-  WITH column_ids AS (
-    SELECT col.id AS id
-    FROM observatory.bmd_column col,
-         observatory.bmd_column_table coltab,
-         observatory.bmd_table tab,
-         observatory.bmd_tag tag,
-         observatory.bmd_column_tag coltag
-    WHERE coltab.column_id = col.id
-      AND coltab.table_id = tab.id
-      AND coltag.tag_id = tag.id
-      AND coltag.column_id = col.id
-      AND tag.name ILIKE tag_value
-      AND tab.id = table_id;
-  ),
-  excluded_column_ids AS (
-    SELECT from_id AS id
-    FROM observatory.bmd_column_2_column
-    WHERE from_id in (column_ids)
-      AND reltype in ('parent', 'denominator')
-      AND to_id in (column_ids)
-  ),
-  SELECT bmd_table.tablename, bmd_column_table.colname
-  FROM observatory.bmd_column_table,
-       observatory.bmd_table
-  WHERE bmd_column_table.table_id = bmd_table.id
-    AND bmd_column_table.column_id IN (column_ids)
-    AND NOT bmd_column_table.column_id IN (exclude_column_ids)
-  INTO (table_name, column_name);
-  */
-  table_name := 'us_census_acs2013_5yr_block_group';
-  column_name := 'total_pop';
-
-  -- Register the foreign table
-  -- This would require super-user privileges
-  -- SELECT cdb_add_remote_table('observatory', table_name);
-
-  -- Perform the query
-  SELECT cdb_crankshaft.cdb_overlap_sum(
-    area,
-    table_name,
-    column_name,
-    schema_name := 'observatory')
-  INTO population;
-
-  RETURN population;
-END;
-$$
-LANGUAGE plpgsql VOLATILE
--- a/pg/test/0.0.1/sql/90_permissions.sql
+++ b/pg/test/0.0.1/sql/90_permissions.sql
@@ -0,0 +1,18 @@
+SELECT cdb_crankshaft._cdb_random_seeds(1234);
+
+-- Use regular user role
+SET ROLE test_regular_user;
+
+-- Add to the search path the schema
+SET search_path TO public,cartodb,cdb_crankshaft;
+
+-- Exercise public functions
+SELECT ppoints.code, m.quads
+  FROM ppoints
+  JOIN cdb_moran_local('ppoints', 'value') m
+    ON ppoints.cartodb_id = m.ids
+  ORDER BY ppoints.code;
+SELECT round(cdb_overlap_sum(
+  '0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry,
+  'values', 'value'
+), 2);
--- a/python/crankshaft/crankshaft/init.py
+++ b/python/crankshaft/crankshaft/init.py
@@ -1,2 +1,3 @@
 import random_seeds
 import clustering
+import contours
--- a/python/crankshaft/crankshaft/contours/init.py
+++ b/python/crankshaft/crankshaft/contours/init.py
@@ -0,0 +1 @@
+from contours import * 
--- a/python/crankshaft/crankshaft/contours/contours.py
+++ b/python/crankshaft/crankshaft/contours/contours.py
@@ -0,0 +1,47 @@
+
+import matplotlib.pyplot as plt
+import numpy as np
+import plpy
+
+def contour_to_polygon(contour):
+    plpy.notice('appending contour ')
+    c = np.append(contour, [contour[0]], axis=0)
+    points =','.join( [  " ".join(str(a) for a in b) for b in c])
+
+    return "POLYGON(({points}))::geometry".format(points=points)
+
+def create_countours_count(query,levels,mesh_size=20):
+    qresult = plpy.execute( "select ST_X(the_geom)::Numeric as x, ST_Y(the_geom)::Numeric as y from ({query}) a ".format(query=query))
+    x =[]
+    y =[]
+    for a in qresult:
+        if a['x'] and a['y']:
+            x.append(float(a['x']))
+            y.append(float(a['y']))
+
+    plpy.notice(np.shape(x))
+    plpy.notice(np.shape(y))
+
+    if None in x:
+        plpy.notice("NULL IN LIST X ")
+    if None in y:
+        plpy.notice("NULL IN LIST Y ")
+
+    x_min,x_max = np.min(x), np.max(x)
+    y_min,y_max = np.min(y), np.max(y)
+    plpy.notice(x_min)
+    plpy.notice(x_max)
+    plpy.notice(y_min)
+    plpy.notice(y_max)
+    plpy.notice(mesh_size)
+
+    x_grid = np.linspace(x_min,x_max, mesh_size)
+    y_grid = np.linspace(y_min,y_max, mesh_size)
+    range  = [[x_min,x_max],[y_min,y_max]]
+    a, xedges, yedges= np.histogram2d(x,y,bins=(mesh_size,mesh_size), range=range)
+    a = np.swapaxes(a,0,1)
+    plpy.notice("here about to create the contours")
+
+    CS = plt.contour(xedges[1:],yedges[1:] ,a,4,linewidths=0.5, colors='b')
+    plpy.notice(levels)
+    return[(contour_to_polygon(CS.Cntr.trace((level))[0]), float(level)) for level in levels]
--- a/python/crankshaft/setup.py
+++ b/python/crankshaft/setup.py
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
 setup(
    name='crankshaft',

-    version='0.0.01',
+    version='0.0.1',

    description='CartoDB Spatial Analysis Python Library',

@@ -40,9 +40,9 @@ setup(

    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
-    install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
+    install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0', 'matplotlib==1.4.3'],

-    requires=['pysal', 'numpy'],
+    requires=['pysal', 'numpy', 'matplotlib'],

    test_suite='test'
 )
Author	SHA1	Message	Date
Stuart Lynn	01eef5ee9e	inital attempt at contours	2016-03-15 17:01:03 -04:00
Javier Goizueta	46c66476b5	Merge pull request #5 from CartoDB/4-pgxs-fix Adapt Makefile of the extension for some PGXS versions	2016-02-29 16:35:04 +01:00
Javier Goizueta	e03aac4d8f	Fix typo	2016-02-26 19:09:17 +01:00
Javier Goizueta	d885c16db2	Adapt Makefile of the extension for some PGXS versions Postgresql 9.3.11 doesn't generates $DATA by default. fixes #4	2016-02-26 19:02:18 +01:00
Rafa de la Torre	abfda1c75e	Update CONTRIBUTING.md minor change (just a space)	2016-02-23 17:23:33 +01:00
Rafa de la Torre	8f478ef22c	Update README.md Remove FIXME that should be already fixed.	2016-02-23 17:18:19 +01:00
Javier Goizueta	c7bb50be5a	Fix: Make extension publicly available	2016-02-22 17:39:58 +01:00
Javier Goizueta	ef17e2fe4c	Add header	2016-02-22 16:14:52 +01:00
Javier Goizueta	f3b8546063	Fix syntax	2016-02-22 16:14:28 +01:00