From a451fb5b6ac94b2849cabd6a039d73a96786d7a6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 29 Aug 2016 15:50:19 -0400 Subject: [PATCH 01/96] minor ordering changes --- src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index 4622925..a588795 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -58,9 +58,9 @@ def query_attr_select(params): else: ## if moran's analysis attrs = [k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')] + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] - for idx, val in enumerate(sorted(attrs)): + for idx, val in enumerate(attrs): attr_string += template % {"col": params[val], "alias_num": idx + 1} return attr_string From 623613aa5cd08cb9ddefbcadfb5adf6cd49389d7 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 29 Aug 2016 16:46:49 -0400 Subject: [PATCH 02/96] adding ordered dict to tests --- src/py/crankshaft/test/test_pysal_utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index 171fdbc..393adf6 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -2,18 +2,19 @@ import unittest import crankshaft.pysal_utils as pu from crankshaft import random_seeds +from collections import OrderedDict class PysalUtilsTest(unittest.TestCase): """Testing class for utility functions related to PySAL integrations""" def setUp(self): - self.params = {"id_col": "cartodb_id", - "attr1": "andy", - "attr2": "jay_z", - "subquery": "SELECT * FROM a_list", - "geom_col": "the_geom", - "num_ngbrs": 321} + self.params = OrderedDict([("id_col", "cartodb_id"), + ("attr1": "andy"), + ("attr2": "jay_z"), + ("subquery": "SELECT * FROM a_list"), + ("geom_col": "the_geom"), + ("num_ngbrs": 321)]) self.params_array = {"id_col": "cartodb_id", "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], @@ -69,7 +70,7 @@ class PysalUtilsTest(unittest.TestCase): "i.\"jay_z\" IS NOT NULL AND " \ "i.\"jay_z\" <> 0 " \ "ORDER BY i.\"cartodb_id\" ASC;" - + ans_array = "SELECT i.\"cartodb_id\" As id, " \ "i.\"_2013_dec\"::numeric As attr1, " \ "i.\"_2014_jan\"::numeric As attr2, " \ From 622235d787faa179d57dc241602f28cc48386f0a Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 29 Aug 2016 16:52:40 -0400 Subject: [PATCH 03/96] :P adding commas --- src/py/crankshaft/test/test_pysal_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index 393adf6..82c38f1 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -10,11 +10,11 @@ class PysalUtilsTest(unittest.TestCase): def setUp(self): self.params = OrderedDict([("id_col", "cartodb_id"), - ("attr1": "andy"), - ("attr2": "jay_z"), - ("subquery": "SELECT * FROM a_list"), - ("geom_col": "the_geom"), - ("num_ngbrs": 321)]) + ("attr1", "andy"), + ("attr2", "jay_z"), + ("subquery", "SELECT * FROM a_list"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) self.params_array = {"id_col": "cartodb_id", "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], From 40481f128610bb2acf9ed0c3cd49d5792ae06b41 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 29 Aug 2016 17:10:58 -0400 Subject: [PATCH 04/96] adding more tests --- src/py/crankshaft/test/test_pysal_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index 82c38f1..9af8522 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -16,6 +16,13 @@ class PysalUtilsTest(unittest.TestCase): ("geom_col", "the_geom"), ("num_ngbrs", 321)]) + self.params2 = OrderedDict([("id_col", "cartodb_id"), + ("attr1", "price"), + ("attr2", "sq_meters"), + ("subquery", "SELECT * FROM a_list"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + self.params_array = {"id_col": "cartodb_id", "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], "subquery": "SELECT * FROM a_list", @@ -28,11 +35,15 @@ class PysalUtilsTest(unittest.TestCase): ans = "i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, " + ans2 = "i.\"price\"::numeric As attr1, " \ + "i.\"sq_meters\"::numeric As attr2, " + ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ "i.\"_2014_jan\"::numeric As attr2, " \ "i.\"_2014_feb\"::numeric As attr3, " self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params2), ans2) self.assertEqual(pu.query_attr_select(self.params_array), ans_array) def test_query_attr_where(self): From 44dc5811b58dba58e5674ddb24d8a1eae524655a Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 1 Sep 2016 16:47:57 -0400 Subject: [PATCH 05/96] updating tests for query ordering error --- src/pg/test/expected/02_moran_test.out | 215 ++++++++++++------------- 1 file changed, 106 insertions(+), 109 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 3ba5bfd..03e842a 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -143,135 +143,132 @@ _cdb_random_seeds (1 row) code|quads -01|LL -02|LH -03|HH -04|HH -05|LL -06|HH -07|LL -08|LL -09|LL -10|HH -11|HH -12|HL -13|LL -14|HH +01|HH +02|HL +03|LL +04|LL +05|LH +06|LL +07|HH +08|HH +09|HH +10|LL +11|LL +12|LL +13|HL +14|LL 15|LL -16|LL -17|LL -18|LH -19|LL -20|LL -21|HH -22|LL -23|HL +16|HH +17|HH +18|LL +19|HH +20|HH +21|LL +22|HH +23|LL 24|LL -25|LL -26|LL +25|HH +26|HH 27|LL -28|LL -29|LH -30|HH -31|LL +28|HH +29|LL +30|LL +31|HH 32|LL -33|LL -34|LL -35|LH -36|HL -37|LH -38|LH -39|LL -40|LL -41|LH -42|HL -43|LL -44|HL -45|LL -46|HL +33|HL +34|LH +35|LL +36|LL +37|HL +38|HL +39|HH +40|HH +41|HL +42|LH +43|LH +44|LL +45|LH +46|LL 47|LL -48|LL -49|HL -50|LL -51|HH +48|HH +49|LL +50|HH +51|LL (51 rows) _cdb_random_seeds (1 row) code|quads -03|HH -04|HH -06|HH -10|HH -11|HH -12|HL -14|HH -21|HH -23|HL -30|HH -36|HL -42|HL -44|HL -46|HL -49|HL -51|HH -(16 rows) +01|HH +02|HL +07|HH +08|HH +09|HH +13|HL +16|HH +17|HH +19|HH +20|HH +22|HH +25|HH +26|HH +28|HH +31|HH +33|HL +37|HL +38|HL +39|HH +40|HH +41|HL +48|HH +50|HH +(23 rows) _cdb_random_seeds (1 row) code|quads -01|LL -02|LH -05|LL -07|LL -08|LL -09|LL -13|LL +03|LL +04|LL +05|LH +06|LL +10|LL +11|LL +12|LL +14|LL 15|LL -16|LL -17|LL -18|LH -19|LL -20|LL -22|LL +18|LL +21|LL +23|LL 24|LL -25|LL -26|LL 27|LL -28|LL -29|LH -31|LL +29|LL +30|LL 32|LL -33|LL -34|LL -35|LH -37|LH -38|LH -39|LL -40|LL -41|LH -43|LL -45|LL +34|LH +35|LL +36|LL +42|LH +43|LH +44|LL +45|LH +46|LL 47|LL -48|LL -50|LL -(35 rows) +49|LL +51|LL +(28 rows) _cdb_random_seeds (1 row) code|quads -02|LH -12|HL -18|LH -23|HL -29|LH -35|LH -36|HL -37|LH -38|LH -41|LH -42|HL -44|HL -46|HL -49|HL -(14 rows) +02|HL +05|LH +13|HL +33|HL +34|LH +37|HL +38|HL +41|HL +42|LH +43|LH +45|LH +(11 rows) From e29f6f2861b271bd04192d064ddc9411e553164d Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 6 Sep 2016 09:23:39 -0400 Subject: [PATCH 06/96] add more comments --- src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index a588795..f0c73ba 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -44,6 +44,13 @@ def query_attr_select(params): Create portion of SELECT statement for attributes inolved in query. @param params: dict of information used in query (column names, table name, etc.) + Example: + OrderedDict([('attr1', 'numerator'), + ('attr2', 'denominator'), + ('subquery', 'SELECT * FROM interesting_data')]) + Output: + "i.\"numerator\"::numeric As attr1, " \ + "i.\"denominator\"::numeric As attr2, " """ attr_string = "" From 1148aa417ae5da2d7beaa5d8e0d120e3410095f3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 6 Sep 2016 09:23:59 -0400 Subject: [PATCH 07/96] additional test on alphabetical ordering --- src/py/crankshaft/test/test_pysal_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index 9af8522..aad9b20 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -23,6 +23,13 @@ class PysalUtilsTest(unittest.TestCase): ("geom_col", "the_geom"), ("num_ngbrs", 321)]) + self.params3 = OrderedDict([("id_col", "cartodb_id"), + ("attr1", "numerator"), + ("attr2", "denominator"), + ("subquery", "SELECT * FROM pecan"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + self.params_array = {"id_col": "cartodb_id", "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], "subquery": "SELECT * FROM a_list", @@ -38,12 +45,16 @@ class PysalUtilsTest(unittest.TestCase): ans2 = "i.\"price\"::numeric As attr1, " \ "i.\"sq_meters\"::numeric As attr2, " + ans3 = "i.\"numerator\"::numeric As attr1, " \ + "i.\"denominator\"::numeric As attr2, " + ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ "i.\"_2014_jan\"::numeric As attr2, " \ "i.\"_2014_feb\"::numeric As attr3, " self.assertEqual(pu.query_attr_select(self.params), ans) self.assertEqual(pu.query_attr_select(self.params2), ans2) + self.assertEqual(pu.query_attr_select(self.params3), ans3) self.assertEqual(pu.query_attr_select(self.params_array), ans_array) def test_query_attr_where(self): From 60f52633face6b4803e241d28657a4494601740e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 9 Sep 2016 11:11:32 -0400 Subject: [PATCH 08/96] adds hotspot/coldspot function --- src/pg/sql/16_getis.sql | 16 ++++++ .../crankshaft/clustering/__init__.py | 1 + .../crankshaft/crankshaft/clustering/getis.py | 51 +++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/pg/sql/16_getis.sql create mode 100644 src/py/crankshaft/crankshaft/clustering/getis.py diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql new file mode 100644 index 0000000..37f288b --- /dev/null +++ b/src/pg/sql/16_getis.sql @@ -0,0 +1,16 @@ +-- Getis-Ord's G +-- Hotspot/Coldspot Analysis tool +CREATE OR REPLACE FUNCTION + CDB_GetisOrdsG( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, rowid BIGINT) +AS $$ + from crankshaft.clustering import getis + return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/__init__.py b/src/py/crankshaft/crankshaft/clustering/__init__.py index ed34fe0..d9682fa 100644 --- a/src/py/crankshaft/crankshaft/clustering/__init__.py +++ b/src/py/crankshaft/crankshaft/clustering/__init__.py @@ -1,3 +1,4 @@ """Import all functions from for clustering""" from moran import * from kmeans import * +from getis import * diff --git a/src/py/crankshaft/crankshaft/clustering/getis.py b/src/py/crankshaft/crankshaft/clustering/getis.py new file mode 100644 index 0000000..29a2c50 --- /dev/null +++ b/src/py/crankshaft/crankshaft/clustering/getis.py @@ -0,0 +1,51 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy +from collections import OrderedDict + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + +def getis_ord(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Getis-Ord's G + Implementation building neighbors with a PostGIS database and Getis-Ord's G + hotspot/coldspot analysis with PySAL. + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors if kNN is chosen + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(3) + except plpy.SPIError, e: + plpy.error('Query failed: %s' % e) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + getis = ps.esda.getisord(attr_vals, weight, star=True) + + return zip(getis.z_sim, getis.p_sim, weight.id_order) From ccccf680662446974d7c71a14965baaac0b09d40 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 12 Sep 2016 11:39:21 -0400 Subject: [PATCH 09/96] fix module call --- src/pg/sql/16_getis.sql | 13 ++++++------- src/py/crankshaft/crankshaft/clustering/getis.py | 8 ++++---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql index 37f288b..23547f1 100644 --- a/src/pg/sql/16_getis.sql +++ b/src/pg/sql/16_getis.sql @@ -4,13 +4,12 @@ CREATE OR REPLACE FUNCTION CDB_GetisOrdsG( subquery TEXT, column_name TEXT, - w_type TEXT, - num_ngbrs INT, - permutations INT, - geom_col TEXT, - id_col TEXT) + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, rowid BIGINT) AS $$ - from crankshaft.clustering import getis - return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + from crankshaft.clustering import getis_ord + return getis_ord(subquery, column_name, w_type, num_ngbrs, geom_col, id_col) $$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/getis.py b/src/py/crankshaft/crankshaft/clustering/getis.py index 29a2c50..8cbbd56 100644 --- a/src/py/crankshaft/crankshaft/clustering/getis.py +++ b/src/py/crankshaft/crankshaft/clustering/getis.py @@ -15,7 +15,7 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- def getis_ord(subquery, attr, - w_type, num_ngbrs, permutations, geom_col, id_col): + w_type, num_ngbrs, geom_col, id_col): """ Getis-Ord's G Implementation building neighbors with a PostGIS database and Getis-Ord's G @@ -39,13 +39,13 @@ def getis_ord(subquery, attr, # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(3) - except plpy.SPIError, e: - plpy.error('Query failed: %s' % e) + except plpy.SPIError, err: + plpy.error('Query failed: %s' % err) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values - getis = ps.esda.getisord(attr_vals, weight, star=True) + getis = ps.esda.getisord.G_Local(attr_vals, weight, star=True) return zip(getis.z_sim, getis.p_sim, weight.id_order) From ce4cc637aeacb2ccee24372496a3e7f4d8575a29 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 09:05:24 -0400 Subject: [PATCH 10/96] adding permutations to interface --- src/pg/sql/16_getis.sql | 5 ++-- .../crankshaft/crankshaft/clustering/getis.py | 24 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql index 23547f1..dc3a25d 100644 --- a/src/pg/sql/16_getis.sql +++ b/src/pg/sql/16_getis.sql @@ -6,10 +6,11 @@ CREATE OR REPLACE FUNCTION column_name TEXT, w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, rowid BIGINT) +RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, p_z_sim NUMERIC, rowid BIGINT) AS $$ from crankshaft.clustering import getis_ord - return getis_ord(subquery, column_name, w_type, num_ngbrs, geom_col, id_col) + return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/getis.py b/src/py/crankshaft/crankshaft/clustering/getis.py index 8cbbd56..1ee425a 100644 --- a/src/py/crankshaft/crankshaft/clustering/getis.py +++ b/src/py/crankshaft/crankshaft/clustering/getis.py @@ -1,10 +1,7 @@ """ -Moran's I geostatistics (global clustering & outliers presence) +Getis-Ord's G geostatistics (hotspot/coldspot analysis) """ -# TODO: Fill in local neighbors which have null/NoneType values with the -# average of the their neighborhood - import pysal as ps import plpy from collections import OrderedDict @@ -15,11 +12,11 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- def getis_ord(subquery, attr, - w_type, num_ngbrs, geom_col, id_col): + w_type, num_ngbrs, permutations, geom_col, id_col): """ - Getis-Ord's G - Implementation building neighbors with a PostGIS database and Getis-Ord's G - hotspot/coldspot analysis with PySAL. + Getis-Ord's G* + Implementation building neighbors with a PostGIS database and PySAL's Getis-Ord's G* + hotspot/coldspot module. Andy Eschbacher """ @@ -38,14 +35,17 @@ def getis_ord(subquery, attr, result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return pu.empty_zipped_array(3) + return pu.empty_zipped_array(4) except plpy.SPIError, err: plpy.error('Query failed: %s' % err) attr_vals = pu.get_attributes(result) + + ## build PySAL weight object weight = pu.get_weight(result, w_type, num_ngbrs) - # calculate LISA values - getis = ps.esda.getisord.G_Local(attr_vals, weight, star=True) + # calculate Getis-Ord's G* z- and p-values + getis = ps.esda.getisord.G_Local(attr_vals, weight, + star=True, permutations=permutations) - return zip(getis.z_sim, getis.p_sim, weight.id_order) + return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order) From b71152a884b354cf57552fc89861d472e767c104 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 09:06:09 -0400 Subject: [PATCH 11/96] adds fixtures and tests --- src/py/crankshaft/test/fixtures/getis.json | 1 + .../test/fixtures/neighbors_getis.json | 1 + .../crankshaft/test/test_clustering_getis.py | 38 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 src/py/crankshaft/test/fixtures/getis.json create mode 100644 src/py/crankshaft/test/fixtures/neighbors_getis.json create mode 100644 src/py/crankshaft/test/test_clustering_getis.py diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json new file mode 100644 index 0000000..f2f7d88 --- /dev/null +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -0,0 +1 @@ +[[0.43099999999999999, -0.4491160909028587, 0.32667395669425392], [0.021999999999999999, -0.93788225530474956, 0.17415246162388542], [0.44600000000000001, -0.4491160909028587, 0.32667395669425392], [0.216, -0.64751176059715954, 0.25865039275726565], [0.11700000000000001, -0.96347562762330119, 0.1676544447116961], [0.049000000000000002, -0.20152426147617952, 0.42014433032225806], [0.072999999999999995, -0.46619525535633877, 0.32053787260380839], [0.16300000000000001, -0.74976349152754251, 0.22669858024734424], [0.052999999999999999, -0.64751176059715954, 0.25865039275726565], [0.156, -0.56083197633324544, 0.28745604294545346], [0.29599999999999999, -0.24280805846929457, 0.40407704630238783], [0.29099999999999998, -0.26928692847374969, 0.39385444517601476], [0.44, -0.67722330498207772, 0.24913214171492104], [0.16500000000000001, -0.011684382726338413, 0.49533871177388633], [0.378, -0.56473075182761978, 0.28612845711175672], [0.042999999999999997, -0.99072103490325258, 0.1609109084310375], [0.45600000000000002, -0.28743876271061125, 0.38688819224593796], [0.156, 0.13295980219309725, 0.4471125856868039], [0.19500000000000001, 0.0063207047268819955, 0.49747842043259183], [0.0070000000000000001, -1.0415760797098768, 0.1488041317793396], [0.111, -0.36942477246818262, 0.35590556804466089], [0.22500000000000001, 0.17506551624725114, 0.43051407691606425], [0.26900000000000002, -0.44363455341924385, 0.32865340985373048], [0.33800000000000002, -0.43284736233536597, 0.33256283131987785], [0.189, -0.60291193010388022, 0.27328363953070622], [0.20799999999999999, 0.035856268747985663, 0.48569848295465989], [0.123, 0.089678570182411793, 0.4642713229152271], [0.0030000000000000001, -1.1057224660076166, 0.13442333147768415], [0.16800000000000001, 0.11677761660057258, 0.45351814086906583], [0.29999999999999999, -0.013808110669475293, 0.49449153588626749], [0.378, -0.23290337417697893, 0.40791821913691739], [0.41699999999999998, 0.22945579816510672, 0.40925733604961323], [0.002, 6.5207630666532674, 3.4975355944766306e-11], [0.46200000000000002, -0.14672827921301743, 0.44167324829783083], [0.126, 0.22908126100922532, 0.40940287888500482], [0.047, -1.0840432095769956, 0.13917282042962942], [0.017000000000000001, 4.0493273511601489, 2.5682527508119612e-05], [0.067000000000000004, -0.93058949009641589, 0.17603297655968531], [0.435, -0.39105578866749485, 0.34787799959221344], [0.0030000000000000001, 6.4632623511010854, 5.1234683162704187e-11], [0.098000000000000004, 0.29440403358766087, 0.38422459253128149], [0.38400000000000001, -0.42044605311189681, 0.33707981556243372], [0.38700000000000001, 0.056785871984068648, 0.47735788415038005], [0.36899999999999999, -0.15875022725747437, 0.43693283214176271], [0.20399999999999999, 0.14086541129997771, 0.44398813370614665], [0.014, 6.0345506436163996, 7.9702822119998018e-10], [0.13400000000000001, -0.85883340027064359, 0.19521621870003536], [0.14499999999999999, -0.93901580426004294, 0.17386131505073421], [0.029000000000000001, 2.052209368389633, 0.020074659704574338], [0.45300000000000001, -0.2873860843780221, 0.38690835753455055], [0.27200000000000002, 0.11238736962962483, 0.45525813472736054], [0.47599999999999998, -0.22667990193583551, 0.41033632813474463], [0.060999999999999999, -0.66318887096788159, 0.25360479911357758], [0.47599999999999998, -0.41451366242647014, 0.33924898169485895], [0.14499999999999999, -0.65907426867863195, 0.25492403906163263], [0.127, -0.56550838435631712, 0.28586401153505392], [0.311, -0.019571757025783815, 0.49219249707263191], [0.27700000000000002, -0.049735022936677371, 0.48016677335804769], [0.20899999999999999, 0.34564692985400752, 0.36480403618322277], [0.26200000000000001, -0.055301366746933138, 0.47794918666392716], [0.315, -0.11227385176628404, 0.45530313698210811], [0.39900000000000002, -0.23091105262392728, 0.4086919520030482], [0.38, -0.05314541432928175, 0.47880802357740249], [0.29499999999999998, -0.048726126739086051, 0.48056877723722968], [0.29999999999999999, -0.65725648028245498, 0.25550800720533962], [0.35499999999999998, -0.29942829331962884, 0.38230663846055501], [0.39000000000000001, -0.38409317960223854, 0.35045469320011735], [0.249, -0.52740757430541685, 0.29895529238584306], [0.246, 0.34740441796104815, 0.36414375737078841], [0.089999999999999997, 0.47786308046182885, 0.31637383012047438], [0.23400000000000001, 0.0047163311140149293, 0.49811846308566454], [0.307, -0.098930796747205507, 0.46059660828759452], [0.40500000000000003, -0.37158786444006753, 0.35509986295795459], [0.45800000000000002, 0.60261000309518942, 0.27338408212548115], [0.44600000000000001, 0.046168582994860159, 0.48158794144995187], [0.28499999999999998, -0.049232649167827866, 0.48036694626611731], [0.050000000000000003, 1.0035582996961303, 0.15779578142846606], [0.218, 0.52664582141189142, 0.29921978282283035]] diff --git a/src/py/crankshaft/test/fixtures/neighbors_getis.json b/src/py/crankshaft/test/fixtures/neighbors_getis.json new file mode 100644 index 0000000..61e0c0e --- /dev/null +++ b/src/py/crankshaft/test/fixtures/neighbors_getis.json @@ -0,0 +1 @@ +[{"neighbors": [2, 6, 5, 10, 3], "hr8893": 1.624458, "id": 1}, {"neighbors": [4, 7, 9, 14, 16], "hr8893": 2.2554919999999998, "id": 2}, {"neighbors": [6, 0, 3, 8, 10], "hr8893": 1.4678899999999999, "id": 3}, {"neighbors": [8, 12, 2, 4, 6], "hr8893": 2.4842559999999998, "id": 4}, {"neighbors": [9, 12, 1, 3, 8], "hr8893": 0.0, "id": 5}, {"neighbors": [11, 10, 0, 15, 6], "hr8893": 9.0486730000000009, "id": 6}, {"neighbors": [2, 10, 0, 8, 17], "hr8893": 6.0294889999999999, "id": 7}, {"neighbors": [14, 1, 22, 16, 9], "hr8893": 1.8003849999999999, "id": 8}, {"neighbors": [3, 12, 19, 2, 18], "hr8893": 4.581251, "id": 9}, {"neighbors": [4, 16, 12, 1, 20], "hr8893": 3.7906070000000001, "id": 10}, {"neighbors": [17, 6, 5, 15, 0], "hr8893": 1.4474359999999999, "id": 11}, {"neighbors": [15, 5, 13, 21, 27], "hr8893": 1.1919660000000001, "id": 12}, {"neighbors": [8, 19, 3, 9, 4], "hr8893": 0.0, "id": 13}, {"neighbors": [21, 11, 28, 27, 15], "hr8893": 1.608017, "id": 14}, {"neighbors": [7, 16, 22, 1, 29], "hr8893": 1.9498120000000001, "id": 15}, {"neighbors": [11, 27, 26, 5, 10], "hr8893": 0.74509000000000003, "id": 16}, {"neighbors": [25, 9, 14, 29, 20], "hr8893": 4.1733180000000001, "id": 17}, {"neighbors": [31, 10, 18, 26, 6], "hr8893": 3.7832520000000001, "id": 18}, {"neighbors": [32, 17, 23, 19, 8], "hr8893": 2.0851359999999999, "id": 19}, {"neighbors": [23, 12, 20, 8, 18], "hr8893": 2.1763020000000002, "id": 20}, {"neighbors": [25, 23, 19, 34, 9], "hr8893": 6.3093469999999998, "id": 21}, {"neighbors": [13, 28, 27, 11, 35], "hr8893": 10.855743, "id": 22}, {"neighbors": [30, 14, 29, 24, 7], "hr8893": 4.211354, "id": 23}, {"neighbors": [19, 20, 34, 39, 36], "hr8893": 0.80481000000000003, "id": 24}, {"neighbors": [30, 41, 22, 43, 52], "hr8893": 3.2153309999999999, "id": 25}, {"neighbors": [20, 33, 16, 34, 29], "hr8893": 2.8336640000000002, "id": 26}, {"neighbors": [38, 31, 27, 15, 17], "hr8893": 1.5920399999999999, "id": 27}, {"neighbors": [35, 15, 21, 28, 26], "hr8893": 1.5711580000000001, "id": 28}, {"neighbors": [21, 37, 35, 27, 13], "hr8893": 3.1275900000000001, "id": 29}, {"neighbors": [33, 22, 30, 42, 16], "hr8893": 4.4168960000000004, "id": 30}, {"neighbors": [43, 22, 24, 29, 41], "hr8893": 3.0174859999999999, "id": 31}, {"neighbors": [40, 17, 26, 32, 49], "hr8893": 9.9242450000000009, "id": 32}, {"neighbors": [45, 39, 18, 31, 23], "hr8893": 7.9739570000000004, "id": 33}, {"neighbors": [25, 29, 44, 42, 34], "hr8893": 5.0054639999999999, "id": 34}, {"neighbors": [36, 20, 25, 23, 39], "hr8893": 2.4638909999999998, "id": 35}, {"neighbors": [27, 46, 37, 28, 38], "hr8893": 0.0, "id": 36}, {"neighbors": [39, 34, 50, 48, 23], "hr8893": 7.377974, "id": 37}, {"neighbors": [47, 28, 35, 46, 21], "hr8893": 1.0038750000000001, "id": 38}, {"neighbors": [51, 26, 35, 40, 27], "hr8893": 3.1900469999999999, "id": 39}, {"neighbors": [36, 45, 48, 32, 23], "hr8893": 45.905405999999999, "id": 40}, {"neighbors": [49, 31, 38, 45, 57], "hr8893": 2.447597, "id": 41}, {"neighbors": [52, 43, 30, 24, 53], "hr8893": 1.2949580000000001, "id": 42}, {"neighbors": [43, 44, 33, 53, 29], "hr8893": 5.9330980000000002, "id": 43}, {"neighbors": [53, 42, 30, 41, 60], "hr8893": 4.1339969999999999, "id": 44}, {"neighbors": [33, 42, 59, 58, 34], "hr8893": 4.298311, "id": 45}, {"neighbors": [48, 39, 32, 56, 40], "hr8893": 27.483827000000002, "id": 46}, {"neighbors": [35, 55, 47, 54, 37], "hr8893": 0.96979099999999996, "id": 47}, {"neighbors": [37, 54, 46, 35, 55], "hr8893": 0.0, "id": 48}, {"neighbors": [45, 50, 39, 62, 56], "hr8893": 2.934466, "id": 49}, {"neighbors": [40, 57, 51, 56, 45], "hr8893": 4.4564269999999997, "id": 50}, {"neighbors": [48, 36, 63, 59, 39], "hr8893": 4.629264, "id": 51}, {"neighbors": [61, 38, 55, 49, 57], "hr8893": 4.9415329999999997, "id": 52}, {"neighbors": [41, 64, 53, 43, 60], "hr8893": 3.9900410000000002, "id": 53}, {"neighbors": [43, 60, 64, 41, 42], "hr8893": 2.064324, "id": 54}, {"neighbors": [47, 55, 46, 37, 35], "hr8893": 3.0402529999999999, "id": 55}, {"neighbors": [54, 46, 61, 51, 67], "hr8893": 3.905411, "id": 56}, {"neighbors": [66, 62, 48, 45, 57], "hr8893": 4.3328389999999999, "id": 57}, {"neighbors": [49, 65, 61, 56, 51], "hr8893": 3.8941110000000001, "id": 58}, {"neighbors": [68, 59, 60, 44, 42], "hr8893": 6.8287940000000003, "id": 59}, {"neighbors": [69, 58, 63, 50, 44], "hr8893": 3.2639469999999999, "id": 60}, {"neighbors": [53, 68, 64, 58, 43], "hr8893": 3.2821630000000002, "id": 61}, {"neighbors": [67, 51, 55, 57, 65], "hr8893": 3.2957619999999999, "id": 62}, {"neighbors": [63, 48, 56, 66, 70], "hr8893": 7.2496790000000004, "id": 63}, {"neighbors": [62, 70, 69, 59, 50], "hr8893": 3.041846, "id": 64}, {"neighbors": [60, 53, 52, 71, 41], "hr8893": 1.618018, "id": 65}, {"neighbors": [57, 72, 66, 67, 75], "hr8893": 4.9108010000000002, "id": 66}, {"neighbors": [56, 75, 62, 74, 65], "hr8893": 1.991457, "id": 67}, {"neighbors": [61, 72, 65, 55, 57], "hr8893": 3.1461920000000001, "id": 68}, {"neighbors": [60, 58, 76, 71, 73], "hr8893": 7.2666500000000003, "id": 69}, {"neighbors": [73, 63, 59, 70, 77], "hr8893": 3.1109040000000001, "id": 70}, {"neighbors": [74, 63, 77, 69, 62], "hr8893": 2.9802710000000001, "id": 71}, {"neighbors": [68, 64, 76, 60, 53], "hr8893": 3.8667669999999998, "id": 72}, {"neighbors": [65, 67, 75, 61, 57], "hr8893": 1.8684080000000001, "id": 73}, {"neighbors": [69, 76, 77, 68, 59], "hr8893": 12.577033999999999, "id": 74}, {"neighbors": [75, 70, 66, 77, 62], "hr8893": 7.8035990000000002, "id": 75}, {"neighbors": [74, 66, 72, 65, 70], "hr8893": 3.4714900000000002, "id": 76}, {"neighbors": [68, 73, 71, 69, 60], "hr8893": 4.334822, "id": 77}, {"neighbors": [70, 74, 69, 73, 63], "hr8893": 8.4515370000000001, "id": 78}] diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py new file mode 100644 index 0000000..fc26fc7 --- /dev/null +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class GetisTest(unittest.TestCase): + """Testing class for Getis-Ord's G funtion""" + + def setUp(self): + plpy._reset() + print(help(cc)) + self.neighbors_data = json.loads(open(fixture_file('neighbors_getis.json')).read()) + self.getis_data = json.loads(open(fixture_file('getis.json')).read()) + + def test_getis_ord(self): + """Test Getis-Ord's G*""" + data = [ { 'id': d['id'], + 'attr1': d['hr8893'], + 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.getis_ord('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = self.getis_data + for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) From 4e42625d7915919b835d4453df661c36f1561eed Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 14:29:06 -0400 Subject: [PATCH 12/96] fix indexing of fixture --- .../test/fixtures/neighbors_getis.json | 861 +++++++++++++++++- 1 file changed, 860 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/fixtures/neighbors_getis.json b/src/py/crankshaft/test/fixtures/neighbors_getis.json index 61e0c0e..878b955 100644 --- a/src/py/crankshaft/test/fixtures/neighbors_getis.json +++ b/src/py/crankshaft/test/fixtures/neighbors_getis.json @@ -1 +1,860 @@ -[{"neighbors": [2, 6, 5, 10, 3], "hr8893": 1.624458, "id": 1}, {"neighbors": [4, 7, 9, 14, 16], "hr8893": 2.2554919999999998, "id": 2}, {"neighbors": [6, 0, 3, 8, 10], "hr8893": 1.4678899999999999, "id": 3}, {"neighbors": [8, 12, 2, 4, 6], "hr8893": 2.4842559999999998, "id": 4}, {"neighbors": [9, 12, 1, 3, 8], "hr8893": 0.0, "id": 5}, {"neighbors": [11, 10, 0, 15, 6], "hr8893": 9.0486730000000009, "id": 6}, {"neighbors": [2, 10, 0, 8, 17], "hr8893": 6.0294889999999999, "id": 7}, {"neighbors": [14, 1, 22, 16, 9], "hr8893": 1.8003849999999999, "id": 8}, {"neighbors": [3, 12, 19, 2, 18], "hr8893": 4.581251, "id": 9}, {"neighbors": [4, 16, 12, 1, 20], "hr8893": 3.7906070000000001, "id": 10}, {"neighbors": [17, 6, 5, 15, 0], "hr8893": 1.4474359999999999, "id": 11}, {"neighbors": [15, 5, 13, 21, 27], "hr8893": 1.1919660000000001, "id": 12}, {"neighbors": [8, 19, 3, 9, 4], "hr8893": 0.0, "id": 13}, {"neighbors": [21, 11, 28, 27, 15], "hr8893": 1.608017, "id": 14}, {"neighbors": [7, 16, 22, 1, 29], "hr8893": 1.9498120000000001, "id": 15}, {"neighbors": [11, 27, 26, 5, 10], "hr8893": 0.74509000000000003, "id": 16}, {"neighbors": [25, 9, 14, 29, 20], "hr8893": 4.1733180000000001, "id": 17}, {"neighbors": [31, 10, 18, 26, 6], "hr8893": 3.7832520000000001, "id": 18}, {"neighbors": [32, 17, 23, 19, 8], "hr8893": 2.0851359999999999, "id": 19}, {"neighbors": [23, 12, 20, 8, 18], "hr8893": 2.1763020000000002, "id": 20}, {"neighbors": [25, 23, 19, 34, 9], "hr8893": 6.3093469999999998, "id": 21}, {"neighbors": [13, 28, 27, 11, 35], "hr8893": 10.855743, "id": 22}, {"neighbors": [30, 14, 29, 24, 7], "hr8893": 4.211354, "id": 23}, {"neighbors": [19, 20, 34, 39, 36], "hr8893": 0.80481000000000003, "id": 24}, {"neighbors": [30, 41, 22, 43, 52], "hr8893": 3.2153309999999999, "id": 25}, {"neighbors": [20, 33, 16, 34, 29], "hr8893": 2.8336640000000002, "id": 26}, {"neighbors": [38, 31, 27, 15, 17], "hr8893": 1.5920399999999999, "id": 27}, {"neighbors": [35, 15, 21, 28, 26], "hr8893": 1.5711580000000001, "id": 28}, {"neighbors": [21, 37, 35, 27, 13], "hr8893": 3.1275900000000001, "id": 29}, {"neighbors": [33, 22, 30, 42, 16], "hr8893": 4.4168960000000004, "id": 30}, {"neighbors": [43, 22, 24, 29, 41], "hr8893": 3.0174859999999999, "id": 31}, {"neighbors": [40, 17, 26, 32, 49], "hr8893": 9.9242450000000009, "id": 32}, {"neighbors": [45, 39, 18, 31, 23], "hr8893": 7.9739570000000004, "id": 33}, {"neighbors": [25, 29, 44, 42, 34], "hr8893": 5.0054639999999999, "id": 34}, {"neighbors": [36, 20, 25, 23, 39], "hr8893": 2.4638909999999998, "id": 35}, {"neighbors": [27, 46, 37, 28, 38], "hr8893": 0.0, "id": 36}, {"neighbors": [39, 34, 50, 48, 23], "hr8893": 7.377974, "id": 37}, {"neighbors": [47, 28, 35, 46, 21], "hr8893": 1.0038750000000001, "id": 38}, {"neighbors": [51, 26, 35, 40, 27], "hr8893": 3.1900469999999999, "id": 39}, {"neighbors": [36, 45, 48, 32, 23], "hr8893": 45.905405999999999, "id": 40}, {"neighbors": [49, 31, 38, 45, 57], "hr8893": 2.447597, "id": 41}, {"neighbors": [52, 43, 30, 24, 53], "hr8893": 1.2949580000000001, "id": 42}, {"neighbors": [43, 44, 33, 53, 29], "hr8893": 5.9330980000000002, "id": 43}, {"neighbors": [53, 42, 30, 41, 60], "hr8893": 4.1339969999999999, "id": 44}, {"neighbors": [33, 42, 59, 58, 34], "hr8893": 4.298311, "id": 45}, {"neighbors": [48, 39, 32, 56, 40], "hr8893": 27.483827000000002, "id": 46}, {"neighbors": [35, 55, 47, 54, 37], "hr8893": 0.96979099999999996, "id": 47}, {"neighbors": [37, 54, 46, 35, 55], "hr8893": 0.0, "id": 48}, {"neighbors": [45, 50, 39, 62, 56], "hr8893": 2.934466, "id": 49}, {"neighbors": [40, 57, 51, 56, 45], "hr8893": 4.4564269999999997, "id": 50}, {"neighbors": [48, 36, 63, 59, 39], "hr8893": 4.629264, "id": 51}, {"neighbors": [61, 38, 55, 49, 57], "hr8893": 4.9415329999999997, "id": 52}, {"neighbors": [41, 64, 53, 43, 60], "hr8893": 3.9900410000000002, "id": 53}, {"neighbors": [43, 60, 64, 41, 42], "hr8893": 2.064324, "id": 54}, {"neighbors": [47, 55, 46, 37, 35], "hr8893": 3.0402529999999999, "id": 55}, {"neighbors": [54, 46, 61, 51, 67], "hr8893": 3.905411, "id": 56}, {"neighbors": [66, 62, 48, 45, 57], "hr8893": 4.3328389999999999, "id": 57}, {"neighbors": [49, 65, 61, 56, 51], "hr8893": 3.8941110000000001, "id": 58}, {"neighbors": [68, 59, 60, 44, 42], "hr8893": 6.8287940000000003, "id": 59}, {"neighbors": [69, 58, 63, 50, 44], "hr8893": 3.2639469999999999, "id": 60}, {"neighbors": [53, 68, 64, 58, 43], "hr8893": 3.2821630000000002, "id": 61}, {"neighbors": [67, 51, 55, 57, 65], "hr8893": 3.2957619999999999, "id": 62}, {"neighbors": [63, 48, 56, 66, 70], "hr8893": 7.2496790000000004, "id": 63}, {"neighbors": [62, 70, 69, 59, 50], "hr8893": 3.041846, "id": 64}, {"neighbors": [60, 53, 52, 71, 41], "hr8893": 1.618018, "id": 65}, {"neighbors": [57, 72, 66, 67, 75], "hr8893": 4.9108010000000002, "id": 66}, {"neighbors": [56, 75, 62, 74, 65], "hr8893": 1.991457, "id": 67}, {"neighbors": [61, 72, 65, 55, 57], "hr8893": 3.1461920000000001, "id": 68}, {"neighbors": [60, 58, 76, 71, 73], "hr8893": 7.2666500000000003, "id": 69}, {"neighbors": [73, 63, 59, 70, 77], "hr8893": 3.1109040000000001, "id": 70}, {"neighbors": [74, 63, 77, 69, 62], "hr8893": 2.9802710000000001, "id": 71}, {"neighbors": [68, 64, 76, 60, 53], "hr8893": 3.8667669999999998, "id": 72}, {"neighbors": [65, 67, 75, 61, 57], "hr8893": 1.8684080000000001, "id": 73}, {"neighbors": [69, 76, 77, 68, 59], "hr8893": 12.577033999999999, "id": 74}, {"neighbors": [75, 70, 66, 77, 62], "hr8893": 7.8035990000000002, "id": 75}, {"neighbors": [74, 66, 72, 65, 70], "hr8893": 3.4714900000000002, "id": 76}, {"neighbors": [68, 73, 71, 69, 60], "hr8893": 4.334822, "id": 77}, {"neighbors": [70, 74, 69, 73, 63], "hr8893": 8.4515370000000001, "id": 78}] +[ + { + "neighbors": [ + 2, + 6, + 5, + 10, + 3 + ], + "id": 0, + "value": 1.624458 + }, + { + "neighbors": [ + 4, + 7, + 9, + 14, + 16 + ], + "id": 1, + "value": 2.255492 + }, + { + "neighbors": [ + 6, + 0, + 3, + 8, + 10 + ], + "id": 2, + "value": 1.46789 + }, + { + "neighbors": [ + 8, + 12, + 2, + 4, + 6 + ], + "id": 3, + "value": 2.484256 + }, + { + "neighbors": [ + 9, + 12, + 1, + 3, + 8 + ], + "id": 4, + "value": 0 + }, + { + "neighbors": [ + 11, + 10, + 0, + 15, + 6 + ], + "id": 5, + "value": 9.048673 + }, + { + "neighbors": [ + 2, + 10, + 0, + 8, + 17 + ], + "id": 6, + "value": 6.029489 + }, + { + "neighbors": [ + 14, + 1, + 22, + 16, + 9 + ], + "id": 7, + "value": 1.800385 + }, + { + "neighbors": [ + 3, + 12, + 19, + 2, + 18 + ], + "id": 8, + "value": 4.581251 + }, + { + "neighbors": [ + 4, + 16, + 12, + 1, + 20 + ], + "id": 9, + "value": 3.790607 + }, + { + "neighbors": [ + 17, + 6, + 5, + 15, + 0 + ], + "id": 10, + "value": 1.447436 + }, + { + "neighbors": [ + 15, + 5, + 13, + 21, + 27 + ], + "id": 11, + "value": 1.191966 + }, + { + "neighbors": [ + 8, + 19, + 3, + 9, + 4 + ], + "id": 12, + "value": 0 + }, + { + "neighbors": [ + 21, + 11, + 28, + 27, + 15 + ], + "id": 13, + "value": 1.608017 + }, + { + "neighbors": [ + 7, + 16, + 22, + 1, + 29 + ], + "id": 14, + "value": 1.949812 + }, + { + "neighbors": [ + 11, + 27, + 26, + 5, + 10 + ], + "id": 15, + "value": 0.74509 + }, + { + "neighbors": [ + 25, + 9, + 14, + 29, + 20 + ], + "id": 16, + "value": 4.173318 + }, + { + "neighbors": [ + 31, + 10, + 18, + 26, + 6 + ], + "id": 17, + "value": 3.783252 + }, + { + "neighbors": [ + 32, + 17, + 23, + 19, + 8 + ], + "id": 18, + "value": 2.085136 + }, + { + "neighbors": [ + 23, + 12, + 20, + 8, + 18 + ], + "id": 19, + "value": 2.176302 + }, + { + "neighbors": [ + 25, + 23, + 19, + 34, + 9 + ], + "id": 20, + "value": 6.309347 + }, + { + "neighbors": [ + 13, + 28, + 27, + 11, + 35 + ], + "id": 21, + "value": 10.855743 + }, + { + "neighbors": [ + 30, + 14, + 29, + 24, + 7 + ], + "id": 22, + "value": 4.211354 + }, + { + "neighbors": [ + 19, + 20, + 34, + 39, + 36 + ], + "id": 23, + "value": 0.80481 + }, + { + "neighbors": [ + 30, + 41, + 22, + 43, + 52 + ], + "id": 24, + "value": 3.215331 + }, + { + "neighbors": [ + 20, + 33, + 16, + 34, + 29 + ], + "id": 25, + "value": 2.833664 + }, + { + "neighbors": [ + 38, + 31, + 27, + 15, + 17 + ], + "id": 26, + "value": 1.59204 + }, + { + "neighbors": [ + 35, + 15, + 21, + 28, + 26 + ], + "id": 27, + "value": 1.571158 + }, + { + "neighbors": [ + 21, + 37, + 35, + 27, + 13 + ], + "id": 28, + "value": 3.12759 + }, + { + "neighbors": [ + 33, + 22, + 30, + 42, + 16 + ], + "id": 29, + "value": 4.416896 + }, + { + "neighbors": [ + 43, + 22, + 24, + 29, + 41 + ], + "id": 30, + "value": 3.017486 + }, + { + "neighbors": [ + 40, + 17, + 26, + 32, + 49 + ], + "id": 31, + "value": 9.924245 + }, + { + "neighbors": [ + 45, + 39, + 18, + 31, + 23 + ], + "id": 32, + "value": 7.973957 + }, + { + "neighbors": [ + 25, + 29, + 44, + 42, + 34 + ], + "id": 33, + "value": 5.005464 + }, + { + "neighbors": [ + 36, + 20, + 25, + 23, + 39 + ], + "id": 34, + "value": 2.463891 + }, + { + "neighbors": [ + 27, + 46, + 37, + 28, + 38 + ], + "id": 35, + "value": 0 + }, + { + "neighbors": [ + 39, + 34, + 50, + 48, + 23 + ], + "id": 36, + "value": 7.377974 + }, + { + "neighbors": [ + 47, + 28, + 35, + 46, + 21 + ], + "id": 37, + "value": 1.003875 + }, + { + "neighbors": [ + 51, + 26, + 35, + 40, + 27 + ], + "id": 38, + "value": 3.190047 + }, + { + "neighbors": [ + 36, + 45, + 48, + 32, + 23 + ], + "id": 39, + "value": 45.905406 + }, + { + "neighbors": [ + 49, + 31, + 38, + 45, + 57 + ], + "id": 40, + "value": 2.447597 + }, + { + "neighbors": [ + 52, + 43, + 30, + 24, + 53 + ], + "id": 41, + "value": 1.294958 + }, + { + "neighbors": [ + 43, + 44, + 33, + 53, + 29 + ], + "id": 42, + "value": 5.933098 + }, + { + "neighbors": [ + 53, + 42, + 30, + 41, + 60 + ], + "id": 43, + "value": 4.133997 + }, + { + "neighbors": [ + 33, + 42, + 59, + 58, + 34 + ], + "id": 44, + "value": 4.298311 + }, + { + "neighbors": [ + 48, + 39, + 32, + 56, + 40 + ], + "id": 45, + "value": 27.483827 + }, + { + "neighbors": [ + 35, + 55, + 47, + 54, + 37 + ], + "id": 46, + "value": 0.969791 + }, + { + "neighbors": [ + 37, + 54, + 46, + 35, + 55 + ], + "id": 47, + "value": 0 + }, + { + "neighbors": [ + 45, + 50, + 39, + 62, + 56 + ], + "id": 48, + "value": 2.934466 + }, + { + "neighbors": [ + 40, + 57, + 51, + 56, + 45 + ], + "id": 49, + "value": 4.456427 + }, + { + "neighbors": [ + 48, + 36, + 63, + 59, + 39 + ], + "id": 50, + "value": 4.629264 + }, + { + "neighbors": [ + 61, + 38, + 55, + 49, + 57 + ], + "id": 51, + "value": 4.941533 + }, + { + "neighbors": [ + 41, + 64, + 53, + 43, + 60 + ], + "id": 52, + "value": 3.990041 + }, + { + "neighbors": [ + 43, + 60, + 64, + 41, + 42 + ], + "id": 53, + "value": 2.064324 + }, + { + "neighbors": [ + 47, + 55, + 46, + 37, + 35 + ], + "id": 54, + "value": 3.040253 + }, + { + "neighbors": [ + 54, + 46, + 61, + 51, + 67 + ], + "id": 55, + "value": 3.905411 + }, + { + "neighbors": [ + 66, + 62, + 48, + 45, + 57 + ], + "id": 56, + "value": 4.332839 + }, + { + "neighbors": [ + 49, + 65, + 61, + 56, + 51 + ], + "id": 57, + "value": 3.894111 + }, + { + "neighbors": [ + 68, + 59, + 60, + 44, + 42 + ], + "id": 58, + "value": 6.828794 + }, + { + "neighbors": [ + 69, + 58, + 63, + 50, + 44 + ], + "id": 59, + "value": 3.263947 + }, + { + "neighbors": [ + 53, + 68, + 64, + 58, + 43 + ], + "id": 60, + "value": 3.282163 + }, + { + "neighbors": [ + 67, + 51, + 55, + 57, + 65 + ], + "id": 61, + "value": 3.295762 + }, + { + "neighbors": [ + 63, + 48, + 56, + 66, + 70 + ], + "id": 62, + "value": 7.249679 + }, + { + "neighbors": [ + 62, + 70, + 69, + 59, + 50 + ], + "id": 63, + "value": 3.041846 + }, + { + "neighbors": [ + 60, + 53, + 52, + 71, + 41 + ], + "id": 64, + "value": 1.618018 + }, + { + "neighbors": [ + 57, + 72, + 66, + 67, + 75 + ], + "id": 65, + "value": 4.910801 + }, + { + "neighbors": [ + 56, + 75, + 62, + 74, + 65 + ], + "id": 66, + "value": 1.991457 + }, + { + "neighbors": [ + 61, + 72, + 65, + 55, + 57 + ], + "id": 67, + "value": 3.146192 + }, + { + "neighbors": [ + 60, + 58, + 76, + 71, + 73 + ], + "id": 68, + "value": 7.26665 + }, + { + "neighbors": [ + 73, + 63, + 59, + 70, + 77 + ], + "id": 69, + "value": 3.110904 + }, + { + "neighbors": [ + 74, + 63, + 77, + 69, + 62 + ], + "id": 70, + "value": 2.980271 + }, + { + "neighbors": [ + 68, + 64, + 76, + 60, + 53 + ], + "id": 71, + "value": 3.866767 + }, + { + "neighbors": [ + 65, + 67, + 75, + 61, + 57 + ], + "id": 72, + "value": 1.868408 + }, + { + "neighbors": [ + 69, + 76, + 77, + 68, + 59 + ], + "id": 73, + "value": 12.577034 + }, + { + "neighbors": [ + 75, + 70, + 66, + 77, + 62 + ], + "id": 74, + "value": 7.803599 + }, + { + "neighbors": [ + 74, + 66, + 72, + 65, + 70 + ], + "id": 75, + "value": 3.47149 + }, + { + "neighbors": [ + 68, + 73, + 71, + 69, + 60 + ], + "id": 76, + "value": 4.334822 + }, + { + "neighbors": [ + 70, + 74, + 69, + 73, + 63 + ], + "id": 77, + "value": 8.451537 + } +] From c392aec98a48ff9c56e05e238b1287fef3e7f19b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 15:32:32 -0400 Subject: [PATCH 13/96] re-ordered columns --- src/py/crankshaft/test/fixtures/getis.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json index f2f7d88..20fe1a9 100644 --- a/src/py/crankshaft/test/fixtures/getis.json +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -1 +1 @@ -[[0.43099999999999999, -0.4491160909028587, 0.32667395669425392], [0.021999999999999999, -0.93788225530474956, 0.17415246162388542], [0.44600000000000001, -0.4491160909028587, 0.32667395669425392], [0.216, -0.64751176059715954, 0.25865039275726565], [0.11700000000000001, -0.96347562762330119, 0.1676544447116961], [0.049000000000000002, -0.20152426147617952, 0.42014433032225806], [0.072999999999999995, -0.46619525535633877, 0.32053787260380839], [0.16300000000000001, -0.74976349152754251, 0.22669858024734424], [0.052999999999999999, -0.64751176059715954, 0.25865039275726565], [0.156, -0.56083197633324544, 0.28745604294545346], [0.29599999999999999, -0.24280805846929457, 0.40407704630238783], [0.29099999999999998, -0.26928692847374969, 0.39385444517601476], [0.44, -0.67722330498207772, 0.24913214171492104], [0.16500000000000001, -0.011684382726338413, 0.49533871177388633], [0.378, -0.56473075182761978, 0.28612845711175672], [0.042999999999999997, -0.99072103490325258, 0.1609109084310375], [0.45600000000000002, -0.28743876271061125, 0.38688819224593796], [0.156, 0.13295980219309725, 0.4471125856868039], [0.19500000000000001, 0.0063207047268819955, 0.49747842043259183], [0.0070000000000000001, -1.0415760797098768, 0.1488041317793396], [0.111, -0.36942477246818262, 0.35590556804466089], [0.22500000000000001, 0.17506551624725114, 0.43051407691606425], [0.26900000000000002, -0.44363455341924385, 0.32865340985373048], [0.33800000000000002, -0.43284736233536597, 0.33256283131987785], [0.189, -0.60291193010388022, 0.27328363953070622], [0.20799999999999999, 0.035856268747985663, 0.48569848295465989], [0.123, 0.089678570182411793, 0.4642713229152271], [0.0030000000000000001, -1.1057224660076166, 0.13442333147768415], [0.16800000000000001, 0.11677761660057258, 0.45351814086906583], [0.29999999999999999, -0.013808110669475293, 0.49449153588626749], [0.378, -0.23290337417697893, 0.40791821913691739], [0.41699999999999998, 0.22945579816510672, 0.40925733604961323], [0.002, 6.5207630666532674, 3.4975355944766306e-11], [0.46200000000000002, -0.14672827921301743, 0.44167324829783083], [0.126, 0.22908126100922532, 0.40940287888500482], [0.047, -1.0840432095769956, 0.13917282042962942], [0.017000000000000001, 4.0493273511601489, 2.5682527508119612e-05], [0.067000000000000004, -0.93058949009641589, 0.17603297655968531], [0.435, -0.39105578866749485, 0.34787799959221344], [0.0030000000000000001, 6.4632623511010854, 5.1234683162704187e-11], [0.098000000000000004, 0.29440403358766087, 0.38422459253128149], [0.38400000000000001, -0.42044605311189681, 0.33707981556243372], [0.38700000000000001, 0.056785871984068648, 0.47735788415038005], [0.36899999999999999, -0.15875022725747437, 0.43693283214176271], [0.20399999999999999, 0.14086541129997771, 0.44398813370614665], [0.014, 6.0345506436163996, 7.9702822119998018e-10], [0.13400000000000001, -0.85883340027064359, 0.19521621870003536], [0.14499999999999999, -0.93901580426004294, 0.17386131505073421], [0.029000000000000001, 2.052209368389633, 0.020074659704574338], [0.45300000000000001, -0.2873860843780221, 0.38690835753455055], [0.27200000000000002, 0.11238736962962483, 0.45525813472736054], [0.47599999999999998, -0.22667990193583551, 0.41033632813474463], [0.060999999999999999, -0.66318887096788159, 0.25360479911357758], [0.47599999999999998, -0.41451366242647014, 0.33924898169485895], [0.14499999999999999, -0.65907426867863195, 0.25492403906163263], [0.127, -0.56550838435631712, 0.28586401153505392], [0.311, -0.019571757025783815, 0.49219249707263191], [0.27700000000000002, -0.049735022936677371, 0.48016677335804769], [0.20899999999999999, 0.34564692985400752, 0.36480403618322277], [0.26200000000000001, -0.055301366746933138, 0.47794918666392716], [0.315, -0.11227385176628404, 0.45530313698210811], [0.39900000000000002, -0.23091105262392728, 0.4086919520030482], [0.38, -0.05314541432928175, 0.47880802357740249], [0.29499999999999998, -0.048726126739086051, 0.48056877723722968], [0.29999999999999999, -0.65725648028245498, 0.25550800720533962], [0.35499999999999998, -0.29942829331962884, 0.38230663846055501], [0.39000000000000001, -0.38409317960223854, 0.35045469320011735], [0.249, -0.52740757430541685, 0.29895529238584306], [0.246, 0.34740441796104815, 0.36414375737078841], [0.089999999999999997, 0.47786308046182885, 0.31637383012047438], [0.23400000000000001, 0.0047163311140149293, 0.49811846308566454], [0.307, -0.098930796747205507, 0.46059660828759452], [0.40500000000000003, -0.37158786444006753, 0.35509986295795459], [0.45800000000000002, 0.60261000309518942, 0.27338408212548115], [0.44600000000000001, 0.046168582994860159, 0.48158794144995187], [0.28499999999999998, -0.049232649167827866, 0.48036694626611731], [0.050000000000000003, 1.0035582996961303, 0.15779578142846606], [0.218, 0.52664582141189142, 0.29921978282283035]] +[[-0.4491160909028587, 0.43099999999999999, 0.32667395669425392], [-0.93788225530474956, 0.021999999999999999, 0.17415246162388542], [-0.4491160909028587, 0.44600000000000001, 0.32667395669425392], [-0.64751176059715954, 0.216, 0.25865039275726565], [-0.96347562762330119, 0.11700000000000001, 0.1676544447116961], [-0.20152426147617952, 0.049000000000000002, 0.42014433032225806], [-0.46619525535633877, 0.072999999999999995, 0.32053787260380839], [-0.74976349152754251, 0.16300000000000001, 0.22669858024734424], [-0.64751176059715954, 0.052999999999999999, 0.25865039275726565], [-0.56083197633324544, 0.156, 0.28745604294545346], [-0.24280805846929457, 0.29599999999999999, 0.40407704630238783], [-0.26928692847374969, 0.29099999999999998, 0.39385444517601476], [-0.67722330498207772, 0.44, 0.24913214171492104], [-0.011684382726338413, 0.16500000000000001, 0.49533871177388633], [-0.56473075182761978, 0.378, 0.28612845711175672], [-0.99072103490325258, 0.042999999999999997, 0.1609109084310375], [-0.28743876271061125, 0.45600000000000002, 0.38688819224593796], [0.13295980219309725, 0.156, 0.4471125856868039], [0.0063207047268819955, 0.19500000000000001, 0.49747842043259183], [-1.0415760797098768, 0.0070000000000000001, 0.1488041317793396], [-0.36942477246818262, 0.111, 0.35590556804466089], [0.17506551624725114, 0.22500000000000001, 0.43051407691606425], [-0.44363455341924385, 0.26900000000000002, 0.32865340985373048], [-0.43284736233536597, 0.33800000000000002, 0.33256283131987785], [-0.60291193010388022, 0.189, 0.27328363953070622], [0.035856268747985663, 0.20799999999999999, 0.48569848295465989], [0.089678570182411793, 0.123, 0.4642713229152271], [-1.1057224660076166, 0.0030000000000000001, 0.13442333147768415], [0.11677761660057258, 0.16800000000000001, 0.45351814086906583], [-0.013808110669475293, 0.29999999999999999, 0.49449153588626749], [-0.23290337417697893, 0.378, 0.40791821913691739], [0.22945579816510672, 0.41699999999999998, 0.40925733604961323], [6.5207630666532674, 0.002, 3.4975355944766306e-11], [-0.14672827921301743, 0.46200000000000002, 0.44167324829783083], [0.22908126100922532, 0.126, 0.40940287888500482], [-1.0840432095769956, 0.047, 0.13917282042962942], [4.0493273511601489, 0.017000000000000001, 2.5682527508119612e-05], [-0.93058949009641589, 0.067000000000000004, 0.17603297655968531], [-0.39105578866749485, 0.435, 0.34787799959221344], [6.4632623511010854, 0.0030000000000000001, 5.1234683162704187e-11], [0.29440403358766087, 0.098000000000000004, 0.38422459253128149], [-0.42044605311189681, 0.38400000000000001, 0.33707981556243372], [0.056785871984068648, 0.38700000000000001, 0.47735788415038005], [-0.15875022725747437, 0.36899999999999999, 0.43693283214176271], [0.14086541129997771, 0.20399999999999999, 0.44398813370614665], [6.0345506436163996, 0.014, 7.9702822119998018e-10], [-0.85883340027064359, 0.13400000000000001, 0.19521621870003536], [-0.93901580426004294, 0.14499999999999999, 0.17386131505073421], [2.052209368389633, 0.029000000000000001, 0.020074659704574338], [-0.2873860843780221, 0.45300000000000001, 0.38690835753455055], [0.11238736962962483, 0.27200000000000002, 0.45525813472736054], [-0.22667990193583551, 0.47599999999999998, 0.41033632813474463], [-0.66318887096788159, 0.060999999999999999, 0.25360479911357758], [-0.41451366242647014, 0.47599999999999998, 0.33924898169485895], [-0.65907426867863195, 0.14499999999999999, 0.25492403906163263], [-0.56550838435631712, 0.127, 0.28586401153505392], [-0.019571757025783815, 0.311, 0.49219249707263191], [-0.049735022936677371, 0.27700000000000002, 0.48016677335804769], [0.34564692985400752, 0.20899999999999999, 0.36480403618322277], [-0.055301366746933138, 0.26200000000000001, 0.47794918666392716], [-0.11227385176628404, 0.315, 0.45530313698210811], [-0.23091105262392728, 0.39900000000000002, 0.4086919520030482], [-0.05314541432928175, 0.38, 0.47880802357740249], [-0.048726126739086051, 0.29499999999999998, 0.48056877723722968], [-0.65725648028245498, 0.29999999999999999, 0.25550800720533962], [-0.29942829331962884, 0.35499999999999998, 0.38230663846055501], [-0.38409317960223854, 0.39000000000000001, 0.35045469320011735], [-0.52740757430541685, 0.249, 0.29895529238584306], [0.34740441796104815, 0.246, 0.36414375737078841], [0.47786308046182885, 0.089999999999999997, 0.31637383012047438], [0.0047163311140149293, 0.23400000000000001, 0.49811846308566454], [-0.098930796747205507, 0.307, 0.46059660828759452], [-0.37158786444006753, 0.40500000000000003, 0.35509986295795459], [0.60261000309518942, 0.45800000000000002, 0.27338408212548115], [0.046168582994860159, 0.44600000000000001, 0.48158794144995187], [-0.049232649167827866, 0.28499999999999998, 0.48036694626611731], [1.0035582996961303, 0.050000000000000003, 0.15779578142846606], [0.52664582141189142, 0.218, 0.29921978282283035]] \ No newline at end of file From 2937c97fea8f2d55ba7787c979879ad8a363dd4b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 17:47:30 -0400 Subject: [PATCH 14/96] including correct fixtures --- src/py/crankshaft/test/fixtures/getis.json | 79 +++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json index 20fe1a9..71d7728 100644 --- a/src/py/crankshaft/test/fixtures/getis.json +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -1 +1,78 @@ -[[-0.4491160909028587, 0.43099999999999999, 0.32667395669425392], [-0.93788225530474956, 0.021999999999999999, 0.17415246162388542], [-0.4491160909028587, 0.44600000000000001, 0.32667395669425392], [-0.64751176059715954, 0.216, 0.25865039275726565], [-0.96347562762330119, 0.11700000000000001, 0.1676544447116961], [-0.20152426147617952, 0.049000000000000002, 0.42014433032225806], [-0.46619525535633877, 0.072999999999999995, 0.32053787260380839], [-0.74976349152754251, 0.16300000000000001, 0.22669858024734424], [-0.64751176059715954, 0.052999999999999999, 0.25865039275726565], [-0.56083197633324544, 0.156, 0.28745604294545346], [-0.24280805846929457, 0.29599999999999999, 0.40407704630238783], [-0.26928692847374969, 0.29099999999999998, 0.39385444517601476], [-0.67722330498207772, 0.44, 0.24913214171492104], [-0.011684382726338413, 0.16500000000000001, 0.49533871177388633], [-0.56473075182761978, 0.378, 0.28612845711175672], [-0.99072103490325258, 0.042999999999999997, 0.1609109084310375], [-0.28743876271061125, 0.45600000000000002, 0.38688819224593796], [0.13295980219309725, 0.156, 0.4471125856868039], [0.0063207047268819955, 0.19500000000000001, 0.49747842043259183], [-1.0415760797098768, 0.0070000000000000001, 0.1488041317793396], [-0.36942477246818262, 0.111, 0.35590556804466089], [0.17506551624725114, 0.22500000000000001, 0.43051407691606425], [-0.44363455341924385, 0.26900000000000002, 0.32865340985373048], [-0.43284736233536597, 0.33800000000000002, 0.33256283131987785], [-0.60291193010388022, 0.189, 0.27328363953070622], [0.035856268747985663, 0.20799999999999999, 0.48569848295465989], [0.089678570182411793, 0.123, 0.4642713229152271], [-1.1057224660076166, 0.0030000000000000001, 0.13442333147768415], [0.11677761660057258, 0.16800000000000001, 0.45351814086906583], [-0.013808110669475293, 0.29999999999999999, 0.49449153588626749], [-0.23290337417697893, 0.378, 0.40791821913691739], [0.22945579816510672, 0.41699999999999998, 0.40925733604961323], [6.5207630666532674, 0.002, 3.4975355944766306e-11], [-0.14672827921301743, 0.46200000000000002, 0.44167324829783083], [0.22908126100922532, 0.126, 0.40940287888500482], [-1.0840432095769956, 0.047, 0.13917282042962942], [4.0493273511601489, 0.017000000000000001, 2.5682527508119612e-05], [-0.93058949009641589, 0.067000000000000004, 0.17603297655968531], [-0.39105578866749485, 0.435, 0.34787799959221344], [6.4632623511010854, 0.0030000000000000001, 5.1234683162704187e-11], [0.29440403358766087, 0.098000000000000004, 0.38422459253128149], [-0.42044605311189681, 0.38400000000000001, 0.33707981556243372], [0.056785871984068648, 0.38700000000000001, 0.47735788415038005], [-0.15875022725747437, 0.36899999999999999, 0.43693283214176271], [0.14086541129997771, 0.20399999999999999, 0.44398813370614665], [6.0345506436163996, 0.014, 7.9702822119998018e-10], [-0.85883340027064359, 0.13400000000000001, 0.19521621870003536], [-0.93901580426004294, 0.14499999999999999, 0.17386131505073421], [2.052209368389633, 0.029000000000000001, 0.020074659704574338], [-0.2873860843780221, 0.45300000000000001, 0.38690835753455055], [0.11238736962962483, 0.27200000000000002, 0.45525813472736054], [-0.22667990193583551, 0.47599999999999998, 0.41033632813474463], [-0.66318887096788159, 0.060999999999999999, 0.25360479911357758], [-0.41451366242647014, 0.47599999999999998, 0.33924898169485895], [-0.65907426867863195, 0.14499999999999999, 0.25492403906163263], [-0.56550838435631712, 0.127, 0.28586401153505392], [-0.019571757025783815, 0.311, 0.49219249707263191], [-0.049735022936677371, 0.27700000000000002, 0.48016677335804769], [0.34564692985400752, 0.20899999999999999, 0.36480403618322277], [-0.055301366746933138, 0.26200000000000001, 0.47794918666392716], [-0.11227385176628404, 0.315, 0.45530313698210811], [-0.23091105262392728, 0.39900000000000002, 0.4086919520030482], [-0.05314541432928175, 0.38, 0.47880802357740249], [-0.048726126739086051, 0.29499999999999998, 0.48056877723722968], [-0.65725648028245498, 0.29999999999999999, 0.25550800720533962], [-0.29942829331962884, 0.35499999999999998, 0.38230663846055501], [-0.38409317960223854, 0.39000000000000001, 0.35045469320011735], [-0.52740757430541685, 0.249, 0.29895529238584306], [0.34740441796104815, 0.246, 0.36414375737078841], [0.47786308046182885, 0.089999999999999997, 0.31637383012047438], [0.0047163311140149293, 0.23400000000000001, 0.49811846308566454], [-0.098930796747205507, 0.307, 0.46059660828759452], [-0.37158786444006753, 0.40500000000000003, 0.35509986295795459], [0.60261000309518942, 0.45800000000000002, 0.27338408212548115], [0.046168582994860159, 0.44600000000000001, 0.48158794144995187], [-0.049232649167827866, 0.28499999999999998, 0.48036694626611731], [1.0035582996961303, 0.050000000000000003, 0.15779578142846606], [0.52664582141189142, 0.218, 0.29921978282283035]] \ No newline at end of file +[[-0.37378594228210338, 0.436, 0.35428178954558454], + [-0.95225687055925445, 0.065000000000000002, 0.17048336261993491], + [-0.69155363829052829, 0.313, 0.2446088425605718], + [-0.91005743729633215, 0.085999999999999993, 0.18139610975939735], + [-1.0132869736739361, 0.13200000000000001, 0.1554615538579156], + [-0.51711928006609076, 0.058999999999999997, 0.30253644777604194], + [-0.59915606178126646, 0.125, 0.27453440990643974], + [-0.65270328962244883, 0.314, 0.25697379065855031], + [-1.035818892716347, 0.016, 0.15014332094164029], + [-0.77022454873249124, 0.14899999999999999, 0.22058335212423197], + [-0.33280112478125556, 0.40500000000000003, 0.36964220456990138], + [-0.16619696958659569, 0.28000000000000003, 0.43400097210156352], + [-1.0189197578270577, 0.114, 0.15412053143437432], + [-0.58736356248618182, 0.378, 0.27847978008948393], + [-0.60815535737753856, 0.34699999999999998, 0.27154222002447759], + [-0.83654622542342216, 0.22, 0.20142384153812443], + [-0.27623528229314609, 0.48099999999999998, 0.39118367986278957], + [-0.17751012385821857, 0.40600000000000003, 0.42955385691066428], + [-0.42339868568527872, 0.499, 0.33600220182364238], + [-0.81090507212288232, 0.17000000000000001, 0.2087100936430748], + [-0.63864423309742102, 0.089999999999999997, 0.26152719947142589], + [-0.64036180904066409, 0.0070000000000000001, 0.2609687028452613], + [-0.62209633811403076, 0.217, 0.26693926135549806], + [2.6802178678493869, 0.019, 0.0036787128295844296], + [-0.53304848636954871, 0.314, 0.29700000849115427], + [-0.15325607655300719, 0.33900000000000002, 0.43909816861874729], + [-0.46599673142949988, 0.5, 0.32060892008190012], + [-0.67328453973424696, 0.34000000000000002, 0.25038314326507338], + [-0.65374072057172605, 0.27300000000000002, 0.25663943186717253], + [-0.042646376611931783, 0.33900000000000002, 0.48299171295314569], + [-0.50268629316736713, 0.36899999999999999, 0.30759242403716802], + [0.20061122903056294, 0.41199999999999998, 0.42050128853782709], + [4.7529210422438366, 0.002, 1.0024933114749501e-06], + [-0.17112791409785774, 0.45500000000000002, 0.43206159366732011], + [2.7269760223697093, 0.025000000000000001, 0.0031958841790815651], + [-1.2443986350856617, 0.023, 0.1066764425086173], + [2.6146417757460672, 0.058999999999999997, 0.0044660540776073621], + [-0.81089418923393874, 0.217, 0.20871321876749027], + [-0.96842037905996448, 0.051999999999999998, 0.16641723288102916], + [4.632217640632744, 0.014999999999999999, 1.8088477418132243e-06], + [1.7098997741760702, 0.066000000000000003, 0.043642204110986293], + [-0.68576671544237189, 0.30399999999999999, 0.24643011428027894], + [-0.10705643473845843, 0.46100000000000002, 0.45737210425657626], + [-0.54280332475246651, 0.29499999999999998, 0.29363261191793877], + [0.031036400804321389, 0.26000000000000001, 0.48762025500133288], + [4.5324686370772209, 0.021999999999999999, 2.9149167000142029e-06], + [-1.3114835366024926, 0.0089999999999999993, 0.09484722615634944], + [-1.3114835366024926, 0.0080000000000000002, 0.09484722615634944], + [4.6361327065826368, 0.001, 1.7749405375466765e-06], + [1.4367662910941781, 0.081000000000000003, 0.075392205783569644], + [2.8306701087322108, 0.031, 0.0023225301610166893], + [-0.26132309263094761, 0.47599999999999998, 0.39692167988511706], + [-0.78055710491931951, 0.13400000000000001, 0.21753151443265129], + [-0.64234747392787916, 0.33300000000000002, 0.26032379813179585], + [-1.3114835366024926, 0.002, 0.09484722615634944], + [-0.57318180672306673, 0.26400000000000001, 0.28326080073223758], + [1.4602423896649199, 0.085999999999999993, 0.072111734510341252], + [-0.10852292180607989, 0.33700000000000002, 0.45679044945885094], + [0.25007822707788863, 0.25800000000000001, 0.4012634266959908], + [-0.15535540716918589, 0.36799999999999999, 0.43827056859443592], + [-0.15387021291338146, 0.34300000000000003, 0.43885603550771368], + [-0.23212287899595097, 0.434, 0.40822128763053078], + [-0.34331698296028734, 0.255, 0.36568000291448455], + [-0.21917003612352226, 0.38300000000000001, 0.41325879787572739], + [-0.79956517297385543, 0.20000000000000001, 0.21198138620767248], + [-0.57435424239923683, 0.20100000000000001, 0.28286405516217616], + [0.17090357402483336, 0.17599999999999999, 0.43214979317279412], + [-0.45071423902325392, 0.39500000000000002, 0.32609775926146312], + [0.76813695849036678, 0.16500000000000001, 0.22120291089357114], + [0.43164294033005113, 0.14899999999999999, 0.33300047214912831], + [0.3756136313798501, 0.16, 0.35360207763890095], + [-0.35027449210479344, 0.47599999999999998, 0.36306635326063197], + [-0.48157903675663827, 0.45500000000000002, 0.31505251051764338], + [0.82850242294957743, 0.187, 0.20369301166122333], + [0.32654794875356141, 0.23599999999999999, 0.3720049088953894], + [-0.30807434754194035, 0.497, 0.37901288012280243], + [0.50368346846574807, 0.159, 0.30724191359295261], + [0.75454835573966283, 0.13500000000000001, 0.22526001043744448]] From 5d109acd8df8e481ce136b1a001ed7fea605b61e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 13 Sep 2016 17:59:15 -0400 Subject: [PATCH 15/96] remove debug messages --- src/py/crankshaft/test/test_clustering_getis.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index fc26fc7..c10c8ab 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -19,20 +19,19 @@ class GetisTest(unittest.TestCase): def setUp(self): plpy._reset() - print(help(cc)) self.neighbors_data = json.loads(open(fixture_file('neighbors_getis.json')).read()) self.getis_data = json.loads(open(fixture_file('getis.json')).read()) def test_getis_ord(self): """Test Getis-Ord's G*""" data = [ { 'id': d['id'], - 'attr1': d['hr8893'], + 'value': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.getis_ord('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = cc.getis_ord('subquery', 'value', 'knn', 5, 999, 'the_geom', 'id') result = [(row[0], row[1]) for row in result] - expected = self.getis_data + expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): - self.assertAlmostEqual(res_val, exp_val) - self.assertEqual(res_quad, exp_quad) + self.assertAlmostEqual(res_z, exp_z, delta=1e-2) + self.assertEqual(res_p, exp_p, delta=1e-2) From b5445da3030ddfc4d7f93e6d08544d3247a7d6f5 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 14 Sep 2016 12:45:43 +0000 Subject: [PATCH 16/96] remove kinks in test --- src/py/crankshaft/test/test_clustering_getis.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index fc26fc7..0f3eb3f 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -19,20 +19,19 @@ class GetisTest(unittest.TestCase): def setUp(self): plpy._reset() - print(help(cc)) self.neighbors_data = json.loads(open(fixture_file('neighbors_getis.json')).read()) self.getis_data = json.loads(open(fixture_file('getis.json')).read()) def test_getis_ord(self): """Test Getis-Ord's G*""" data = [ { 'id': d['id'], - 'attr1': d['hr8893'], + 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.getis_ord('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = cc.getis_ord('subquery', 'value', 'knn', 5, 999, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] - expected = self.getis_data + expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): - self.assertAlmostEqual(res_val, exp_val) - self.assertEqual(res_quad, exp_quad) + self.assertAlmostEqual(res_z, exp_z, delta=1e-2) + self.assertAlmostEqual(res_p, exp_p, delta=1e-2) From 11176b71b319e00a3ae177cd133e38e3207bbcdb Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Mon, 19 Sep 2016 10:47:59 -0400 Subject: [PATCH 17/96] Update PULL_REQUEST_TEMPLATE.md --- .github/PULL_REQUEST_TEMPLATE.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9bb2e75..3de84c4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,6 +2,8 @@ - [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)` - [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades). - [ ] Docs for public-facing functions are written -- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`. -- [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information) - +- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore +- [ ] Video explaining the analysis and showing examples +- [ ] Analysis Documentation written [template](https://docs.google.com/a/cartodb.com/document/d/1X2KOtaiEBKWNMp8UjwcLB-kE9aIOw09aOjX3oaCjeME/edit?usp=sharing) +- [ ] Smoke test written + From 06f0cb0dc43f8366f65d1f9db99544c416fc587f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 15:45:10 +0000 Subject: [PATCH 18/96] updating how p values are tested --- src/py/crankshaft/test/test_clustering_getis.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 0f3eb3f..2fe0c54 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -15,8 +15,11 @@ from crankshaft import random_seeds import json class GetisTest(unittest.TestCase): - """Testing class for Getis-Ord's G funtion""" - + """Testing class for Getis-Ord's G funtion + This test replicates the work done in PySAL documentation: + https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g + """ + def setUp(self): plpy._reset() self.neighbors_data = json.loads(open(fixture_file('neighbors_getis.json')).read()) @@ -34,4 +37,6 @@ class GetisTest(unittest.TestCase): expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): self.assertAlmostEqual(res_z, exp_z, delta=1e-2) - self.assertAlmostEqual(res_p, exp_p, delta=1e-2) + if exp_p <= 0.05: + self.assertTrue(res_p < 0.05) + From 2ede55d1658e3a499990eb7a5c7b88a69b480c97 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 12:17:01 -0400 Subject: [PATCH 19/96] pep8 updates --- src/py/crankshaft/crankshaft/clustering/getis.py | 9 +++++---- src/py/crankshaft/test/test_clustering_getis.py | 16 +++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/getis.py b/src/py/crankshaft/crankshaft/clustering/getis.py index 1ee425a..a593e64 100644 --- a/src/py/crankshaft/crankshaft/clustering/getis.py +++ b/src/py/crankshaft/crankshaft/clustering/getis.py @@ -11,12 +11,13 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- + def getis_ord(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col): """ Getis-Ord's G* - Implementation building neighbors with a PostGIS database and PySAL's Getis-Ord's G* - hotspot/coldspot module. + Implementation building neighbors with a PostGIS database and PySAL's + Getis-Ord's G* hotspot/coldspot module. Andy Eschbacher """ @@ -41,11 +42,11 @@ def getis_ord(subquery, attr, attr_vals = pu.get_attributes(result) - ## build PySAL weight object + # build PySAL weight object weight = pu.get_weight(result, w_type, num_ngbrs) # calculate Getis-Ord's G* z- and p-values getis = ps.esda.getisord.G_Local(attr_vals, weight, - star=True, permutations=permutations) + star=True, permutations=permutations) return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order) diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 2fe0c54..f56d5fb 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -14,29 +14,31 @@ import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json + class GetisTest(unittest.TestCase): """Testing class for Getis-Ord's G funtion This test replicates the work done in PySAL documentation: https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g """ - + def setUp(self): plpy._reset() - self.neighbors_data = json.loads(open(fixture_file('neighbors_getis.json')).read()) + self.neighbors_data = json.loads( + open(fixture_file('neighbors_getis.json')).read()) self.getis_data = json.loads(open(fixture_file('getis.json')).read()) def test_getis_ord(self): """Test Getis-Ord's G*""" - data = [ { 'id': d['id'], - 'attr1': d['value'], - 'neighbors': d['neighbors'] } for d in self.neighbors_data] + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.getis_ord('subquery', 'value', 'knn', 5, 999, 'the_geom', 'cartodb_id') + result = cc.getis_ord('subquery', 'value', + 'knn', 5, 999, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): self.assertAlmostEqual(res_z, exp_z, delta=1e-2) if exp_p <= 0.05: self.assertTrue(res_p < 0.05) - From ee4eb795b7e765262dc1a4448529c43cdea4f8c9 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 19:24:23 +0000 Subject: [PATCH 20/96] adding getis fixture file --- src/pg/test/fixtures/getis_data.sql | 103 ++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 src/pg/test/fixtures/getis_data.sql diff --git a/src/pg/test/fixtures/getis_data.sql b/src/pg/test/fixtures/getis_data.sql new file mode 100644 index 0000000..e7b2f69 --- /dev/null +++ b/src/pg/test/fixtures/getis_data.sql @@ -0,0 +1,103 @@ +SET client_min_messages TO WARNING; +\set ECHO none + +-- +-- Getis-Ord's G* test dataset, subsetted from PySAL examples: +-- https://github.com/pysal/pysal/tree/952ea04029165048a774d9a1846cf86ad000c096/pysal/examples/stl +-- + + +CREATE TABLE getis_data ( + cartodb_id integer, + the_geom geometry(Geometry,4326), + hr8893 numeric +); + + +ALTER TABLE getis_data OWNER TO contrib_regression; + + +COPY getis_data (cartodb_id, the_geom, hr8893) FROM stdin; +22 0106000020E61000000100000001030000000100000007000000000000E0B10056C0000000C0B8964340FFFFFFFF4C1756C00000002054964340000000A00F1E56C00000004072964340000000C02D1E56C0000000A0439B434000000060381E56C00000000036B04340000000E0E20056C0000000608CB04340000000E0B10056C0000000C0B8964340 10.8557430000000004 +32 0106000020E6100000010000000103000000010000000B000000FFFFFF1FC26656C0FFFFFFBFE25E4340000000A0D86656C0000000E0976F4340000000A03A6956C0000000C0966F434000000020526956C0000000E08A7F4340000000E0F26556C000000000C87F4340000000E0066656C0000000209C834340000000407F5056C0000000803C83434000000020635056C0000000E016814340000000A0F45056C0000000A0F980434000000060D25056C000000060FA5E4340FFFFFF1FC26656C0FFFFFFBFE25E4340 9.92424500000000087 +10 0106000020E610000001000000010300000001000000170000000000000002CD56C000000080CDCC4340000000A054D456C000000020CCD74340000000607ED756C000000000C1DC434000000020E6D756C00000006071E143400000004007BB56C00000000007E2434000000080FABA56C0000000A079EC43400000000040B856C0000000E0D6EB4340FFFFFF3FEEA456C0000000A037EC4340000000C0A9A556C0000000A0ADE7434000000040F3A656C0000000E09FE543400000004063A956C0000000E034DA4340FFFFFF9F04A956C00000008005D74340000000402FA756C00000008069D243400000004046A556C00000002068C84340000000009EA556C0FFFFFF7F3CC34340000000C0C3A756C000000080BCB543400000006082A756C00000004051B2434000000040AABC56C00000006046B343400000006053C256C0FFFFFF7FE2B84340000000E01EC456C000000080ABBC4340000000C0FDC556C0000000E0B3C3434000000000FFC956C000000060BBC643400000000002CD56C000000080CDCC4340 3.79060700000000006 +43 0106000020E6100000010000000103000000010000000F0000000000004025D856C000000020FA1A43400000008092E256C000000060481B434000000060BCE256C0000000C023144340000000A0D7E856C0FFFFFF1F1B14434000000020BEE956C0000000C030144340FFFFFF9FB0E956C0FFFFFF1F1425434000000000D4E956C0000000C00D5A4340000000A0D3E956C0000000202C5A43400000000004E656C00000004066574340000000E0EEE356C0000000A0E35643400000008099DF56C0000000601B5A43400000000033DB56C0000000804A5B43400000004001D856C00000006079594340000000E0A7D756C0000000E0553543400000004025D856C000000020FA1A4340 5.93309800000000021 +6 0106000020E6100000010000000103000000010000000F000000000000A00F4256C0000000E008D4434000000000674956C00000004015D44340000000608C4956C00000004098E64340FFFFFFBF434C56C0FFFFFF3F77E84340000000004C4E56C000000020E5E74340000000C0624E56C0FFFFFF3F97F5434000000020B44956C000000000AFF54340000000E0C64956C00000004009074440FFFFFF1F523056C0FFFFFFDF91074440000000C0EB2F56C000000040BBE54340000000E0B93056C0000000E09FE54340000000E0D63056C0000000007DDE4340000000E0213456C0000000005ADE4340000000802E3456C000000020F7D34340000000A00F4256C0000000E008D44340 9.04867300000000085 +16 0106000020E6100000010000000103000000010000001500000000000020D73356C000000060729B4340000000201F4956C000000000BE9B4340000000A0E34856C000000060CCAC434000000040094256C0FFFFFFDFB1AC4340000000A00F4256C0000000E008D44340000000802E3456C000000020F7D34340000000E0083456C0000000E0ADCA4340000000801D2E56C0000000A06FCA4340FFFFFF7F132E56C00000000079C34340000000607F2956C0000000402EC3434000000080652956C0000000A0EAC04340FFFFFF5FF22756C000000060E5C0434000000080F52756C000000080DDBE434000000020B32656C0000000E0E7BE434000000000AC2656C0FFFFFFFF38BD4340FFFFFFDFC12556C0000000C026BD434000000060C72556C000000060BAB9434000000040441E56C000000020C9B9434000000060381E56C00000000036B04340000000C02D1E56C0000000A0439B434000000020D73356C000000060729B4340 0.74509000000000003 +29 0106000020E610000001000000010300000001000000080000000000002025FD55C0FFFFFF1F7F6D434000000080C61056C0000000A04C6D4340000000A0631756C000000000D56D4340000000C05E1756C0FFFFFF5F24754340FFFFFFFF4C1756C00000002054964340000000E0B10056C0000000C0B89643400000006029FD55C000000080C09643400000002025FD55C0FFFFFF1F7F6D4340 3.12759000000000009 +54 0106000020E6100000010000000103000000010000000F000000FFFFFF1F090C57C0000000202E024340000000E0AA0B57C00000004000154340000000A0C10C57C0000000802015434000000060850C57C000000080EC244340FFFFFF9FB0E956C0FFFFFF1F1425434000000020BEE956C0000000C030144340000000A0D7E856C0FFFFFF1F1B144340FFFFFF3FF2E856C0000000A0A3064340FFFFFFDFD2F956C000000040E2064340000000C01EFB56C0000000201406434000000060BBFB56C0000000809A044340000000605AFD56C0000000E059054340000000405FFE56C0000000E06B014340000000A0F60157C0000000C081014340FFFFFF1F090C57C0000000202E024340 2.06432400000000005 +13 0106000020E6100000010000000103000000010000000F000000000000009EA556C0FFFFFF7F3CC343400000004046A556C00000002068C84340000000402FA756C00000008069D24340FFFFFF9F04A956C00000008005D743400000004063A956C0000000E034DA434000000040F3A656C0000000E09FE54340000000E0F09E56C0000000005AE54340FFFFFF3F069F56C000000040B5E04340FFFFFF1F0E9856C000000020A3E0434000000060109856C0000000E07ED5434000000040E29556C0000000A077D54340FFFFFFDFDC9556C0000000A0EDD1434000000080399356C0000000A0D8D1434000000020699356C0000000E029C34340000000009EA556C0FFFFFF7F3CC34340 0 +28 0106000020E61000000100000001030000000100000008000000000000C05E1756C0FFFFFF5F24754340000000C0DD2C56C0000000407A75434000000080DE3356C0000000406375434000000020D73356C000000060729B4340000000C02D1E56C0000000A0439B4340000000A00F1E56C00000004072964340FFFFFFFF4C1756C00000002054964340000000C05E1756C0FFFFFF5F24754340 1.57115800000000005 +36 0106000020E6100000010000000103000000010000000D00000000000000EE2C56C000000060424E434000000040F72C56C000000000486A4340000000C0DD2C56C0000000407A754340000000C05E1756C0FFFFFF5F24754340000000A0631756C000000000D56D434000000080C61056C0000000A04C6D4340000000C0BE1056C0000000A0065F4340000000407C1256C0FFFFFF7FA75E434000000000BD1156C000000020A954434000000040D01256C0000000605C524340000000404F1256C000000040734F434000000040011156C000000000AF4D434000000000EE2C56C000000060424E4340 0 +68 0106000020E61000000100000001030000000100000006000000000000809F2D56C00000002078CD4240000000E0F64256C0FFFFFFDF38CD424000000000CE4956C00000004053CD4240000000C0E94956C000000080CDEE424000000020682D56C0FFFFFF7F00EF4240000000809F2D56C00000002078CD4240 3.1461920000000001 +27 0106000020E6100000010000000103000000010000000D000000000000407F5056C0000000803C83434000000060615056C000000040A99B4340000000201F4956C000000000BE9B434000000020D73356C000000060729B434000000080DE3356C00000004063754340000000C0DD2C56C0000000407A75434000000040F72C56C000000000486A4340000000202B4956C000000080AC69434000000040454956C000000040E25E434000000060D25056C000000060FA5E4340000000A0F45056C0000000A0F980434000000020635056C0000000E016814340000000407F5056C0000000803C834340 1.5920399999999999 +40 0106000020E6100000010000000103000000010000000F00000000000000B89056C0000000C03144434000000000419456C0000000A03D4A434000000020609356C000000020CC55434000000020578F56C0000000A0725D4340000000004D8C56C0000000C05E614340000000E0F48A56C000000020B164434000000060CA8756C00000002077664340000000C0A68856C0000000C08A64434000000020758A56C0000000E0F4624340000000A0948C56C0000000E0AA5C4340FFFFFF7FF18C56C000000080A5594340000000C0BF8B56C0000000A052544340000000E0C18B56C0000000601D4E4340000000A06B8F56C0000000000A48434000000000B89056C0000000C031444340 45.9054059999999993 +52 0106000020E61000000100000001030000000100000008000000000000E0562D56C0000000208910434000000020864856C00000000054104340FFFFFF9FB84956C0000000409110434000000080A84956C0000000A05B1B4340000000803D4956C0FFFFFF9FB63C434000000080062D56C0FFFFFF7FAE3C434000000060492D56C0000000603E214340000000E0562D56C00000002089104340 4.94153299999999973 +59 0106000020E6100000010000000103000000010000000F0000000000006010C756C000000020A7DE4240FFFFFF9FF7C956C0FFFFFF7F75DE424000000060FBC956C0000000A01ED94240000000C080CA56C0000000001BD94240000000C086D456C000000040CED942400000000065D456C00000004041E442400000008044E256C0000000E0BEE44240FFFFFFFF04E256C0FFFFFFBFD013434000000060BCE256C0000000C0231443400000008092E256C000000060481B43400000004025D856C000000020FA1A4340FFFFFF5F2CD856C0000000E0381A43400000000009D656C0FFFFFFDF1C1A4340000000A08DC656C000000000241A43400000006010C756C000000020A7DE4240 6.82879400000000025 +1 0106000020E61000000100000001030000000100000010000000FFFFFF3F746556C00000002049FD434000000080316556C0000000A0240C4440000000C0A76656C000000000330C444000000000C76656C0000000606C11444000000000106756C000000000A0294440000000C0375156C0000000402F2A444000000020305156C0000000E08D244440000000E0E54956C00000004094244440000000E0C64956C0000000400907444000000020B44956C000000000AFF54340000000C0624E56C0FFFFFF3F97F54340000000E05B5A56C00000006082F5434000000060655A56C00000002040F7434000000060B15F56C0000000807CF74340000000008A5F56C0000000C076FD4340FFFFFF3F746556C00000002049FD4340 1.62445799999999996 +2 0106000020E6100000010000000103000000010000001000000000000080FABA56C0000000A079EC43400000004007BB56C00000000007E2434000000020E6D756C00000006071E14340000000006ED856C000000000E2E64340FFFFFF7FBFDC56C00000006078EE434000000000DDDC56C0000000A04FF1434000000080C7DB56C0000000206FF34340000000808BDB56C0000000C0FEF54340000000A09FDC56C0000000A018F94340000000C02FDF56C000000080BC004440000000A041E056C0000000008A084440FFFFFF3F08E156C0000000C038114440000000406BE056C0000000A0A8194440FFFFFFBFF5BA56C0000000E02A1944400000004014BB56C000000060E70D444000000080FABA56C0000000A079EC4340 2.25549199999999983 +3 0106000020E61000000100000001030000000100000018000000000000A0D87F56C000000080FEF3434000000020048056C0FFFFFF7FB00E444000000080CB7D56C0000000E0F411444000000080237B56C0000000800812444000000060227956C0000000400F114440000000A04A7856C00000002084124440000000C0667756C0000000E0B810444000000080187656C0000000A001124440000000A0037556C0FFFFFF5F36104440000000005C7356C0000000206510444000000080D27156C00000004081114440000000A0657056C000000040EE104440000000A0EF6D56C000000060AE12444000000000BE6C56C0000000E073124440000000802A6A56C0000000402115444000000000C76656C0000000606C114440000000C0A76656C000000000330C444000000080316556C0000000A0240C4440FFFFFF3F746556C00000002049FD4340000000602C6D56C00000004009FD4340FFFFFF5F786D56C0FFFFFFDFC7F54340000000E0447156C000000080A6F54340000000A09C7156C0FFFFFF9F41F44340000000A0D87F56C000000080FEF34340 1.46788999999999992 +4 0106000020E610000001000000010300000001000000160000000000008076A556C000000020BCF0434000000060BCA256C0FFFFFF5F7BF5434000000060F5A056C000000040AEFE4340000000204C9C56C00000008095024440000000A0609B56C000000060EC084440FFFFFFFFC49956C0000000C00C0A444000000000E19856C000000040490F444000000040A79656C0000000805C10444000000080199456C0000000E0120E4440FFFFFFBF879256C00000006094084440000000A0699156C00000006012084440000000807E9056C000000000F408444000000060378F56C00000000049074440000000A0E98C56C0FFFFFF5FE4074440000000E0F48B56C0000000201009444000000020618856C000000040ED08444000000020A88556C000000060CF0A4440FFFFFFDF0C8456C000000080250D444000000020048056C0FFFFFF7FB00E4440000000A0D87F56C000000080FEF34340FFFFFF3FDF7F56C0000000C047F043400000008076A556C000000020BCF04340 2.4842559999999998 +5 0106000020E61000000100000001030000000100000012000000FFFFFF3FEEA456C0000000A037EC43400000000040B856C0000000E0D6EB434000000080FABA56C0000000A079EC43400000004014BB56C000000060E70D444000000020B9AC56C0000000409F0D4440000000A094AD56C0000000005B0A4440000000A07FAB56C0000000A0C1094440FFFFFFBFA8AB56C000000060DD054440000000E04BA956C0FFFFFFFFB30344400000002042A756C0FFFFFFBF900344400000002005A756C0000000409502444000000060D2A756C000000040EE014440000000C0D9A656C0000000008E004440000000E0C0A656C0000000E0F1FD434000000060F5A056C000000040AEFE434000000060BCA256C0FFFFFF5F7BF543400000008076A556C000000020BCF04340FFFFFF3FEEA456C0000000A037EC4340 0 +15 0106000020E6100000010000000103000000010000000B000000000000806CDC56C00000006020A9434000000080A4ED56C000000020E3A94340000000A050EE56C0FFFFFFDF8BAB434000000000C4ED56C00000000058CD4340000000A039EE56C0000000C062CD4340000000C041EE56C00000006068D44340000000E031EE56C000000080D5D74340000000A054D456C000000020CCD743400000000002CD56C000000080CDCC434000000040CDDD56C0000000407EBA4340000000806CDC56C00000006020A94340 1.9498120000000001 +7 0106000020E6100000010000000103000000010000001C000000000000C00D6D56C0000000A096C34340000000E0847B56C00000000075C3434000000060747B56C00000002076C74340000000601B7F56C0000000203CDA4340FFFFFF3FDF7F56C0000000C047F04340000000A0D87F56C000000080FEF34340000000A09C7156C0FFFFFF9F41F44340000000E0447156C000000080A6F54340FFFFFF5F786D56C0FFFFFFDFC7F54340000000602C6D56C00000004009FD4340FFFFFF3F746556C00000002049FD4340000000008A5F56C0000000C076FD434000000060B15F56C0000000807CF7434000000060655A56C00000002040F74340000000E05B5A56C00000006082F54340000000C0624E56C0FFFFFF3F97F54340000000004C4E56C000000020E5E7434000000080B55056C0000000C010E94340FFFFFFDF215256C0000000C096E5434000000020475556C0000000A0D3E14340000000A0245A56C0000000200DDF434000000060E75B56C000000000C0DF434000000000F25B56C0000000C0DBD74340FFFFFF5F635F56C0FFFFFFFF91D74340000000E04F5F56C000000020C1D24340FFFFFF7F996256C00000004094D24340000000A08E6256C0000000C0AAC34340000000C00D6D56C0000000A096C34340 6.02948899999999988 +8 0106000020E6100000010000000103000000010000000E000000FFFFFF1F72F656C000000080A1D443400000004048F656C0000000008EF94340000000A09FDC56C0000000A018F94340000000808BDB56C0000000C0FEF5434000000080C7DB56C0000000206FF3434000000000DDDC56C0000000A04FF14340FFFFFF7FBFDC56C00000006078EE4340000000006ED856C000000000E2E6434000000020E6D756C00000006071E14340000000607ED756C000000000C1DC4340000000A054D456C000000020CCD74340000000E031EE56C000000080D5D74340000000C041EE56C00000006068D44340FFFFFF1F72F656C000000080A1D44340 1.8003849999999999 +9 0106000020E6100000010000000103000000010000001200000000000040E88956C00000004046C3434000000020699356C0000000E029C3434000000080399356C0000000A0D8D14340FFFFFFDFDC9556C0000000A0EDD1434000000040E29556C0000000A077D5434000000060109856C0000000E07ED54340FFFFFF1F0E9856C000000020A3E04340FFFFFF3F069F56C000000040B5E04340000000E0F09E56C0000000005AE5434000000040F3A656C0000000E09FE54340000000C0A9A556C0000000A0ADE74340FFFFFF3FEEA456C0000000A037EC43400000008076A556C000000020BCF04340FFFFFF3FDF7F56C0000000C047F04340000000601B7F56C0000000203CDA434000000060747B56C00000002076C74340000000E0847B56C00000000075C3434000000040E88956C00000004046C34340 4.58125099999999996 +30 0106000020E6100000010000000103000000010000000D0000000000000033DB56C0000000804A5B43400000008099DF56C0000000601B5A4340000000E0EEE356C0000000A0E35643400000000004E656C00000004066574340000000A0D3E956C0000000202C5A4340000000A02EE956C0FFFFFF1F09884340000000A0D9E856C0FFFFFFDF03934340000000A0B8DA56C000000060E592434000000080F4D056C0000000605F9243400000000038D156C0000000A0817F434000000000A0D156C0000000A0FF6B4340000000E0C8DA56C000000080986C43400000000033DB56C0000000804A5B4340 4.41689600000000038 +11 0106000020E61000000100000001030000000100000013000000000000A08E6256C0000000C0AAC34340FFFFFF7F996256C00000004094D24340000000E04F5F56C000000020C1D24340FFFFFF5F635F56C0FFFFFFFF91D7434000000000F25B56C0000000C0DBD7434000000060E75B56C000000000C0DF4340000000A0245A56C0000000200DDF434000000020475556C0000000A0D3E14340FFFFFFDF215256C0000000C096E5434000000080B55056C0000000C010E94340000000004C4E56C000000020E5E74340FFFFFFBF434C56C0FFFFFF3F77E84340000000608C4956C00000004098E6434000000000674956C00000004015D44340000000A00F4256C0000000E008D4434000000040094256C0FFFFFFDFB1AC4340000000A0E34856C000000060CCAC434000000020666256C0FFFFFF9FBFAC4340000000A08E6256C0000000C0AAC34340 1.44743599999999994 +12 0106000020E6100000010000000103000000010000001400000000000040441E56C000000020C9B9434000000060C72556C000000060BAB94340FFFFFFDFC12556C0000000C026BD434000000000AC2656C0FFFFFFFF38BD434000000020B32656C0000000E0E7BE434000000080F52756C000000080DDBE4340FFFFFF5FF22756C000000060E5C0434000000080652956C0000000A0EAC04340000000607F2956C0000000402EC34340FFFFFF7F132E56C00000000079C34340000000801D2E56C0000000A06FCA4340000000E0083456C0000000E0ADCA4340000000802E3456C000000020F7D34340000000E0213456C0000000005ADE4340000000E0D63056C0000000007DDE4340000000E0B93056C0000000E09FE54340000000C0EB2F56C000000040BBE54340FFFFFF5F741E56C00000008040E54340000000605A1E56C0000000E042D3434000000040441E56C000000020C9B94340 1.19196600000000008 +14 0106000020E6100000010000000103000000010000000A000000000000E0E20056C0000000608CB0434000000060381E56C00000000036B0434000000040441E56C000000020C9B94340000000605A1E56C0000000E042D3434000000040BB0356C00000000030D4434000000060CF0356C000000080C9D74340FFFFFFBFFDFD55C0000000E02BD84340000000A0C5FD55C0000000400DBE434000000040E60056C0FFFFFF9F20BE4340000000E0E20056C0000000608CB04340 1.60801700000000003 +17 0106000020E61000000100000001030000000100000011000000000000A0B8DA56C000000060E5924340000000806CDC56C00000006020A9434000000040CDDD56C0000000407EBA43400000000002CD56C000000080CDCC434000000000FFC956C000000060BBC64340000000C0FDC556C0000000E0B3C34340000000E01EC456C000000080ABBC43400000006053C256C0FFFFFF7FE2B8434000000040AABC56C00000006046B34340000000806EB656C0000000A0DBAC4340000000C0E0B156C0000000A0FDA54340000000C03CAF56C000000040B89F4340000000A0DEAE56C0FFFFFF7FC49C4340000000E0F3CB56C0FFFFFF9F039D4340000000205CCC56C0000000805392434000000080F4D056C0000000605F924340000000A0B8DA56C000000060E5924340 4.17331800000000008 +18 0106000020E6100000010000000103000000010000000E000000FFFFFF3FF56C56C000000020977F4340000000A07A6D56C0000000605DAD4340000000800F6D56C0000000A06CAD4340000000C00D6D56C0000000A096C34340000000A08E6256C0000000C0AAC3434000000020666256C0FFFFFF9FBFAC4340000000A0E34856C000000060CCAC4340000000201F4956C000000000BE9B434000000060615056C000000040A99B4340000000407F5056C0000000803C834340000000E0066656C0000000209C834340000000E0F26556C000000000C87F434000000020526956C0000000E08A7F4340FFFFFF3FF56C56C000000020977F4340 3.78325200000000006 +19 0106000020E61000000100000001030000000100000009000000000000A0B78956C0000000A0BD7F4340000000C0C08956C0000000000BA1434000000040E88956C00000004046C34340000000E0847B56C00000000075C34340000000C00D6D56C0000000A096C34340000000800F6D56C0000000A06CAD4340000000A07A6D56C0000000605DAD4340FFFFFF3FF56C56C000000020977F4340000000A0B78956C0000000A0BD7F4340 2.08513599999999988 +20 0106000020E61000000100000001030000000100000015000000000000C0C08956C0000000000BA1434000000060188D56C00000008040A04340000000C0278D56C0000000E0E29C4340000000E04C9456C000000080CC9C4340000000A0579456C0000000E0B4964340FFFFFFFF819F56C0000000806D9643400000006068A056C0FFFFFF1FBB944340000000203AA156C0000000E0CA9743400000002061A456C000000080AE974340000000C08CA556C0000000A0BB964340FFFFFF9F44A556C0000000609794434000000040EFA656C0000000C00C8F43400000004050A756C0FFFFFFBFEA9343400000000062A656C0FFFFFFFF6A9B434000000080DCA756C0000000208AAE43400000006082A756C00000004051B24340000000C0C3A756C000000080BCB54340000000009EA556C0FFFFFF7F3CC3434000000020699356C0000000E029C3434000000040E88956C00000004046C34340000000C0C08956C0000000000BA14340 2.17630200000000018 +21 0106000020E6100000010000000103000000010000001B00000000000040EFA656C0000000C00C8F4340FFFFFF5F1DA756C000000020C58D434000000020E1A456C0000000E00F84434000000020D1A456C0000000A0B2804340FFFFFFDF3AA356C000000040247C4340000000800BA256C0FFFFFF5F987A434000000020409F56C000000000CD7B4340000000E0119E56C000000060C67A434000000080F2A156C000000040207243400000004080A456C0000000A0876F43400000004024A856C0000000E0BD704340FFFFFFDFCEAA56C000000060B67743400000004030AD56C000000060D6844340FFFFFF1F49AD56C00000006072874340000000802FAC56C000000060FE8B434000000000DFAD56C00000008075924340FFFFFFDFF6AD56C00000006012994340000000A0DEAE56C0FFFFFF7FC49C4340000000C03CAF56C000000040B89F4340000000C0E0B156C0000000A0FDA54340000000806EB656C0000000A0DBAC434000000040AABC56C00000006046B343400000006082A756C00000004051B2434000000080DCA756C0000000208AAE43400000000062A656C0FFFFFFFF6A9B43400000004050A756C0FFFFFFBFEA93434000000040EFA656C0000000C00C8F4340 6.30934699999999982 +23 0106000020E6100000010000000103000000010000000B000000FFFFFF1F491457C000000060E99F4340FFFFFF5F1C1457C00000000075AC4340000000A050EE56C0FFFFFFDF8BAB434000000080A4ED56C000000020E3A94340000000806CDC56C00000006020A94340000000A0B8DA56C000000060E5924340000000A0D9E856C0FFFFFFDF03934340000000A02EE956C0FFFFFF1F0988434000000020290757C0FFFFFF3F92884340000000A0B00657C000000080FE9E4340FFFFFF1F491457C000000060E99F4340 4.21135400000000004 +24 0106000020E61000000100000001030000000100000019000000000000A0B78956C0000000A0BD7F4340FFFFFF1FDD9156C000000060B47F434000000000DA9156C0000000205D764340000000A0769456C00000006063764340000000C06F9A56C0000000A02D7B4340000000E0119E56C000000060C67A434000000020409F56C000000000CD7B4340000000800BA256C0FFFFFF5F987A4340FFFFFFDF3AA356C000000040247C434000000020D1A456C0000000A0B280434000000020E1A456C0000000E00F844340FFFFFF5F1DA756C000000020C58D434000000040EFA656C0000000C00C8F4340FFFFFF9F44A556C00000006097944340000000C08CA556C0000000A0BB9643400000002061A456C000000080AE974340000000203AA156C0000000E0CA9743400000006068A056C0FFFFFF1FBB944340FFFFFFFF819F56C0000000806D964340000000A0579456C0000000E0B4964340000000E04C9456C000000080CC9C4340000000C0278D56C0000000E0E29C434000000060188D56C00000008040A04340000000C0C08956C0000000000BA14340000000A0B78956C0000000A0BD7F4340 0.804810000000000025 +47 0106000020E61000000100000001030000000100000009000000FFFFFF9FD60956C0000000003521434000000020F21756C000000040DA20434000000060492D56C0000000603E21434000000080062D56C0FFFFFF7FAE3C434000000000EE2C56C000000060424E434000000040011156C000000000AF4D434000000040780956C0000000005A4D4340000000808A0956C000000080C1494340FFFFFF9FD60956C00000000035214340 0.969790999999999959 +25 0106000020E6100000010000000103000000010000001E000000000000A0491957C0FFFFFF9F355E434000000020381A57C0000000A05B61434000000000201957C0000000803D654340000000802D1957C000000040E867434000000060B41B57C0000000C078694340000000A0611E57C000000040A06E4340000000C0FF1F57C00000006083754340000000803C2457C0000000E0ED7B4340000000A0332457C00000004047804340FFFFFF9FB41B57C0FFFFFF5F08A04340FFFFFF1F491457C000000060E99F4340000000A0B00657C000000080FE9E434000000020290757C0FFFFFF3F92884340FFFFFFBFC70857C000000020EC874340000000C0F20957C000000020DE764340000000C0E50A57C000000040E172434000000000260A57C0000000A03A71434000000000750A57C000000080796F4340FFFFFF7F310957C00000000064684340FFFFFF3FFF0B57C000000020765E434000000000060E57C000000000B45B434000000000640E57C00000000022594340000000A0C40C57C0000000801D57434000000040600C57C0000000C049544340FFFFFF5F100E57C000000000B851434000000060091157C0000000E05353434000000020B21257C0FFFFFFDF48554340FFFFFF3FC11657C00000004060564340000000005F1657C0FFFFFF5FD95B4340000000A0491957C0FFFFFF9F355E4340 3.21533099999999994 +26 0106000020E6100000010000000103000000010000001D000000000000008BBD56C0000000608A6F43400000006075C756C0000000A0EE6F43400000000093C756C0FFFFFF3FF3764340FFFFFFBFD7CC56C0FFFFFF7F7277434000000020BECC56C0FFFFFFFF1B7F43400000000038D156C0000000A0817F434000000080F4D056C0000000605F924340000000205CCC56C00000008053924340000000E0F3CB56C0FFFFFF9F039D4340000000A0DEAE56C0FFFFFF7FC49C4340FFFFFFDFF6AD56C0000000601299434000000000DFAD56C00000008075924340000000802FAC56C000000060FE8B4340FFFFFF1F49AD56C000000060728743400000004030AD56C000000060D6844340FFFFFFDFCEAA56C000000060B6774340FFFFFFDF6EAC56C000000060557743400000002088AC56C000000060887543400000000071AD56C000000080B07543400000004047AD56C0000000C04C744340000000C0FDAE56C0000000406D754340000000C0DAAE56C0000000802B774340000000C070B256C0000000A0FE754340FFFFFFBF86B356C000000040A3744340000000A00EB456C0000000A0907043400000008058B856C000000000FB7143400000004020BC56C0000000C09A714340000000C058BD56C0000000C09E724340000000008BBD56C0000000608A6F4340 2.83366400000000018 +38 0106000020E6100000010000000103000000010000000E000000000000605FFD55C000000020A5494340000000808A0956C000000080C149434000000040780956C0000000005A4D434000000040011156C000000000AF4D4340000000404F1256C000000040734F434000000040D01256C0000000605C52434000000000BD1156C000000020A9544340000000407C1256C0FFFFFF7FA75E4340000000C0BE1056C0000000A0065F434000000080C61056C0000000A04C6D43400000002025FD55C0FFFFFF1F7F6D434000000080B9FA55C0000000607E6D434000000020A9FA55C00000008093494340000000605FFD55C000000020A5494340 1.00387500000000007 +31 0106000020E6100000010000000103000000010000001B000000FFFFFF5F100E57C000000000B851434000000040600C57C0000000C049544340000000A0C40C57C0000000801D57434000000000640E57C0000000002259434000000000060E57C000000000B45B4340FFFFFF3FFF0B57C000000020765E4340FFFFFF7F310957C0000000006468434000000000750A57C000000080796F434000000000260A57C0000000A03A714340000000C0E50A57C000000040E1724340000000C0F20957C000000020DE764340FFFFFFBFC70857C000000020EC87434000000020290757C0FFFFFF3F92884340000000A02EE956C0FFFFFF1F09884340000000A0D3E956C0000000202C5A434000000000D4E956C0000000C00D5A4340FFFFFF7F6BEF56C0000000A0605A4340FFFFFFFFACF056C000000040975843400000008054F356C0FFFFFFDFFB56434000000060C1F656C0000000E0725643400000000014FD56C000000000744C4340000000E005FF56C000000020904B434000000040220257C0000000405C484340000000E0880657C0FFFFFFDFF147434000000000EA0A57C0000000407C4A4340000000E0B10C57C000000020054D4340FFFFFF5F100E57C000000000B8514340 3.01748599999999989 +33 0106000020E6100000010000000103000000010000001400000000000060CA8756C00000002077664340000000603D8756C0000000C04C6A434000000000808856C0FFFFFF1F306D4340000000809C8F56C0000000A00E75434000000000DA9156C0000000205D764340FFFFFF1FDD9156C000000060B47F4340000000A0B78956C0000000A0BD7F4340FFFFFF3FF56C56C000000020977F434000000020526956C0000000E08A7F4340000000A03A6956C0000000C0966F4340000000A0D86656C0000000E0976F4340FFFFFF1FC26656C0FFFFFFBFE25E434000000000AD6656C0000000408054434000000060BA6D56C00000006031544340000000C0BF8B56C0000000A052544340FFFFFF7FF18C56C000000080A5594340000000A0948C56C0000000E0AA5C434000000020758A56C0000000E0F4624340000000C0A68856C0000000C08A64434000000060CA8756C00000002077664340 7.97395700000000041 +34 0106000020E61000000100000001030000000100000015000000FFFFFF5FFEBD56C0000000C0ED454340000000C0E0C056C0000000401048434000000040E0C356C0000000C0AC4D434000000060B0C556C000000000094E4340000000201BC956C0000000E0D74C4340FFFFFF3F1ECD56C0000000204D4E4340000000806BCE56C0000000600150434000000020DDCF56C0000000A015544340000000E0F7D256C0000000801D584340000000E067D556C000000080E65943400000004001D856C000000060795943400000000033DB56C0000000804A5B4340000000E0C8DA56C000000080986C434000000000A0D156C0000000A0FF6B43400000000038D156C0000000A0817F434000000020BECC56C0FFFFFFFF1B7F4340FFFFFFBFD7CC56C0FFFFFF7F727743400000000093C756C0FFFFFF3FF37643400000006075C756C0000000A0EE6F4340000000008BBD56C0000000608A6F4340FFFFFF5FFEBD56C0000000C0ED454340 5.00546399999999991 +35 0106000020E61000000100000001030000000100000031000000FFFFFF9F36AF56C0000000E037514340000000602AB256C0FFFFFFDFE74943400000008057B356C000000020CF4A434000000060A8B456C0000000C07D4A4340000000005FBA56C00000006033454340FFFFFF5FFEBD56C0000000C0ED454340000000008BBD56C0000000608A6F4340000000C058BD56C0000000C09E7243400000004020BC56C0000000C09A7143400000008058B856C000000000FB714340000000A00EB456C0000000A090704340FFFFFFBF86B356C000000040A3744340000000C070B256C0000000A0FE754340000000C0DAAE56C0000000802B774340000000C0FDAE56C0000000406D7543400000004047AD56C0000000C04C7443400000000071AD56C000000080B07543400000002088AC56C00000006088754340FFFFFFDF6EAC56C00000006055774340FFFFFFDFCEAA56C000000060B67743400000004024A856C0000000E0BD7043400000004080A456C0000000A0876F434000000080F2A156C00000004020724340000000E0119E56C000000060C67A4340000000C06F9A56C0000000A02D7B4340000000A0769456C0000000606376434000000000DA9156C0000000205D764340000000809C8F56C0000000A00E75434000000000808856C0FFFFFF1F306D4340000000603D8756C0000000C04C6A434000000060CA8756C0000000207766434000000060A28856C0000000A04C69434000000020CB8C56C0000000409A69434000000080AB9056C0000000608F6D4340FFFFFFFF6D9256C0000000204F714340000000C05C9456C000000040DD714340000000E0A39556C0000000A065704340000000400B9756C000000040A26A434000000040CC9956C0000000A0AA694340FFFFFFFFBA9B56C000000060646A4340000000A0EF9C56C0FFFFFF1FCB694340000000605F9F56C0000000202B614340000000402CA256C000000000995C4340000000C00CA356C00000006094584340000000A08CA656C0000000E04C57434000000000F0A856C000000020B5584340000000A086AB56C0FFFFFF5FCD5643400000004012AC56C0000000004C544340FFFFFF9F36AF56C0000000E037514340 2.46389099999999983 +37 0106000020E6100000010000000103000000010000002A0000000000006054AF56C0000000C0573B4340FFFFFF9F36AF56C0000000E0375143400000004012AC56C0000000004C544340000000A086AB56C0FFFFFF5FCD56434000000000F0A856C000000020B5584340000000A08CA656C0000000E04C574340000000C00CA356C00000006094584340000000402CA256C000000000995C4340000000605F9F56C0000000202B614340000000A0EF9C56C0FFFFFF1FCB694340FFFFFFFFBA9B56C000000060646A434000000040CC9956C0000000A0AA694340000000400B9756C000000040A26A4340000000E0A39556C0000000A065704340000000C05C9456C000000040DD714340FFFFFFFF6D9256C0000000204F71434000000080AB9056C0000000608F6D434000000020CB8C56C0000000409A69434000000060A28856C0000000A04C69434000000060CA8756C00000002077664340000000E0F48A56C000000020B1644340000000004D8C56C0000000C05E61434000000020578F56C0000000A0725D434000000020609356C000000020CC55434000000000419456C0000000A03D4A434000000000B89056C0000000C031444340000000A0029156C00000006064424340FFFFFF5F519356C0000000A0B336434000000020BC9556C00000004007324340000000406F9656C00000008005364340FFFFFF1FAA9556C0000000A07439434000000040129A56C0000000C07E3A4340FFFFFF1FE19A56C000000040623D4340FFFFFF3F2B9A56C000000000263E4340000000E0379A56C00000002001404340FFFFFFFFF5A556C0000000801A404340000000C049A756C0000000A07B3C4340000000801EAA56C000000020A63D4340FFFFFFDFD4AA56C000000060A538434000000060CFAB56C000000040BE384340000000202CAC56C000000040A33B43400000006054AF56C0000000C0573B4340 7.37797400000000003 +39 0106000020E61000000100000001030000000100000008000000000000A04E4956C0000000E03140434000000040454956C000000040E25E4340000000202B4956C000000080AC69434000000040F72C56C000000000486A434000000000EE2C56C000000060424E434000000080062D56C0FFFFFF7FAE3C4340000000803D4956C0FFFFFF9FB63C4340000000A04E4956C0000000E031404340 3.19004699999999985 +60 0106000020E6100000010000000103000000010000000A0000000000006098A956C0000000C019DE42400000006010C756C000000020A7DE4240000000A08DC656C000000000241A43400000002017B256C0000000209F1A434000000080FEAB56C0000000C0710E434000000080D3AB56C0FFFFFF5F2F0C434000000080FFA956C000000080EA0C43400000006018AA56C0000000E0FF0A434000000060E4A856C000000080440A43400000006098A956C0000000C019DE4240 3.26394699999999993 +41 0106000020E6100000010000000103000000010000001700000000000040454956C000000040E25E4340000000A04E4956C0000000E031404340000000A0FB5056C0000000001A414340000000E00D5356C0000000004C404340000000A0DA5656C000000040A4414340000000A07D5956C0000000A0833E4340000000608F5B56C0000000C0273F4340000000E0405D56C0000000803A3E4340000000E0CB5E56C000000040AD3B4340000000E08A6156C0000000A0793D434000000060B56256C0000000A09A3C4340000000A0A06456C0000000A0963D434000000020996756C000000060AB3B4340000000E0156856C0000000A085394340FFFFFF3F536956C0000000C06A384340FFFFFF5F876A56C0000000809638434000000080DD6A56C000000020BB364340000000806B6D56C0000000E09E35434000000060BA6D56C0000000603154434000000000AD6656C00000004080544340FFFFFF1FC26656C0FFFFFFBFE25E434000000060D25056C000000060FA5E434000000040454956C000000040E25E4340 2.44759700000000002 +42 0106000020E610000001000000010300000001000000210000000000000014FD56C000000000744C4340000000C0D70157C000000020CE464340000000E0CC0057C0000000A0ED404340FFFFFF1F3C0257C0000000A0CE3C434000000060C80457C0000000E02D3C4340000000A0020757C000000000763A4340000000602E0957C000000060EF3B434000000020B90A57C000000060DE3B434000000060340A57C0000000402D38434000000060310857C00000004012354340000000E0F50757C0000000E095324340000000E0D40857C000000080DD304340000000E0690B57C0FFFFFF3F3B30434000000000520C57C000000060732E4340000000208D0C57C000000000AB2A434000000060D80E57C000000080CC2A4340000000C05C1057C0000000209229434000000040131257C000000040C52A4340000000C0281A57C000000080262B4340000000E0D81957C000000000F6354340FFFFFF9FB91F57C0FFFFFFDF7D364340000000A0491957C0FFFFFF9F355E4340000000005F1657C0FFFFFF5FD95B4340FFFFFF3FC11657C0000000406056434000000020B21257C0FFFFFFDF4855434000000060091157C0000000E053534340FFFFFF5F100E57C000000000B8514340000000E0B10C57C000000020054D434000000000EA0A57C0000000407C4A4340000000E0880657C0FFFFFFDFF147434000000040220257C0000000405C484340000000E005FF56C000000020904B43400000000014FD56C000000000744C4340 1.29495800000000005 +44 0106000020E6100000010000000103000000010000001700000000000060850C57C000000080EC244340000000208D0C57C000000000AB2A434000000000520C57C000000060732E4340000000E0690B57C0FFFFFF3F3B304340000000E0D40857C000000080DD304340000000E0F50757C0000000E09532434000000060310857C0000000401235434000000060340A57C0000000402D38434000000020B90A57C000000060DE3B4340000000602E0957C000000060EF3B4340000000A0020757C000000000763A434000000060C80457C0000000E02D3C4340FFFFFF1F3C0257C0000000A0CE3C4340000000E0CC0057C0000000A0ED404340000000C0D70157C000000020CE4643400000000014FD56C000000000744C434000000060C1F656C0000000E0725643400000008054F356C0FFFFFFDFFB564340FFFFFFFFACF056C00000004097584340FFFFFF7F6BEF56C0000000A0605A434000000000D4E956C0000000C00D5A4340FFFFFF9FB0E956C0FFFFFF1F1425434000000060850C57C000000080EC244340 4.13399699999999992 +45 0106000020E610000001000000010300000001000000190000000000002017B256C0000000209F1A4340000000A08DC656C000000000241A43400000000009D656C0FFFFFFDF1C1A4340FFFFFF5F2CD856C0000000E0381A43400000004025D856C000000020FA1A4340000000E0A7D756C0000000E0553543400000004001D856C00000006079594340000000E067D556C000000080E6594340000000E0F7D256C0000000801D58434000000020DDCF56C0000000A015544340000000806BCE56C00000006001504340FFFFFF3F1ECD56C0000000204D4E4340000000201BC956C0000000E0D74C434000000060B0C556C000000000094E434000000040E0C356C0000000C0AC4D4340000000C0E0C056C00000004010484340FFFFFF5FFEBD56C0000000C0ED454340000000005FBA56C0000000603345434000000060A8B456C0000000C07D4A43400000008057B356C000000020CF4A4340000000602AB256C0FFFFFFDFE7494340FFFFFF9F36AF56C0000000E0375143400000006054AF56C0000000C0573B43400000006067AF56C000000040593243400000002017B256C0000000209F1A4340 4.29831099999999999 +46 0106000020E6100000010000000103000000010000001200000000000060DE7956C0000000E08D1C4340000000C0897B56C0000000806323434000000060177B56C0000000807E244340000000204D7A56C0000000A0CD234340FFFFFF7FB97A56C0000000409227434000000000028256C0000000E0EB27434000000020048256C0000000002F2A4340000000A04E8956C00000002053344340000000C05A8956C000000020A5364340000000A0029156C0000000606442434000000000B89056C0000000C031444340000000A06B8F56C0000000000A484340000000E0C18B56C0000000601D4E4340000000C0BF8B56C0000000A05254434000000060BA6D56C00000006031544340000000806B6D56C0000000E09E35434000000080B86D56C0FFFFFFFF081C434000000060DE7956C0000000E08D1C4340 27.4838270000000016 +48 0106000020E6100000010000000103000000010000000E000000FFFFFF9FD60956C00000000035214340000000808A0956C000000080C1494340000000605FFD55C000000020A549434000000060C8FC55C0FFFFFF5FDF444340000000C0AAFD55C0000000808E3F434000000080F9FC55C000000060DA394340000000E068FD55C000000060D537434000000000E6FC55C0000000C05E364340000000E0A8FE55C0000000C056334340FFFFFFFFA6FE55C00000004061304340000000E0ACFD55C0000000E0D02C4340000000407CFD55C000000040D92543400000006042FF55C00000004040214340FFFFFF9FD60956C00000000035214340 0 +49 0106000020E6100000010000000103000000010000001600000000000060DE7956C0000000E08D1C434000000020918256C0000000809A1C4340000000207D8256C0000000008611434000000020488D56C000000040610B434000000080429056C000000040A30F434000000060899256C0000000405A154340000000C08C9556C0000000C02718434000000060589756C000000080FD1D434000000060A39756C0000000606A294340FFFFFFBFF49656C0FFFFFF1FC32E434000000020BC9556C00000004007324340FFFFFF5F519356C0000000A0B3364340000000A0029156C00000006064424340000000C05A8956C000000020A5364340000000A04E8956C0000000205334434000000020048256C0000000002F2A434000000000028256C0000000E0EB274340FFFFFF7FB97A56C00000004092274340000000204D7A56C0000000A0CD23434000000060177B56C0000000807E244340000000C0897B56C0000000806323434000000060DE7956C0000000E08D1C4340 2.93446600000000002 +50 0106000020E6100000010000000103000000010000001600000000000020396656C0000000A0C31B434000000080B86D56C0FFFFFFFF081C4340000000806B6D56C0000000E09E35434000000080DD6A56C000000020BB364340FFFFFF5F876A56C00000008096384340FFFFFF3F536956C0000000C06A384340000000E0156856C0000000A08539434000000020996756C000000060AB3B4340000000A0A06456C0000000A0963D434000000060B56256C0000000A09A3C4340000000E08A6156C0000000A0793D4340000000E0CB5E56C000000040AD3B4340000000E0405D56C0000000803A3E4340000000608F5B56C0000000C0273F4340000000A07D5956C0000000A0833E4340000000A0DA5656C000000040A4414340000000E00D5356C0000000004C404340000000A0FB5056C0000000001A414340000000A04E4956C0000000E031404340000000803D4956C0FFFFFF9FB63C434000000080A84956C0000000A05B1B434000000020396656C0000000A0C31B4340 4.45642699999999969 +51 0106000020E6100000010000000103000000010000002400000000000060E4A856C000000080440A43400000006018AA56C0000000E0FF0A434000000080FFA956C000000080EA0C434000000080D3AB56C0FFFFFF5F2F0C434000000080FEAB56C0000000C0710E43400000002017B256C0000000209F1A43400000006067AF56C000000040593243400000006054AF56C0000000C0573B4340000000202CAC56C000000040A33B434000000060CFAB56C000000040BE384340FFFFFFDFD4AA56C000000060A5384340000000801EAA56C000000020A63D4340000000C049A756C0000000A07B3C4340FFFFFFFFF5A556C0000000801A404340000000E0379A56C00000002001404340FFFFFF3F2B9A56C000000000263E4340FFFFFF1FE19A56C000000040623D434000000040129A56C0000000C07E3A4340FFFFFF1FAA9556C0000000A074394340000000406F9656C0000000800536434000000020BC9556C00000004007324340FFFFFFBFF49656C0FFFFFF1FC32E434000000060A39756C0000000606A29434000000060589756C000000080FD1D4340000000C08C9556C0000000C02718434000000060899256C0000000405A15434000000080429056C000000040A30F434000000060029356C000000020C60B4340FFFFFFDF129556C000000060CA0C4340000000E0919A56C0000000E0CE054340FFFFFF7F92A656C0FFFFFFBF5400434000000060F6A756C0000000E03B014340FFFFFF9F39A756C0000000E0A3024340000000E08CA756C0000000A00E064340000000A003A756C0000000C05F09434000000060E4A856C000000080440A4340 4.62926400000000005 +53 0106000020E6100000010000000103000000010000001F00000000000040E21957C0000000A0A6024340000000E0092157C0000000E02C03434000000000242157C0000000608104434000000060892357C0000000405C064340FFFFFF7FC12357C0000000A0EA074340000000A0D82457C00000000027084340FFFFFF7FDB2457C0000000E0450C434000000080C22557C0000000A0750C434000000020B82557C000000040160E434000000020652657C0000000402B0E434000000020632657C0000000A05C114340FFFFFF9F102757C00000006080114340000000A0F62657C0FFFFFFDF91154340000000C0FE2857C0FFFFFF3FEE154340000000A0342957C000000080811A4340FFFFFFDFD72C57C0000000A03B1C4340000000C0662C57C000000040292C434000000060CC2857C0FFFFFFBF582C4340000000800C2857C0FFFFFFBF01374340FFFFFF9FB91F57C0FFFFFFDF7D364340000000E0D81957C000000000F6354340000000C0281A57C000000080262B434000000040131257C000000040C52A4340000000C05C1057C0000000209229434000000060D80E57C000000080CC2A4340000000208D0C57C000000000AB2A434000000060850C57C000000080EC244340000000A0C10C57C00000008020154340000000E0AA0B57C00000004000154340FFFFFF1F090C57C0000000202E02434000000040E21957C0000000A0A6024340 3.99004100000000017 +55 0106000020E6100000010000000103000000010000002200000000000000181856C000000020FEF4424000000020F21756C000000040DA204340FFFFFF9FD60956C000000000352143400000006042FF55C00000004040214340FFFFFF9FB8FE55C0000000E0DB1E4340000000C01AFF55C0000000600E1E43400000006096FE55C0FFFFFFFFB0194340000000A0AAFB55C0000000A0E7154340FFFFFFBFA5FB55C0000000E02914434000000020D6FC55C00000006086114340000000E04DFE55C000000080DD104340000000E02F0156C000000000390D434000000000CA0056C000000000D20B434000000060C0FD55C000000040620C4340000000406BFE55C0000000206209434000000000390256C000000040EC06434000000000C20256C000000080C605434000000080A70256C000000020E704434000000080630156C0000000C04A044340000000A0DE0156C0000000E00D014340000000A0630156C0FFFFFF9FCEFC424000000080B80256C0000000E066FA4240FFFFFF5FAC0256C0000000A09DF74240FFFFFFBF220456C00000002003F74240000000600D0556C000000000D5F8424000000040600556C0000000803AF64240000000C0F20156C0000000A073F54240000000A0B30156C0000000E0EFF34240FFFFFF1FDF0256C000000040B0F24240FFFFFFBF670656C0FFFFFF5FFDF34240000000407E0656C00000006099F24240000000A0390956C0000000E0E8F54240000000E0CA0956C0000000201CF5424000000000181856C000000020FEF44240 3.04025299999999987 +56 0106000020E6100000010000000103000000010000000600000000000000181856C000000020FEF4424000000020572D56C0FFFFFF7F74F44240000000E0562D56C0000000208910434000000060492D56C0000000603E21434000000020F21756C000000040DA20434000000000181856C000000020FEF44240 3.90541099999999997 +57 0106000020E6100000010000000103000000010000001700000000000020488D56C000000040610B4340000000207D8256C0000000008611434000000020918256C0000000809A1C434000000060DE7956C0000000E08D1C434000000080B86D56C0FFFFFFFF081C434000000020396656C0000000A0C31B434000000000936656C0000000601DFA424000000040B46A56C0000000607CEB424000000060E56B56C0FFFFFFFF16EA4240000000403A6C56C00000008003E74240000000E09E6E56C0000000A0A5EB424000000080827656C000000020D9F34240FFFFFF5F1B7756C000000000E7F34240FFFFFFDF797756C00000000029F24240FFFFFF9FA27956C0000000A01DF0424000000020067C56C0000000C063F0424000000080A67E56C0000000A0B8F44240000000A0537D56C00000006058FB424000000020B18056C0000000A012FC4240000000E0AE8256C0FFFFFF5F21FF424000000040A38756C00000008021044340000000209F8856C0000000E0E706434000000020488D56C000000040610B4340 4.33283899999999988 +58 0106000020E6100000010000000103000000010000000B00000000000020864856C00000000054104340000000C0FA4856C0FFFFFF3FC70D434000000000E64756C000000060EA0B4340FFFFFFFFF54856C0000000E00F06434000000080504956C0000000C0DEFE4240000000E0744B56C0000000A07CF9424000000000936656C0000000601DFA424000000020396656C0000000A0C31B434000000080A84956C0000000A05B1B4340FFFFFF9FB84956C0000000409110434000000020864856C00000000054104340 3.8941110000000001 +61 0106000020E61000000100000001030000000100000013000000000000A0F60157C0000000C081014340000000405FFE56C0000000E06B014340000000605AFD56C0000000E05905434000000060BBFB56C0000000809A044340000000C01EFB56C00000002014064340FFFFFFDFD2F956C000000040E2064340FFFFFF3FF2E856C0000000A0A3064340000000A0D7E856C0FFFFFF1F1B14434000000060BCE256C0000000C023144340FFFFFFFF04E256C0FFFFFFBFD01343400000008044E256C0000000E0BEE44240000000803CF456C000000040BEE4424000000080DBF356C0FFFFFF9F97DF42400000004063F456C0FFFFFFBF64DB4240000000007EF456C000000000A6CC4240000000A0000257C00000002054CD424000000080730157C0000000C094E3424000000080EA0157C0000000409BE44240000000A0F60157C0000000C081014340 3.28216300000000016 +62 0106000020E6100000010000000103000000010000000C00000000000020572D56C0FFFFFF7F74F4424000000020682D56C0FFFFFF7F00EF4240000000C0E94956C000000080CDEE4240000000E0EC4956C0000000607AF94240000000E0744B56C0000000A07CF9424000000080504956C0000000C0DEFE4240FFFFFFFFF54856C0000000E00F06434000000000E64756C000000060EA0B4340000000C0FA4856C0FFFFFF3FC70D434000000020864856C00000000054104340000000E0562D56C0000000208910434000000020572D56C0FFFFFF7F74F44240 3.29576199999999986 +63 0106000020E61000000100000001030000000100000013000000000000E0718756C00000000011D64240FFFFFF5F418A56C0FFFFFF3F72DA4240FFFFFFDFFD8C56C00000004087D64240000000E0A59D56C0FFFFFF5FA4F0424000000000CE9456C0000000603BFE4240000000E0919A56C0000000E0CE054340FFFFFFDF129556C000000060CA0C434000000060029356C000000020C60B434000000080429056C000000040A30F434000000020488D56C000000040610B4340000000209F8856C0000000E0E706434000000040A38756C00000008021044340000000E0AE8256C0FFFFFF5F21FF424000000020B18056C0000000A012FC4240000000A0537D56C00000006058FB424000000080A67E56C0000000A0B8F4424000000020067C56C0000000C063F04240000000E0798056C0000000E0DEE84240000000E0718756C00000000011D64240 7.24967900000000043 +64 0106000020E6100000010000000103000000010000001100000000000060BC8956C00000004054D24240FFFFFF7F82A256C00000008046D2424000000040D6A956C0000000C024D242400000006098A956C0000000C019DE424000000060E4A856C000000080440A4340000000A003A756C0000000C05F094340000000E08CA756C0000000A00E064340FFFFFF9F39A756C0000000E0A302434000000060F6A756C0000000E03B014340FFFFFF7F92A656C0FFFFFFBF54004340000000E0919A56C0000000E0CE05434000000000CE9456C0000000603BFE4240000000E0A59D56C0FFFFFF5FA4F04240FFFFFFDFFD8C56C00000004087D64240FFFFFF5F418A56C0FFFFFF3F72DA4240000000E0718756C00000000011D6424000000060BC8956C00000004054D24240 3.04184600000000005 +65 0106000020E6100000010000000103000000010000000B000000000000E0F40F57C0000000C0B5CD4240FFFFFF9FF10F57C0000000000DD34240000000403A1A57C0000000E038DB4240000000001E1A57C00000002044EE424000000040E21957C0000000A0A6024340FFFFFF1F090C57C0000000202E024340000000A0F60157C0000000C08101434000000080EA0157C0000000409BE4424000000080730157C0000000C094E34240000000A0000257C00000002054CD4240000000E0F40F57C0000000C0B5CD4240 1.61801799999999996 +66 0106000020E6100000010000000103000000010000001400000000000000CE4956C00000004053CD4240000000C0655D56C0000000A09ECD4240FFFFFF7F825D56C0FFFFFF1FA9CA4240FFFFFF7F835E56C000000080FAC8424000000020996156C0000000E035C9424000000000DB6056C0FFFFFFBFD6CE4240000000403A6156C0000000803FD3424000000020DB6056C00000000005D74240000000A0606156C000000020EFD84240FFFFFF3F366556C0000000A061DA424000000040A76A56C0000000006BDF424000000040416B56C00000002059E44240000000403A6C56C00000008003E7424000000060E56B56C0FFFFFFFF16EA424000000040B46A56C0000000607CEB424000000000936656C0000000601DFA4240000000E0744B56C0000000A07CF94240000000E0EC4956C0000000607AF94240000000C0E94956C000000080CDEE424000000000CE4956C00000004053CD4240 4.91080100000000019 +67 0106000020E6100000010000000103000000010000002000000000000060BC8956C00000004054D24240000000E0718756C00000000011D64240000000E0798056C0000000E0DEE8424000000020067C56C0000000C063F04240FFFFFF9FA27956C0000000A01DF04240FFFFFFDF797756C00000000029F24240FFFFFF5F1B7756C000000000E7F3424000000080827656C000000020D9F34240000000E09E6E56C0000000A0A5EB4240000000403A6C56C00000008003E7424000000040416B56C00000002059E4424000000040A76A56C0000000006BDF4240FFFFFF3F366556C0000000A061DA4240000000A0606156C000000020EFD8424000000020DB6056C00000000005D74240000000403A6156C0000000803FD3424000000000DB6056C0FFFFFFBFD6CE424000000020996156C0000000E035C94240000000A0D56556C0FFFFFF3F7FC9424000000020FD6656C00000006065CC4240000000A08C6856C000000080A3CB424000000020126956C000000000DBCC424000000060DB6B56C0000000801BCB4240000000E03A6C56C00000008035CC424000000020756D56C000000020BACC4240000000A0296E56C000000080E9CB4240000000C0F16E56C0000000E0A9CC4240000000E0817156C0FFFFFF3F6FCB4240000000A0D07356C000000080EECC424000000060627756C000000060F8CC424000000000908956C0000000E0B3CC424000000060BC8956C00000004054D24240 1.99145700000000003 +69 0106000020E6100000010000000103000000010000001100000000000060E6E956C000000040FFB54240000000C0B1F056C00000006063B6424000000060EAF056C0000000E033CC4240000000007EF456C000000000A6CC42400000004063F456C0FFFFFFBF64DB424000000080DBF356C0FFFFFF9F97DF4240000000803CF456C000000040BEE442400000008044E256C0000000E0BEE442400000000065D456C00000004041E44240000000C086D456C000000040CED94240000000C080CA56C0000000001BD942400000008089CA56C00000008095CB4240000000408AD456C000000000C3CB4240000000A067D456C000000060BDC042400000008032CE56C00000006039C04240000000C049CE56C0000000E0D8B4424000000060E6E956C000000040FFB54240 7.26665000000000028 +70 0106000020E61000000100000001030000000100000014000000FFFFFF7F82A256C00000008046D24240FFFFFF1FC4A256C00000000069CC4240000000E078A356C00000002057CC4240000000408CA356C0000000C0F4A74240FFFFFF7FD8A356C0000000A004A34240000000808AAF56C0000000E0CBA24240FFFFFF3F5BB056C000000020F4A24240000000800DB056C00000002043AF424000000040D5B156C00000004066AF424000000080CAB156C0000000600DCD4240FFFFFF7F5EC056C0000000805BCD4240000000C00AC756C00000000081CB42400000008089CA56C00000008095CB4240000000C080CA56C0000000001BD9424000000060FBC956C0000000A01ED94240FFFFFF9FF7C956C0FFFFFF7F75DE42400000006010C756C000000020A7DE42400000006098A956C0000000C019DE424000000040D6A956C0000000C024D24240FFFFFF7F82A256C00000008046D24240 3.11090400000000011 +71 0106000020E6100000010000000103000000010000000900000000000060378E56C0000000A0EBA74240000000408CA356C0000000C0F4A74240000000E078A356C00000002057CC4240FFFFFF1FC4A256C00000000069CC4240FFFFFF7F82A256C00000008046D2424000000060BC8956C00000004054D2424000000000908956C0000000E0B3CC424000000040968956C000000040EAA7424000000060378E56C0000000A0EBA74240 2.98027100000000011 +72 0106000020E6100000010000000103000000010000000D00000000000080CD0557C0000000407487424000000000A11057C000000060E7874240000000A0F00F57C000000080AAA04240FFFFFF7F761057C0000000A0EEA0424000000000221057C0000000C028BD4240000000E0F40F57C0000000C0B5CD4240000000A0000257C00000002054CD4240000000007EF456C000000000A6CC424000000060EAF056C0000000E033CC4240000000C0B1F056C00000006063B6424000000060E6E956C000000040FFB5424000000000B0EA56C0000000004286424000000080CD0557C00000004074874240 3.86676699999999984 +73 0106000020E6100000010000000103000000010000000E000000000000E0FF5D56C0000000C071AB424000000020E35B56C00000002088AD4240000000605D5B56C0000000409CB4424000000020085D56C00000000002BA424000000080AA5F56C0000000E0F0BE424000000020996156C0000000E035C94240FFFFFF7F835E56C000000080FAC84240FFFFFF7F825D56C0FFFFFF1FA9CA4240000000C0655D56C0000000A09ECD424000000000CE4956C00000004053CD4240000000E0F64256C0FFFFFFDF38CD4240000000002E4356C0FFFFFFFF29AB4240000000C0BC4F56C0000000C03CAB4240000000E0FF5D56C0000000C071AB4240 1.86840800000000007 +74 0106000020E61000000100000001030000000100000024000000000000808AAF56C0000000E0CBA24240000000A093AF56C0000000E0549542400000006089B056C00000002032954240000000E0B1B056C0FFFFFFFF0E924240000000202CB256C00000002007924240000000C071B256C000000060B4864240FFFFFFBF2FBE56C0000000A04E874240000000802ABE56C000000080B48C4240FFFFFF7F90C156C0FFFFFF9FCB8C424000000080BAC156C0000000C003924240000000E09DC256C0000000A01B92424000000020C3C256C0000000807895424000000020D4C456C00000008020954240000000C0EAC456C0FFFFFF7F84964240000000E019C656C0000000409A964240000000E035C656C000000080E899424000000000C3C856C000000000D7994240000000A0D9C856C0000000C0899E42400000006055C856C0FFFFFF1FAB9E42400000000047C856C0000000804BA04240000000005BCA56C0000000A0D1A04240000000805DCA56C0000000204EA84240000000E096CB56C0000000E080A8424000000080B0CB56C000000040A1B44240000000C049CE56C0000000E0D8B442400000008032CE56C00000006039C04240000000A067D456C000000060BDC04240000000408AD456C000000000C3CB42400000008089CA56C00000008095CB4240000000C00AC756C00000000081CB4240FFFFFF7F5EC056C0000000805BCD424000000080CAB156C0000000600DCD424000000040D5B156C00000004066AF4240000000800DB056C00000002043AF4240FFFFFF3F5BB056C000000020F4A24240000000808AAF56C0000000E0CBA24240 12.5770339999999994 +75 0106000020E6100000010000000103000000010000000D000000000000A0A47756C0000000C0DA904240000000E07A7D56C000000060D090424000000060B67D56C00000000057884240000000E0BD7F56C00000008017884240000000C0D87F56C0000000805986424000000040568756C00000000039864240000000605B8756C0000000E0188B4240000000E02F8E56C0000000A0058B424000000060378E56C0000000A0EBA7424000000040968956C000000040EAA7424000000000908956C0000000E0B3CC424000000060627756C000000060F8CC4240000000A0A47756C0000000C0DA904240 7.80359900000000017 +76 0106000020E6100000010000000103000000010000002800000000000060627756C000000060F8CC4240000000A0D07356C000000080EECC4240000000E0817156C0FFFFFF3F6FCB4240000000C0F16E56C0000000E0A9CC4240000000A0296E56C000000080E9CB424000000020756D56C000000020BACC4240000000E03A6C56C00000008035CC424000000060DB6B56C0000000801BCB424000000020126956C000000000DBCC4240000000A08C6856C000000080A3CB424000000020FD6656C00000006065CC4240000000A0D56556C0FFFFFF3F7FC9424000000020996156C0000000E035C9424000000080AA5F56C0000000E0F0BE424000000020085D56C00000000002BA4240000000605D5B56C0000000409CB4424000000020E35B56C00000002088AD4240000000E0FF5D56C0000000C071AB424000000080096056C0FFFFFF1F2BAA424000000080E36056C00000000009A7424000000080E36056C0FFFFFF1F61A3424000000080555F56C0000000A0C4A0424000000060D06056C000000000079F4240000000C0F46556C000000000279E424000000000FE6556C0000000A01B9D424000000040236856C000000020EE9C4240FFFFFF7F196856C000000000C59B424000000000CD6956C0000000E06A9B424000000060CC6956C00000008027994240000000A0336C56C00000000008994240FFFFFFFF326C56C0FFFFFF9F2C974240FFFFFF7F876D56C0FFFFFF5FD296424000000020906D56C0000000208094424000000020866E56C0000000806194424000000080986E56C0000000E0FC924240000000A0A07056C000000080BF924240000000A0B27056C000000060B790424000000060BB7156C000000060A7904240000000A0A47756C0000000C0DA90424000000060627756C000000060F8CC4240 3.47149000000000019 +77 0106000020E6100000010000000103000000010000001800000000000000B0EA56C0000000004286424000000060E6E956C000000040FFB54240000000C049CE56C0000000E0D8B4424000000080B0CB56C000000040A1B44240000000E096CB56C0000000E080A84240000000805DCA56C0000000204EA84240000000005BCA56C0000000A0D1A042400000000047C856C0000000804BA042400000006055C856C0FFFFFF1FAB9E4240000000A0D9C856C0000000C0899E424000000000C3C856C000000000D7994240000000E035C656C000000080E8994240000000E019C656C0000000409A964240000000C0EAC456C0FFFFFF7F8496424000000020D4C456C0000000802095424000000020C3C256C00000008078954240000000E09DC256C0000000A01B92424000000080BAC156C0000000C003924240FFFFFF7F90C156C0FFFFFF9FCB8C4240000000E031C756C000000080E88A4240000000806ECE56C0000000E0E18A424000000040AFCE56C000000020DF704240000000C0B7EA56C0000000C06A71424000000000B0EA56C00000000042864240 4.33482199999999995 +78 0106000020E6100000010000000103000000010000001F000000000000E08FAB56C0000000807A76424000000040ABAC56C000000080A176424000000020BFAC56C0000000A0BC7B4240FFFFFFDFF6AD56C000000000E37B424000000000FEAD56C0000000604F7F42400000004052AF56C000000000847F42400000000061AF56C0FFFFFFBF6B864240000000C071B256C000000060B4864240000000202CB256C00000002007924240000000E0B1B056C0FFFFFFFF0E9242400000006089B056C00000002032954240000000A093AF56C0000000E054954240000000808AAF56C0000000E0CBA24240FFFFFF7FD8A356C0000000A004A34240000000408CA356C0000000C0F4A7424000000060378E56C0000000A0EBA74240000000E02F8E56C0000000A0058B4240000000605B8756C0000000E0188B424000000040568756C0000000003986424000000060BC8956C0000000604186424000000060E68956C0000000809C81424000000060D28A56C0000000207C81424000000000F58A56C000000060D07E424000000000108C56C0000000A0A07E424000000060338C56C000000040987C4240FFFFFFFF318D56C000000080777C424000000040438D56C0000000A0217B4240000000607A8E56C000000080E27A424000000000938E56C000000080EC77424000000040D09056C00000004054764240000000E08FAB56C0000000807A764240 8.45153700000000008 +\. + + +CREATE INDEX getis_data_gix ON getis_data USING GIST(the_geom); + From 1d09eac3e7f8ef2ead4c2a7c2bf89db502aab4c5 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 16:10:29 -0400 Subject: [PATCH 21/96] adding pgsql tests --- src/pg/test/expected/16_getis_test.out | 23 +++++++++++++++++++++++ src/pg/test/sql/16_getis_test.sql | 13 +++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 src/pg/test/expected/16_getis_test.out create mode 100644 src/pg/test/sql/16_getis_test.sql diff --git a/src/pg/test/expected/16_getis_test.out b/src/pg/test/expected/16_getis_test.out new file mode 100644 index 0000000..ba5ee87 --- /dev/null +++ b/src/pg/test/expected/16_getis_test.out @@ -0,0 +1,23 @@ +\pset format unaligned +\set ECHO all +\i test/fixtures/ppoints.sql +SET client_min_messages TO WARNING; +\set ECHO none +_cdb_random_seeds + +(1 row) +z_score|p_value +-1.0358|0.0160 +-0.6404|0.0070 +2.6802|0.0190 +4.7529|0.0020 +2.7270|0.0250 +-1.2444|0.0230 +4.6322|0.0150 +4.5325|0.0220 +-1.3115|0.0090 +-1.3115|0.0080 +4.6361|0.0010 +2.8307|0.0310 +-1.3115|0.0020 +(13 rows) diff --git a/src/pg/test/sql/16_getis_test.sql b/src/pg/test/sql/16_getis_test.sql new file mode 100644 index 0000000..38977a3 --- /dev/null +++ b/src/pg/test/sql/16_getis_test.sql @@ -0,0 +1,13 @@ +\pset format unaligned +\set ECHO all +\i test/fixtures/getis_data.sql + +-- set random seed +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- test against PySAL example dataset +SELECT z_score, p_val +FROM cdb_crankshaft.CDB_GetisOrdsG( + 'select * from ppoints2', + 'ratio') As cdb_getisordsg(z_score, p_val, p_z_sim) +WHERE p_val <= 0.05; From dcb364c3eec992bed6a2f3681d693f4306ccc457 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 17:16:53 -0400 Subject: [PATCH 22/96] up default number of permutations --- src/pg/sql/16_getis.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql index dc3a25d..6e52d50 100644 --- a/src/pg/sql/16_getis.sql +++ b/src/pg/sql/16_getis.sql @@ -6,7 +6,7 @@ CREATE OR REPLACE FUNCTION column_name TEXT, w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5, - permutations INT DEFAULT 99, + permutations INT DEFAULT 999, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, p_z_sim NUMERIC, rowid BIGINT) From eff548dec92ceaef5ff90b5afa901434352686f6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 19 Sep 2016 17:17:51 -0400 Subject: [PATCH 23/96] aligning parameters for fistures and tests --- src/pg/test/sql/16_getis_test.sql | 9 ++- src/py/crankshaft/test/fixtures/getis.json | 79 +------------------ .../crankshaft/test/test_clustering_getis.py | 19 +++++ 3 files changed, 25 insertions(+), 82 deletions(-) diff --git a/src/pg/test/sql/16_getis_test.sql b/src/pg/test/sql/16_getis_test.sql index 38977a3..03cfcc6 100644 --- a/src/pg/test/sql/16_getis_test.sql +++ b/src/pg/test/sql/16_getis_test.sql @@ -6,8 +6,9 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); -- test against PySAL example dataset -SELECT z_score, p_val +SELECT z_score, p_value FROM cdb_crankshaft.CDB_GetisOrdsG( - 'select * from ppoints2', - 'ratio') As cdb_getisordsg(z_score, p_val, p_z_sim) -WHERE p_val <= 0.05; + 'select * from getis_data', + 'hr8893', 'knn', 5, 999, + 'the_geom', 'cartodb_id') As cdb_getisordsg(z_score, p_value, p_z_sim) +WHERE p_value <= 0.05; diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json index 71d7728..4d5837f 100644 --- a/src/py/crankshaft/test/fixtures/getis.json +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -1,78 +1 @@ -[[-0.37378594228210338, 0.436, 0.35428178954558454], - [-0.95225687055925445, 0.065000000000000002, 0.17048336261993491], - [-0.69155363829052829, 0.313, 0.2446088425605718], - [-0.91005743729633215, 0.085999999999999993, 0.18139610975939735], - [-1.0132869736739361, 0.13200000000000001, 0.1554615538579156], - [-0.51711928006609076, 0.058999999999999997, 0.30253644777604194], - [-0.59915606178126646, 0.125, 0.27453440990643974], - [-0.65270328962244883, 0.314, 0.25697379065855031], - [-1.035818892716347, 0.016, 0.15014332094164029], - [-0.77022454873249124, 0.14899999999999999, 0.22058335212423197], - [-0.33280112478125556, 0.40500000000000003, 0.36964220456990138], - [-0.16619696958659569, 0.28000000000000003, 0.43400097210156352], - [-1.0189197578270577, 0.114, 0.15412053143437432], - [-0.58736356248618182, 0.378, 0.27847978008948393], - [-0.60815535737753856, 0.34699999999999998, 0.27154222002447759], - [-0.83654622542342216, 0.22, 0.20142384153812443], - [-0.27623528229314609, 0.48099999999999998, 0.39118367986278957], - [-0.17751012385821857, 0.40600000000000003, 0.42955385691066428], - [-0.42339868568527872, 0.499, 0.33600220182364238], - [-0.81090507212288232, 0.17000000000000001, 0.2087100936430748], - [-0.63864423309742102, 0.089999999999999997, 0.26152719947142589], - [-0.64036180904066409, 0.0070000000000000001, 0.2609687028452613], - [-0.62209633811403076, 0.217, 0.26693926135549806], - [2.6802178678493869, 0.019, 0.0036787128295844296], - [-0.53304848636954871, 0.314, 0.29700000849115427], - [-0.15325607655300719, 0.33900000000000002, 0.43909816861874729], - [-0.46599673142949988, 0.5, 0.32060892008190012], - [-0.67328453973424696, 0.34000000000000002, 0.25038314326507338], - [-0.65374072057172605, 0.27300000000000002, 0.25663943186717253], - [-0.042646376611931783, 0.33900000000000002, 0.48299171295314569], - [-0.50268629316736713, 0.36899999999999999, 0.30759242403716802], - [0.20061122903056294, 0.41199999999999998, 0.42050128853782709], - [4.7529210422438366, 0.002, 1.0024933114749501e-06], - [-0.17112791409785774, 0.45500000000000002, 0.43206159366732011], - [2.7269760223697093, 0.025000000000000001, 0.0031958841790815651], - [-1.2443986350856617, 0.023, 0.1066764425086173], - [2.6146417757460672, 0.058999999999999997, 0.0044660540776073621], - [-0.81089418923393874, 0.217, 0.20871321876749027], - [-0.96842037905996448, 0.051999999999999998, 0.16641723288102916], - [4.632217640632744, 0.014999999999999999, 1.8088477418132243e-06], - [1.7098997741760702, 0.066000000000000003, 0.043642204110986293], - [-0.68576671544237189, 0.30399999999999999, 0.24643011428027894], - [-0.10705643473845843, 0.46100000000000002, 0.45737210425657626], - [-0.54280332475246651, 0.29499999999999998, 0.29363261191793877], - [0.031036400804321389, 0.26000000000000001, 0.48762025500133288], - [4.5324686370772209, 0.021999999999999999, 2.9149167000142029e-06], - [-1.3114835366024926, 0.0089999999999999993, 0.09484722615634944], - [-1.3114835366024926, 0.0080000000000000002, 0.09484722615634944], - [4.6361327065826368, 0.001, 1.7749405375466765e-06], - [1.4367662910941781, 0.081000000000000003, 0.075392205783569644], - [2.8306701087322108, 0.031, 0.0023225301610166893], - [-0.26132309263094761, 0.47599999999999998, 0.39692167988511706], - [-0.78055710491931951, 0.13400000000000001, 0.21753151443265129], - [-0.64234747392787916, 0.33300000000000002, 0.26032379813179585], - [-1.3114835366024926, 0.002, 0.09484722615634944], - [-0.57318180672306673, 0.26400000000000001, 0.28326080073223758], - [1.4602423896649199, 0.085999999999999993, 0.072111734510341252], - [-0.10852292180607989, 0.33700000000000002, 0.45679044945885094], - [0.25007822707788863, 0.25800000000000001, 0.4012634266959908], - [-0.15535540716918589, 0.36799999999999999, 0.43827056859443592], - [-0.15387021291338146, 0.34300000000000003, 0.43885603550771368], - [-0.23212287899595097, 0.434, 0.40822128763053078], - [-0.34331698296028734, 0.255, 0.36568000291448455], - [-0.21917003612352226, 0.38300000000000001, 0.41325879787572739], - [-0.79956517297385543, 0.20000000000000001, 0.21198138620767248], - [-0.57435424239923683, 0.20100000000000001, 0.28286405516217616], - [0.17090357402483336, 0.17599999999999999, 0.43214979317279412], - [-0.45071423902325392, 0.39500000000000002, 0.32609775926146312], - [0.76813695849036678, 0.16500000000000001, 0.22120291089357114], - [0.43164294033005113, 0.14899999999999999, 0.33300047214912831], - [0.3756136313798501, 0.16, 0.35360207763890095], - [-0.35027449210479344, 0.47599999999999998, 0.36306635326063197], - [-0.48157903675663827, 0.45500000000000002, 0.31505251051764338], - [0.82850242294957743, 0.187, 0.20369301166122333], - [0.32654794875356141, 0.23599999999999999, 0.3720049088953894], - [-0.30807434754194035, 0.497, 0.37901288012280243], - [0.50368346846574807, 0.159, 0.30724191359295261], - [0.75454835573966283, 0.13500000000000001, 0.22526001043744448]] +[[-0.37337341596824414, 0.43099999999999999, 0.35443527112672846], [-0.95217153199599269, 0.091999999999999998, 0.17050499804382691], [-0.69132084390888426, 0.30299999999999999, 0.24468196785399454], [-0.90994823039851658, 0.088999999999999996, 0.18142490623820451], [-1.0132361542216282, 0.129, 0.15547368761207747], [-0.51678782422627401, 0.058999999999999997, 0.30265214024759912], [-0.59887100659663139, 0.13400000000000001, 0.27462945349842682], [-0.65244852117509244, 0.32900000000000001, 0.2570559358979031], [-1.0357808174963727, 0.0070000000000000001, 0.1501522043425858], [-0.77003625124250874, 0.13600000000000001, 0.22063919456432224], [-0.33236541712916268, 0.375, 0.3698066743983055], [-0.16566702930134228, 0.28699999999999998, 0.43420949712949208], [-1.0188721243221961, 0.113, 0.15413183958086896], [-0.58707183737030266, 0.40100000000000002, 0.27857773047946677], [-0.60787539226595488, 0.34100000000000003, 0.27163506072349475], [-0.8363954400004282, 0.20599999999999999, 0.2014662385347713], [-0.27576758055135975, 0.49399999999999999, 0.39136329273990866], [-0.17698658238285753, 0.38100000000000001, 0.42975946445365665], [-0.42301422073305678, 0.48599999999999999, 0.33614244280579275], [-0.81073978385996837, 0.161, 0.20875756070128504], [-0.63838151273605281, 0.107, 0.26161268117180692], [-0.64010006015388854, 0.0040000000000000001, 0.26105377495417703], [-0.62182425813185527, 0.22700000000000001, 0.2670287167605806], [2.6823577629697111, 0.014, 0.0036552616083728173], [-0.53272604021530889, 0.39400000000000002, 0.29711161877609882], [-0.15271881679589203, 0.34699999999999998, 0.43931001058875374], [-0.46563636027021654, 0.48499999999999999, 0.32073790602545205], [-0.67304141220538694, 0.32200000000000001, 0.25046047260054227], [-0.65348653890355268, 0.27000000000000002, 0.25672133238529804], [-0.042046555130192398, 0.36199999999999999, 0.48323079264157687], [-0.50234667391554988, 0.39400000000000002, 0.30771184147044006], [0.20134863894573138, 0.41199999999999998, 0.42021298643169991], [4.7562332747386709, 0.001, 9.8619270438859985e-07], [-0.17060076279397593, 0.44400000000000001, 0.43226884904800156], [2.7291423642735309, 0.02, 0.0031749642026865921], [-1.244478534224277, 0.036999999999999998, 0.10666174742715517], [2.6167445805079668, 0.045999999999999999, 0.0044386370442521805], [-0.81072889481557631, 0.20799999999999999, 0.20876068801239311], [-0.96834418270546307, 0.049000000000000002, 0.16643625291733932], [4.6354616023251367, 0.017000000000000001, 1.7807091640120731e-06], [1.711490849677441, 0.058999999999999997, 0.043495269461387376], [-0.68553064793121166, 0.317, 0.24650456386912545], [-0.10649304409706747, 0.44, 0.45759558701679381], [-0.54248639601227799, 0.29299999999999998, 0.29374173830183614], [0.031677897850072946, 0.311, 0.48736446049996618], [4.5356561799411086, 0.019, 2.8712332330593782e-06], [-1.3116013794941181, 0.01, 0.094827333672427794], [-1.3116013794941181, 0.0089999999999999993, 0.094827333672427794], [4.6393788826674198, 0.002, 1.7472893316661242e-06], [1.4382028801285995, 0.090999999999999998, 0.07518824913828126], [2.8328951008347012, 0.033000000000000002, 0.0023064260400579295], [-0.26084695643627254, 0.46500000000000002, 0.39710526578910033], [-0.78037465160516584, 0.122, 0.21758519184573832], [-0.64208684814893169, 0.307, 0.2604083973990674], [-1.3116013794941181, 0.001, 0.094827333672427794], [-0.57288206029351252, 0.314, 0.28336227604534636], [1.4616922569671136, 0.10000000000000001, 0.071912780017027833], [-0.10796036062141957, 0.38200000000000001, 0.45701356800520454], [0.25084361591996374, 0.29299999999999998, 0.40096750937264047], [-0.15481933481013774, 0.34699999999999998, 0.43848187401104366], [-0.15333330051666849, 0.35699999999999998, 0.43906772057781862], [-0.23163022692856874, 0.41699999999999998, 0.40841261410676866], [-0.34288722316108555, 0.28399999999999997, 0.36584165208609076], [-0.21867005782535454, 0.38200000000000001, 0.41345353745151792], [-0.79939347077393408, 0.215, 0.21203114760123465], [-0.57405515910861116, 0.19800000000000001, 0.28296523788747729], [0.17162418105442584, 0.151, 0.43186649789407894], [-0.45034522396490928, 0.435, 0.3262307674101308], [0.76919536546476719, 0.122, 0.22088867064513418], [0.43251102361572391, 0.14999999999999999, 0.33268502099692632], [0.37645002404345906, 0.155, 0.3532911812599], [-0.34984866752801247, 0.47699999999999998, 0.36322613637735446], [-0.48122747907298502, 0.439, 0.31517741642583419], [0.8295949731100305, 0.16700000000000001, 0.20338390993964683], [0.32735658947745261, 0.24399999999999999, 0.37169909789396838], [-0.30762465422828866, 0.47899999999999998, 0.37918397900139522], [0.50459229844619902, 0.14399999999999999, 0.30692261012051003], [0.7555990768924421, 0.14999999999999999, 0.22494480477952961]] diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index f56d5fb..137e8aa 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -14,6 +14,25 @@ import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json +# Fixture files produced as follows +# +# import pysal as ps +# import numpy as np +# f = ps.open(ps.examples.get_path("stl_hom.txt")) +# y = np.array(f.by_col['HR8893']) +# w = ps.knnW_from_shapefile(ps.examples.get_path("stl_hom.shp"), k=5) +# +# out = [{"id": index, "neighbors": w.neighbors[index], "value": val} +# for index, val in enumerate(y)] +# with open('neighbors_getis.json', 'w') as f: +# f.write(str(out)) +# +# np.random.seed(1234) +# lgstar = ps.esda.getisord.G_Local(y, w, star=True, permutations=999) +# +# with open('getis_data.json', 'w') as f: +# f.write(str(zip(lgstar.z_sim, lgstar.p_sim, lgstar.p_z_sim))) + class GetisTest(unittest.TestCase): """Testing class for Getis-Ord's G funtion From 29de72de332d9381d3b1c4de078d58070887abcd Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 20 Sep 2016 09:55:13 -0400 Subject: [PATCH 24/96] output column renaming --- src/pg/sql/16_getis.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql index 6e52d50..578f15a 100644 --- a/src/pg/sql/16_getis.sql +++ b/src/pg/sql/16_getis.sql @@ -9,8 +9,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 999, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE (z_val NUMERIC, p_val NUMERIC, p_z_sim NUMERIC, rowid BIGINT) +RETURNS TABLE (z_score NUMERIC, p_value NUMERIC, p_z_sim NUMERIC, rowid BIGINT) AS $$ from crankshaft.clustering import getis_ord return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; + +-- TODO: make a version that accepts the values as arrays From 166e9e223fac6de76e2d73dc143436d051f35216 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 20 Sep 2016 09:55:33 -0400 Subject: [PATCH 25/96] minor formatting changes --- src/pg/test/sql/16_getis_test.sql | 2 +- src/py/crankshaft/test/test_clustering_getis.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pg/test/sql/16_getis_test.sql b/src/pg/test/sql/16_getis_test.sql index 03cfcc6..2d01798 100644 --- a/src/pg/test/sql/16_getis_test.sql +++ b/src/pg/test/sql/16_getis_test.sql @@ -10,5 +10,5 @@ SELECT z_score, p_value FROM cdb_crankshaft.CDB_GetisOrdsG( 'select * from getis_data', 'hr8893', 'knn', 5, 999, - 'the_geom', 'cartodb_id') As cdb_getisordsg(z_score, p_value, p_z_sim) + 'the_geom', 'cartodb_id') As t(z_score, p_value, p_z_sim) WHERE p_value <= 0.05; diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 137e8aa..1bcc02d 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -28,14 +28,16 @@ import json # f.write(str(out)) # # np.random.seed(1234) +# # need to do random.seed(1234) too? # lgstar = ps.esda.getisord.G_Local(y, w, star=True, permutations=999) # -# with open('getis_data.json', 'w') as f: +# with open('getis.json', 'w') as f: # f.write(str(zip(lgstar.z_sim, lgstar.p_sim, lgstar.p_z_sim))) +# class GetisTest(unittest.TestCase): - """Testing class for Getis-Ord's G funtion + """Testing class for Getis-Ord's G* funtion This test replicates the work done in PySAL documentation: https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g """ From 258322fcca9f08b4999b71d62f5b451473f2af21 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 11:46:07 -0400 Subject: [PATCH 26/96] update tests to queen weights from knn --- src/pg/test/expected/16_getis_test.out | 28 +- src/pg/test/sql/16_getis_test.sql | 11 +- src/py/crankshaft/test/fixtures/getis.json | 2 +- .../test/fixtures/neighbors_getis.json | 861 +----------------- .../crankshaft/test/test_clustering_getis.py | 37 +- 5 files changed, 45 insertions(+), 894 deletions(-) diff --git a/src/pg/test/expected/16_getis_test.out b/src/pg/test/expected/16_getis_test.out index ba5ee87..d0cb4f8 100644 --- a/src/pg/test/expected/16_getis_test.out +++ b/src/pg/test/expected/16_getis_test.out @@ -6,18 +6,16 @@ SET client_min_messages TO WARNING; _cdb_random_seeds (1 row) -z_score|p_value --1.0358|0.0160 --0.6404|0.0070 -2.6802|0.0190 -4.7529|0.0020 -2.7270|0.0250 --1.2444|0.0230 -4.6322|0.0150 -4.5325|0.0220 --1.3115|0.0090 --1.3115|0.0080 -4.6361|0.0010 -2.8307|0.0310 --1.3115|0.0020 -(13 rows) +rowid|z_score|p_value +9|-0.7862|0.0500 +22|-0.3955|0.0330 +33|2.7045|0.0050 +35|1.9524|0.0130 +36|-1.2056|0.0170 +37|3.4785|0.0020 +38|-1.4622|0.0020 +40|5.7098|0.0030 +46|3.4704|0.0120 +47|-0.9994|0.0320 +48|-1.3650|0.0340 +(11 rows) diff --git a/src/pg/test/sql/16_getis_test.sql b/src/pg/test/sql/16_getis_test.sql index 2d01798..90fb34d 100644 --- a/src/pg/test/sql/16_getis_test.sql +++ b/src/pg/test/sql/16_getis_test.sql @@ -5,10 +5,11 @@ -- set random seed SELECT cdb_crankshaft._cdb_random_seeds(1234); --- test against PySAL example dataset -SELECT z_score, p_value +-- test against PySAL example dataset 'stl_hom' +SELECT rowid, round(z_score, 4) As z_score, round(p_value, 4) As p_value FROM cdb_crankshaft.CDB_GetisOrdsG( 'select * from getis_data', - 'hr8893', 'knn', 5, 999, - 'the_geom', 'cartodb_id') As t(z_score, p_value, p_z_sim) -WHERE p_value <= 0.05; + 'hr8893', 'queen', NULL, 999, + 'the_geom', 'cartodb_id') As t(z_score, p_value, p_z_sim, rowid) +WHERE round(p_value, 4) <= 0.05 +ORDER BY rowid ASC; diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json index 4d5837f..ae67a3c 100644 --- a/src/py/crankshaft/test/fixtures/getis.json +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -1 +1 @@ -[[-0.37337341596824414, 0.43099999999999999, 0.35443527112672846], [-0.95217153199599269, 0.091999999999999998, 0.17050499804382691], [-0.69132084390888426, 0.30299999999999999, 0.24468196785399454], [-0.90994823039851658, 0.088999999999999996, 0.18142490623820451], [-1.0132361542216282, 0.129, 0.15547368761207747], [-0.51678782422627401, 0.058999999999999997, 0.30265214024759912], [-0.59887100659663139, 0.13400000000000001, 0.27462945349842682], [-0.65244852117509244, 0.32900000000000001, 0.2570559358979031], [-1.0357808174963727, 0.0070000000000000001, 0.1501522043425858], [-0.77003625124250874, 0.13600000000000001, 0.22063919456432224], [-0.33236541712916268, 0.375, 0.3698066743983055], [-0.16566702930134228, 0.28699999999999998, 0.43420949712949208], [-1.0188721243221961, 0.113, 0.15413183958086896], [-0.58707183737030266, 0.40100000000000002, 0.27857773047946677], [-0.60787539226595488, 0.34100000000000003, 0.27163506072349475], [-0.8363954400004282, 0.20599999999999999, 0.2014662385347713], [-0.27576758055135975, 0.49399999999999999, 0.39136329273990866], [-0.17698658238285753, 0.38100000000000001, 0.42975946445365665], [-0.42301422073305678, 0.48599999999999999, 0.33614244280579275], [-0.81073978385996837, 0.161, 0.20875756070128504], [-0.63838151273605281, 0.107, 0.26161268117180692], [-0.64010006015388854, 0.0040000000000000001, 0.26105377495417703], [-0.62182425813185527, 0.22700000000000001, 0.2670287167605806], [2.6823577629697111, 0.014, 0.0036552616083728173], [-0.53272604021530889, 0.39400000000000002, 0.29711161877609882], [-0.15271881679589203, 0.34699999999999998, 0.43931001058875374], [-0.46563636027021654, 0.48499999999999999, 0.32073790602545205], [-0.67304141220538694, 0.32200000000000001, 0.25046047260054227], [-0.65348653890355268, 0.27000000000000002, 0.25672133238529804], [-0.042046555130192398, 0.36199999999999999, 0.48323079264157687], [-0.50234667391554988, 0.39400000000000002, 0.30771184147044006], [0.20134863894573138, 0.41199999999999998, 0.42021298643169991], [4.7562332747386709, 0.001, 9.8619270438859985e-07], [-0.17060076279397593, 0.44400000000000001, 0.43226884904800156], [2.7291423642735309, 0.02, 0.0031749642026865921], [-1.244478534224277, 0.036999999999999998, 0.10666174742715517], [2.6167445805079668, 0.045999999999999999, 0.0044386370442521805], [-0.81072889481557631, 0.20799999999999999, 0.20876068801239311], [-0.96834418270546307, 0.049000000000000002, 0.16643625291733932], [4.6354616023251367, 0.017000000000000001, 1.7807091640120731e-06], [1.711490849677441, 0.058999999999999997, 0.043495269461387376], [-0.68553064793121166, 0.317, 0.24650456386912545], [-0.10649304409706747, 0.44, 0.45759558701679381], [-0.54248639601227799, 0.29299999999999998, 0.29374173830183614], [0.031677897850072946, 0.311, 0.48736446049996618], [4.5356561799411086, 0.019, 2.8712332330593782e-06], [-1.3116013794941181, 0.01, 0.094827333672427794], [-1.3116013794941181, 0.0089999999999999993, 0.094827333672427794], [4.6393788826674198, 0.002, 1.7472893316661242e-06], [1.4382028801285995, 0.090999999999999998, 0.07518824913828126], [2.8328951008347012, 0.033000000000000002, 0.0023064260400579295], [-0.26084695643627254, 0.46500000000000002, 0.39710526578910033], [-0.78037465160516584, 0.122, 0.21758519184573832], [-0.64208684814893169, 0.307, 0.2604083973990674], [-1.3116013794941181, 0.001, 0.094827333672427794], [-0.57288206029351252, 0.314, 0.28336227604534636], [1.4616922569671136, 0.10000000000000001, 0.071912780017027833], [-0.10796036062141957, 0.38200000000000001, 0.45701356800520454], [0.25084361591996374, 0.29299999999999998, 0.40096750937264047], [-0.15481933481013774, 0.34699999999999998, 0.43848187401104366], [-0.15333330051666849, 0.35699999999999998, 0.43906772057781862], [-0.23163022692856874, 0.41699999999999998, 0.40841261410676866], [-0.34288722316108555, 0.28399999999999997, 0.36584165208609076], [-0.21867005782535454, 0.38200000000000001, 0.41345353745151792], [-0.79939347077393408, 0.215, 0.21203114760123465], [-0.57405515910861116, 0.19800000000000001, 0.28296523788747729], [0.17162418105442584, 0.151, 0.43186649789407894], [-0.45034522396490928, 0.435, 0.3262307674101308], [0.76919536546476719, 0.122, 0.22088867064513418], [0.43251102361572391, 0.14999999999999999, 0.33268502099692632], [0.37645002404345906, 0.155, 0.3532911812599], [-0.34984866752801247, 0.47699999999999998, 0.36322613637735446], [-0.48122747907298502, 0.439, 0.31517741642583419], [0.8295949731100305, 0.16700000000000001, 0.20338390993964683], [0.32735658947745261, 0.24399999999999999, 0.37169909789396838], [-0.30762465422828866, 0.47899999999999998, 0.37918397900139522], [0.50459229844619902, 0.14399999999999999, 0.30692261012051003], [0.7555990768924421, 0.14999999999999999, 0.22494480477952961]] +[(0.004793783909323601, 0.17999999999999999, 0.49808756424021061), (-1.0701189472090842, 0.079000000000000001, 0.14228288580832316), (-0.67867750971877305, 0.42099999999999999, 0.24867110969448558), (-0.67407386707620487, 0.246, 0.25013217644612995), (-0.79495689068870035, 0.33200000000000002, 0.21331928959090596), (-0.49279481022182703, 0.058999999999999997, 0.31107878905057329), (-0.38075627530057132, 0.28399999999999997, 0.35169205342069643), (-0.86710921611314895, 0.23699999999999999, 0.19294108571294855), (-0.78618647240956485, 0.050000000000000003, 0.2158791250244505), (-0.76108527223116984, 0.064000000000000001, 0.22330306830813684), (-0.13340753531942209, 0.247, 0.44693554317763651), (-0.57584545722033043, 0.48999999999999999, 0.28235982246156488), (-0.78882694661192831, 0.433, 0.2151065788731219), (-0.38769767950046219, 0.375, 0.34911988661484239), (-0.56057819488052207, 0.41399999999999998, 0.28754255985169652), (-0.41354017495644935, 0.45500000000000002, 0.339605447117173), (-0.23993577722243081, 0.49099999999999999, 0.40519002230969337), (-0.1389080156677496, 0.40400000000000003, 0.44476141839645233), (-0.25485737510500855, 0.376, 0.39941662953554224), (-0.71218610582902353, 0.17399999999999999, 0.23817476979886087), (-0.54533105995872144, 0.13700000000000001, 0.2927629228714812), (-0.39547917847510977, 0.033000000000000002, 0.34624464252424236), (-0.43052658996257548, 0.35399999999999998, 0.33340631435564982), (-0.37296719193774736, 0.40300000000000002, 0.35458643102865428), (-0.66482612169465694, 0.31900000000000001, 0.25308085650392698), (-0.13772133540823422, 0.34699999999999998, 0.44523032843016275), (-0.6765304487868502, 0.20999999999999999, 0.24935196033890672), (-0.64518763494323472, 0.32200000000000001, 0.25940279912025543), (-0.5078622084312413, 0.41099999999999998, 0.30577498972600159), (-0.12652006733772059, 0.42899999999999999, 0.44966013262301163), (-0.32691133022814595, 0.498, 0.37186747562269029), (0.25533848511500978, 0.42399999999999999, 0.39923083899077472), (2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577), (-0.1551614486076057, 0.44400000000000001, 0.43834701985429037), (1.9524487722567723, 0.012999999999999999, 0.025442473674991528), (-1.2055816465306763, 0.017000000000000001, 0.11398941970467646), (3.478472976017831, 0.002, 0.00025213964072468009), (-1.4621715757903719, 0.002, 0.071847099325659136), (-0.84010307600180256, 0.085000000000000006, 0.20042529779230778), (5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09), (1.5082367956567375, 0.065000000000000002, 0.065746966514827365), (-0.58337270103430816, 0.44, 0.27982121546450034), (-0.083271860457022437, 0.45100000000000001, 0.46681768733385554), (-0.46872337815000953, 0.34599999999999997, 0.31963368715684204), (0.18490279849545319, 0.23799999999999999, 0.42665263797981101), (3.470424529947997, 0.012, 0.00025981817437825683), (-0.99942612137154796, 0.032000000000000001, 0.15879415560388499), (-1.3650387953594485, 0.034000000000000002, 0.08612042845912049), (1.8617160516432014, 0.081000000000000003, 0.03132156240215267), (1.1321188945775384, 0.11600000000000001, 0.12879222611766061), (0.064116686050580601, 0.27300000000000002, 0.4744386578180424), (-0.42032194540259099, 0.29999999999999999, 0.33712514016213468), (-0.79581215423980922, 0.123, 0.21307061309098785), (-0.42792753720906046, 0.45600000000000002, 0.33435193892883741), (-1.0629378527428395, 0.051999999999999998, 0.14390506780140866), (-0.54164761752225477, 0.33700000000000002, 0.29403064095211839), (1.0934778886820793, 0.13700000000000001, 0.13709201601893539), (-0.094068785378413719, 0.38200000000000001, 0.46252725802998929), (0.13482026574801856, 0.36799999999999999, 0.44637699118865737), (-0.13976995315653129, 0.34699999999999998, 0.44442087706276601), (-0.051047663924746682, 0.32000000000000001, 0.47964376985626245), (-0.21468297736730158, 0.41699999999999998, 0.41500724761906527), (-0.20873154637330626, 0.38800000000000001, 0.41732890604390893), (-0.32427876152583485, 0.49199999999999999, 0.37286349875557478), (-0.65254842943280977, 0.374, 0.25702372075306734), (-0.48611858196118796, 0.23300000000000001, 0.31344154643990074), (-0.14482354344529477, 0.32600000000000001, 0.44242509660469886), (-0.51052030974200002, 0.439, 0.30484349480873729), (0.56814382285283538, 0.14999999999999999, 0.28496865660103166), (0.58680919931668207, 0.161, 0.27866592887231878), (0.013390357044409013, 0.25800000000000001, 0.49465818005865647), (-0.19050728887961568, 0.41399999999999998, 0.4244558160399462), (-0.60531777422216049, 0.35199999999999998, 0.2724839368239631), (1.0899331115425805, 0.127, 0.13787130480311838), (0.17015055382651084, 0.36899999999999999, 0.43244586845546418), (-0.21738337124409801, 0.40600000000000003, 0.41395479459421991), (1.0329303331079593, 0.079000000000000001, 0.15081825117169467), (1.0218317101096221, 0.104, 0.15343027913308094)] \ No newline at end of file diff --git a/src/py/crankshaft/test/fixtures/neighbors_getis.json b/src/py/crankshaft/test/fixtures/neighbors_getis.json index 878b955..5c162d7 100644 --- a/src/py/crankshaft/test/fixtures/neighbors_getis.json +++ b/src/py/crankshaft/test/fixtures/neighbors_getis.json @@ -1,860 +1 @@ -[ - { - "neighbors": [ - 2, - 6, - 5, - 10, - 3 - ], - "id": 0, - "value": 1.624458 - }, - { - "neighbors": [ - 4, - 7, - 9, - 14, - 16 - ], - "id": 1, - "value": 2.255492 - }, - { - "neighbors": [ - 6, - 0, - 3, - 8, - 10 - ], - "id": 2, - "value": 1.46789 - }, - { - "neighbors": [ - 8, - 12, - 2, - 4, - 6 - ], - "id": 3, - "value": 2.484256 - }, - { - "neighbors": [ - 9, - 12, - 1, - 3, - 8 - ], - "id": 4, - "value": 0 - }, - { - "neighbors": [ - 11, - 10, - 0, - 15, - 6 - ], - "id": 5, - "value": 9.048673 - }, - { - "neighbors": [ - 2, - 10, - 0, - 8, - 17 - ], - "id": 6, - "value": 6.029489 - }, - { - "neighbors": [ - 14, - 1, - 22, - 16, - 9 - ], - "id": 7, - "value": 1.800385 - }, - { - "neighbors": [ - 3, - 12, - 19, - 2, - 18 - ], - "id": 8, - "value": 4.581251 - }, - { - "neighbors": [ - 4, - 16, - 12, - 1, - 20 - ], - "id": 9, - "value": 3.790607 - }, - { - "neighbors": [ - 17, - 6, - 5, - 15, - 0 - ], - "id": 10, - "value": 1.447436 - }, - { - "neighbors": [ - 15, - 5, - 13, - 21, - 27 - ], - "id": 11, - "value": 1.191966 - }, - { - "neighbors": [ - 8, - 19, - 3, - 9, - 4 - ], - "id": 12, - "value": 0 - }, - { - "neighbors": [ - 21, - 11, - 28, - 27, - 15 - ], - "id": 13, - "value": 1.608017 - }, - { - "neighbors": [ - 7, - 16, - 22, - 1, - 29 - ], - "id": 14, - "value": 1.949812 - }, - { - "neighbors": [ - 11, - 27, - 26, - 5, - 10 - ], - "id": 15, - "value": 0.74509 - }, - { - "neighbors": [ - 25, - 9, - 14, - 29, - 20 - ], - "id": 16, - "value": 4.173318 - }, - { - "neighbors": [ - 31, - 10, - 18, - 26, - 6 - ], - "id": 17, - "value": 3.783252 - }, - { - "neighbors": [ - 32, - 17, - 23, - 19, - 8 - ], - "id": 18, - "value": 2.085136 - }, - { - "neighbors": [ - 23, - 12, - 20, - 8, - 18 - ], - "id": 19, - "value": 2.176302 - }, - { - "neighbors": [ - 25, - 23, - 19, - 34, - 9 - ], - "id": 20, - "value": 6.309347 - }, - { - "neighbors": [ - 13, - 28, - 27, - 11, - 35 - ], - "id": 21, - "value": 10.855743 - }, - { - "neighbors": [ - 30, - 14, - 29, - 24, - 7 - ], - "id": 22, - "value": 4.211354 - }, - { - "neighbors": [ - 19, - 20, - 34, - 39, - 36 - ], - "id": 23, - "value": 0.80481 - }, - { - "neighbors": [ - 30, - 41, - 22, - 43, - 52 - ], - "id": 24, - "value": 3.215331 - }, - { - "neighbors": [ - 20, - 33, - 16, - 34, - 29 - ], - "id": 25, - "value": 2.833664 - }, - { - "neighbors": [ - 38, - 31, - 27, - 15, - 17 - ], - "id": 26, - "value": 1.59204 - }, - { - "neighbors": [ - 35, - 15, - 21, - 28, - 26 - ], - "id": 27, - "value": 1.571158 - }, - { - "neighbors": [ - 21, - 37, - 35, - 27, - 13 - ], - "id": 28, - "value": 3.12759 - }, - { - "neighbors": [ - 33, - 22, - 30, - 42, - 16 - ], - "id": 29, - "value": 4.416896 - }, - { - "neighbors": [ - 43, - 22, - 24, - 29, - 41 - ], - "id": 30, - "value": 3.017486 - }, - { - "neighbors": [ - 40, - 17, - 26, - 32, - 49 - ], - "id": 31, - "value": 9.924245 - }, - { - "neighbors": [ - 45, - 39, - 18, - 31, - 23 - ], - "id": 32, - "value": 7.973957 - }, - { - "neighbors": [ - 25, - 29, - 44, - 42, - 34 - ], - "id": 33, - "value": 5.005464 - }, - { - "neighbors": [ - 36, - 20, - 25, - 23, - 39 - ], - "id": 34, - "value": 2.463891 - }, - { - "neighbors": [ - 27, - 46, - 37, - 28, - 38 - ], - "id": 35, - "value": 0 - }, - { - "neighbors": [ - 39, - 34, - 50, - 48, - 23 - ], - "id": 36, - "value": 7.377974 - }, - { - "neighbors": [ - 47, - 28, - 35, - 46, - 21 - ], - "id": 37, - "value": 1.003875 - }, - { - "neighbors": [ - 51, - 26, - 35, - 40, - 27 - ], - "id": 38, - "value": 3.190047 - }, - { - "neighbors": [ - 36, - 45, - 48, - 32, - 23 - ], - "id": 39, - "value": 45.905406 - }, - { - "neighbors": [ - 49, - 31, - 38, - 45, - 57 - ], - "id": 40, - "value": 2.447597 - }, - { - "neighbors": [ - 52, - 43, - 30, - 24, - 53 - ], - "id": 41, - "value": 1.294958 - }, - { - "neighbors": [ - 43, - 44, - 33, - 53, - 29 - ], - "id": 42, - "value": 5.933098 - }, - { - "neighbors": [ - 53, - 42, - 30, - 41, - 60 - ], - "id": 43, - "value": 4.133997 - }, - { - "neighbors": [ - 33, - 42, - 59, - 58, - 34 - ], - "id": 44, - "value": 4.298311 - }, - { - "neighbors": [ - 48, - 39, - 32, - 56, - 40 - ], - "id": 45, - "value": 27.483827 - }, - { - "neighbors": [ - 35, - 55, - 47, - 54, - 37 - ], - "id": 46, - "value": 0.969791 - }, - { - "neighbors": [ - 37, - 54, - 46, - 35, - 55 - ], - "id": 47, - "value": 0 - }, - { - "neighbors": [ - 45, - 50, - 39, - 62, - 56 - ], - "id": 48, - "value": 2.934466 - }, - { - "neighbors": [ - 40, - 57, - 51, - 56, - 45 - ], - "id": 49, - "value": 4.456427 - }, - { - "neighbors": [ - 48, - 36, - 63, - 59, - 39 - ], - "id": 50, - "value": 4.629264 - }, - { - "neighbors": [ - 61, - 38, - 55, - 49, - 57 - ], - "id": 51, - "value": 4.941533 - }, - { - "neighbors": [ - 41, - 64, - 53, - 43, - 60 - ], - "id": 52, - "value": 3.990041 - }, - { - "neighbors": [ - 43, - 60, - 64, - 41, - 42 - ], - "id": 53, - "value": 2.064324 - }, - { - "neighbors": [ - 47, - 55, - 46, - 37, - 35 - ], - "id": 54, - "value": 3.040253 - }, - { - "neighbors": [ - 54, - 46, - 61, - 51, - 67 - ], - "id": 55, - "value": 3.905411 - }, - { - "neighbors": [ - 66, - 62, - 48, - 45, - 57 - ], - "id": 56, - "value": 4.332839 - }, - { - "neighbors": [ - 49, - 65, - 61, - 56, - 51 - ], - "id": 57, - "value": 3.894111 - }, - { - "neighbors": [ - 68, - 59, - 60, - 44, - 42 - ], - "id": 58, - "value": 6.828794 - }, - { - "neighbors": [ - 69, - 58, - 63, - 50, - 44 - ], - "id": 59, - "value": 3.263947 - }, - { - "neighbors": [ - 53, - 68, - 64, - 58, - 43 - ], - "id": 60, - "value": 3.282163 - }, - { - "neighbors": [ - 67, - 51, - 55, - 57, - 65 - ], - "id": 61, - "value": 3.295762 - }, - { - "neighbors": [ - 63, - 48, - 56, - 66, - 70 - ], - "id": 62, - "value": 7.249679 - }, - { - "neighbors": [ - 62, - 70, - 69, - 59, - 50 - ], - "id": 63, - "value": 3.041846 - }, - { - "neighbors": [ - 60, - 53, - 52, - 71, - 41 - ], - "id": 64, - "value": 1.618018 - }, - { - "neighbors": [ - 57, - 72, - 66, - 67, - 75 - ], - "id": 65, - "value": 4.910801 - }, - { - "neighbors": [ - 56, - 75, - 62, - 74, - 65 - ], - "id": 66, - "value": 1.991457 - }, - { - "neighbors": [ - 61, - 72, - 65, - 55, - 57 - ], - "id": 67, - "value": 3.146192 - }, - { - "neighbors": [ - 60, - 58, - 76, - 71, - 73 - ], - "id": 68, - "value": 7.26665 - }, - { - "neighbors": [ - 73, - 63, - 59, - 70, - 77 - ], - "id": 69, - "value": 3.110904 - }, - { - "neighbors": [ - 74, - 63, - 77, - 69, - 62 - ], - "id": 70, - "value": 2.980271 - }, - { - "neighbors": [ - 68, - 64, - 76, - 60, - 53 - ], - "id": 71, - "value": 3.866767 - }, - { - "neighbors": [ - 65, - 67, - 75, - 61, - 57 - ], - "id": 72, - "value": 1.868408 - }, - { - "neighbors": [ - 69, - 76, - 77, - 68, - 59 - ], - "id": 73, - "value": 12.577034 - }, - { - "neighbors": [ - 75, - 70, - 66, - 77, - 62 - ], - "id": 74, - "value": 7.803599 - }, - { - "neighbors": [ - 74, - 66, - 72, - 65, - 70 - ], - "id": 75, - "value": 3.47149 - }, - { - "neighbors": [ - 68, - 73, - 71, - 69, - 60 - ], - "id": 76, - "value": 4.334822 - }, - { - "neighbors": [ - 70, - 74, - 69, - 73, - 63 - ], - "id": 77, - "value": 8.451537 - } -] +[{'neighbors': [3, 6, 7], 'id': 1, 'value': 1.624458}, {'neighbors': [10, 5, 8], 'id': 2, 'value': 2.2554919999999998}, {'neighbors': [1, 4, 7], 'id': 3, 'value': 1.4678899999999999}, {'neighbors': [9, 3, 5, 7], 'id': 4, 'value': 2.4842559999999998}, {'neighbors': [9, 2, 4, 10], 'id': 5, 'value': 0.0}, {'neighbors': [1, 11, 12, 7, 16], 'id': 6, 'value': 9.0486730000000009}, {'neighbors': [1, 3, 4, 6, 9, 11, 18, 19], 'id': 7, 'value': 6.0294889999999999}, {'neighbors': [2, 15, 10], 'id': 8, 'value': 1.8003849999999999}, {'neighbors': [4, 5, 7, 10, 13, 19, 20], 'id': 9, 'value': 4.581251}, {'neighbors': [2, 5, 8, 9, 13, 15, 17, 20, 21], 'id': 10, 'value': 3.7906070000000001}, {'neighbors': [18, 6, 7, 16], 'id': 11, 'value': 1.4474359999999999}, {'neighbors': [16, 6, 14], 'id': 12, 'value': 1.1919660000000001}, {'neighbors': [9, 10, 20], 'id': 13, 'value': 0.0}, {'neighbors': [12, 22, 16], 'id': 14, 'value': 1.608017}, {'neighbors': [17, 10, 23, 8], 'id': 15, 'value': 1.9498120000000001}, {'neighbors': [6, 11, 12, 14, 18, 22, 27, 28], 'id': 16, 'value': 0.74509000000000003}, {'neighbors': [10, 15, 21, 23, 26, 30], 'id': 17, 'value': 4.1733180000000001}, {'neighbors': [33, 7, 11, 16, 19, 27, 32], 'id': 18, 'value': 3.7832520000000001}, {'neighbors': [33, 7, 9, 18, 20, 24], 'id': 19, 'value': 2.0851359999999999}, {'neighbors': [9, 10, 13, 19, 21, 24], 'id': 20, 'value': 2.1763020000000002}, {'neighbors': [35, 10, 17, 20, 24, 26], 'id': 21, 'value': 6.3093469999999998}, {'neighbors': [28, 29, 14, 16], 'id': 22, 'value': 10.855743}, {'neighbors': [17, 25, 31, 30, 15], 'id': 23, 'value': 4.211354}, {'neighbors': [33, 19, 20, 21, 35], 'id': 24, 'value': 0.80481000000000003}, {'neighbors': [42, 31, 23], 'id': 25, 'value': 3.2153309999999999}, {'neighbors': [17, 34, 35, 21, 30], 'id': 26, 'value': 2.8336640000000002}, {'neighbors': [36, 39, 41, 16, 18, 28, 32], 'id': 27, 'value': 1.5920399999999999}, {'neighbors': [27, 36, 29, 22, 16], 'id': 28, 'value': 1.5711580000000001}, {'neighbors': [36, 28, 22, 38], 'id': 29, 'value': 3.1275900000000001}, {'neighbors': [34, 43, 17, 23, 26, 31], 'id': 30, 'value': 4.4168960000000004}, {'neighbors': [42, 43, 44, 23, 25, 30], 'id': 31, 'value': 3.0174859999999999}, {'neighbors': [33, 18, 27, 41], 'id': 32, 'value': 9.9242450000000009}, {'neighbors': [35, 37, 40, 41, 46, 18, 19, 24, 32], 'id': 33, 'value': 7.9739570000000004}, {'neighbors': [26, 35, 43, 45, 30], 'id': 34, 'value': 5.0054639999999999}, {'neighbors': [33, 34, 37, 40, 45, 21, 24, 26], 'id': 35, 'value': 2.4638909999999998}, {'neighbors': [38, 39, 47, 27, 28, 29], 'id': 36, 'value': 0.0}, {'neighbors': [33, 35, 40, 45, 46, 49, 51], 'id': 37, 'value': 7.377974}, {'neighbors': [36, 29, 47, 48], 'id': 38, 'value': 1.0038750000000001}, {'neighbors': [36, 41, 47, 50, 52, 27], 'id': 39, 'value': 3.1900469999999999}, {'neighbors': [33, 35, 37, 46], 'id': 40, 'value': 45.905405999999999}, {'neighbors': [33, 39, 46, 50, 27, 32], 'id': 41, 'value': 2.447597}, {'neighbors': [25, 44, 53, 31], 'id': 42, 'value': 1.2949580000000001}, {'neighbors': [34, 44, 45, 54, 59, 61, 30, 31], 'id': 43, 'value': 5.9330980000000002}, {'neighbors': [42, 43, 53, 54, 31], 'id': 44, 'value': 4.1339969999999999}, {'neighbors': [34, 35, 37, 43, 51, 59, 60], 'id': 45, 'value': 4.298311}, {'neighbors': [33, 37, 40, 41, 49, 50, 57], 'id': 46, 'value': 27.483827000000002}, {'neighbors': [36, 38, 39, 48, 52, 55, 56], 'id': 47, 'value': 0.96979099999999996}, {'neighbors': [55, 38, 47], 'id': 48, 'value': 0.0}, {'neighbors': [57, 51, 37, 46, 63], 'id': 49, 'value': 2.934466}, {'neighbors': [39, 41, 46, 52, 57, 58], 'id': 50, 'value': 4.4564269999999997}, {'neighbors': [37, 45, 49, 60, 63, 64], 'id': 51, 'value': 4.629264}, {'neighbors': [39, 47, 50, 56, 58, 62], 'id': 52, 'value': 4.9415329999999997}, {'neighbors': [65, 42, 44, 54], 'id': 53, 'value': 3.9900410000000002}, {'neighbors': [65, 61, 43, 44, 53], 'id': 54, 'value': 2.064324}, {'neighbors': [56, 47, 48], 'id': 55, 'value': 3.0402529999999999}, {'neighbors': [52, 55, 47, 62], 'id': 56, 'value': 3.905411}, {'neighbors': [66, 67, 46, 49, 50, 58, 63], 'id': 57, 'value': 4.3328389999999999}, {'neighbors': [57, 50, 52, 62, 66], 'id': 58, 'value': 3.8941110000000001}, {'neighbors': [69, 70, 43, 45, 60, 61], 'id': 59, 'value': 6.8287940000000003}, {'neighbors': [51, 64, 45, 59, 70], 'id': 60, 'value': 3.2639469999999999}, {'neighbors': [65, 69, 72, 43, 54, 59], 'id': 61, 'value': 3.2821630000000002}, {'neighbors': [58, 68, 52, 66, 56], 'id': 62, 'value': 3.2957619999999999}, {'neighbors': [49, 57, 51, 67, 64], 'id': 63, 'value': 7.2496790000000004}, {'neighbors': [67, 70, 71, 51, 60, 63], 'id': 64, 'value': 3.041846}, {'neighbors': [61, 53, 54, 72], 'id': 65, 'value': 1.618018}, {'neighbors': [67, 68, 73, 76, 57, 58, 62], 'id': 66, 'value': 4.9108010000000002}, {'neighbors': [66, 71, 73, 75, 76, 57, 63, 64], 'id': 67, 'value': 1.991457}, {'neighbors': [73, 66, 62], 'id': 68, 'value': 3.1461920000000001}, {'neighbors': [70, 72, 74, 77, 59, 61], 'id': 69, 'value': 7.2666500000000003}, {'neighbors': [69, 71, 74, 78, 59, 60, 64], 'id': 70, 'value': 3.1109040000000001}, {'neighbors': [67, 75, 70, 78, 64], 'id': 71, 'value': 2.9802710000000001}, {'neighbors': [65, 69, 61, 77], 'id': 72, 'value': 3.8667669999999998}, {'neighbors': [76, 66, 67, 68], 'id': 73, 'value': 1.8684080000000001}, {'neighbors': [77, 69, 70, 78], 'id': 74, 'value': 12.577033999999999}, {'neighbors': [67, 76, 78, 71], 'id': 75, 'value': 7.8035990000000002}, {'neighbors': [73, 66, 67, 75], 'id': 76, 'value': 3.4714900000000002}, {'neighbors': [74, 69, 72], 'id': 77, 'value': 4.334822}, {'neighbors': [74, 75, 70, 71], 'id': 78, 'value': 8.4515370000000001}] \ No newline at end of file diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 1bcc02d..8acb562 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -18,22 +18,28 @@ import json # # import pysal as ps # import numpy as np -# f = ps.open(ps.examples.get_path("stl_hom.txt")) +# import random +# +# # setup variables +# f = ps.open(ps.examples.get_path("stl_hom.dbf")) # y = np.array(f.by_col['HR8893']) -# w = ps.knnW_from_shapefile(ps.examples.get_path("stl_hom.shp"), k=5) +# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp")) # -# out = [{"id": index, "neighbors": w.neighbors[index], "value": val} -# for index, val in enumerate(y)] -# with open('neighbors_getis.json', 'w') as f: -# f.write(str(out)) +# out_queen = [{"id": index + 1, +# "neighbors": [x+1 for x in w_queen.neighbors[index]], +# "value": val} for index, val in enumerate(y)] # +# with open('neighbors_queen_getis.json', 'w') as f: +# f.write(str(out_queen)) +# +# random.seed(1234) # np.random.seed(1234) -# # need to do random.seed(1234) too? -# lgstar = ps.esda.getisord.G_Local(y, w, star=True, permutations=999) -# -# with open('getis.json', 'w') as f: -# f.write(str(zip(lgstar.z_sim, lgstar.p_sim, lgstar.p_z_sim))) +# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True, +# permutations=999) # +# with open('getis_queen.json', 'w') as f: +# f.write(str(zip(lgstar_queen.z_sim, +# lgstar_queen.p_sim, lgstar_queen.p_z_sim))) class GetisTest(unittest.TestCase): @@ -44,9 +50,14 @@ class GetisTest(unittest.TestCase): def setUp(self): plpy._reset() + + # load raw data for analysis self.neighbors_data = json.loads( open(fixture_file('neighbors_getis.json')).read()) - self.getis_data = json.loads(open(fixture_file('getis.json')).read()) + + # load pre-computed/known values + self.getis_data = json.loads( + open(fixture_file('getis.json')).read()) def test_getis_ord(self): """Test Getis-Ord's G*""" @@ -56,7 +67,7 @@ class GetisTest(unittest.TestCase): plpy._define_result('select', data) random_seeds.set_random_seeds(1234) result = cc.getis_ord('subquery', 'value', - 'knn', 5, 999, 'the_geom', 'cartodb_id') + 'queen', None, 999, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): From e5ea83649327672d758a3b112f433f343543eb80 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 11:53:17 -0400 Subject: [PATCH 27/96] fix json format --- src/py/crankshaft/test/fixtures/getis.json | 2 +- src/py/crankshaft/test/fixtures/neighbors_getis.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/test/fixtures/getis.json b/src/py/crankshaft/test/fixtures/getis.json index ae67a3c..02566fc 100644 --- a/src/py/crankshaft/test/fixtures/getis.json +++ b/src/py/crankshaft/test/fixtures/getis.json @@ -1 +1 @@ -[(0.004793783909323601, 0.17999999999999999, 0.49808756424021061), (-1.0701189472090842, 0.079000000000000001, 0.14228288580832316), (-0.67867750971877305, 0.42099999999999999, 0.24867110969448558), (-0.67407386707620487, 0.246, 0.25013217644612995), (-0.79495689068870035, 0.33200000000000002, 0.21331928959090596), (-0.49279481022182703, 0.058999999999999997, 0.31107878905057329), (-0.38075627530057132, 0.28399999999999997, 0.35169205342069643), (-0.86710921611314895, 0.23699999999999999, 0.19294108571294855), (-0.78618647240956485, 0.050000000000000003, 0.2158791250244505), (-0.76108527223116984, 0.064000000000000001, 0.22330306830813684), (-0.13340753531942209, 0.247, 0.44693554317763651), (-0.57584545722033043, 0.48999999999999999, 0.28235982246156488), (-0.78882694661192831, 0.433, 0.2151065788731219), (-0.38769767950046219, 0.375, 0.34911988661484239), (-0.56057819488052207, 0.41399999999999998, 0.28754255985169652), (-0.41354017495644935, 0.45500000000000002, 0.339605447117173), (-0.23993577722243081, 0.49099999999999999, 0.40519002230969337), (-0.1389080156677496, 0.40400000000000003, 0.44476141839645233), (-0.25485737510500855, 0.376, 0.39941662953554224), (-0.71218610582902353, 0.17399999999999999, 0.23817476979886087), (-0.54533105995872144, 0.13700000000000001, 0.2927629228714812), (-0.39547917847510977, 0.033000000000000002, 0.34624464252424236), (-0.43052658996257548, 0.35399999999999998, 0.33340631435564982), (-0.37296719193774736, 0.40300000000000002, 0.35458643102865428), (-0.66482612169465694, 0.31900000000000001, 0.25308085650392698), (-0.13772133540823422, 0.34699999999999998, 0.44523032843016275), (-0.6765304487868502, 0.20999999999999999, 0.24935196033890672), (-0.64518763494323472, 0.32200000000000001, 0.25940279912025543), (-0.5078622084312413, 0.41099999999999998, 0.30577498972600159), (-0.12652006733772059, 0.42899999999999999, 0.44966013262301163), (-0.32691133022814595, 0.498, 0.37186747562269029), (0.25533848511500978, 0.42399999999999999, 0.39923083899077472), (2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577), (-0.1551614486076057, 0.44400000000000001, 0.43834701985429037), (1.9524487722567723, 0.012999999999999999, 0.025442473674991528), (-1.2055816465306763, 0.017000000000000001, 0.11398941970467646), (3.478472976017831, 0.002, 0.00025213964072468009), (-1.4621715757903719, 0.002, 0.071847099325659136), (-0.84010307600180256, 0.085000000000000006, 0.20042529779230778), (5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09), (1.5082367956567375, 0.065000000000000002, 0.065746966514827365), (-0.58337270103430816, 0.44, 0.27982121546450034), (-0.083271860457022437, 0.45100000000000001, 0.46681768733385554), (-0.46872337815000953, 0.34599999999999997, 0.31963368715684204), (0.18490279849545319, 0.23799999999999999, 0.42665263797981101), (3.470424529947997, 0.012, 0.00025981817437825683), (-0.99942612137154796, 0.032000000000000001, 0.15879415560388499), (-1.3650387953594485, 0.034000000000000002, 0.08612042845912049), (1.8617160516432014, 0.081000000000000003, 0.03132156240215267), (1.1321188945775384, 0.11600000000000001, 0.12879222611766061), (0.064116686050580601, 0.27300000000000002, 0.4744386578180424), (-0.42032194540259099, 0.29999999999999999, 0.33712514016213468), (-0.79581215423980922, 0.123, 0.21307061309098785), (-0.42792753720906046, 0.45600000000000002, 0.33435193892883741), (-1.0629378527428395, 0.051999999999999998, 0.14390506780140866), (-0.54164761752225477, 0.33700000000000002, 0.29403064095211839), (1.0934778886820793, 0.13700000000000001, 0.13709201601893539), (-0.094068785378413719, 0.38200000000000001, 0.46252725802998929), (0.13482026574801856, 0.36799999999999999, 0.44637699118865737), (-0.13976995315653129, 0.34699999999999998, 0.44442087706276601), (-0.051047663924746682, 0.32000000000000001, 0.47964376985626245), (-0.21468297736730158, 0.41699999999999998, 0.41500724761906527), (-0.20873154637330626, 0.38800000000000001, 0.41732890604390893), (-0.32427876152583485, 0.49199999999999999, 0.37286349875557478), (-0.65254842943280977, 0.374, 0.25702372075306734), (-0.48611858196118796, 0.23300000000000001, 0.31344154643990074), (-0.14482354344529477, 0.32600000000000001, 0.44242509660469886), (-0.51052030974200002, 0.439, 0.30484349480873729), (0.56814382285283538, 0.14999999999999999, 0.28496865660103166), (0.58680919931668207, 0.161, 0.27866592887231878), (0.013390357044409013, 0.25800000000000001, 0.49465818005865647), (-0.19050728887961568, 0.41399999999999998, 0.4244558160399462), (-0.60531777422216049, 0.35199999999999998, 0.2724839368239631), (1.0899331115425805, 0.127, 0.13787130480311838), (0.17015055382651084, 0.36899999999999999, 0.43244586845546418), (-0.21738337124409801, 0.40600000000000003, 0.41395479459421991), (1.0329303331079593, 0.079000000000000001, 0.15081825117169467), (1.0218317101096221, 0.104, 0.15343027913308094)] \ No newline at end of file +[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]] diff --git a/src/py/crankshaft/test/fixtures/neighbors_getis.json b/src/py/crankshaft/test/fixtures/neighbors_getis.json index 5c162d7..be367ff 100644 --- a/src/py/crankshaft/test/fixtures/neighbors_getis.json +++ b/src/py/crankshaft/test/fixtures/neighbors_getis.json @@ -1 +1 @@ -[{'neighbors': [3, 6, 7], 'id': 1, 'value': 1.624458}, {'neighbors': [10, 5, 8], 'id': 2, 'value': 2.2554919999999998}, {'neighbors': [1, 4, 7], 'id': 3, 'value': 1.4678899999999999}, {'neighbors': [9, 3, 5, 7], 'id': 4, 'value': 2.4842559999999998}, {'neighbors': [9, 2, 4, 10], 'id': 5, 'value': 0.0}, {'neighbors': [1, 11, 12, 7, 16], 'id': 6, 'value': 9.0486730000000009}, {'neighbors': [1, 3, 4, 6, 9, 11, 18, 19], 'id': 7, 'value': 6.0294889999999999}, {'neighbors': [2, 15, 10], 'id': 8, 'value': 1.8003849999999999}, {'neighbors': [4, 5, 7, 10, 13, 19, 20], 'id': 9, 'value': 4.581251}, {'neighbors': [2, 5, 8, 9, 13, 15, 17, 20, 21], 'id': 10, 'value': 3.7906070000000001}, {'neighbors': [18, 6, 7, 16], 'id': 11, 'value': 1.4474359999999999}, {'neighbors': [16, 6, 14], 'id': 12, 'value': 1.1919660000000001}, {'neighbors': [9, 10, 20], 'id': 13, 'value': 0.0}, {'neighbors': [12, 22, 16], 'id': 14, 'value': 1.608017}, {'neighbors': [17, 10, 23, 8], 'id': 15, 'value': 1.9498120000000001}, {'neighbors': [6, 11, 12, 14, 18, 22, 27, 28], 'id': 16, 'value': 0.74509000000000003}, {'neighbors': [10, 15, 21, 23, 26, 30], 'id': 17, 'value': 4.1733180000000001}, {'neighbors': [33, 7, 11, 16, 19, 27, 32], 'id': 18, 'value': 3.7832520000000001}, {'neighbors': [33, 7, 9, 18, 20, 24], 'id': 19, 'value': 2.0851359999999999}, {'neighbors': [9, 10, 13, 19, 21, 24], 'id': 20, 'value': 2.1763020000000002}, {'neighbors': [35, 10, 17, 20, 24, 26], 'id': 21, 'value': 6.3093469999999998}, {'neighbors': [28, 29, 14, 16], 'id': 22, 'value': 10.855743}, {'neighbors': [17, 25, 31, 30, 15], 'id': 23, 'value': 4.211354}, {'neighbors': [33, 19, 20, 21, 35], 'id': 24, 'value': 0.80481000000000003}, {'neighbors': [42, 31, 23], 'id': 25, 'value': 3.2153309999999999}, {'neighbors': [17, 34, 35, 21, 30], 'id': 26, 'value': 2.8336640000000002}, {'neighbors': [36, 39, 41, 16, 18, 28, 32], 'id': 27, 'value': 1.5920399999999999}, {'neighbors': [27, 36, 29, 22, 16], 'id': 28, 'value': 1.5711580000000001}, {'neighbors': [36, 28, 22, 38], 'id': 29, 'value': 3.1275900000000001}, {'neighbors': [34, 43, 17, 23, 26, 31], 'id': 30, 'value': 4.4168960000000004}, {'neighbors': [42, 43, 44, 23, 25, 30], 'id': 31, 'value': 3.0174859999999999}, {'neighbors': [33, 18, 27, 41], 'id': 32, 'value': 9.9242450000000009}, {'neighbors': [35, 37, 40, 41, 46, 18, 19, 24, 32], 'id': 33, 'value': 7.9739570000000004}, {'neighbors': [26, 35, 43, 45, 30], 'id': 34, 'value': 5.0054639999999999}, {'neighbors': [33, 34, 37, 40, 45, 21, 24, 26], 'id': 35, 'value': 2.4638909999999998}, {'neighbors': [38, 39, 47, 27, 28, 29], 'id': 36, 'value': 0.0}, {'neighbors': [33, 35, 40, 45, 46, 49, 51], 'id': 37, 'value': 7.377974}, {'neighbors': [36, 29, 47, 48], 'id': 38, 'value': 1.0038750000000001}, {'neighbors': [36, 41, 47, 50, 52, 27], 'id': 39, 'value': 3.1900469999999999}, {'neighbors': [33, 35, 37, 46], 'id': 40, 'value': 45.905405999999999}, {'neighbors': [33, 39, 46, 50, 27, 32], 'id': 41, 'value': 2.447597}, {'neighbors': [25, 44, 53, 31], 'id': 42, 'value': 1.2949580000000001}, {'neighbors': [34, 44, 45, 54, 59, 61, 30, 31], 'id': 43, 'value': 5.9330980000000002}, {'neighbors': [42, 43, 53, 54, 31], 'id': 44, 'value': 4.1339969999999999}, {'neighbors': [34, 35, 37, 43, 51, 59, 60], 'id': 45, 'value': 4.298311}, {'neighbors': [33, 37, 40, 41, 49, 50, 57], 'id': 46, 'value': 27.483827000000002}, {'neighbors': [36, 38, 39, 48, 52, 55, 56], 'id': 47, 'value': 0.96979099999999996}, {'neighbors': [55, 38, 47], 'id': 48, 'value': 0.0}, {'neighbors': [57, 51, 37, 46, 63], 'id': 49, 'value': 2.934466}, {'neighbors': [39, 41, 46, 52, 57, 58], 'id': 50, 'value': 4.4564269999999997}, {'neighbors': [37, 45, 49, 60, 63, 64], 'id': 51, 'value': 4.629264}, {'neighbors': [39, 47, 50, 56, 58, 62], 'id': 52, 'value': 4.9415329999999997}, {'neighbors': [65, 42, 44, 54], 'id': 53, 'value': 3.9900410000000002}, {'neighbors': [65, 61, 43, 44, 53], 'id': 54, 'value': 2.064324}, {'neighbors': [56, 47, 48], 'id': 55, 'value': 3.0402529999999999}, {'neighbors': [52, 55, 47, 62], 'id': 56, 'value': 3.905411}, {'neighbors': [66, 67, 46, 49, 50, 58, 63], 'id': 57, 'value': 4.3328389999999999}, {'neighbors': [57, 50, 52, 62, 66], 'id': 58, 'value': 3.8941110000000001}, {'neighbors': [69, 70, 43, 45, 60, 61], 'id': 59, 'value': 6.8287940000000003}, {'neighbors': [51, 64, 45, 59, 70], 'id': 60, 'value': 3.2639469999999999}, {'neighbors': [65, 69, 72, 43, 54, 59], 'id': 61, 'value': 3.2821630000000002}, {'neighbors': [58, 68, 52, 66, 56], 'id': 62, 'value': 3.2957619999999999}, {'neighbors': [49, 57, 51, 67, 64], 'id': 63, 'value': 7.2496790000000004}, {'neighbors': [67, 70, 71, 51, 60, 63], 'id': 64, 'value': 3.041846}, {'neighbors': [61, 53, 54, 72], 'id': 65, 'value': 1.618018}, {'neighbors': [67, 68, 73, 76, 57, 58, 62], 'id': 66, 'value': 4.9108010000000002}, {'neighbors': [66, 71, 73, 75, 76, 57, 63, 64], 'id': 67, 'value': 1.991457}, {'neighbors': [73, 66, 62], 'id': 68, 'value': 3.1461920000000001}, {'neighbors': [70, 72, 74, 77, 59, 61], 'id': 69, 'value': 7.2666500000000003}, {'neighbors': [69, 71, 74, 78, 59, 60, 64], 'id': 70, 'value': 3.1109040000000001}, {'neighbors': [67, 75, 70, 78, 64], 'id': 71, 'value': 2.9802710000000001}, {'neighbors': [65, 69, 61, 77], 'id': 72, 'value': 3.8667669999999998}, {'neighbors': [76, 66, 67, 68], 'id': 73, 'value': 1.8684080000000001}, {'neighbors': [77, 69, 70, 78], 'id': 74, 'value': 12.577033999999999}, {'neighbors': [67, 76, 78, 71], 'id': 75, 'value': 7.8035990000000002}, {'neighbors': [73, 66, 67, 75], 'id': 76, 'value': 3.4714900000000002}, {'neighbors': [74, 69, 72], 'id': 77, 'value': 4.334822}, {'neighbors': [74, 75, 70, 71], 'id': 78, 'value': 8.4515370000000001}] \ No newline at end of file +[{"neighbors": [3, 6, 7], "id": 1, "value": 1.624458}, {"neighbors": [10, 5, 8], "id": 2, "value": 2.2554919999999998}, {"neighbors": [1, 4, 7], "id": 3, "value": 1.4678899999999999}, {"neighbors": [9, 3, 5, 7], "id": 4, "value": 2.4842559999999998}, {"neighbors": [9, 2, 4, 10], "id": 5, "value": 0.0}, {"neighbors": [1, 11, 12, 7, 16], "id": 6, "value": 9.0486730000000009}, {"neighbors": [1, 3, 4, 6, 9, 11, 18, 19], "id": 7, "value": 6.0294889999999999}, {"neighbors": [2, 15, 10], "id": 8, "value": 1.8003849999999999}, {"neighbors": [4, 5, 7, 10, 13, 19, 20], "id": 9, "value": 4.581251}, {"neighbors": [2, 5, 8, 9, 13, 15, 17, 20, 21], "id": 10, "value": 3.7906070000000001}, {"neighbors": [18, 6, 7, 16], "id": 11, "value": 1.4474359999999999}, {"neighbors": [16, 6, 14], "id": 12, "value": 1.1919660000000001}, {"neighbors": [9, 10, 20], "id": 13, "value": 0.0}, {"neighbors": [12, 22, 16], "id": 14, "value": 1.608017}, {"neighbors": [17, 10, 23, 8], "id": 15, "value": 1.9498120000000001}, {"neighbors": [6, 11, 12, 14, 18, 22, 27, 28], "id": 16, "value": 0.74509000000000003}, {"neighbors": [10, 15, 21, 23, 26, 30], "id": 17, "value": 4.1733180000000001}, {"neighbors": [33, 7, 11, 16, 19, 27, 32], "id": 18, "value": 3.7832520000000001}, {"neighbors": [33, 7, 9, 18, 20, 24], "id": 19, "value": 2.0851359999999999}, {"neighbors": [9, 10, 13, 19, 21, 24], "id": 20, "value": 2.1763020000000002}, {"neighbors": [35, 10, 17, 20, 24, 26], "id": 21, "value": 6.3093469999999998}, {"neighbors": [28, 29, 14, 16], "id": 22, "value": 10.855743}, {"neighbors": [17, 25, 31, 30, 15], "id": 23, "value": 4.211354}, {"neighbors": [33, 19, 20, 21, 35], "id": 24, "value": 0.80481000000000003}, {"neighbors": [42, 31, 23], "id": 25, "value": 3.2153309999999999}, {"neighbors": [17, 34, 35, 21, 30], "id": 26, "value": 2.8336640000000002}, {"neighbors": [36, 39, 41, 16, 18, 28, 32], "id": 27, "value": 1.5920399999999999}, {"neighbors": [27, 36, 29, 22, 16], "id": 28, "value": 1.5711580000000001}, {"neighbors": [36, 28, 22, 38], "id": 29, "value": 3.1275900000000001}, {"neighbors": [34, 43, 17, 23, 26, 31], "id": 30, "value": 4.4168960000000004}, {"neighbors": [42, 43, 44, 23, 25, 30], "id": 31, "value": 3.0174859999999999}, {"neighbors": [33, 18, 27, 41], "id": 32, "value": 9.9242450000000009}, {"neighbors": [35, 37, 40, 41, 46, 18, 19, 24, 32], "id": 33, "value": 7.9739570000000004}, {"neighbors": [26, 35, 43, 45, 30], "id": 34, "value": 5.0054639999999999}, {"neighbors": [33, 34, 37, 40, 45, 21, 24, 26], "id": 35, "value": 2.4638909999999998}, {"neighbors": [38, 39, 47, 27, 28, 29], "id": 36, "value": 0.0}, {"neighbors": [33, 35, 40, 45, 46, 49, 51], "id": 37, "value": 7.377974}, {"neighbors": [36, 29, 47, 48], "id": 38, "value": 1.0038750000000001}, {"neighbors": [36, 41, 47, 50, 52, 27], "id": 39, "value": 3.1900469999999999}, {"neighbors": [33, 35, 37, 46], "id": 40, "value": 45.905405999999999}, {"neighbors": [33, 39, 46, 50, 27, 32], "id": 41, "value": 2.447597}, {"neighbors": [25, 44, 53, 31], "id": 42, "value": 1.2949580000000001}, {"neighbors": [34, 44, 45, 54, 59, 61, 30, 31], "id": 43, "value": 5.9330980000000002}, {"neighbors": [42, 43, 53, 54, 31], "id": 44, "value": 4.1339969999999999}, {"neighbors": [34, 35, 37, 43, 51, 59, 60], "id": 45, "value": 4.298311}, {"neighbors": [33, 37, 40, 41, 49, 50, 57], "id": 46, "value": 27.483827000000002}, {"neighbors": [36, 38, 39, 48, 52, 55, 56], "id": 47, "value": 0.96979099999999996}, {"neighbors": [55, 38, 47], "id": 48, "value": 0.0}, {"neighbors": [57, 51, 37, 46, 63], "id": 49, "value": 2.934466}, {"neighbors": [39, 41, 46, 52, 57, 58], "id": 50, "value": 4.4564269999999997}, {"neighbors": [37, 45, 49, 60, 63, 64], "id": 51, "value": 4.629264}, {"neighbors": [39, 47, 50, 56, 58, 62], "id": 52, "value": 4.9415329999999997}, {"neighbors": [65, 42, 44, 54], "id": 53, "value": 3.9900410000000002}, {"neighbors": [65, 61, 43, 44, 53], "id": 54, "value": 2.064324}, {"neighbors": [56, 47, 48], "id": 55, "value": 3.0402529999999999}, {"neighbors": [52, 55, 47, 62], "id": 56, "value": 3.905411}, {"neighbors": [66, 67, 46, 49, 50, 58, 63], "id": 57, "value": 4.3328389999999999}, {"neighbors": [57, 50, 52, 62, 66], "id": 58, "value": 3.8941110000000001}, {"neighbors": [69, 70, 43, 45, 60, 61], "id": 59, "value": 6.8287940000000003}, {"neighbors": [51, 64, 45, 59, 70], "id": 60, "value": 3.2639469999999999}, {"neighbors": [65, 69, 72, 43, 54, 59], "id": 61, "value": 3.2821630000000002}, {"neighbors": [58, 68, 52, 66, 56], "id": 62, "value": 3.2957619999999999}, {"neighbors": [49, 57, 51, 67, 64], "id": 63, "value": 7.2496790000000004}, {"neighbors": [67, 70, 71, 51, 60, 63], "id": 64, "value": 3.041846}, {"neighbors": [61, 53, 54, 72], "id": 65, "value": 1.618018}, {"neighbors": [67, 68, 73, 76, 57, 58, 62], "id": 66, "value": 4.9108010000000002}, {"neighbors": [66, 71, 73, 75, 76, 57, 63, 64], "id": 67, "value": 1.991457}, {"neighbors": [73, 66, 62], "id": 68, "value": 3.1461920000000001}, {"neighbors": [70, 72, 74, 77, 59, 61], "id": 69, "value": 7.2666500000000003}, {"neighbors": [69, 71, 74, 78, 59, 60, 64], "id": 70, "value": 3.1109040000000001}, {"neighbors": [67, 75, 70, 78, 64], "id": 71, "value": 2.9802710000000001}, {"neighbors": [65, 69, 61, 77], "id": 72, "value": 3.8667669999999998}, {"neighbors": [76, 66, 67, 68], "id": 73, "value": 1.8684080000000001}, {"neighbors": [77, 69, 70, 78], "id": 74, "value": 12.577033999999999}, {"neighbors": [67, 76, 78, 71], "id": 75, "value": 7.8035990000000002}, {"neighbors": [73, 66, 67, 75], "id": 76, "value": 3.4714900000000002}, {"neighbors": [74, 69, 72], "id": 77, "value": 4.334822}, {"neighbors": [74, 75, 70, 71], "id": 78, "value": 8.4515370000000001}] From 795413e46dc77ab473a6a78fe97fd4b4749a1eb4 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 12:01:42 -0400 Subject: [PATCH 28/96] cleaning test files --- src/pg/test/expected/16_getis_test.out | 2 +- src/pg/test/fixtures/getis_data.sql | 9 ++------- src/py/crankshaft/test/test_clustering_getis.py | 2 -- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/pg/test/expected/16_getis_test.out b/src/pg/test/expected/16_getis_test.out index d0cb4f8..416844b 100644 --- a/src/pg/test/expected/16_getis_test.out +++ b/src/pg/test/expected/16_getis_test.out @@ -1,6 +1,6 @@ \pset format unaligned \set ECHO all -\i test/fixtures/ppoints.sql +\i test/fixtures/getis_data.sql SET client_min_messages TO WARNING; \set ECHO none _cdb_random_seeds diff --git a/src/pg/test/fixtures/getis_data.sql b/src/pg/test/fixtures/getis_data.sql index e7b2f69..3b47013 100644 --- a/src/pg/test/fixtures/getis_data.sql +++ b/src/pg/test/fixtures/getis_data.sql @@ -3,20 +3,16 @@ SET client_min_messages TO WARNING; -- -- Getis-Ord's G* test dataset, subsetted from PySAL examples: --- https://github.com/pysal/pysal/tree/952ea04029165048a774d9a1846cf86ad000c096/pysal/examples/stl +-- https://github.com/pysal/pysal/tree/952ea04029165048a774d9a1846cf86ad000c096/pysal/examples/stl -- CREATE TABLE getis_data ( cartodb_id integer, the_geom geometry(Geometry,4326), - hr8893 numeric + hr8893 numeric ); - -ALTER TABLE getis_data OWNER TO contrib_regression; - - COPY getis_data (cartodb_id, the_geom, hr8893) FROM stdin; 22 0106000020E61000000100000001030000000100000007000000000000E0B10056C0000000C0B8964340FFFFFFFF4C1756C00000002054964340000000A00F1E56C00000004072964340000000C02D1E56C0000000A0439B434000000060381E56C00000000036B04340000000E0E20056C0000000608CB04340000000E0B10056C0000000C0B8964340 10.8557430000000004 32 0106000020E6100000010000000103000000010000000B000000FFFFFF1FC26656C0FFFFFFBFE25E4340000000A0D86656C0000000E0976F4340000000A03A6956C0000000C0966F434000000020526956C0000000E08A7F4340000000E0F26556C000000000C87F4340000000E0066656C0000000209C834340000000407F5056C0000000803C83434000000020635056C0000000E016814340000000A0F45056C0000000A0F980434000000060D25056C000000060FA5E4340FFFFFF1FC26656C0FFFFFFBFE25E4340 9.92424500000000087 @@ -100,4 +96,3 @@ COPY getis_data (cartodb_id, the_geom, hr8893) FROM stdin; CREATE INDEX getis_data_gix ON getis_data USING GIST(the_geom); - diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 8acb562..835a121 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -72,5 +72,3 @@ class GetisTest(unittest.TestCase): expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): self.assertAlmostEqual(res_z, exp_z, delta=1e-2) - if exp_p <= 0.05: - self.assertTrue(res_p < 0.05) From 5443b674707c92b77778fabd949051ce0c52857d Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 22 Sep 2016 08:58:22 -0400 Subject: [PATCH 29/96] adding docs for getis ord's g --- doc/16_getis_ord_gstar.md | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 doc/16_getis_ord_gstar.md diff --git a/doc/16_getis_ord_gstar.md b/doc/16_getis_ord_gstar.md new file mode 100644 index 0000000..1b12ac9 --- /dev/null +++ b/doc/16_getis_ord_gstar.md @@ -0,0 +1,40 @@ +## Getis-Ord's G\* + +Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values. The clustering of high values can be referred to as "hotspots" because these are areas of high activity or large (relative to the global mean) measurement values. Coldspots are clustered areas with low activity or small measurement values. + +### CDB_GetisOrdsG(subquery text, column_name text) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | text | A query of the data you want to pass to the function. It must include `column_name`, a geometry column (usually `the_geom`) and an id column (usually `cartodb_id`) | +| column_name | text | This is the column of interest for performing this analysis on. This column should be a numeric type. | +| w_type (optional) | text | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation.](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html) | +| num_ngbrs (optional) | integer | Default: 5. If `knn` is chosen, this will set the number of neighbors. If `knn` is not chosen, any entered value will be ignored. Use `NULL` if not choosing `knn`. | +| permutations (optional) | integer | The number of permutations for calculating p-values. Default: 999 | +| geom_col (optional) | text | The column where the geometry information is stored. The format must be PostGIS Geometry type (SRID 4326). Default: `the_geom`. | +| id_col (optional) | text | The column that has the unique row identifier. | + +### Returns + +Returns a table with the following columns. + +| Name | Type | Description | +|------|------|-------------| +| z_score | numeric | z-score, a measure of the intensity of clustering of high values (hotspots) or low values (coldspots). Positive values represent 'hotspots', while negative values represent 'coldspots'. | +| p_value | numeric | p-value, a measure of the significance of the intensity of clustering | +| p_z_sim | numeric | p-value based on standard normal approximation from permutations | +| rowid | integer | The original `id_col` that can be used to associate the outputs with the original geometry and inputs | + +#### Example Usage + +The following query returns the original table augmented with the values calculated from the Getis-Ord's G\* analysis. + +```sql +SELECT i.*, m.z_score, m.p_value + FROM cdb_crankshaft.CDB_GetisOrdsG('SELECT * FROM incident_reports_clustered', + 'num_incidents') As m + JOIN incident_reports_clustered As i + ON i.cartodb_id = m.rowid; +``` From 06452562b905f21dc0adb6a0fa403cda333bb737 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 10:10:52 -0400 Subject: [PATCH 30/96] fix ordering problems in input columns --- src/py/crankshaft/test/test_pysal_utils.py | 81 ++++++++++------------ 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index aad9b20..92b528b 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -9,23 +9,23 @@ class PysalUtilsTest(unittest.TestCase): """Testing class for utility functions related to PySAL integrations""" def setUp(self): - self.params = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "andy"), - ("attr2", "jay_z"), - ("subquery", "SELECT * FROM a_list"), - ("geom_col", "the_geom"), - ("num_ngbrs", 321)]) - - self.params2 = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "price"), - ("attr2", "sq_meters"), + self.params1 = OrderedDict([("id_col", "cartodb_id"), + ("attr1", "andy"), + ("attr2", "jay_z"), ("subquery", "SELECT * FROM a_list"), ("geom_col", "the_geom"), ("num_ngbrs", 321)]) + self.params2 = OrderedDict([("id_col", "cartodb_id"), + ("numerator", "price"), + ("denominator", "sq_meters"), + ("subquery", "SELECT * FROM pecan"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + self.params3 = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "numerator"), - ("attr2", "denominator"), + ("numerator", "sq_meters"), + ("denominator", "price"), ("subquery", "SELECT * FROM pecan"), ("geom_col", "the_geom"), ("num_ngbrs", 321)]) @@ -39,20 +39,20 @@ class PysalUtilsTest(unittest.TestCase): def test_query_attr_select(self): """Test query_attr_select""" - ans = "i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, " + ans1 = ("i.\"andy\"::numeric As attr1, " + "i.\"jay_z\"::numeric As attr2, ") - ans2 = "i.\"price\"::numeric As attr1, " \ - "i.\"sq_meters\"::numeric As attr2, " + ans2 = ("i.\"price\"::numeric As attr1, " + "i.\"sq_meters\"::numeric As attr2, ") - ans3 = "i.\"numerator\"::numeric As attr1, " \ - "i.\"denominator\"::numeric As attr2, " + ans3 = ("i.\"sq_meters\"::numeric As attr1, " + "i.\"price\"::numeric As attr2, ") - ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ - "i.\"_2014_jan\"::numeric As attr2, " \ - "i.\"_2014_feb\"::numeric As attr3, " + ans_array = ("i.\"_2013_dec\"::numeric As attr1, " + "i.\"_2014_jan\"::numeric As attr2, " + "i.\"_2014_feb\"::numeric As attr3, ") - self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params1), ans1) self.assertEqual(pu.query_attr_select(self.params2), ans2) self.assertEqual(pu.query_attr_select(self.params3), ans3) self.assertEqual(pu.query_attr_select(self.params_array), ans_array) @@ -60,21 +60,20 @@ class PysalUtilsTest(unittest.TestCase): def test_query_attr_where(self): """Test pu.query_attr_where""" - ans = "idx_replace.\"andy\" IS NOT NULL AND " \ - "idx_replace.\"jay_z\" IS NOT NULL AND " \ - "idx_replace.\"jay_z\" <> 0" + ans1 = ("idx_replace.\"andy\" IS NOT NULL AND " + "idx_replace.\"jay_z\" IS NOT NULL") - ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ - "idx_replace.\"_2014_jan\" IS NOT NULL AND " \ - "idx_replace.\"_2014_feb\" IS NOT NULL" + ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND " + "idx_replace.\"_2014_jan\" IS NOT NULL AND " + "idx_replace.\"_2014_feb\" IS NOT NULL") - self.assertEqual(pu.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params1), ans1) self.assertEqual(pu.query_attr_where(self.params_array), ans_array) def test_knn(self): """Test knn neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, " \ + ans1 = "SELECT i.\"cartodb_id\" As id, " \ "i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ @@ -82,15 +81,13 @@ class PysalUtilsTest(unittest.TestCase): "WHERE " \ "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ "j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0 " \ + "j.\"jay_z\" IS NOT NULL " \ "ORDER BY " \ "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ "LIMIT 321)) As neighbors " \ "FROM (SELECT * FROM a_list) As i " \ "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ + "i.\"jay_z\" IS NOT NULL " \ "ORDER BY i.\"cartodb_id\" ASC;" ans_array = "SELECT i.\"cartodb_id\" As id, " \ @@ -111,13 +108,13 @@ class PysalUtilsTest(unittest.TestCase): "i.\"_2014_feb\" IS NOT NULL "\ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(pu.knn(self.params), ans) + self.assertEqual(pu.knn(self.params1), ans1) self.assertEqual(pu.knn(self.params_array), ans_array) def test_queen(self): """Test queen neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, " \ + ans1 = "SELECT i.\"cartodb_id\" As id, " \ "i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ @@ -127,23 +124,21 @@ class PysalUtilsTest(unittest.TestCase): "ST_Touches(i.\"the_geom\", " \ "j.\"the_geom\") AND " \ "j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0)" \ + "j.\"jay_z\" IS NOT NULL)" \ ") As neighbors " \ "FROM (SELECT * FROM a_list) As i " \ "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ + "i.\"jay_z\" IS NOT NULL " \ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(pu.queen(self.params), ans) + self.assertEqual(pu.queen(self.params1), ans1) def test_construct_neighbor_query(self): """Test construct_neighbor_query""" # Compare to raw knn query - self.assertEqual(pu.construct_neighbor_query('knn', self.params), - pu.knn(self.params)) + self.assertEqual(pu.construct_neighbor_query('knn', self.params1), + pu.knn(self.params1)) def test_get_attributes(self): """Test get_attributes""" From f1d420a6f7fc83cfc58862065c3a845bf31a5ba6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 10:11:16 -0400 Subject: [PATCH 31/96] ordering fixes --- .../crankshaft/pysal_utils/pysal_utils.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index f0c73ba..c0ec19d 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -42,15 +42,16 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5): def query_attr_select(params): """ Create portion of SELECT statement for attributes inolved in query. + Defaults to order in the params @param params: dict of information used in query (column names, table name, etc.) Example: - OrderedDict([('attr1', 'numerator'), - ('attr2', 'denominator'), + OrderedDict([('numerator', 'price'), + ('denominator', 'sq_meters'), ('subquery', 'SELECT * FROM interesting_data')]) Output: - "i.\"numerator\"::numeric As attr1, " \ - "i.\"denominator\"::numeric As attr2, " + "i.\"price\"::numeric As attr1, " \ + "i.\"sq_meters\"::numeric As attr2, " """ attr_string = "" @@ -81,7 +82,8 @@ def query_attr_where(params): 'numerator': 'data1', 'denominator': 'data2', '': ...} - Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Output: + 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' Input: {'subquery': ..., 'time_cols': ['time1', 'time2', 'time3'], @@ -102,14 +104,16 @@ def query_attr_where(params): ## moran where clauses # get keys - attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]) + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] # add values to template for attr in attrs: attr_string.append(template % params[attr]) - if len(attrs) == 2: - attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + if 'denominator' in attrs: + attr_string.append( + "idx_replace.\"%s\" <> 0" % params['denominator']) out = " AND ".join(attr_string) From 1ef3f864749a8b6f89a0c3a9e35e6ab9ed00f659 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 10:13:27 -0400 Subject: [PATCH 32/96] small updates after ordering fix --- src/pg/test/expected/02_moran_test.out | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index e7aef8b..95ce189 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -197,10 +197,11 @@ code|quads 46|LL 47|LL 48|HH -49|LL +49|LH 50|HH 51|LL -(51 rows) +52|LL +(52 rows) _cdb_random_seeds (1 row) @@ -259,9 +260,10 @@ code|quads 45|LH 46|LL 47|LL -49|LL +49|LH 51|LL -(28 rows) +52|LL +(29 rows) _cdb_random_seeds (1 row) @@ -277,4 +279,5 @@ code|quads 42|LH 43|LH 45|LH -(11 rows) +49|LH +(12 rows) From aaa36569ded69399f73848440e18329f7457354b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 16:26:34 -0400 Subject: [PATCH 33/96] first add --- src/pg/sql/18_outliers.sql | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/pg/sql/18_outliers.sql diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql new file mode 100644 index 0000000..48588ce --- /dev/null +++ b/src/pg/sql/18_outliers.sql @@ -0,0 +1,52 @@ +CREATE OR REPLACE FUNCTION CDB_StaticOutlier(attr numeric, threshold numeric) +RETURNS numeric +AS $$ +BEGIN + + RETURN attr > threshold; + +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION CDB_PercentOutlier(attr numeric[], outlier_fraction numeric, ids int[]) +RETURNS TABLE(outlier boolean, rowid int) +AS $$ +DECLARE + avg_val numeric; + out_vals boolean[]; +BEGIN + + SELECT avg(i) INTO avg_val FROM unnest(attr) As x(i); + + SELECT array_agg( i > avg_val * outlier_fraction) INTO out_vals + FROM unnest(attr) As x(i); + + RETURN QUERY + SELECT unnest(out_vals) As outlier, + unnest(ids) As rowid; + +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(attrs numeric[], num_deviations numeric, ids int[]) +RETURNS TABLE(outlier boolean, rowid int) +AS $$ +DECLARE + stddev_val numeric; + avg_val numeric; + out_vals boolean[]; +BEGIN + + SELECT stddev(i), avg(i) INTO stddev_val, avg_val + FROM unnest(attrs) As x(i); + + SELECT array_agg(abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals + FROM unnest(attrs) As x(i); + + + RETURN QUERY + SELECT unnest(out_vals) As outlier, + unnest(ids) As rowid; +END; +$$ LANGUAGE plpgsql; From f2bb0b496bc9f86d2d09db583cb4677d73d94a50 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 16:51:22 -0400 Subject: [PATCH 34/96] small fixes --- src/pg/sql/18_outliers.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 48588ce..7d4b3b2 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -1,5 +1,8 @@ + +-- Find outliers using a static threshold +-- CREATE OR REPLACE FUNCTION CDB_StaticOutlier(attr numeric, threshold numeric) -RETURNS numeric +RETURNS boolean AS $$ BEGIN @@ -8,6 +11,7 @@ BEGIN END; $$ LANGUAGE plpgsql; +-- Find outliers by a percentage above the threshold CREATE OR REPLACE FUNCTION CDB_PercentOutlier(attr numeric[], outlier_fraction numeric, ids int[]) RETURNS TABLE(outlier boolean, rowid int) @@ -29,6 +33,8 @@ BEGIN END; $$ LANGUAGE plpgsql; +-- Find outliers above a given number of standard deviations from the mean + CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(attrs numeric[], num_deviations numeric, ids int[]) RETURNS TABLE(outlier boolean, rowid int) AS $$ From b8accb48fc15fa6ed235700f363e33a7483621c0 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 28 Sep 2016 15:55:56 -0400 Subject: [PATCH 35/96] adds tests --- src/pg/sql/18_outliers.sql | 16 +++++-- src/pg/test/expected/18_outliers.out | 16 +++++++ src/pg/test/sql/18_outliers.sql | 70 ++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 src/pg/test/expected/18_outliers.out create mode 100644 src/pg/test/sql/18_outliers.sql diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 7d4b3b2..3ae7a5e 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -12,6 +12,7 @@ END; $$ LANGUAGE plpgsql; -- Find outliers by a percentage above the threshold +-- TODO: add symmetric option? `symmetric boolean DEFAULT false` CREATE OR REPLACE FUNCTION CDB_PercentOutlier(attr numeric[], outlier_fraction numeric, ids int[]) RETURNS TABLE(outlier boolean, rowid int) @@ -21,10 +22,15 @@ DECLARE out_vals boolean[]; BEGIN - SELECT avg(i) INTO avg_val FROM unnest(attr) As x(i); + SELECT avg(i) INTO avg_val + FROM unnest(attr) As x(i); - SELECT array_agg( i > avg_val * outlier_fraction) INTO out_vals - FROM unnest(attr) As x(i); + SELECT array_agg( + CASE WHEN avg_val = 0 THEN null + ELSE outlier_fraction > i / avg_val + END + ) INTO out_vals + FROM unnest(attr) As x(i); RETURN QUERY SELECT unnest(out_vals) As outlier, @@ -45,10 +51,10 @@ DECLARE BEGIN SELECT stddev(i), avg(i) INTO stddev_val, avg_val - FROM unnest(attrs) As x(i); + FROM unnest(attrs) As x(i); SELECT array_agg(abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals - FROM unnest(attrs) As x(i); + FROM unnest(attrs) As x(i); RETURN QUERY diff --git a/src/pg/test/expected/18_outliers.out b/src/pg/test/expected/18_outliers.out new file mode 100644 index 0000000..1c0f186 --- /dev/null +++ b/src/pg/test/expected/18_outliers.out @@ -0,0 +1,16 @@ +outlier|rowid +t|11 +t|16 +t|17 +outlier|rowid +t|16 +t|17 +outlier|rowid +t|8 +t|11 +t|16 +outlier|rowid +t|8 +t|9 +t|11 +t|15 diff --git a/src/pg/test/sql/18_outliers.sql b/src/pg/test/sql/18_outliers.sql new file mode 100644 index 0000000..432d4c1 --- /dev/null +++ b/src/pg/test/sql/18_outliers.sql @@ -0,0 +1,70 @@ +SET client_min_messages TO WARNING; +\set ECHO none +\pset format unaligned + +-- +-- postgres=# select round(avg(i), 3) as avg, +-- round(stddev(i), 3) as stddev, +-- round(avg(i) + stddev(i), 3) as one_stddev, +-- round(avg(i) + 2 * stddev(i), 3) As two_stddev +-- from unnest(ARRAY[1,3,2,3,5,1,2,32,12,3,57,2,1,4,2,100]) As x(i); +-- avg | stddev | one_stddev | two_stddev +-- --------+--------+------------+------------ +-- 14.375 | 27.322 | 41.697 | 69.020 + + +-- With an threshold of 1.0 standard deviation, ids 11, 16, and 17 are outliers +WITH a AS ( + SELECT + ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids +), b As ( + SELECT + (cdb_StdDevOutlier(vals, 1.0, ids)).* + FROM a + ORDER BY ids) +SELECT * +FROM b +WHERE outlier IS TRUE; + +-- With a threshold of 2.0 standard deviations, id 16 is the only outlier +WITH a AS ( + SELECT + ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, + ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids +), b As ( + SELECT + (CDB_StdDevOutlier(vals, 2.0, ids)).* + FROM a + ORDER BY ids) +SELECT * +FROM b +WHERE outlier IS TRUE; + +-- With a ratio threshold of 2.0 threshold (100% above or below the mean) +-- which is greater than ~21, which are values +WITH a AS ( + SELECT + ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, + ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids +), b As ( + SELECT + (CDB_PercentOutlier(vals, 2.0, ids)).* + FROM a + ORDER BY ids) +SELECT * + FROM b + WHERE outlier IS TRUE; + +-- With a static threshold of 11, what are the outliers +WITH a AS ( + SELECT + ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, + ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids + ), b As ( + SELECT unnest(vals) As v, unnest(ids) as i + FROM a + ) +SELECT CDB_StaticOutlier(v, 11.0), i + FROM b +WHERE CDB_StaticOutlier(v, 11.0) is True +ORDER BY i; From acde384157bf0ffe516c6ab3c5504e61fd28dce9 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 28 Sep 2016 16:27:41 -0400 Subject: [PATCH 36/96] update tests --- src/pg/sql/18_outliers.sql | 2 +- .../{18_outliers.out => 18_outliers_test.out} | 8 +++++++- .../sql/{18_outliers.sql => 18_outliers_test.sql} | 11 ++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) rename src/pg/test/expected/{18_outliers.out => 18_outliers_test.out} (54%) rename src/pg/test/sql/{18_outliers.sql => 18_outliers_test.sql} (86%) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 3ae7a5e..6b6d943 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -27,7 +27,7 @@ BEGIN SELECT array_agg( CASE WHEN avg_val = 0 THEN null - ELSE outlier_fraction > i / avg_val + ELSE outlier_fraction < i::numeric / avg_val::numeric END ) INTO out_vals FROM unnest(attr) As x(i); diff --git a/src/pg/test/expected/18_outliers.out b/src/pg/test/expected/18_outliers_test.out similarity index 54% rename from src/pg/test/expected/18_outliers.out rename to src/pg/test/expected/18_outliers_test.out index 1c0f186..a329e07 100644 --- a/src/pg/test/expected/18_outliers.out +++ b/src/pg/test/expected/18_outliers_test.out @@ -1,16 +1,22 @@ +SET client_min_messages TO WARNING; +\set ECHO none outlier|rowid t|11 t|16 t|17 +(3 rows) outlier|rowid t|16 t|17 +(2 rows) outlier|rowid t|8 t|11 t|16 +(3 rows) outlier|rowid t|8 t|9 t|11 -t|15 +t|16 +(4 rows) diff --git a/src/pg/test/sql/18_outliers.sql b/src/pg/test/sql/18_outliers_test.sql similarity index 86% rename from src/pg/test/sql/18_outliers.sql rename to src/pg/test/sql/18_outliers_test.sql index 432d4c1..c12c889 100644 --- a/src/pg/test/sql/18_outliers.sql +++ b/src/pg/test/sql/18_outliers_test.sql @@ -19,7 +19,7 @@ WITH a AS ( ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids ), b As ( SELECT - (cdb_StdDevOutlier(vals, 1.0, ids)).* + (cdb_crankshaft.cdb_StdDevOutlier(vals, 1.0, ids)).* FROM a ORDER BY ids) SELECT * @@ -33,7 +33,7 @@ WITH a AS ( ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids ), b As ( SELECT - (CDB_StdDevOutlier(vals, 2.0, ids)).* + (cdb_crankshaft.CDB_StdDevOutlier(vals, 2.0, ids)).* FROM a ORDER BY ids) SELECT * @@ -48,7 +48,7 @@ WITH a AS ( ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids ), b As ( SELECT - (CDB_PercentOutlier(vals, 2.0, ids)).* + (cdb_crankshaft.CDB_PercentOutlier(vals, 2.0, ids)).* FROM a ORDER BY ids) SELECT * @@ -64,7 +64,8 @@ WITH a AS ( SELECT unnest(vals) As v, unnest(ids) as i FROM a ) -SELECT CDB_StaticOutlier(v, 11.0), i +SELECT cdb_crankshaft.CDB_StaticOutlier(v, 11.0) As outlier, i As rowid FROM b -WHERE CDB_StaticOutlier(v, 11.0) is True +WHERE cdb_crankshaft.CDB_StaticOutlier(v, 11.0) is True ORDER BY i; + From b54c62890f58281aea399ec2f3d0e271783e8e01 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 08:48:22 -0400 Subject: [PATCH 37/96] adds hand-off doc line --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3de84c4..fbc5c8d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,4 +6,5 @@ - [ ] Video explaining the analysis and showing examples - [ ] Analysis Documentation written [template](https://docs.google.com/a/cartodb.com/document/d/1X2KOtaiEBKWNMp8UjwcLB-kE9aIOw09aOjX3oaCjeME/edit?usp=sharing) - [ ] Smoke test written +- [ ] Hand-off document for camshaft node written From 8bc6f69a1bf992dce2668652e722c42de06715d0 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 10:12:32 -0400 Subject: [PATCH 38/96] adding exceptions to improve robustness --- src/pg/sql/18_outliers.sql | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 3ae7a5e..1fa8d93 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -14,7 +14,7 @@ $$ LANGUAGE plpgsql; -- Find outliers by a percentage above the threshold -- TODO: add symmetric option? `symmetric boolean DEFAULT false` -CREATE OR REPLACE FUNCTION CDB_PercentOutlier(attr numeric[], outlier_fraction numeric, ids int[]) +CREATE OR REPLACE FUNCTION CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[]) RETURNS TABLE(outlier boolean, rowid int) AS $$ DECLARE @@ -23,14 +23,15 @@ DECLARE BEGIN SELECT avg(i) INTO avg_val - FROM unnest(attr) As x(i); + FROM unnest(column_values) As x(i); + + IF avg_val = 0 THEN + RAISE EXCEPTION 'Mean value is zero. Try another outlier method.'; + END IF; SELECT array_agg( - CASE WHEN avg_val = 0 THEN null - ELSE outlier_fraction > i / avg_val - END - ) INTO out_vals - FROM unnest(attr) As x(i); + outlier_fraction > i / avg_val) INTO out_vals + FROM unnest(column_values) As x(i); RETURN QUERY SELECT unnest(out_vals) As outlier, @@ -53,7 +54,12 @@ BEGIN SELECT stddev(i), avg(i) INTO stddev_val, avg_val FROM unnest(attrs) As x(i); - SELECT array_agg(abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals + IF stddev_val = 0 THEN + RAISE EXCEPTION 'Standard deviation of input data is zero'; + END IF; + + SELECT array_agg( + abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals FROM unnest(attrs) As x(i); From 5754087140da57a5e716a1e259294053ab229ede Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 11:09:10 -0400 Subject: [PATCH 39/96] adds symmetric option for stddev outlier --- src/pg/sql/18_outliers.sql | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 1fa8d93..454e383 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -12,7 +12,7 @@ END; $$ LANGUAGE plpgsql; -- Find outliers by a percentage above the threshold --- TODO: add symmetric option? `symmetric boolean DEFAULT false` +-- TODO: add symmetric option? `is_symmetric boolean DEFAULT false` CREATE OR REPLACE FUNCTION CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[]) RETURNS TABLE(outlier boolean, rowid int) @@ -42,7 +42,7 @@ $$ LANGUAGE plpgsql; -- Find outliers above a given number of standard deviations from the mean -CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(attrs numeric[], num_deviations numeric, ids int[]) +CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(attrs numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true) RETURNS TABLE(outlier boolean, rowid int) AS $$ DECLARE @@ -58,10 +58,15 @@ BEGIN RAISE EXCEPTION 'Standard deviation of input data is zero'; END IF; - SELECT array_agg( - abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals - FROM unnest(attrs) As x(i); - + IF is_symmetric THEN + SELECT array_agg( + abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals + FROM unnest(attrs) As x(i); + ELSE + SELECT array_agg( + (i - avg_val) / stddev_val > num_deviations) INTO out_vals + FROM unnest(attrs) As x(i); + END IF; RETURN QUERY SELECT unnest(out_vals) As outlier, From bd05e7739d7bafc15ad7d3f0c9045953103f7210 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 11:10:54 -0400 Subject: [PATCH 40/96] add test to produce error --- src/pg/test/sql/18_outliers.sql | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/pg/test/sql/18_outliers.sql b/src/pg/test/sql/18_outliers.sql index 432d4c1..c84c614 100644 --- a/src/pg/test/sql/18_outliers.sql +++ b/src/pg/test/sql/18_outliers.sql @@ -40,6 +40,21 @@ SELECT * FROM b WHERE outlier IS TRUE; +-- With a Stddev of zero, should throw back error +-- With a threshold of 2.0 standard deviations, id 16 is the only outlier +WITH a AS ( + SELECT + ARRAY[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]::numeric[] As vals, + ARRAY[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]::int[] As ids +), b As ( + SELECT + (CDB_StdDevOutlier(vals, 1.0, ids)).* + FROM a + ORDER BY ids) +SELECT * +FROM b +WHERE outlier IS TRUE; + -- With a ratio threshold of 2.0 threshold (100% above or below the mean) -- which is greater than ~21, which are values WITH a AS ( From 99856ce95608101553ee11402ea6e81822c61e53 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 11:37:22 -0400 Subject: [PATCH 41/96] flip inequality --- src/pg/sql/18_outliers.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index 454e383..b726bf6 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -30,7 +30,7 @@ BEGIN END IF; SELECT array_agg( - outlier_fraction > i / avg_val) INTO out_vals + outlier_fraction < i / avg_val) INTO out_vals FROM unnest(column_values) As x(i); RETURN QUERY From 23b2ad57c538b0eba0eaa8dfdf121993464d5c7c Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 11:37:42 -0400 Subject: [PATCH 42/96] test updates --- src/pg/test/expected/18_outliers_test.out | 1 + src/pg/test/sql/18_outliers_test.sql | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pg/test/expected/18_outliers_test.out b/src/pg/test/expected/18_outliers_test.out index a329e07..417933b 100644 --- a/src/pg/test/expected/18_outliers_test.out +++ b/src/pg/test/expected/18_outliers_test.out @@ -9,6 +9,7 @@ outlier|rowid t|16 t|17 (2 rows) +ERROR: Standard deviation of input data is zero outlier|rowid t|8 t|11 diff --git a/src/pg/test/sql/18_outliers_test.sql b/src/pg/test/sql/18_outliers_test.sql index 6f18bf1..3b56125 100644 --- a/src/pg/test/sql/18_outliers_test.sql +++ b/src/pg/test/sql/18_outliers_test.sql @@ -48,7 +48,7 @@ WITH a AS ( ARRAY[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]::int[] As ids ), b As ( SELECT - (CDB_StdDevOutlier(vals, 1.0, ids)).* + (cdb_crankshaft.CDB_StdDevOutlier(vals, 1.0, ids)).* FROM a ORDER BY ids) SELECT * From 6846014a4fb1ccb1d738cbde48fbc17d33617394 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 29 Sep 2016 11:42:11 -0400 Subject: [PATCH 43/96] adding docs --- doc/18_outliers.md | 163 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 doc/18_outliers.md diff --git a/doc/18_outliers.md b/doc/18_outliers.md new file mode 100644 index 0000000..f1aa862 --- /dev/null +++ b/doc/18_outliers.md @@ -0,0 +1,163 @@ +## Outlier Detection + +This set of functions detects the presence of outliers. There are three functions for finding outliers from non-spatial data: + +1. Static Outliers +1. Percentage Outliers +1. Standard Deviation Outliers + +### CDB_StaticOutlier(column_value numeric, threshold numeric) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| column_value | numeric | The column of values on which to apply the threshold | +| threshold | numeric | The static threshold which is used to indicate whether a `column_value` is an outlier or not | + +### Returns + +Returns a boolean (true/false) depending on whether a value is above or below (or equal to) the threshold + +| Name | Type | Description | +|------|------|-------------| +| outlier | boolean | classification of whether a row is an outlier or not | + +#### Example Usage + +With a table `website_visits`: + +``` +| id | visits_10k | +|----|------------| +| 1 | 1 | +| 2 | 3 | +| 3 | 5 | +| 4 | 1 | +| 5 | 32 | +| 6 | 3 | +| 7 | 57 | +| 8 | 2 | +``` + +```sql +SELECT + id, + CDB_StaticOutlier(visits_10k, 11.0) As outlier, + visits_10k +FROM website_visits +``` + +``` +| id | outlier | visits_10k | +|----|---------|------------| +| 1 | f | 1 | +| 2 | f | 3 | +| 3 | f | 5 | +| 4 | f | 1 | +| 5 | t | 32 | +| 6 | f | 3 | +| 7 | t | 57 | +| 8 | f | 2 | +``` + +### CDB_PercentOutlier(column_values numeric[], ratio_threshold numeric, ids int[]) + +`CDB_PercentOutlier` calculates whether or not a value falls above a given threshold based on a percentage above the mean value of the input values. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| column_values | numeric[] | An array of the values to calculate the outlier classification on | +| outlier_fraction | numeric | The threshold above which a column value divided by the mean of all values is considered an outlier | +| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) | + +### Returns + +Returns a table of the outlier classification with the following columns + +| Name | Type | Description | +|------|------|-------------| +| outlier | boolean | classification of whether a row is an outlier or not | +| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification | + +#### Example Usage + +This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger). + +```sql +WITH cte As ( + SELECT + unnest(Array[1,2,3,4,5,6,7,8]) As id, + unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k + ) +SELECT + (CDB_PercentOutlier(array_agg(visits_10k), 2.0, array_agg(id))).* +FROM cte; +``` + +Output +``` +| outlier | rowid | +|---------+-------| +| f | 1 | +| f | 2 | +| f | 3 | +| f | 4 | +| t | 5 | +| f | 6 | +| t | 7 | +| f | 8 | +``` + +### CDB_StdDevOutlier(column_values numeric[], ratio_threshold numeric, ids int[], is_symmetric boolean DEFAULT true) + +`CDB_StdDevOutlier` calculates whether or not a value falls above or below a given threshold based on the number of standard deviations from the mean. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| column_values | numeric[] | An array of the values to calculate the outlier classification on | +| num_deviations | numeric | The threshold in units of standard deviation | +| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) | +| is_symmetric (optional) | boolean | Consider outliers that are symmetric about the mean (default: true) | + +### Returns + +Returns a table of the outlier classification with the following columns + +| Name | Type | Description | +|------|------|-------------| +| outlier | boolean | classification of whether a row is an outlier or not | +| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification | + +#### Example Usage + +This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger). + +```sql +WITH cte As ( + SELECT + unnest(Array[1,2,3,4,5,6,7,8]) As id, + unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k + ) +SELECT + (CDB_StdDevOutlier(array_agg(visits_10k), 2.0, array_agg(id))).* +FROM cte; +``` + +Output +``` +| outlier | rowid | +|---------+-------| +| f | 1 | +| f | 2 | +| f | 3 | +| f | 4 | +| f | 5 | +| f | 6 | +| t | 7 | +| f | 8 | +``` From ecc9814a8831503528980007ffa885013ef33485 Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Fri, 30 Sep 2016 11:31:57 +0200 Subject: [PATCH 44/96] Reorder package installation Fixes #138 It seems that package postgresql-9.5-postgis-2.2 is now indirectly depending on postgresql-9.5-postgis-2.3-scripts which is not compatible with the packages in cartodb launchpad repos --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a165028..bc85047 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,8 +41,8 @@ before_install: - sudo apt-get -y install postgresql-9.5=9.5.2-2ubuntu1 - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-2ubuntu1 - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-2ubuntu1 - - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 + - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 # configure it to accept local connections from postgres - echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \ From 0a53a6e71d5e5dceca1ce266b1519d0f9a8df50d Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 6 Oct 2016 08:19:57 -0400 Subject: [PATCH 45/96] fix error variable name bug, pep8 updates --- .../crankshaft/space_time_dynamics/markov.py | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index ae788d7..b1c09ba 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -8,12 +8,14 @@ import pysal as ps import plpy import crankshaft.pysal_utils as pu + def spatial_markov_trend(subquery, time_cols, num_classes=7, w_type='knn', num_ngbrs=5, permutations=0, geom_col='the_geom', id_col='cartodb_id'): """ Predict the trends of a unit based on: - 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) + 1. history of its transitions to different classes (e.g., 1st quantile + -> 2nd quantile) 2. average class of its neighbors Inputs: @@ -56,16 +58,15 @@ def spatial_markov_trend(subquery, time_cols, num_classes=7, ) if len(query_result) == 0: return zip([None], [None], [None], [None], [None]) - except plpy.SPIError, e: - plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) - plpy.error('Analysis failed: %s' % e) + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) return zip([None], [None], [None], [None], [None]) - ## build weight + # build weight weights = pu.get_weight(query_result, w_type) weights.transform = 'r' - ## prep time data + # prep time data t_data = get_time_data(query_result, time_cols) plpy.debug('shape of t_data %d, %d' % t_data.shape) @@ -78,23 +79,26 @@ def spatial_markov_trend(subquery, time_cols, num_classes=7, fixed=False, permutations=permutations) - ## get lag classes + # get lag classes lag_classes = ps.Quantiles( ps.lag_spatial(weights, t_data[:, -1]), k=num_classes).yb - ## look up probablity distribution for each unit according to class and lag class + # look up probablity distribution for each unit according to class and lag + # class prob_dist = get_prob_dist(sp_markov_result.P, lag_classes, sp_markov_result.classes[:, -1]) - ## find the ups and down and overall distribution of each cell - trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, - sp_markov_result.classes[:, -1]) + # find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats( + prob_dist, + sp_markov_result.classes[:, -1]) - ## output the results + # output the results return zip(trend, trend_up, trend_down, volatility, weights.id_order) + def get_time_data(markov_data, time_cols): """ Extract the time columns and bin appropriately @@ -103,7 +107,8 @@ def get_time_data(markov_data, time_cols): return np.array([[x['attr' + str(i)] for x in markov_data] for i in range(1, num_attrs+1)], dtype=float).transpose() -## not currently used + +# not currently used def rebin_data(time_data, num_time_per_bin): """ Convert an n x l matrix into an (n/m) x l matrix where the values are @@ -131,14 +136,16 @@ def rebin_data(time_data, num_time_per_bin): """ if time_data.shape[1] % num_time_per_bin == 0: - ## if fit is perfect, then use it + # if fit is perfect, then use it n_max = time_data.shape[1] / num_time_per_bin else: - ## fit remainders into an additional column + # fit remainders into an additional column n_max = time_data.shape[1] / num_time_per_bin + 1 - return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) - for i in range(n_max)]).T + return np.array( + [time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + def get_prob_dist(transition_matrix, lag_indices, unit_indices): """ @@ -157,6 +164,7 @@ def get_prob_dist(transition_matrix, lag_indices, unit_indices): return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] for i in range(len(lag_indices))]) + def get_prob_stats(prob_dist, unit_indices): """ get the statistics of the probability distributions @@ -179,11 +187,12 @@ def get_prob_stats(prob_dist, unit_indices): trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() trend_down[i] = prob_dist[i, :unit_indices[i]].sum() if prob_dist[i, unit_indices[i]] > 0.0: - trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]] + trend[i] = ((trend_up[i] - trend_down[i]) / + (prob_dist[i, unit_indices[i]])) else: trend[i] = None - ## calculate volatility of distribution + # calculate volatility of distribution volatility = prob_dist.std(axis=1) return trend_up, trend_down, trend, volatility From 11c33ce3fa99926eeafc6091b321f2fddd827435 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 6 Oct 2016 08:56:14 -0400 Subject: [PATCH 46/96] adds pep8 check item --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index fbc5c8d..529ca2c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,4 +7,4 @@ - [ ] Analysis Documentation written [template](https://docs.google.com/a/cartodb.com/document/d/1X2KOtaiEBKWNMp8UjwcLB-kE9aIOw09aOjX3oaCjeME/edit?usp=sharing) - [ ] Smoke test written - [ ] Hand-off document for camshaft node written - +- [ ] If function is in Python, code conforms to [PEP8 Style Guide](https://www.python.org/dev/peps/pep-0008/) From c7f5c2451071e813ede30eff5f5f9f3f005cc7f0 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 6 Oct 2016 09:53:22 -0400 Subject: [PATCH 47/96] update signature names --- doc/18_outliers.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/18_outliers.md b/doc/18_outliers.md index f1aa862..f557529 100644 --- a/doc/18_outliers.md +++ b/doc/18_outliers.md @@ -25,7 +25,7 @@ Returns a boolean (true/false) depending on whether a value is above or below (o #### Example Usage -With a table `website_visits`: +With a table `website_visits` and a column of the number of website visits in units of 10,000 visits: ``` | id | visits_10k | @@ -61,7 +61,7 @@ FROM website_visits | 8 | f | 2 | ``` -### CDB_PercentOutlier(column_values numeric[], ratio_threshold numeric, ids int[]) +### CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[]) `CDB_PercentOutlier` calculates whether or not a value falls above a given threshold based on a percentage above the mean value of the input values. @@ -79,7 +79,7 @@ Returns a table of the outlier classification with the following columns | Name | Type | Description | |------|------|-------------| -| outlier | boolean | classification of whether a row is an outlier or not | +| is_outlier | boolean | classification of whether a row is an outlier or not | | rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification | #### Example Usage @@ -111,7 +111,7 @@ Output | f | 8 | ``` -### CDB_StdDevOutlier(column_values numeric[], ratio_threshold numeric, ids int[], is_symmetric boolean DEFAULT true) +### CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true) `CDB_StdDevOutlier` calculates whether or not a value falls above or below a given threshold based on the number of standard deviations from the mean. @@ -130,7 +130,7 @@ Returns a table of the outlier classification with the following columns | Name | Type | Description | |------|------|-------------| -| outlier | boolean | classification of whether a row is an outlier or not | +| is_outlier | boolean | classification of whether a row is an outlier or not | | rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification | #### Example Usage From da1449331cec83b8e3598c63630f63df2510e944 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 6 Oct 2016 09:53:38 -0400 Subject: [PATCH 48/96] update signature variable names --- src/pg/sql/18_outliers.sql | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pg/sql/18_outliers.sql b/src/pg/sql/18_outliers.sql index b726bf6..5b23c77 100644 --- a/src/pg/sql/18_outliers.sql +++ b/src/pg/sql/18_outliers.sql @@ -1,12 +1,12 @@ -- Find outliers using a static threshold -- -CREATE OR REPLACE FUNCTION CDB_StaticOutlier(attr numeric, threshold numeric) +CREATE OR REPLACE FUNCTION CDB_StaticOutlier(column_value numeric, threshold numeric) RETURNS boolean AS $$ BEGIN - RETURN attr > threshold; + RETURN column_value > threshold; END; $$ LANGUAGE plpgsql; @@ -15,7 +15,7 @@ $$ LANGUAGE plpgsql; -- TODO: add symmetric option? `is_symmetric boolean DEFAULT false` CREATE OR REPLACE FUNCTION CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[]) -RETURNS TABLE(outlier boolean, rowid int) +RETURNS TABLE(is_outlier boolean, rowid int) AS $$ DECLARE avg_val numeric; @@ -34,7 +34,7 @@ BEGIN FROM unnest(column_values) As x(i); RETURN QUERY - SELECT unnest(out_vals) As outlier, + SELECT unnest(out_vals) As is_outlier, unnest(ids) As rowid; END; @@ -42,8 +42,8 @@ $$ LANGUAGE plpgsql; -- Find outliers above a given number of standard deviations from the mean -CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(attrs numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true) -RETURNS TABLE(outlier boolean, rowid int) +CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true) +RETURNS TABLE(is_outlier boolean, rowid int) AS $$ DECLARE stddev_val numeric; @@ -52,7 +52,7 @@ DECLARE BEGIN SELECT stddev(i), avg(i) INTO stddev_val, avg_val - FROM unnest(attrs) As x(i); + FROM unnest(column_values) As x(i); IF stddev_val = 0 THEN RAISE EXCEPTION 'Standard deviation of input data is zero'; @@ -61,15 +61,15 @@ BEGIN IF is_symmetric THEN SELECT array_agg( abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals - FROM unnest(attrs) As x(i); + FROM unnest(column_values) As x(i); ELSE SELECT array_agg( (i - avg_val) / stddev_val > num_deviations) INTO out_vals - FROM unnest(attrs) As x(i); + FROM unnest(column_values) As x(i); END IF; RETURN QUERY - SELECT unnest(out_vals) As outlier, + SELECT unnest(out_vals) As is_outlier, unnest(ids) As rowid; END; $$ LANGUAGE plpgsql; From c7e690980f6165c303de91edc7a463c1497971b6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 6 Oct 2016 10:29:52 -0400 Subject: [PATCH 49/96] update column names in tests --- src/pg/test/expected/18_outliers_test.out | 8 ++++---- src/pg/test/sql/18_outliers_test.sql | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/pg/test/expected/18_outliers_test.out b/src/pg/test/expected/18_outliers_test.out index 417933b..0798e0c 100644 --- a/src/pg/test/expected/18_outliers_test.out +++ b/src/pg/test/expected/18_outliers_test.out @@ -1,21 +1,21 @@ SET client_min_messages TO WARNING; \set ECHO none -outlier|rowid +is_outlier|rowid t|11 t|16 t|17 (3 rows) -outlier|rowid +is_outlier|rowid t|16 t|17 (2 rows) ERROR: Standard deviation of input data is zero -outlier|rowid +is_outlier|rowid t|8 t|11 t|16 (3 rows) -outlier|rowid +is_outlier|rowid t|8 t|9 t|11 diff --git a/src/pg/test/sql/18_outliers_test.sql b/src/pg/test/sql/18_outliers_test.sql index 3b56125..9a6b87d 100644 --- a/src/pg/test/sql/18_outliers_test.sql +++ b/src/pg/test/sql/18_outliers_test.sql @@ -24,7 +24,7 @@ WITH a AS ( ORDER BY ids) SELECT * FROM b -WHERE outlier IS TRUE; +WHERE is_outlier IS TRUE; -- With a threshold of 2.0 standard deviations, id 16 is the only outlier WITH a AS ( @@ -38,7 +38,7 @@ WITH a AS ( ORDER BY ids) SELECT * FROM b -WHERE outlier IS TRUE; +WHERE is_outlier IS TRUE; -- With a Stddev of zero, should throw back error -- With a threshold of 2.0 standard deviations, id 16 is the only outlier @@ -53,7 +53,7 @@ WITH a AS ( ORDER BY ids) SELECT * FROM b -WHERE outlier IS TRUE; +WHERE is_outlier IS TRUE; -- With a ratio threshold of 2.0 threshold (100% above or below the mean) -- which is greater than ~21, which are values @@ -68,7 +68,7 @@ WITH a AS ( ORDER BY ids) SELECT * FROM b - WHERE outlier IS TRUE; + WHERE is_outlier IS TRUE; -- With a static threshold of 11, what are the outliers WITH a AS ( @@ -79,8 +79,7 @@ WITH a AS ( SELECT unnest(vals) As v, unnest(ids) as i FROM a ) -SELECT cdb_crankshaft.CDB_StaticOutlier(v, 11.0) As outlier, i As rowid +SELECT cdb_crankshaft.CDB_StaticOutlier(v, 11.0) As is_outlier, i As rowid FROM b WHERE cdb_crankshaft.CDB_StaticOutlier(v, 11.0) is True ORDER BY i; - From 215e61396a76a4ae2943e5c6a101f022db311053 Mon Sep 17 00:00:00 2001 From: Javier Villar Date: Fri, 7 Oct 2016 13:45:09 +0200 Subject: [PATCH 50/96] Creating requirements.txt file for python --- src/py/crankshaft/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/py/crankshaft/requirements.txt diff --git a/src/py/crankshaft/requirements.txt b/src/py/crankshaft/requirements.txt new file mode 100644 index 0000000..3f50cd7 --- /dev/null +++ b/src/py/crankshaft/requirements.txt @@ -0,0 +1,5 @@ +joblib==0.8.3 +numpy==1.6.1 +scipy==0.14.0 +pysal==1.11.2 +scikit-learn==0.14.1 From 83219270aea0601859058d7c45eece84e093cae8 Mon Sep 17 00:00:00 2001 From: Javier Villar Date: Fri, 7 Oct 2016 16:47:28 +0200 Subject: [PATCH 51/96] Copying requirements.txt to python 0.4.2 folder --- release/python/0.4.2/crankshaft/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 release/python/0.4.2/crankshaft/requirements.txt diff --git a/release/python/0.4.2/crankshaft/requirements.txt b/release/python/0.4.2/crankshaft/requirements.txt new file mode 100644 index 0000000..3f50cd7 --- /dev/null +++ b/release/python/0.4.2/crankshaft/requirements.txt @@ -0,0 +1,5 @@ +joblib==0.8.3 +numpy==1.6.1 +scipy==0.14.0 +pysal==1.11.2 +scikit-learn==0.14.1 From 947d6ba798e5cc9f23d0b666be40b5073629c99b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 11 Oct 2016 16:38:18 -0400 Subject: [PATCH 52/96] first add --- src/pg/sql/11_kmeans.sql | 39 ++++++---- .../crankshaft/clustering/kmeans.py | 71 ++++++++++++++++--- 2 files changed, 86 insertions(+), 24 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index f20942f..4985c2f 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -1,21 +1,34 @@ -CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) -RETURNS table (cartodb_id integer, cluster_no integer) as $$ - - from crankshaft.clustering import kmeans - return kmeans(query,no_clusters,no_init) +-- Spatial k-means clustering -$$ language plpythonu; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer, no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query, no_clusters, no_init) + +$$ LANGUAGE plpythonu; + +-- Non-spatial k-means clustering +-- query: sql query to retrieve all the needed data + +CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, col_names TEXT[], no_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE(rowid BIGINT, cluster_no INTEGER, ) + +from crankshaft.clustering import kmeans_nonspatial +return kmeans_nonspatial(query, colnames, num_clusters, id_col) + +$$ LANGUAGE plpythonu; CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) -RETURNS Numeric[] AS +RETURNS Numeric[] AS $$ -DECLARE +DECLARE newX NUMERIC; newY NUMERIC; newW NUMERIC; BEGIN - IF weight IS NULL OR the_geom IS NULL THEN + IF weight IS NULL OR the_geom IS NULL THEN newX = state[1]; newY = state[2]; newW = state[3]; @@ -30,12 +43,12 @@ END $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) -RETURNS GEOMETRY AS +RETURNS GEOMETRY AS $$ BEGIN - IF state[3] = 0 THEN + IF state[3] = 0 THEN RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); - ELSE + ELSE RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); END IF; END @@ -56,7 +69,7 @@ BEGIN SFUNC = CDB_WeightedMeanS, FINALFUNC = CDB_WeightedMeanF, STYPE = Numeric[], - INITCOND = "{0.0,0.0,0.0}" + INITCOND = "{0.0,0.0,0.0}" ); END IF; END diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 4134062..ee2f304 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -1,18 +1,67 @@ from sklearn.cluster import KMeans import plpy -def kmeans(query, no_clusters, no_init=20): - data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, - array_agg(ST_X(the_geom) order by cartodb_id) xs, - array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a - where the_geom is not null - '''.format(query=query)) - xs = data[0]['xs'] - ys = data[0]['ys'] +def kmeans(query, no_clusters, no_init=20): + """ + + """ + full_query = ''' + SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids, + array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs, + array_agg(ST_Y(the_geom) ORDER BY cartodb_id) + FROM ({query}) As a + WHERE the_geom IS NOT NULL + '''.format(query=query) + try: + data = plpy.execute(full_query) + except plpy.SPIError, err: + plpy.error("KMeans cluster failed: %s" % err) + + xs = data[0]['xs'] + ys = data[0]['ys'] ids = data[0]['ids'] - km = KMeans(n_clusters= no_clusters, n_init=no_init) - labels = km.fit_predict(zip(xs,ys)) - return zip(ids,labels) + km = KMeans(n_clusters=no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs, ys)) + return zip(ids, labels) + +def kmeans_nonspatial(query, colnames, num_clusters=5, id_col='cartodb_id'): + """ + query (string): A SQL query to retrieve the data required to do the + k-means clustering analysis, like so: + SELECT * FROM iris_flower_data + colnames (list): a list of the column names which contain the data of + interest, like so: ["sepal_width", "petal_width", + "sepal_length", "petal_length"] + num_clusters (int): number of clusters (greater than zero) + id_col (string): name of the input id_column + """ + + id_colname = 'rowids' + + full_query = ''' + SELECT {cols}, array_agg({id_col}) As {id_colname} + FROM ({query}) As a + '''.format(query=query, + id_col=id_col, + id_colname=id_colname, + cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) + for idx, val in enumerate(colnames)])) + + try: + data = plpy.execute(full_query) + plpy.notice('query: %s' % full_query) + + # fill array with values for kmeans clustering + data = np.array([d[c] for c in d if c != 'id_colname'], + dtype=float).T + except plpy.SPIError, err: + plpy.error('KMeans cluster failed: %s' % err) + + kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(data) + + # zip(ids, labels, means) + return zip(kmeans.labels_, map(str, kmeans.cluster_centers_), + d[0]['rowids']) From 3e1cef9958984e85ed5a6f859f7cb9374c50d9cb Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 11 Oct 2016 16:48:22 -0400 Subject: [PATCH 53/96] fix output signature --- src/pg/sql/11_kmeans.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 4985c2f..fe078b5 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -12,7 +12,7 @@ $$ LANGUAGE plpythonu; -- query: sql query to retrieve all the needed data CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, col_names TEXT[], no_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE(rowid BIGINT, cluster_no INTEGER, ) +RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) from crankshaft.clustering import kmeans_nonspatial return kmeans_nonspatial(query, colnames, num_clusters, id_col) From c47116571f0d6284dea600fbccb052b428909308 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 12 Oct 2016 14:19:19 -0400 Subject: [PATCH 54/96] properly close plpgsql function --- src/pg/sql/11_kmeans.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index fe078b5..59fcf59 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -12,7 +12,7 @@ $$ LANGUAGE plpythonu; -- query: sql query to retrieve all the needed data CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, col_names TEXT[], no_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) +RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) AS $$ from crankshaft.clustering import kmeans_nonspatial return kmeans_nonspatial(query, colnames, num_clusters, id_col) From 361505fca9d7767886c9e290f545b49921337b29 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 12 Oct 2016 21:13:51 +0000 Subject: [PATCH 55/96] fixes syntax errors --- src/pg/sql/11_kmeans.sql | 2 +- src/py/crankshaft/crankshaft/clustering/kmeans.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 59fcf59..2db57e0 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -11,7 +11,7 @@ $$ LANGUAGE plpythonu; -- Non-spatial k-means clustering -- query: sql query to retrieve all the needed data -CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, col_names TEXT[], no_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') +CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, colnames TEXT[], num_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) AS $$ from crankshaft.clustering import kmeans_nonspatial diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index ee2f304..091e87b 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -38,7 +38,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, id_col='cartodb_id'): num_clusters (int): number of clusters (greater than zero) id_col (string): name of the input id_column """ - + import numpy as np id_colname = 'rowids' full_query = ''' @@ -55,13 +55,14 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, id_col='cartodb_id'): plpy.notice('query: %s' % full_query) # fill array with values for kmeans clustering - data = np.array([d[c] for c in d if c != 'id_colname'], - dtype=float).T + cluster_columns = np.array([data[0][c] for c in data.colnames() + if c != 'id_colname'], + dtype=float).T except plpy.SPIError, err: plpy.error('KMeans cluster failed: %s' % err) - kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(data) + kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(cluster_columns) # zip(ids, labels, means) return zip(kmeans.labels_, map(str, kmeans.cluster_centers_), - d[0]['rowids']) + data[0]['rowids']) From c2e2359e6520a71016bc15fe66613a67fbd526f4 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 12 Oct 2016 17:16:52 -0400 Subject: [PATCH 56/96] addes minmax scaling for variables --- .../crankshaft/clustering/kmeans.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index ee2f304..aa2239c 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -38,6 +38,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, id_col='cartodb_id'): num_clusters (int): number of clusters (greater than zero) id_col (string): name of the input id_column """ + import numpy as np id_colname = 'rowids' @@ -53,15 +54,26 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, id_col='cartodb_id'): try: data = plpy.execute(full_query) plpy.notice('query: %s' % full_query) - - # fill array with values for kmeans clustering - data = np.array([d[c] for c in d if c != 'id_colname'], - dtype=float).T except plpy.SPIError, err: plpy.error('KMeans cluster failed: %s' % err) + # fill array with values for kmeans clustering + cluster_columns = scale_data( + np.array([data[0][c] for c in data.colnames() + if c != id_col], + dtype=float).T) + kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(data) - # zip(ids, labels, means) return zip(kmeans.labels_, map(str, kmeans.cluster_centers_), - d[0]['rowids']) + data[0]['rowids']) + + +def scale_data(input_data): + """ + Scale all input columns from 0 to 1 so that k-means puts them on equal + footing + """ + from sklearn.preprocessing import MinMaxScaler + min_max_scaler = MinMaxScaler() + return min_max_scaler.fit_transform(input_data) From a95423174cae1b1f3a0f30aca643a06a54a66e49 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 13 Oct 2016 10:50:48 -0400 Subject: [PATCH 57/96] adds back alias for kmeans removed by accident --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 1c1f178..c99ded1 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -9,7 +9,7 @@ def kmeans(query, no_clusters, no_init=20): full_query = ''' SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids, array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs, - array_agg(ST_Y(the_geom) ORDER BY cartodb_id) + array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys FROM ({query}) As a WHERE the_geom IS NOT NULL '''.format(query=query) @@ -87,8 +87,7 @@ def extract_columns(db_resp, id_col): def scale_data(features): """ - Scale all input columns from 0 to 1 so that k-means puts them on equal - footing + Scale all input columns to center on 0 with a standard devation of 1 input_data (numpy array): an array of dimension (n_features, n_samples) """ from sklearn.preprocessing import StandardScaler From 5d2a1881b1ed52b8bc4cc555762b60ceb6c16164 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 13 Oct 2016 15:00:28 +0000 Subject: [PATCH 58/96] make numpy with global scope in module --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index c99ded1..18a711f 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -1,6 +1,6 @@ from sklearn.cluster import KMeans import plpy - +import numpy as np def kmeans(query, no_clusters, no_init=20): """ @@ -39,7 +39,6 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, num_clusters (int): number of clusters (greater than zero) id_col (string): name of the input id_column """ - import numpy as np out_id_colname = 'rowids' # TODO: need a random seed? From 0feaf36cf62f8963b05014d50be477778e7e818f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 13 Oct 2016 15:52:00 +0000 Subject: [PATCH 59/96] outputting consistent labels and centers --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 18a711f..86e8931 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -53,14 +53,13 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, try: db_resp = plpy.execute(full_query) - plpy.notice('query: %s' % full_query) except plpy.SPIError, err: plpy.error('k-means cluster analysis failed: %s' % err) # fill array with values for kmeans clustering if standarize: cluster_columns = scale_data( - extract_columns(db_resp, id_col='cartodb_id')) + extract_columns(db_resp, id_col=out_id_colname)) else: cluster_columns = extract_columns(db_resp) @@ -69,7 +68,8 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(cluster_columns) - return zip(kmeans.labels_, map(str, kmeans.cluster_centers_), + return zip(kmeans.labels_, + map(str, kmeans.cluster_centers_[kmeans.labels_]), db_resp[0][out_id_colname]) From b255fd3e0656ea8c47a26b16f1e6299329ae0057 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 13 Oct 2016 12:50:46 -0400 Subject: [PATCH 60/96] make private functions more explictly private --- .../crankshaft/crankshaft/clustering/kmeans.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index c99ded1..ac0ce4d 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -60,23 +60,27 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, # fill array with values for kmeans clustering if standarize: - cluster_columns = scale_data( - extract_columns(db_resp, id_col='cartodb_id')) + cluster_columns = _scale_data( + _extract_columns(db_resp, id_col='cartodb_id')) else: - cluster_columns = extract_columns(db_resp) + cluster_columns = _extract_columns(db_resp) # TODO: decide on optimal parameters for most cases # Are there ways of deciding parameters based on inputs? kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(cluster_columns) - return zip(kmeans.labels_, map(str, kmeans.cluster_centers_), + return zip(kmeans.predict(X), + map(str, kmeans.cluster_centers_[kmeans.labels_]), db_resp[0][out_id_colname]) -def extract_columns(db_resp, id_col): +def _extract_columns(db_resp, id_col): """ Extract the features from the query and pack them into a NumPy array + db_resp (plpy data object): result of the kmeans request + id_col (string): name of column which has the row id (not a feature of + the analysis) """ return np.array([db_resp[0][c] for c in db_resp.colnames() if c != id_col], @@ -85,10 +89,10 @@ def extract_columns(db_resp, id_col): # -- Preprocessing steps -def scale_data(features): +def _scale_data(features): """ Scale all input columns to center on 0 with a standard devation of 1 - input_data (numpy array): an array of dimension (n_features, n_samples) + features (numpy array): an array of dimension (n_features, n_samples) """ from sklearn.preprocessing import StandardScaler return StandardScaler().fit_transform(features) From a370a2da52632c50611abe74c9094bccea8546ef Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 11:50:59 -0600 Subject: [PATCH 61/96] pep8 updates of test file --- src/py/crankshaft/test/test_cluster_kmeans.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/py/crankshaft/test/test_cluster_kmeans.py b/src/py/crankshaft/test/test_cluster_kmeans.py index aba8e07..f1b738a 100644 --- a/src/py/crankshaft/test/test_cluster_kmeans.py +++ b/src/py/crankshaft/test/test_cluster_kmeans.py @@ -14,25 +14,25 @@ import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json + class KMeansTest(unittest.TestCase): """Testing class for Moran's I functions""" def setUp(self): plpy._reset() - self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.cluster_data = json.loads( + open(fixture_file('kmeans.json')).read()) self.params = {"subquery": "select * from table", - "no_clusters": "10" - } + "no_clusters": "10"} def test_kmeans(self): data = self.cluster_data - plpy._define_result('select' ,data) + plpy._define_result('select', data) clusters = cc.kmeans('subquery', 2) - labels = [a[1] for a in clusters] - c1 = [a for a in clusters if a[1]==0] - c2 = [a for a in clusters if a[1]==1] - - self.assertEqual(len(np.unique(labels)),2) - self.assertEqual(len(c1),20) - self.assertEqual(len(c2),20) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1] == 0] + c2 = [a for a in clusters if a[1] == 1] + self.assertEqual(len(np.unique(labels)), 2) + self.assertEqual(len(c1), 20) + self.assertEqual(len(c2), 20) From da23b002cfa62256dd2ebe26cb5d5638ba3998d6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 11:51:53 -0600 Subject: [PATCH 62/96] rename to match submodule name --- .../test/{test_cluster_kmeans.py => test_clustering_kmeans.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/py/crankshaft/test/{test_cluster_kmeans.py => test_clustering_kmeans.py} (100%) diff --git a/src/py/crankshaft/test/test_cluster_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py similarity index 100% rename from src/py/crankshaft/test/test_cluster_kmeans.py rename to src/py/crankshaft/test/test_clustering_kmeans.py From 54bbd18b02bf74fa2f04e732088407745fb7affb Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 12:12:38 -0600 Subject: [PATCH 63/96] remove unneeded modules from test script --- src/py/crankshaft/test/test_clustering_kmeans.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index f1b738a..0d531ab 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -8,15 +8,12 @@ import numpy as np # import sys # sys.modules['plpy'] = plpy from helper import plpy, fixture_file -import numpy as np import crankshaft.clustering as cc -import crankshaft.pysal_utils as pu -from crankshaft import random_seeds import json class KMeansTest(unittest.TestCase): - """Testing class for Moran's I functions""" + """Testing class for k-means spatial""" def setUp(self): plpy._reset() From f800a35fd16ccab47e5417b9f6fdd8f277bba272 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 13:01:31 -0600 Subject: [PATCH 64/96] new format for input data --- src/py/crankshaft/test/test_clustering_kmeans.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 0d531ab..8ab300d 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -23,7 +23,12 @@ class KMeansTest(unittest.TestCase): "no_clusters": "10"} def test_kmeans(self): - data = self.cluster_data + """ + """ + data = [{'xs': d['xs'], + 'ys': d['ys'], + 'id': d['id']} for d in self.cluster_data] + plpy._define_result('select', data) clusters = cc.kmeans('subquery', 2) labels = [a[1] for a in clusters] From f0c6cca76604c10952d45fad11d4610bf496c75b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 13:05:56 -0600 Subject: [PATCH 65/96] fix key name --- src/py/crankshaft/test/test_clustering_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 8ab300d..e130da4 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -27,7 +27,7 @@ class KMeansTest(unittest.TestCase): """ data = [{'xs': d['xs'], 'ys': d['ys'], - 'id': d['id']} for d in self.cluster_data] + 'ids': d['ids']} for d in self.cluster_data] plpy._define_result('select', data) clusters = cc.kmeans('subquery', 2) From 5d8641732f81c444a92d470e6e34abe1ef51a0da Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 19:30:09 +0000 Subject: [PATCH 66/96] change string formatting --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 84f83f7..e44b742 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -7,13 +7,11 @@ def kmeans(query, no_clusters, no_init=20): """ """ - full_query = ''' - SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids, - array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs, - array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys - FROM ({query}) As a - WHERE the_geom IS NOT NULL - '''.format(query=query) + full_query = ("SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids," + "array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs," + "array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys " + "FROM ({query}) As a " + "WHERE the_geom IS NOT NULL").format(query=query) try: data = plpy.execute(full_query) except plpy.SPIError, err: From 3e0dba35221cdbc5a66f4e9064ba73dba81c8775 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Oct 2016 21:13:34 -0600 Subject: [PATCH 67/96] update comments --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 84f83f7..21d76ea 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -5,7 +5,8 @@ import numpy as np def kmeans(query, no_clusters, no_init=20): """ - + find centers based on clusteres of latitude/longitude pairs + query: SQL query that has a WGS84 geometry (the_geom) """ full_query = ''' SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids, @@ -17,8 +18,9 @@ def kmeans(query, no_clusters, no_init=20): try: data = plpy.execute(full_query) except plpy.SPIError, err: - plpy.error("KMeans cluster failed: %s" % err) + plpy.error("k-means (spatial) cluster analysis failed: %s" % err) + # Unpack query response xs = data[0]['xs'] ys = data[0]['ys'] ids = data[0]['ids'] @@ -55,9 +57,9 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, try: db_resp = plpy.execute(full_query) except plpy.SPIError, err: - plpy.error('k-means cluster analysis failed: %s' % err) + plpy.error("k-means (non-spatial) cluster analysis failed: %s" % err) - # fill array with values for kmeans clustering + # fill array with values for k-means clustering if standarize: cluster_columns = _scale_data( _extract_columns(db_resp, id_col=out_id_colname)) From 4389c9538d9c65c459dfaf4430683669362c4938 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 21 Oct 2016 10:13:21 -0600 Subject: [PATCH 68/96] small updates for readability --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 0df9ce0..df024a1 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -74,15 +74,15 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, db_resp[0][out_id_colname]) -def _extract_columns(db_resp, id_col): +def _extract_columns(db_resp, id_col_name): """ Extract the features from the query and pack them into a NumPy array db_resp (plpy data object): result of the kmeans request - id_col (string): name of column which has the row id (not a feature of - the analysis) + id_col_name (string): name of column which has the row id (not a + feature of the analysis) """ return np.array([db_resp[0][c] for c in db_resp.colnames() - if c != id_col], + if c != id_col_name], dtype=float).T # -- Preprocessing steps @@ -91,7 +91,8 @@ def _extract_columns(db_resp, id_col): def _scale_data(features): """ Scale all input columns to center on 0 with a standard devation of 1 - features (numpy array): an array of dimension (n_features, n_samples) + + features (numpy matrix): features of dimension (n_features, n_samples) """ from sklearn.preprocessing import StandardScaler return StandardScaler().fit_transform(features) From a188b2e10415b91670f46401610d62f4ffa35316 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 21 Oct 2016 15:51:54 -0600 Subject: [PATCH 69/96] adds missing arguments --- src/pg/sql/11_kmeans.sql | 14 ++++++++++---- src/py/crankshaft/crankshaft/clustering/kmeans.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 2db57e0..6a9d1a9 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -11,12 +11,18 @@ $$ LANGUAGE plpythonu; -- Non-spatial k-means clustering -- query: sql query to retrieve all the needed data -CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(query TEXT, colnames TEXT[], num_clusters INTEGER, id_col TEXT DEFAULT 'cartodb_id') +CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial( + query TEXT, + colnames TEXT[], + num_clusters INTEGER, + id_col TEXT DEFAULT 'cartodb_id', + standarize BOOLEAN DEFAULT true +) RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) AS $$ -from crankshaft.clustering import kmeans_nonspatial -return kmeans_nonspatial(query, colnames, num_clusters, id_col) - + from crankshaft.clustering import kmeans_nonspatial + return kmeans_nonspatial(query, colnames, num_clusters, + id_col, standarize) $$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index df024a1..6e972e5 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -62,7 +62,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, cluster_columns = _scale_data( _extract_columns(db_resp, id_col=out_id_colname)) else: - cluster_columns = _extract_columns(db_resp) + cluster_columns = _extract_columns(db_resp, id_col=out_id_colname) # TODO: decide on optimal parameters for most cases # Are there ways of deciding parameters based on inputs? From 64c4b6611c866853c7fb079d3465c7ee10e84f3c Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 10 Nov 2016 16:56:04 +0000 Subject: [PATCH 70/96] changes cluster centers to json --- src/pg/sql/11_kmeans.sql | 6 +++--- src/py/crankshaft/crankshaft/clustering/kmeans.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 6a9d1a9..175ab6b 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -15,14 +15,14 @@ CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial( query TEXT, colnames TEXT[], num_clusters INTEGER, - id_col TEXT DEFAULT 'cartodb_id', + id_colname TEXT DEFAULT 'cartodb_id', standarize BOOLEAN DEFAULT true ) -RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) AS $$ +RETURNS TABLE(cluster_label text, cluster_center json, rowid bigint) AS $$ from crankshaft.clustering import kmeans_nonspatial return kmeans_nonspatial(query, colnames, num_clusters, - id_col, standarize) + id_colname, standarize) $$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 6e972e5..5bd7830 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -40,6 +40,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, num_clusters (int): number of clusters (greater than zero) id_col (string): name of the input id_column """ + import json out_id_colname = 'rowids' # TODO: need a random seed? @@ -60,7 +61,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, # fill array with values for k-means clustering if standarize: cluster_columns = _scale_data( - _extract_columns(db_resp, id_col=out_id_colname)) + _extract_columns(db_resp, out_id_colname)) else: cluster_columns = _extract_columns(db_resp, id_col=out_id_colname) @@ -69,8 +70,9 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(cluster_columns) + centers = [json.dumps(dict(zip(colnames, c))) for c in kmeans.cluster_centers_[kmeans.labels_]] return zip(kmeans.labels_, - map(str, kmeans.cluster_centers_[kmeans.labels_]), + centers, db_resp[0][out_id_colname]) From b6dae5e3801e7f33360c16172b41b4c62077614e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 00:15:23 +0100 Subject: [PATCH 71/96] adding silhouette --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 5bd7830..52139d1 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -41,6 +41,8 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, id_col (string): name of the input id_column """ import json + from sklearn import metrics + out_id_colname = 'rowids' # TODO: need a random seed? @@ -70,7 +72,13 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(cluster_columns) - centers = [json.dumps(dict(zip(colnames, c))) for c in kmeans.cluster_centers_[kmeans.labels_]] + centers = [json.dumps(dict(zip(colnames, c))) + for c in kmeans.cluster_centers_[kmeans.labels_]] + + silhouettes = metrics.silhouette_samples(cluster_columns, + labels, + metric='sqeuclidean') + return zip(kmeans.labels_, centers, db_resp[0][out_id_colname]) From af536757febee26ae7a699d755209e35fc57c246 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 14 Nov 2016 23:29:38 +0000 Subject: [PATCH 72/96] adds silhouettes to output --- src/pg/sql/11_kmeans.sql | 2 +- src/py/crankshaft/crankshaft/clustering/kmeans.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 175ab6b..c9ae131 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -18,7 +18,7 @@ CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial( id_colname TEXT DEFAULT 'cartodb_id', standarize BOOLEAN DEFAULT true ) -RETURNS TABLE(cluster_label text, cluster_center json, rowid bigint) AS $$ +RETURNS TABLE(cluster_label text, cluster_center json, silhouettes numeric, rowid bigint) AS $$ from crankshaft.clustering import kmeans_nonspatial return kmeans_nonspatial(query, colnames, num_clusters, diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 52139d1..d070bf9 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -76,11 +76,12 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, for c in kmeans.cluster_centers_[kmeans.labels_]] silhouettes = metrics.silhouette_samples(cluster_columns, - labels, + kmeans.labels_, metric='sqeuclidean') return zip(kmeans.labels_, centers, + silhouettes, db_resp[0][out_id_colname]) From cbe8571546e2f89170ad7d294d4af7022112f3fe Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 10:10:07 +0100 Subject: [PATCH 73/96] fixes argument in not-standardize --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index d070bf9..383584e 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -65,7 +65,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5, cluster_columns = _scale_data( _extract_columns(db_resp, out_id_colname)) else: - cluster_columns = _extract_columns(db_resp, id_col=out_id_colname) + cluster_columns = _extract_columns(db_resp, out_id_colname) # TODO: decide on optimal parameters for most cases # Are there ways of deciding parameters based on inputs? From 0867e69d1f21cae22c611612cf9fbe197c09c284 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 11:19:15 +0100 Subject: [PATCH 74/96] replace plpy method colnames --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 383584e..e4fc7e5 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -92,7 +92,9 @@ def _extract_columns(db_resp, id_col_name): id_col_name (string): name of column which has the row id (not a feature of the analysis) """ - return np.array([db_resp[0][c] for c in db_resp.colnames() + keys = [k for k in db_resp[0].keys()] + + return np.array([db_resp[0][c] for c in keys if c != id_col_name], dtype=float).T From 0d40080f6cdd1b5a47e70377b0317493d660081f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 12:02:42 +0100 Subject: [PATCH 75/96] move back to colnames --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index e4fc7e5..383584e 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -92,9 +92,7 @@ def _extract_columns(db_resp, id_col_name): id_col_name (string): name of column which has the row id (not a feature of the analysis) """ - keys = [k for k in db_resp[0].keys()] - - return np.array([db_resp[0][c] for c in keys + return np.array([db_resp[0][c] for c in db_resp.colnames() if c != id_col_name], dtype=float).T From ded26dc46bac37f13c17461ec6280d0109d62be3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 12:03:24 +0100 Subject: [PATCH 76/96] adding class for database response --- src/py/crankshaft/test/helper.py | 1 + src/py/crankshaft/test/mock_plpy.py | 27 +++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/py/crankshaft/test/helper.py b/src/py/crankshaft/test/helper.py index 7d28b94..b273354 100644 --- a/src/py/crankshaft/test/helper.py +++ b/src/py/crankshaft/test/helper.py @@ -2,6 +2,7 @@ import unittest from mock_plpy import MockPlPy plpy = MockPlPy() +from mock_plpy import MockDBResponse import sys sys.modules['plpy'] = plpy diff --git a/src/py/crankshaft/test/mock_plpy.py b/src/py/crankshaft/test/mock_plpy.py index a982ebe..05d0f21 100644 --- a/src/py/crankshaft/test/mock_plpy.py +++ b/src/py/crankshaft/test/mock_plpy.py @@ -1,12 +1,13 @@ import re + class MockCursor: def __init__(self, data): self.cursor_pos = 0 self.data = data def fetch(self, batch_size): - batch = self.data[self.cursor_pos : self.cursor_pos + batch_size] + batch = self.data[self.cursor_pos:self.cursor_pos + batch_size] self.cursor_pos += batch_size return batch @@ -45,8 +46,22 @@ class MockPlPy: data = self.execute(query) return MockCursor(data) - def execute(self, query): # TODO: additional arguments - for result in self.results: - if result[0].match(query): - return result[1] - return [] + # TODO: additional arguments + def execute(self, query): + for result in self.results: + if result[0].match(query): + return result[1] + return [] + + +class MockDBResponse: + def __init__(self, data, colnames=None): + self.data = data + if colnames is None: + self.colnames = data[0].keys() + else: + self.colnames = colnames + + + def colnames(self): + return self.colnames From 84d33d841f84e6ff8aafdd6c2dcf5b0727cc95c6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 15 Nov 2016 12:03:54 +0100 Subject: [PATCH 77/96] tests for new class --- .../crankshaft/test/test_clustering_kmeans.py | 38 ++++++++++++++++++- .../crankshaft/test/test_clustering_moran.py | 15 +++++--- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index e130da4..03cbd0a 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -7,9 +7,10 @@ import numpy as np # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file +from helper import plpy, fixture_file, MockDBResponse import crankshaft.clustering as cc import json +from collections import OrderedDict class KMeansTest(unittest.TestCase): @@ -38,3 +39,38 @@ class KMeansTest(unittest.TestCase): self.assertEqual(len(np.unique(labels)), 2) self.assertEqual(len(c1), 20) self.assertEqual(len(c2), 20) + + +class KMeansNonspatialTest(unittest.TestCase): + """Testing class for k-means non-spatial""" + + def setUp(self): + plpy._reset() + + # self.cluster_data = json.loads( + # open(fixture_file('kmeans-nonspatial.json')).read()) + + self.params = {"subquery": "SELECT * FROM TABLE", + "n_clusters": 5} + + def test_kmeans_nonspatial(self): + """ + test for k-means non-spatial + """ + data_raw = [OrderedDict([("col1", [1, 1, 1, 4, 4, 4]), + ("col2", [2, 4, 0, 2, 4, 0]), + ("rowids", [1, 2, 3, 4, 5, 6])])] + + data_obj = MockDBResponse(data_raw, [k for k in data_raw[0] + if k != 'rowids']) + plpy._define_result('select', data_obj) + clusters = cc.kmeans_nonspatial('subquery', ['col1', 'col2'], 4) + + cl1 = clusters[0][1] + cl2 = clusters[3][1] + + for idx, val in enumerate(clusters): + if idx < 3: + self.assertEqual(val[1], cl1) + else: + self.assertEqual(val[1], cl2) diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index cb54902..83256ad 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -7,13 +7,13 @@ import numpy as np # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file +from helper import plpy, fixture_file, MockDBResponse import crankshaft.clustering as cc import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json - +from collections import OrderedDict class MoranTest(unittest.TestCase): """Testing class for Moran's I functions""" @@ -58,11 +58,14 @@ class MoranTest(unittest.TestCase): def test_moran_local(self): """Test Moran's I local""" - data = [{'id': d['id'], - 'attr1': d['value'], - 'neighbors': d['neighbors']} for d in self.neighbors_data] + data = [OrderedDict([('id', d['id']), + ('attr1', d['value']), + ('neighbors', d['neighbors'])]) + for d in self.neighbors_data] - plpy._define_result('select', data) + db_resp = MockDBResponse(data) + + plpy._define_result('select', db_resp) random_seeds.set_random_seeds(1234) result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') From 7eee4faac1d5fb966ed74a8d4afdbb05d7c3f20c Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 18 Nov 2016 17:22:02 +0000 Subject: [PATCH 78/96] rename to match numbering elsewhere --- src/pg/test/sql/{05_kmeans_test.sql => 11_kmeans_test.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/pg/test/sql/{05_kmeans_test.sql => 11_kmeans_test.sql} (100%) diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/11_kmeans_test.sql similarity index 100% rename from src/pg/test/sql/05_kmeans_test.sql rename to src/pg/test/sql/11_kmeans_test.sql From 83f1900512715705dc44871b70215f048cdfbc00 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 18 Nov 2016 17:24:18 +0000 Subject: [PATCH 79/96] creates class-based approach to analysis methods --- src/pg/sql/11_kmeans.sql | 10 +- .../crankshaft/clustering/kmeans.py | 180 +++++++++++------- .../crankshaft/test/test_clustering_kmeans.py | 45 +++-- 3 files changed, 143 insertions(+), 92 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index c9ae131..1dc6d00 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -3,8 +3,9 @@ CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer, no_init integer default 20) RETURNS table (cartodb_id integer, cluster_no integer) as $$ - from crankshaft.clustering import kmeans - return kmeans(query, no_clusters, no_init) + from crankshaft.clustering import Kmeans + kmeans = Kmeans() + return kmeans.spatial(query, no_clusters, no_init) $$ LANGUAGE plpythonu; @@ -20,8 +21,9 @@ CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial( ) RETURNS TABLE(cluster_label text, cluster_center json, silhouettes numeric, rowid bigint) AS $$ - from crankshaft.clustering import kmeans_nonspatial - return kmeans_nonspatial(query, colnames, num_clusters, + from crankshaft.clustering import Kmeans + kmeans = Kmeans() + return kmeans.nonspatial(query, colnames, num_clusters, id_colname, standarize) $$ LANGUAGE plpythonu; diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 383584e..48b9bd3 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -3,101 +3,135 @@ import plpy import numpy as np -def kmeans(query, no_clusters, no_init=20): - """ - find centers based on clusteres of latitude/longitude pairs - query: SQL query that has a WGS84 geometry (the_geom) - """ - full_query = ("SELECT array_agg(cartodb_id ORDER BY cartodb_id) as ids," - "array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs," - "array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys " - "FROM ({query}) As a " - "WHERE the_geom IS NOT NULL").format(query=query) - try: - data = plpy.execute(full_query) - except plpy.SPIError, err: - plpy.error("k-means (spatial) cluster analysis failed: %s" % err) +class QueryRunner: + def get_moran(self, query): + """fetch data for moran's i analyses""" + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) - # Unpack query response - xs = data[0]['xs'] - ys = data[0]['ys'] - ids = data[0]['ids'] + def get_columns(self, query, standarize): + """fetch data for non-spatial kmeans""" + try: + db_resp = plpy.execute(query) + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) - km = KMeans(n_clusters=no_clusters, n_init=no_init) - labels = km.fit_predict(zip(xs, ys)) - return zip(ids, labels) + return db_resp + + def get_result(self, query): + """fetch data for spatial kmeans""" + try: + data = plpy.execute(query) + except plpy.SPIError, err: + plpy.error("Analysis failed: %s" % err) + return data -def kmeans_nonspatial(query, colnames, num_clusters=5, - id_col='cartodb_id', standarize=True): - """ - query (string): A SQL query to retrieve the data required to do the - k-means clustering analysis, like so: - SELECT * FROM iris_flower_data - colnames (list): a list of the column names which contain the data of - interest, like so: ["sepal_width", "petal_width", - "sepal_length", "petal_length"] - num_clusters (int): number of clusters (greater than zero) - id_col (string): name of the input id_column - """ - import json - from sklearn import metrics +class Kmeans: + def __init__(self, query_runner=None): + if query_runner is None: + self.query_runner = QueryRunner() + else: + self.query_runner = query_runner - out_id_colname = 'rowids' - # TODO: need a random seed? + def spatial(self, query, no_clusters, no_init=20): + """ + find centers based on clusters of latitude/longitude pairs + query: SQL query that has a WGS84 geometry (the_geom) + """ + full_query = ("SELECT " + "array_agg(cartodb_id ORDER BY cartodb_id) as ids," + "array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs," + "array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys " + "FROM ({query}) As a " + "WHERE the_geom IS NOT NULL").format(query=query) - full_query = ''' - SELECT {cols}, array_agg({id_col}) As {out_id_colname} - FROM ({query}) As a - '''.format(query=query, - id_col=id_col, - out_id_colname=out_id_colname, - cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) - for idx, val in enumerate(colnames)])) + data = self.query_runner.get_result(full_query) - try: - db_resp = plpy.execute(full_query) - except plpy.SPIError, err: - plpy.error("k-means (non-spatial) cluster analysis failed: %s" % err) + # Unpack query response + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] - # fill array with values for k-means clustering - if standarize: - cluster_columns = _scale_data( - _extract_columns(db_resp, out_id_colname)) - else: - cluster_columns = _extract_columns(db_resp, out_id_colname) + km = KMeans(n_clusters=no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs, ys)) + return zip(ids, labels) - # TODO: decide on optimal parameters for most cases - # Are there ways of deciding parameters based on inputs? - kmeans = KMeans(n_clusters=num_clusters, - random_state=0).fit(cluster_columns) + def nonspatial(self, query, colnames, num_clusters=5, + id_col='cartodb_id', standarize=True): + """ + query (string): A SQL query to retrieve the data required to do the + k-means clustering analysis, like so: + SELECT * FROM iris_flower_data + colnames (list): a list of the column names which contain the data + of interest, like so: ["sepal_width", + "petal_width", + "sepal_length", + "petal_length"] + num_clusters (int): number of clusters (greater than zero) + id_col (string): name of the input id_column + """ + import json + from sklearn import metrics - centers = [json.dumps(dict(zip(colnames, c))) - for c in kmeans.cluster_centers_[kmeans.labels_]] + out_id_colname = 'rowids' + # TODO: need a random seed? - silhouettes = metrics.silhouette_samples(cluster_columns, - kmeans.labels_, - metric='sqeuclidean') + full_query = ''' + SELECT {cols}, array_agg({id_col}) As {out_id_colname} + FROM ({query}) As a + '''.format(query=query, + id_col=id_col, + out_id_colname=out_id_colname, + cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) + for idx, val in enumerate(colnames)])) - return zip(kmeans.labels_, - centers, - silhouettes, - db_resp[0][out_id_colname]) + db_resp = self.query_runner.get_columns(full_query, standarize) + + # fill array with values for k-means clustering + if standarize: + cluster_columns = _scale_data( + _extract_columns(db_resp, colnames)) + else: + cluster_columns = _extract_columns(db_resp, colnames) + + print str(cluster_columns) + # TODO: decide on optimal parameters for most cases + # Are there ways of deciding parameters based on inputs? + kmeans = KMeans(n_clusters=num_clusters, + random_state=0).fit(cluster_columns) + + centers = [json.dumps(dict(zip(colnames, c))) + for c in kmeans.cluster_centers_[kmeans.labels_]] + + silhouettes = metrics.silhouette_samples(cluster_columns, + kmeans.labels_, + metric='sqeuclidean') + + return zip(kmeans.labels_, + centers, + silhouettes, + db_resp[0][out_id_colname]) -def _extract_columns(db_resp, id_col_name): +# -- Preprocessing steps + +def _extract_columns(db_resp, colnames): """ Extract the features from the query and pack them into a NumPy array db_resp (plpy data object): result of the kmeans request id_col_name (string): name of column which has the row id (not a feature of the analysis) """ - return np.array([db_resp[0][c] for c in db_resp.colnames() - if c != id_col_name], + return np.array([db_resp[0][c] for c in colnames], dtype=float).T -# -- Preprocessing steps - def _scale_data(features): """ diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 03cbd0a..8e5c9b4 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -7,17 +7,31 @@ import numpy as np # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file, MockDBResponse +from helper import plpy, fixture_file +from crankshaft.clustering import Kmeans +from crankshaft.clustering import QueryRunner import crankshaft.clustering as cc + +from crankshaft import random_seeds import json from collections import OrderedDict +class FakeQueryRunner(QueryRunner): + def __init__(self, mocked_result): + self.mocked_result = mocked_result + + def get_result(self, query): + return self.mocked_result + + def get_columns(self, query, standarize): + return self.mocked_result + + class KMeansTest(unittest.TestCase): """Testing class for k-means spatial""" def setUp(self): - plpy._reset() self.cluster_data = json.loads( open(fixture_file('kmeans.json')).read()) self.params = {"subquery": "select * from table", @@ -30,8 +44,9 @@ class KMeansTest(unittest.TestCase): 'ys': d['ys'], 'ids': d['ids']} for d in self.cluster_data] - plpy._define_result('select', data) - clusters = cc.kmeans('subquery', 2) + random_seeds.set_random_seeds(1234) + kmeans = Kmeans(FakeQueryRunner(data)) + clusters = kmeans.spatial('subquery', 2) labels = [a[1] for a in clusters] c1 = [a for a in clusters if a[1] == 0] c2 = [a for a in clusters if a[1] == 1] @@ -47,9 +62,6 @@ class KMeansNonspatialTest(unittest.TestCase): def setUp(self): plpy._reset() - # self.cluster_data = json.loads( - # open(fixture_file('kmeans-nonspatial.json')).read()) - self.params = {"subquery": "SELECT * FROM TABLE", "n_clusters": 5} @@ -57,20 +69,23 @@ class KMeansNonspatialTest(unittest.TestCase): """ test for k-means non-spatial """ + # data from: + # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn-cluster-kmeans data_raw = [OrderedDict([("col1", [1, 1, 1, 4, 4, 4]), ("col2", [2, 4, 0, 2, 4, 0]), ("rowids", [1, 2, 3, 4, 5, 6])])] - data_obj = MockDBResponse(data_raw, [k for k in data_raw[0] - if k != 'rowids']) - plpy._define_result('select', data_obj) - clusters = cc.kmeans_nonspatial('subquery', ['col1', 'col2'], 4) + random_seeds.set_random_seeds(1234) + kmeans = Kmeans(FakeQueryRunner(data_raw)) + print 'asfasdfasd' + clusters = kmeans.nonspatial('subquery', ['col1', 'col2'], 2) + print str([c[0] for c in clusters]) - cl1 = clusters[0][1] - cl2 = clusters[3][1] + cl1 = clusters[0][0] + cl2 = clusters[3][0] for idx, val in enumerate(clusters): if idx < 3: - self.assertEqual(val[1], cl1) + self.assertEqual(val[0], cl1) else: - self.assertEqual(val[1], cl2) + self.assertEqual(val[0], cl2) From a9add4b49c49c5483f5aed13598ac076b376d8c1 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 18 Nov 2016 17:40:57 +0000 Subject: [PATCH 80/96] rename results file --- src/pg/test/expected/{05_kmeans_test.out => 11_kmeans_test.out} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/pg/test/expected/{05_kmeans_test.out => 11_kmeans_test.out} (100%) diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/11_kmeans_test.out similarity index 100% rename from src/pg/test/expected/05_kmeans_test.out rename to src/pg/test/expected/11_kmeans_test.out From a8bd122762c321a14263a528d6265089dbdaba93 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 18 Nov 2016 17:46:29 +0000 Subject: [PATCH 81/96] remove mock plpy dependencies --- src/py/crankshaft/test/helper.py | 1 - src/py/crankshaft/test/mock_plpy.py | 13 ----- .../crankshaft/test/test_clustering_kmeans.py | 4 +- .../crankshaft/test/test_clustering_moran.py | 58 +++++++++++-------- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/src/py/crankshaft/test/helper.py b/src/py/crankshaft/test/helper.py index b273354..7d28b94 100644 --- a/src/py/crankshaft/test/helper.py +++ b/src/py/crankshaft/test/helper.py @@ -2,7 +2,6 @@ import unittest from mock_plpy import MockPlPy plpy = MockPlPy() -from mock_plpy import MockDBResponse import sys sys.modules['plpy'] = plpy diff --git a/src/py/crankshaft/test/mock_plpy.py b/src/py/crankshaft/test/mock_plpy.py index 05d0f21..e8a279d 100644 --- a/src/py/crankshaft/test/mock_plpy.py +++ b/src/py/crankshaft/test/mock_plpy.py @@ -52,16 +52,3 @@ class MockPlPy: if result[0].match(query): return result[1] return [] - - -class MockDBResponse: - def __init__(self, data, colnames=None): - self.data = data - if colnames is None: - self.colnames = data[0].keys() - else: - self.colnames = colnames - - - def colnames(self): - return self.colnames diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 8e5c9b4..830ee9d 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -7,7 +7,7 @@ import numpy as np # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file +from helper import fixture_file from crankshaft.clustering import Kmeans from crankshaft.clustering import QueryRunner import crankshaft.clustering as cc @@ -60,8 +60,6 @@ class KMeansNonspatialTest(unittest.TestCase): """Testing class for k-means non-spatial""" def setUp(self): - plpy._reset() - self.params = {"subquery": "SELECT * FROM TABLE", "n_clusters": 5} diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 83256ad..0a320fb 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -7,19 +7,30 @@ import numpy as np # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file, MockDBResponse - -import crankshaft.clustering as cc +from helper import fixture_file +from crankshaft.clustering import Moran +from crankshaft.clustering import QueryRunner import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json from collections import OrderedDict + +class FakeQueryRunner(QueryRunner): + def __init__(self, mocked_result): + self.mocked_result = mocked_result + + def get_result(self, query): + return self.mocked_result + + def get_columns(self, query): + return self.mocked_result + + class MoranTest(unittest.TestCase): """Testing class for Moran's I functions""" def setUp(self): - plpy._reset() self.params = {"id_col": "cartodb_id", "attr1": "andy", "attr2": "jay_z", @@ -39,36 +50,36 @@ class MoranTest(unittest.TestCase): def test_map_quads(self): """Test map_quads""" - self.assertEqual(cc.map_quads(1), 'HH') - self.assertEqual(cc.map_quads(2), 'LH') - self.assertEqual(cc.map_quads(3), 'LL') - self.assertEqual(cc.map_quads(4), 'HL') - self.assertEqual(cc.map_quads(33), None) - self.assertEqual(cc.map_quads('andy'), None) + from crankshaft.clustering import map_quads + self.assertEqual(map_quads(1), 'HH') + self.assertEqual(map_quads(2), 'LH') + self.assertEqual(map_quads(3), 'LL') + self.assertEqual(map_quads(4), 'HL') + self.assertEqual(map_quads(33), None) + self.assertEqual(map_quads('andy'), None) def test_quad_position(self): """Test lisa_sig_vals""" + from crankshaft.clustering import quad_position quads = np.array([1, 2, 3, 4], np.int) ans = np.array(['HH', 'LH', 'LL', 'HL']) - test_ans = cc.quad_position(quads) + test_ans = quad_position(quads) self.assertTrue((test_ans == ans).all()) - def test_moran_local(self): + def test_local_stat(self): """Test Moran's I local""" data = [OrderedDict([('id', d['id']), ('attr1', d['value']), ('neighbors', d['neighbors'])]) for d in self.neighbors_data] - db_resp = MockDBResponse(data) - - plpy._define_result('select', db_resp) + moran = Moran(FakeQueryRunner(data)) random_seeds.set_random_seeds(1234) - result = cc.moran_local('subquery', 'value', - 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = moran.local_stat('subquery', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] zipped_values = zip(result, self.moran_data) @@ -83,10 +94,10 @@ class MoranTest(unittest.TestCase): 'attr2': 1, 'neighbors': d['neighbors']} for d in self.neighbors_data] - plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local_rate('subquery', 'numerator', 'denominator', - 'knn', 5, 99, 'the_geom', 'cartodb_id') + moran = Moran(FakeQueryRunner(data)) + result = moran.local_rate_stat('subquery', 'numerator', 'denominator', + 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] zipped_values = zip(result, self.moran_data) @@ -99,10 +110,11 @@ class MoranTest(unittest.TestCase): data = [{'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors']} for d in self.neighbors_data] - plpy._define_result('select', data) random_seeds.set_random_seeds(1235) - result = cc.moran('table', 'value', - 'knn', 5, 99, 'the_geom', 'cartodb_id') + moran = Moran(FakeQueryRunner(data)) + result = moran.global_stat('table', 'value', + 'knn', 5, 99, 'the_geom', + 'cartodb_id') result_moran = result[0][0] expected_moran = np.array([row[0] for row in self.moran_data]).mean() From 2738c1f29cd559fb0db174fad74fcde46f463844 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 18 Nov 2016 17:46:55 +0000 Subject: [PATCH 82/96] move to class-based module --- src/pg/sql/10_moran.sql | 23 +- .../crankshaft/crankshaft/clustering/moran.py | 295 ++++++++---------- 2 files changed, 151 insertions(+), 167 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 070392d..bd3f96d 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -10,9 +10,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ - from crankshaft.clustering import moran + from crankshaft.clustering import Moran # TODO: use named parameters or a dictionary - return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + moran = Moran() + return moran.global_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (internal function) @@ -27,9 +29,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT) RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - from crankshaft.clustering import moran_local + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.local_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (public-facing function) @@ -120,9 +124,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ - from crankshaft.clustering import moran_local + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.global_rate_stat(subquery, numerator, denominator, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; @@ -140,9 +146,10 @@ CREATE OR REPLACE FUNCTION RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - from crankshaft.clustering import moran_local_rate + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local Rate (public-facing function) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 4e7086e..ee82932 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -15,204 +15,181 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- -def moran(subquery, attr_name, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I (global) - Implementation building neighbors with a PostGIS database and Moran's I - core clusters with PySAL. - Andy Eschbacher - """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr_name), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) - - query = pu.construct_neighbor_query(w_type, qvals) - - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) - - # collect attributes - attr_vals = pu.get_attributes(result) - - # calculate weights - weight = pu.get_weight(result, w_type, num_ngbrs) - - # calculate moran global - moran_global = ps.esda.moran.Moran(attr_vals, weight, - permutations=permutations) - - return zip([moran_global.I], [moran_global.EI]) +class QueryRunner: + def get_result(self, query): + try: + data = plpy.execute(query) + except plpy.SPIError, err: + plpy.error("k-means (spatial) cluster analysis failed: %s" % err) + return data -def moran_local(subquery, attr, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I implementation for PL/Python - Andy Eschbacher - """ +class Moran: + def __init__(self, query_runner=None): + if query_runner is None: + self.query_runner = QueryRunner() + else: + self.query_runner = query_runner - # geometries with attributes that are null are ignored - # resulting in a collection of not as near neighbors + def global_stat(self, subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) - query = pu.construct_neighbor_query(w_type, qvals) + result = self.query_runner.get_result(query) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(5) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(5) + # collect attributes + attr_vals = pu.get_attributes(result) - attr_vals = pu.get_attributes(result) - weight = pu.get_weight(result, w_type, num_ngbrs) + # calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local(attr_vals, weight, - permutations=permutations) + # calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) - # find quadrants for each geometry - quads = quad_position(lisa.q) + return zip([moran_global.I], [moran_global.EI]) - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + def local_stat(self, subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors -def moran_rate(subquery, numerator, denominator, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I Rate (global) - Andy Eschbacher - """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", numerator), - ("attr2", denominator) - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) + result = self.query_runner.get_result(query) - # collect attributes - numer = pu.get_attributes(result, 1) - denom = pu.get_attributes(result, 2) + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) - weight = pu.get_weight(result, w_type, num_ngbrs) - - # calculate moran global rate - lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, permutations=permutations) - return zip([lisa_rate.I], [lisa_rate.EI]) + # find quadrants for each geometry + quads = quad_position(lisa.q) + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) -def moran_local_rate(subquery, numerator, denominator, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I Local Rate + def global_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) Andy Eschbacher - """ - # geometries with values that are null are ignored - # resulting in a collection of not as near neighbors + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - qvals = OrderedDict([("id_col", id_col), - ("numerator", numerator), - ("denominator", denominator), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) - query = pu.construct_neighbor_query(w_type, qvals) + result = self.query_runner.get_result(query) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(5) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(5) + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - # collect attributes - numer = pu.get_attributes(result, 1) - denom = pu.get_attributes(result, 2) + weight = pu.get_weight(result, w_type, num_ngbrs) - weight = pu.get_weight(result, w_type, num_ngbrs) + # calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, - permutations=permutations) + return zip([lisa_rate.I], [lisa_rate.EI]) - # find quadrants for each geometry - quads = quad_position(lisa.q) + def local_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) -def moran_local_bv(subquery, attr1, attr2, - permutations, geom_col, id_col, w_type, num_ngbrs): - """ - Moran's I (local) Bivariate (untested) - """ + result = self.query_runner.get_result(query) - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr1), - ("attr2", attr2), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - query = pu.construct_neighbor_query(w_type, qvals) + weight = pu.get_weight(result, w_type, num_ngbrs) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(4) - except plpy.SPIError: - plpy.error("Error: areas of interest query failed, " - "check input parameters") - return pu.empty_zipped_array(4) + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) - # collect attributes - attr1_vals = pu.get_attributes(result, 1) - attr2_vals = pu.get_attributes(result, 2) + # find quadrants for each geometry + quads = quad_position(lisa.q) - # create weights - weight = pu.get_weight(result, w_type, num_ngbrs) + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, - permutations=permutations) + def local_bivariate_stat(self, subquery, attr1, attr2, + permutations, geom_col, id_col, + w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ - # find clustering of significance - lisa_sig = quad_position(lisa.q) + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + query = pu.construct_neighbor_query(w_type, qvals) + + result = self.query_runner.get_result(query) + + # collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) # Low level functions ---------------------------------------- From 224fbc2fc5f6ebe1cc671da2635dbe2d73b43288 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Sat, 19 Nov 2016 09:05:35 +0000 Subject: [PATCH 83/96] move to class based markov --- src/pg/sql/11_markov.sql | 5 +- .../crankshaft/space_time_dynamics/markov.py | 177 ++++--- .../test/test_space_time_dynamics.py | 501 ++++++++++-------- 3 files changed, 366 insertions(+), 317 deletions(-) diff --git a/src/pg/sql/11_markov.sql b/src/pg/sql/11_markov.sql index 1124abd..da02c66 100644 --- a/src/pg/sql/11_markov.sql +++ b/src/pg/sql/11_markov.sql @@ -22,10 +22,11 @@ CREATE OR REPLACE FUNCTION RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) AS $$ - from crankshaft.space_time_dynamics import spatial_markov_trend + from crankshaft.space_time_dynamics import Markov + markov = Markov() ## TODO: use named parameters or a dictionary - return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) + return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- input table format: identical to above but in a predictable format diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index ae788d7..7984e0c 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -8,92 +8,104 @@ import pysal as ps import plpy import crankshaft.pysal_utils as pu -def spatial_markov_trend(subquery, time_cols, num_classes=7, - w_type='knn', num_ngbrs=5, permutations=0, - geom_col='the_geom', id_col='cartodb_id'): - """ - Predict the trends of a unit based on: - 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) - 2. average class of its neighbors - Inputs: - @param subquery string: e.g., SELECT the_geom, cartodb_id, - interesting_time_column FROM table_name - @param time_cols list of strings: list of strings of column names - @param num_classes (optional): number of classes to break distribution - of values into. Currently uses quantile bins. - @param w_type string (optional): weight type ('knn' or 'queen') - @param num_ngbrs int (optional): number of neighbors (if knn type) - @param permutations int (optional): number of permutations for test - stats - @param geom_col string (optional): name of column which contains the - geometries - @param id_col string (optional): name of column which has the ids of - the table +class QueryRunner: + def get_result(self, query): + try: + data = plpy.execute(query) - Outputs: - @param trend_up float: probablity that a geom will move to a higher - class - @param trend_down float: probablity that a geom will move to a lower - class - @param trend float: (trend_up - trend_down) / trend_static - @param volatility float: a measure of the volatility based on - probability stddev(prob array) - """ + if len(data) == 0: + return zip([None], [None], [None], [None], [None]) - if len(time_cols) < 2: - plpy.error('More than one time column needs to be passed') + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) - qvals = {"id_col": id_col, - "time_cols": time_cols, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} - try: - query_result = plpy.execute( - pu.construct_neighbor_query(w_type, qvals) - ) - if len(query_result) == 0: - return zip([None], [None], [None], [None], [None]) - except plpy.SPIError, e: - plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) - plpy.error('Analysis failed: %s' % e) - return zip([None], [None], [None], [None], [None]) +class Markov: + def __init__(self, query_runner=None): + if query_runner is None: + self.query_runner = QueryRunner() + else: + self.query_runner = query_runner - ## build weight - weights = pu.get_weight(query_result, w_type) - weights.transform = 'r' + def spatial_trend(self, subquery, time_cols, num_classes=7, + w_type='knn', num_ngbrs=5, permutations=0, + geom_col='the_geom', id_col='cartodb_id'): + """ + Predict the trends of a unit based on: + 1. history of its transitions to different classes (e.g., 1st + quantile -> 2nd quantile) + 2. average class of its neighbors - ## prep time data - t_data = get_time_data(query_result, time_cols) + Inputs: + @param subquery string: e.g., SELECT the_geom, cartodb_id, + interesting_time_column FROM table_name + @param time_cols list of strings: list of strings of column names + @param num_classes (optional): number of classes to break + distribution of values into. Currently uses quantile bins. + @param w_type string (optional): weight type ('knn' or 'queen') + @param num_ngbrs int (optional): number of neighbors (if knn type) + @param permutations int (optional): number of permutations for test + stats + @param geom_col string (optional): name of column which contains + the geometries + @param id_col string (optional): name of column which has the ids + of the table - plpy.debug('shape of t_data %d, %d' % t_data.shape) - plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape) - plpy.debug('first num elements: %f' % t_data[0, 0]) + Outputs: + @param trend_up float: probablity that a geom will move to a higher + class + @param trend_down float: probablity that a geom will move to a + lower class + @param trend float: (trend_up - trend_down) / trend_static + @param volatility float: a measure of the volatility based on + probability stddev(prob array) + """ - sp_markov_result = ps.Spatial_Markov(t_data, - weights, - k=num_classes, - fixed=False, - permutations=permutations) + if len(time_cols) < 2: + plpy.error('More than one time column needs to be passed') - ## get lag classes - lag_classes = ps.Quantiles( - ps.lag_spatial(weights, t_data[:, -1]), - k=num_classes).yb + qvals = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} - ## look up probablity distribution for each unit according to class and lag class - prob_dist = get_prob_dist(sp_markov_result.P, - lag_classes, - sp_markov_result.classes[:, -1]) + query = pu.construct_neighbor_query(w_type, qvals) - ## find the ups and down and overall distribution of each cell - trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, - sp_markov_result.classes[:, -1]) + query_result = self.query_runner.get_result(query) + + # build weight + weights = pu.get_weight(query_result, w_type) + weights.transform = 'r' + + # prep time data + t_data = get_time_data(query_result, time_cols) + + sp_markov_result = ps.Spatial_Markov(t_data, + weights, + k=num_classes, + fixed=False, + permutations=permutations) + + # get lag classes + lag_classes = ps.Quantiles( + ps.lag_spatial(weights, t_data[:, -1]), + k=num_classes).yb + + # look up probablity distribution for each unit according to class and + # lag class + prob_dist = get_prob_dist(sp_markov_result.P, + lag_classes, + sp_markov_result.classes[:, -1]) + + # find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1]) + + # output the results + return zip(trend, trend_up, trend_down, volatility, weights.id_order) - ## output the results - return zip(trend, trend_up, trend_down, volatility, weights.id_order) def get_time_data(markov_data, time_cols): """ @@ -103,7 +115,8 @@ def get_time_data(markov_data, time_cols): return np.array([[x['attr' + str(i)] for x in markov_data] for i in range(1, num_attrs+1)], dtype=float).transpose() -## not currently used + +# not currently used def rebin_data(time_data, num_time_per_bin): """ Convert an n x l matrix into an (n/m) x l matrix where the values are @@ -131,14 +144,16 @@ def rebin_data(time_data, num_time_per_bin): """ if time_data.shape[1] % num_time_per_bin == 0: - ## if fit is perfect, then use it + # if fit is perfect, then use it n_max = time_data.shape[1] / num_time_per_bin else: - ## fit remainders into an additional column + # fit remainders into an additional column n_max = time_data.shape[1] / num_time_per_bin + 1 - return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) - for i in range(n_max)]).T + return np.array( + [time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + def get_prob_dist(transition_matrix, lag_indices, unit_indices): """ @@ -157,6 +172,7 @@ def get_prob_dist(transition_matrix, lag_indices, unit_indices): return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] for i in range(len(lag_indices))]) + def get_prob_stats(prob_dist, unit_indices): """ get the statistics of the probability distributions @@ -179,11 +195,12 @@ def get_prob_stats(prob_dist, unit_indices): trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() trend_down[i] = prob_dist[i, :unit_indices[i]].sum() if prob_dist[i, unit_indices[i]] > 0.0: - trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]] + trend[i] = (trend_up[i] - trend_down[i]) / ( + prob_dist[i, unit_indices[i]]) else: trend[i] = None - ## calculate volatility of distribution + # calculate volatility of distribution volatility = prob_dist.std(axis=1) return trend_up, trend_down, trend, volatility diff --git a/src/py/crankshaft/test/test_space_time_dynamics.py b/src/py/crankshaft/test/test_space_time_dynamics.py index 54ffc9d..21f3afc 100644 --- a/src/py/crankshaft/test/test_space_time_dynamics.py +++ b/src/py/crankshaft/test/test_space_time_dynamics.py @@ -9,81 +9,100 @@ import unittest # # import sys # sys.modules['plpy'] = plpy -from helper import plpy, fixture_file +from helper import fixture_file +from crankshaft.space_time_dynamics import Markov import crankshaft.space_time_dynamics as std from crankshaft import random_seeds +from crankshaft.clustering import QueryRunner import json + +class FakeQueryRunner(QueryRunner): + def __init__(self, data): + self.mock_result = data + + def get_result(self, query): + return self.mock_result + + class SpaceTimeTests(unittest.TestCase): """Testing class for Markov Functions.""" def setUp(self): - plpy._reset() + # plpy._reset() self.params = {"id_col": "cartodb_id", "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], "subquery": "SELECT * FROM a_list", "geom_col": "the_geom", "num_ngbrs": 321} - self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read()) + self.neighbors_data = json.loads( + open(fixture_file('neighbors_markov.json')).read()) self.markov_data = json.loads(open(fixture_file('markov.json')).read()) - self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T + self.time_data = np.array([i * np.ones(10, dtype=float) + for i in range(10)]).T self.transition_matrix = np.array([ - [[ 0.96341463, 0.0304878 , 0.00609756, 0. , 0. ], - [ 0.06040268, 0.83221477, 0.10738255, 0. , 0. ], - [ 0. , 0.14 , 0.74 , 0.12 , 0. ], - [ 0. , 0.03571429, 0.32142857, 0.57142857, 0.07142857], - [ 0. , 0. , 0. , 0.16666667, 0.83333333]], - [[ 0.79831933, 0.16806723, 0.03361345, 0. , 0. ], - [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], - [ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0. ], - [ 0. , 0. , 0.06372549, 0.90196078, 0.03431373], - [ 0. , 0. , 0. , 0.19444444, 0.80555556]], - [[ 0.84693878, 0.15306122, 0. , 0. , 0. ], - [ 0.08133971, 0.78947368, 0.1291866 , 0. , 0. ], - [ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135], - [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], - [ 0. , 0. , 0. , 0.10204082, 0.89795918]], - [[ 0.8852459 , 0.09836066, 0. , 0.01639344, 0. ], - [ 0.03875969, 0.81395349, 0.13953488, 0. , 0.00775194], - [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], - [ 0. , 0.02339181, 0.12865497, 0.75438596, 0.09356725], - [ 0. , 0. , 0. , 0.09661836, 0.90338164]], - [[ 0.33333333, 0.66666667, 0. , 0. , 0. ], - [ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0. ], - [ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0. ], - [ 0. , 0.01036269, 0.06217617, 0.89637306, 0.03108808], - [ 0. , 0. , 0. , 0.02352941, 0.97647059]]] + [[0.96341463, 0.0304878, 0.00609756, 0., 0.], + [0.06040268, 0.83221477, 0.10738255, 0., 0.], + [0., 0.14, 0.74, 0.12, 0.], + [0., 0.03571429, 0.32142857, 0.57142857, 0.07142857], + [0., 0., 0., 0.16666667, 0.83333333]], + [[0.79831933, 0.16806723, 0.03361345, 0., 0.], + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.], + [0., 0., 0.06372549, 0.90196078, 0.03431373], + [0., 0., 0., 0.19444444, 0.80555556]], + [[0.84693878, 0.15306122, 0., 0., 0.], + [0.08133971, 0.78947368, 0.1291866, 0., 0.], + [0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0., 0., 0., 0.10204082, 0.89795918]], + [[0.8852459, 0.09836066, 0., 0.01639344, 0.], + [0.03875969, 0.81395349, 0.13953488, 0., 0.00775194], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0.02339181, 0.12865497, 0.75438596, 0.09356725], + [0., 0., 0., 0.09661836, 0.90338164]], + [[0.33333333, 0.66666667, 0., 0., 0.], + [0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.], + [0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.], + [0., 0.01036269, 0.06217617, 0.89637306, 0.03108808], + [0., 0., 0., 0.02352941, 0.97647059]]] ) def test_spatial_markov(self): """Test Spatial Markov.""" - data = [ { 'id': d['id'], - 'attr1': d['y1995'], - 'attr2': d['y1996'], - 'attr3': d['y1997'], - 'attr4': d['y1998'], - 'attr5': d['y1999'], - 'attr6': d['y2000'], - 'attr7': d['y2001'], - 'attr8': d['y2002'], - 'attr9': d['y2003'], - 'attr10': d['y2004'], - 'attr11': d['y2005'], - 'attr12': d['y2006'], - 'attr13': d['y2007'], - 'attr14': d['y2008'], - 'attr15': d['y2009'], - 'neighbors': d['neighbors'] } for d in self.neighbors_data] - print(str(data[0])) - plpy._define_result('select', data) + data = [{'id': d['id'], + 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + # print(str(data[0])) + markov = Markov(FakeQueryRunner(data)) random_seeds.set_random_seeds(1234) - result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') + result = markov.spatial_trend('subquery', + ['y1995', 'y1996', 'y1997', 'y1998', + 'y1999', 'y2000', 'y2001', 'y2002', + 'y2003', 'y2004', 'y2005', 'y2006', + 'y2007', 'y2008', 'y2009'], + 5, 'knn', 5, 0, 'the_geom', + 'cartodb_id') - self.assertTrue(result != None) + self.assertTrue(result is not None) result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] print result[0] expected = self.markov_data @@ -94,173 +113,178 @@ class SpaceTimeTests(unittest.TestCase): def test_get_time_data(self): """Test get_time_data""" - data = [ { 'attr1': d['y1995'], - 'attr2': d['y1996'], - 'attr3': d['y1997'], - 'attr4': d['y1998'], - 'attr5': d['y1999'], - 'attr6': d['y2000'], - 'attr7': d['y2001'], - 'attr8': d['y2002'], - 'attr9': d['y2003'], - 'attr10': d['y2004'], - 'attr11': d['y2005'], - 'attr12': d['y2006'], - 'attr13': d['y2007'], - 'attr14': d['y2008'], - 'attr15': d['y2009'] } for d in self.neighbors_data] + data = [{'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009']} for d in self.neighbors_data] - result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009']) + result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', + 'y1999', 'y2000', 'y2001', 'y2002', + 'y2003', 'y2004', 'y2005', 'y2006', + 'y2007', 'y2008', 'y2009']) - ## expected was prepared from PySAL example: - ### f = ps.open(ps.examples.get_path("usjoin.csv")) - ### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose() - ### rpci = pci / (pci.mean(axis = 0)) + # expected was prepared from PySAL example: + # f = ps.open(ps.examples.get_path("usjoin.csv")) + # pci = np.array([f.by_col[str(y)] + # for y in range(1995, 2010)]).transpose() + # rpci = pci / (pci.mean(axis = 0)) - expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, 0.83271652 - , 0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631 - , 0.86148267, 0.86252252, 0.86746356], - [ 0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978 - , 0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054 - , 0.90139505, 0.87897455, 0.86216858], - [ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559 - , 0.80584442, 0.8084998, 0.82258551, 0.82668196, 0.82373724, 0.81814804 - , 0.83675961, 0.83574199, 0.84647177], - [ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948 - , 1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797 - , 1.12958508, 1.11371818, 1.09936775], - [ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201 - , 1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468 - , 1.10863242, 1.09642516, 1.07762337], - [ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737 - , 1.44782832, 1.41978227, 1.39092208, 1.4059372, 1.40788646, 1.44052766 - , 1.45241216, 1.43306098, 1.4174431 ], - [ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138 - , 1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034 - , 1.08401709, 1.06959206, 1.07875225], - [ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316 - , 0.9880258, 0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511 - , 1.01749645, 0.9936394, 0.98279746], - [ 0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, 0.99127006 - , 0.97925917, 0.9683482, 0.95335147, 0.93694787, 0.94308213, 0.92232874 - , 0.91284091, 0.89689833, 0.88928858], - [ 0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, 0.8578708 - , 0.86036185, 0.86107306, 0.8500772, 0.86981998, 0.86837929, 0.87204141 - , 0.86633032, 0.84946077, 0.83287146], - [ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183 - , 1.12349752, 1.12596664, 1.12213996, 1.1119989, 1.10257792, 1.10491258 - , 1.11059842, 1.10509795, 1.10020097], - [ 0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, 0.95831051 - , 0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201 - , 0.89280392, 0.89298243, 0.89165385], - [ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927 - , 0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815 - , 0.94281559, 0.96520315, 0.96715441], - [ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073 - , 0.99237486, 0.98209969, 0.9877653, 0.97399471, 0.96910087, 0.98416665 - , 0.98423613, 0.99823861, 0.99545704], - [ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, 0.86191535 - , 0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648 - , 0.8198736, 0.82265395, 0.8399404 ], - [ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597 - , 0.86008789, 0.86776298, 0.86720209, 0.8676334, 0.89179317, 0.94202108 - , 0.9422231, 0.93902708, 0.94479184], - [ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632 - , 0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804 - , 0.90543476, 0.91737138, 0.94793657], - [ 1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869 - , 1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384 - , 1.23418676, 1.23626739, 1.26754398], - [ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023 - , 1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766 - , 1.32936244, 1.33001241, 1.33288462], - [ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016 - , 1.03405057, 1.02747623, 1.03162734, 0.9961416, 0.97356208, 0.94241549 - , 0.92754547, 0.92549227, 0.92138102], - [ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622 - , 1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389 - , 1.10577769, 1.11166825, 1.09985155], - [ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073 - , 0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416 - , 0.77487118, 0.77768683, 0.78801192], - [ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819 - , 0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462 - , 0.93069645, 0.94020973, 0.94358232], - [ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489 - , 0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334 - , 0.87863799, 0.87497981, 0.87888675], - [ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195 - , 0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749 - , 0.99323123, 1.00204939, 0.99602148], - [ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397 - , 1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079 - , 1.08603695, 1.05997314, 1.02160943], - [ 1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, 1.18257029 - , 1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236 - , 1.12357937, 1.1159258, 1.12570828], - [ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239 - , 1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743, 1.28656675 - , 1.29734097, 1.29390205, 1.29345446], - [ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975 - , 0.82848011, 0.8259679, 0.82435705, 0.83108634, 0.84373784, 0.83891093 - , 0.84349247, 0.85637272, 0.86539395], - [ 1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, 1.2256767 - , 1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297 - , 1.27962009, 1.27968392, 1.25907738], - [ 0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308 - , 0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464 - , 0.91475304, 0.90249622, 0.9021363 ], - [ 0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, 0.88937573 - , 0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488 - , 0.96635366, 1.02520312, 1.0394296 ], - [ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019 - , 0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168, 0.94779638 - , 0.93759089, 0.93353431, 0.94121705], - [ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854 - , 0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097 - , 0.88283929, 0.90349491, 0.90600675], - [ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626 - , 0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634 - , 0.95011401, 0.9434686, 0.94637765], - [ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023 - , 1.03024941, 1.04200483, 1.0402554, 1.03296979, 1.02191682, 1.02476275 - , 1.02347523, 1.02517684, 1.04359571], - [ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, 1.0531801 - , 1.07452771, 1.09383478, 1.1052447, 1.10322136, 1.09167939, 1.08772756 - , 1.08859544, 1.09177338, 1.1096083 ], - [ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327 - , 0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343 - , 0.83374329, 0.82812044, 0.82878599], - [ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175 - , 0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497, 0.93869677 - , 0.96947771, 1.00362626, 0.98102351], - [ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, 0.93092109 - , 0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556 - , 0.90430364, 0.89994694, 0.90073864], - [ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205 - , 0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004 - , 0.9700702, 0.97158948, 0.95884908], - [ 0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, 0.84818516 - , 0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919 - , 0.83511679, 0.82136617, 0.80921874], - [ 0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, 0.95440787 - , 0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368 - , 0.97001634, 0.97082733, 0.98782366], - [ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188 - , 1.1214086, 1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975 - , 1.14474747, 1.14084037, 1.16142926], - [ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702 - , 1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066 - , 1.09831768, 1.09298232, 1.09176125], - [ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194 - , 0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807 - , 0.80172182, 0.82168155, 0.85587911], - [ 1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979 - , 0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596 - , 0.96519561, 0.96173403, 0.95890284], - [ 0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, 0.96608031 - , 0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263, 1.1704974 - , 1.17025154, 1.18730553, 1.14242645]]) + expected = np.array( + [[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, + 0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612, + 0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356], + [0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, + 0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176, + 0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858], + [0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, + 0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196, + 0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177], + [1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, + 1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806, + 1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775], + [1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, + 1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964, + 1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337], + [1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, + 1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372, + 1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431], + [1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, + 1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239, + 1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225], + [1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, + 0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905, + 1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746], + [0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, + 0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787, + 0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858], + [0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, + 0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998, + 0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146], + [1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, + 1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989, + 1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097], + [0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, + 0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989, + 0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385], + [0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, + 0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424, + 0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441], + [0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, + 0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471, + 0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704], + [0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, + 0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883, + 0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404], + [0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, + 0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334, + 0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184], + [0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, + 0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265, + 0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657], + [1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, + 1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042, + 1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398], + [1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, + 1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841, + 1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462], + [1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, + 1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416, + 0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102], + [1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, + 1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944, + 1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155], + [0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, + 0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288, + 0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192], + [0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, + 0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162, + 0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232], + [0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, + 0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307, + 0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675], + [0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, + 0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651, + 0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148], + [1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, + 1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751, + 1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943], + [1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, + 1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235, + 1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828], + [1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, + 1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786, + 1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446], + [0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, + 0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634, + 0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395], + [1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, + 1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434, + 1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738], + [0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, + 0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835, + 0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363], + [0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, + 0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333, + 0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296], + [1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, + 1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768, + 0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705], + [0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, + 0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183, + 0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675], + [1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, + 1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924, + 0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765], + [1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, + 1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979, + 1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571], + [1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, + 1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136, + 1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083], + [0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, + 0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414, + 0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599], + [0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, + 0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237, + 0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351], + [0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, + 0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506, + 0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864], + [0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, + 0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888, + 0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908], + [0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, + 0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586, + 0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874], + [0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, + 0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768, + 0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366], + [1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, + 1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735, + 1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926], + [1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, + 1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827, + 1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125], + [0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, + 0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865, + 0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911], + [1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, + 1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032, + 0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284], + [0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, + 0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619, + 1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]]) self.assertTrue(np.allclose(result, expected)) self.assertTrue(type(result) == type(expected)) @@ -268,32 +292,35 @@ class SpaceTimeTests(unittest.TestCase): def test_rebin_data(self): """Test rebin_data""" - ## sample in double the time (even case since 10 % 2 = 0): - ## (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 - ## = 0.5, 2.5, 4.5, 6.5, 8.5 + # sample in double the time (even case since 10 % 2 = 0): + # (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 + # = 0.5, 2.5, 4.5, 6.5, 8.5 ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float) for i in range(0, 10, 2)]).T - self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) + self.assertTrue( + np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) - ## sample in triple the time (uneven since 10 % 3 = 1): - ## (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 - ## = 1, 4, 7, 9 - ans_odd = np.array([i * np.ones(10, dtype=float) - for i in (1, 4, 7, 9)]).T - self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) + # sample in triple the time (uneven since 10 % 3 = 1): + # (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 + # = 1, 4, 7, 9 + ans_odd = np.array([i * np.ones(10, dtype=float) + for i in (1, 4, 7, 9)]).T + self.assertTrue( + np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) def test_get_prob_dist(self): """Test get_prob_dist""" lag_indices = np.array([1, 2, 3, 4]) unit_indices = np.array([1, 3, 2, 4]) answer = np.array([ - [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], - [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], - [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], - [ 0. , 0. , 0. , 0.02352941, 0.97647059] + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0., 0., 0.02352941, 0.97647059] ]) - result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices) + result = std.get_prob_dist(self.transition_matrix, + lag_indices, unit_indices) self.assertTrue(np.array_equal(result, answer)) @@ -301,16 +328,20 @@ class SpaceTimeTests(unittest.TestCase): """Test get_prob_stats""" probs = np.array([ - [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], - [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], - [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], - [ 0. , 0. , 0. , 0.02352941, 0.97647059] + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0., 0., 0.02352941, 0.97647059] ]) unit_indices = np.array([1, 3, 2, 4]) answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.]) answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941]) - answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824, 0.02475248 / 0.77722772, -0.02352941 / 0.97647059]) - answer_volatility = np.array([ 0.34221495, 0.33705421, 0.29226542, 0.38834223]) + answer_trend = np.array([-0.03301887 / 0.88207547, + -0.05882353 / 0.87058824, + 0.02475248 / 0.77722772, + -0.02352941 / 0.97647059]) + answer_volatility = np.array([0.34221495, 0.33705421, + 0.29226542, 0.38834223]) result = std.get_prob_stats(probs, unit_indices) result_up = result[0] From c8f5448b7c263fce8dfd7e05b68b67bfa95e4c7d Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Sat, 19 Nov 2016 14:20:06 +0000 Subject: [PATCH 84/96] seprates out query runner --- src/py/crankshaft/crankshaft/__init__.py | 1 + .../crankshaft/clustering/kmeans.py | 35 ++------------- .../crankshaft/crankshaft/clustering/moran.py | 20 +++------ src/py/crankshaft/crankshaft/query_runner.py | 43 +++++++++++++++++++ .../crankshaft/space_time_dynamics/markov.py | 16 +------ .../crankshaft/test/test_clustering_kmeans.py | 6 +-- .../crankshaft/test/test_clustering_moran.py | 17 ++------ .../test/test_space_time_dynamics.py | 10 +---- 8 files changed, 64 insertions(+), 84 deletions(-) create mode 100644 src/py/crankshaft/crankshaft/query_runner.py diff --git a/src/py/crankshaft/crankshaft/__init__.py b/src/py/crankshaft/crankshaft/__init__.py index 4e06bc5..a03b040 100644 --- a/src/py/crankshaft/crankshaft/__init__.py +++ b/src/py/crankshaft/crankshaft/__init__.py @@ -3,3 +3,4 @@ import crankshaft.random_seeds import crankshaft.clustering import crankshaft.space_time_dynamics import crankshaft.segmentation +import query_runner diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 48b9bd3..06c6527 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -1,36 +1,7 @@ from sklearn.cluster import KMeans -import plpy import numpy as np - -class QueryRunner: - def get_moran(self, query): - """fetch data for moran's i analyses""" - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) - - def get_columns(self, query, standarize): - """fetch data for non-spatial kmeans""" - try: - db_resp = plpy.execute(query) - except plpy.SPIError, err: - plpy.error('Analysis failed: %s' % err) - - return db_resp - - def get_result(self, query): - """fetch data for spatial kmeans""" - try: - data = plpy.execute(query) - except plpy.SPIError, err: - plpy.error("Analysis failed: %s" % err) - return data +from crankshaft.query_runner import QueryRunner class Kmeans: @@ -52,7 +23,7 @@ class Kmeans: "FROM ({query}) As a " "WHERE the_geom IS NOT NULL").format(query=query) - data = self.query_runner.get_result(full_query) + data = self.query_runner.get_spatial_kmeans(full_query) # Unpack query response xs = data[0]['xs'] @@ -92,7 +63,7 @@ class Kmeans: cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) for idx, val in enumerate(colnames)])) - db_resp = self.query_runner.get_columns(full_query, standarize) + db_resp = self.query_runner.get_nonspatial_kmeans(full_query, standarize) # fill array with values for k-means clustering if standarize: diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index ee82932..d2c99d6 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -8,6 +8,7 @@ Moran's I geostatistics (global clustering & outliers presence) import pysal as ps import plpy from collections import OrderedDict +from crankshaft.query_runner import QueryRunner # crankshaft module import crankshaft.pysal_utils as pu @@ -15,15 +16,6 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- -class QueryRunner: - def get_result(self, query): - try: - data = plpy.execute(query) - except plpy.SPIError, err: - plpy.error("k-means (spatial) cluster analysis failed: %s" % err) - return data - - class Moran: def __init__(self, query_runner=None): if query_runner is None: @@ -47,7 +39,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes attr_vals = pu.get_attributes(result) @@ -79,7 +71,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) @@ -108,7 +100,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -140,7 +132,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -173,7 +165,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes attr1_vals = pu.get_attributes(result, 1) diff --git a/src/py/crankshaft/crankshaft/query_runner.py b/src/py/crankshaft/crankshaft/query_runner.py new file mode 100644 index 0000000..5775e72 --- /dev/null +++ b/src/py/crankshaft/crankshaft/query_runner.py @@ -0,0 +1,43 @@ +"""class for fetching data""" +import plpy + + +class QueryRunner: + def get_markov(self, query): + try: + data = plpy.execute(query) + + if len(data) == 0: + return pu.empty_zipped_array(4) + + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_moran(self, query): + """fetch data for moran's i analyses""" + try: + data = plpy.execute(query) + # if there are no neighbors, exit + if len(data) == 0: + return pu.empty_zipped_array(2) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + def get_nonspatial_kmeans(self, query): + """fetch data for non-spatial kmeans""" + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_spatial_kmeans(self, query): + """fetch data for spatial kmeans""" + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error("Analysis failed: %s" % err) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index 7984e0c..ea8dd32 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -7,19 +7,7 @@ import numpy as np import pysal as ps import plpy import crankshaft.pysal_utils as pu - - -class QueryRunner: - def get_result(self, query): - try: - data = plpy.execute(query) - - if len(data) == 0: - return zip([None], [None], [None], [None], [None]) - - return data - except plpy.SPIError, err: - plpy.error('Analysis failed: %s' % err) +from crankshaft.query_runner import QueryRunner class Markov: @@ -74,7 +62,7 @@ class Markov: query = pu.construct_neighbor_query(w_type, qvals) - query_result = self.query_runner.get_result(query) + query_result = self.query_runner.get_markov(query) # build weight weights = pu.get_weight(query_result, w_type) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 830ee9d..9fd2504 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -9,7 +9,7 @@ import numpy as np # sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.clustering import Kmeans -from crankshaft.clustering import QueryRunner +from crankshaft.query_runner import QueryRunner import crankshaft.clustering as cc from crankshaft import random_seeds @@ -21,10 +21,10 @@ class FakeQueryRunner(QueryRunner): def __init__(self, mocked_result): self.mocked_result = mocked_result - def get_result(self, query): + def get_spatial_kmeans(self, query): return self.mocked_result - def get_columns(self, query, standarize): + def get_nonspatial_kmeans(self, query, standarize): return self.mocked_result diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 0a320fb..37cf7d0 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -1,12 +1,6 @@ import unittest import numpy as np - -# from mock_plpy import MockPlPy -# plpy = MockPlPy() -# -# import sys -# sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.clustering import Moran from crankshaft.clustering import QueryRunner @@ -17,14 +11,11 @@ from collections import OrderedDict class FakeQueryRunner(QueryRunner): - def __init__(self, mocked_result): - self.mocked_result = mocked_result + def __init__(self, mock_data): + self.mock_result = mock_data - def get_result(self, query): - return self.mocked_result - - def get_columns(self, query): - return self.mocked_result + def get_moran(self, query): + return self.mock_result class MoranTest(unittest.TestCase): diff --git a/src/py/crankshaft/test/test_space_time_dynamics.py b/src/py/crankshaft/test/test_space_time_dynamics.py index 21f3afc..e58c7d4 100644 --- a/src/py/crankshaft/test/test_space_time_dynamics.py +++ b/src/py/crankshaft/test/test_space_time_dynamics.py @@ -4,17 +4,12 @@ import numpy as np import unittest -# from mock_plpy import MockPlPy -# plpy = MockPlPy() -# -# import sys -# sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.space_time_dynamics import Markov import crankshaft.space_time_dynamics as std from crankshaft import random_seeds -from crankshaft.clustering import QueryRunner +from crankshaft.query_runner import QueryRunner import json @@ -22,7 +17,7 @@ class FakeQueryRunner(QueryRunner): def __init__(self, data): self.mock_result = data - def get_result(self, query): + def get_markov(self, query): return self.mock_result @@ -30,7 +25,6 @@ class SpaceTimeTests(unittest.TestCase): """Testing class for Markov Functions.""" def setUp(self): - # plpy._reset() self.params = {"id_col": "cartodb_id", "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], "subquery": "SELECT * FROM a_list", From 538ab9a071e6178c7196c4b793daf3f366520d9e Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Mon, 21 Nov 2016 16:14:48 +0100 Subject: [PATCH 85/96] Changed to the last postgresql-9.5 package --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index bc85047..a5f544a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ before_install: - sudo apt-get -y remove --purge postgis - sudo apt-get -y autoremove - - sudo apt-get -y install postgresql-9.5=9.5.2-2ubuntu1 + - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb1 - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-2ubuntu1 - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-2ubuntu1 - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 From 2f27622a6dbbe44d068bdbf7c1672d74112399b7 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 21 Nov 2016 16:19:54 +0000 Subject: [PATCH 86/96] strips out kmeans non spatial --- src/pg/sql/11_kmeans.sql | 18 ----- .../crankshaft/clustering/kmeans.py | 79 ------------------- .../crankshaft/test/test_clustering_kmeans.py | 33 -------- 3 files changed, 130 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 1dc6d00..0899e81 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -9,24 +9,6 @@ RETURNS table (cartodb_id integer, cluster_no integer) as $$ $$ LANGUAGE plpythonu; --- Non-spatial k-means clustering --- query: sql query to retrieve all the needed data - -CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial( - query TEXT, - colnames TEXT[], - num_clusters INTEGER, - id_colname TEXT DEFAULT 'cartodb_id', - standarize BOOLEAN DEFAULT true -) -RETURNS TABLE(cluster_label text, cluster_center json, silhouettes numeric, rowid bigint) AS $$ - - from crankshaft.clustering import Kmeans - kmeans = Kmeans() - return kmeans.nonspatial(query, colnames, num_clusters, - id_colname, standarize) -$$ LANGUAGE plpythonu; - CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) RETURNS Numeric[] AS diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 06c6527..200dc52 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -33,82 +33,3 @@ class Kmeans: km = KMeans(n_clusters=no_clusters, n_init=no_init) labels = km.fit_predict(zip(xs, ys)) return zip(ids, labels) - - def nonspatial(self, query, colnames, num_clusters=5, - id_col='cartodb_id', standarize=True): - """ - query (string): A SQL query to retrieve the data required to do the - k-means clustering analysis, like so: - SELECT * FROM iris_flower_data - colnames (list): a list of the column names which contain the data - of interest, like so: ["sepal_width", - "petal_width", - "sepal_length", - "petal_length"] - num_clusters (int): number of clusters (greater than zero) - id_col (string): name of the input id_column - """ - import json - from sklearn import metrics - - out_id_colname = 'rowids' - # TODO: need a random seed? - - full_query = ''' - SELECT {cols}, array_agg({id_col}) As {out_id_colname} - FROM ({query}) As a - '''.format(query=query, - id_col=id_col, - out_id_colname=out_id_colname, - cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) - for idx, val in enumerate(colnames)])) - - db_resp = self.query_runner.get_nonspatial_kmeans(full_query, standarize) - - # fill array with values for k-means clustering - if standarize: - cluster_columns = _scale_data( - _extract_columns(db_resp, colnames)) - else: - cluster_columns = _extract_columns(db_resp, colnames) - - print str(cluster_columns) - # TODO: decide on optimal parameters for most cases - # Are there ways of deciding parameters based on inputs? - kmeans = KMeans(n_clusters=num_clusters, - random_state=0).fit(cluster_columns) - - centers = [json.dumps(dict(zip(colnames, c))) - for c in kmeans.cluster_centers_[kmeans.labels_]] - - silhouettes = metrics.silhouette_samples(cluster_columns, - kmeans.labels_, - metric='sqeuclidean') - - return zip(kmeans.labels_, - centers, - silhouettes, - db_resp[0][out_id_colname]) - - -# -- Preprocessing steps - -def _extract_columns(db_resp, colnames): - """ - Extract the features from the query and pack them into a NumPy array - db_resp (plpy data object): result of the kmeans request - id_col_name (string): name of column which has the row id (not a - feature of the analysis) - """ - return np.array([db_resp[0][c] for c in colnames], - dtype=float).T - - -def _scale_data(features): - """ - Scale all input columns to center on 0 with a standard devation of 1 - - features (numpy matrix): features of dimension (n_features, n_samples) - """ - from sklearn.preprocessing import StandardScaler - return StandardScaler().fit_transform(features) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 9fd2504..879dab7 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -54,36 +54,3 @@ class KMeansTest(unittest.TestCase): self.assertEqual(len(np.unique(labels)), 2) self.assertEqual(len(c1), 20) self.assertEqual(len(c2), 20) - - -class KMeansNonspatialTest(unittest.TestCase): - """Testing class for k-means non-spatial""" - - def setUp(self): - self.params = {"subquery": "SELECT * FROM TABLE", - "n_clusters": 5} - - def test_kmeans_nonspatial(self): - """ - test for k-means non-spatial - """ - # data from: - # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn-cluster-kmeans - data_raw = [OrderedDict([("col1", [1, 1, 1, 4, 4, 4]), - ("col2", [2, 4, 0, 2, 4, 0]), - ("rowids", [1, 2, 3, 4, 5, 6])])] - - random_seeds.set_random_seeds(1234) - kmeans = Kmeans(FakeQueryRunner(data_raw)) - print 'asfasdfasd' - clusters = kmeans.nonspatial('subquery', ['col1', 'col2'], 2) - print str([c[0] for c in clusters]) - - cl1 = clusters[0][0] - cl2 = clusters[3][0] - - for idx, val in enumerate(clusters): - if idx < 3: - self.assertEqual(val[0], cl1) - else: - self.assertEqual(val[0], cl2) From bb3ff43f0fd4d898c6bee9b16d8cf78c8c3ec655 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Mon, 21 Nov 2016 17:25:08 +0100 Subject: [PATCH 87/96] Update .travis.yml --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index a5f544a..2e7ef27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,8 +39,8 @@ before_install: - sudo apt-get -y autoremove - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb1 - - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-2ubuntu1 - - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-2ubuntu1 + - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb1 + - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb1 - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 From 280a5193efc06ad5720c735380e1247bc8e63c3b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Nov 2016 09:32:39 -0500 Subject: [PATCH 88/96] rename queryrunner to analysisdataprovider --- src/py/crankshaft/crankshaft/__init__.py | 2 +- ...ry_runner.py => analysis_data_provider.py} | 2 +- .../crankshaft/clustering/kmeans.py | 12 +++++------ .../crankshaft/crankshaft/clustering/moran.py | 20 +++++++++---------- .../crankshaft/space_time_dynamics/markov.py | 12 +++++------ 5 files changed, 24 insertions(+), 24 deletions(-) rename src/py/crankshaft/crankshaft/{query_runner.py => analysis_data_provider.py} (97%) diff --git a/src/py/crankshaft/crankshaft/__init__.py b/src/py/crankshaft/crankshaft/__init__.py index a03b040..a8060f8 100644 --- a/src/py/crankshaft/crankshaft/__init__.py +++ b/src/py/crankshaft/crankshaft/__init__.py @@ -3,4 +3,4 @@ import crankshaft.random_seeds import crankshaft.clustering import crankshaft.space_time_dynamics import crankshaft.segmentation -import query_runner +import analysis_data_provider diff --git a/src/py/crankshaft/crankshaft/query_runner.py b/src/py/crankshaft/crankshaft/analysis_data_provider.py similarity index 97% rename from src/py/crankshaft/crankshaft/query_runner.py rename to src/py/crankshaft/crankshaft/analysis_data_provider.py index 5775e72..ad572ec 100644 --- a/src/py/crankshaft/crankshaft/query_runner.py +++ b/src/py/crankshaft/crankshaft/analysis_data_provider.py @@ -2,7 +2,7 @@ import plpy -class QueryRunner: +class AnalysisDataProvider: def get_markov(self, query): try: data = plpy.execute(query) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 200dc52..e59c25b 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -1,15 +1,15 @@ from sklearn.cluster import KMeans import numpy as np -from crankshaft.query_runner import QueryRunner +from crankshaft.analysis_data_provider import AnalysisDataProvider class Kmeans: - def __init__(self, query_runner=None): - if query_runner is None: - self.query_runner = QueryRunner() + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() else: - self.query_runner = query_runner + self.data_provider = data_provider def spatial(self, query, no_clusters, no_init=20): """ @@ -23,7 +23,7 @@ class Kmeans: "FROM ({query}) As a " "WHERE the_geom IS NOT NULL").format(query=query) - data = self.query_runner.get_spatial_kmeans(full_query) + data = self.data_provider.get_spatial_kmeans(full_query) # Unpack query response xs = data[0]['xs'] diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index d2c99d6..7cc9ba5 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -8,7 +8,7 @@ Moran's I geostatistics (global clustering & outliers presence) import pysal as ps import plpy from collections import OrderedDict -from crankshaft.query_runner import QueryRunner +from crankshaft.analysis_data_provider import AnalysisDataProvider # crankshaft module import crankshaft.pysal_utils as pu @@ -17,11 +17,11 @@ import crankshaft.pysal_utils as pu class Moran: - def __init__(self, query_runner=None): - if query_runner is None: - self.query_runner = QueryRunner() + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() else: - self.query_runner = query_runner + self.data_provider = data_provider def global_stat(self, subquery, attr_name, w_type, num_ngbrs, permutations, geom_col, id_col): @@ -39,7 +39,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_moran(query) + result = self.data_provider.get_moran(query) # collect attributes attr_vals = pu.get_attributes(result) @@ -71,7 +71,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_moran(query) + result = self.data_provider.get_moran(query) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) @@ -100,7 +100,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_moran(query) + result = self.data_provider.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -132,7 +132,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_moran(query) + result = self.data_provider.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -165,7 +165,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_moran(query) + result = self.data_provider.get_moran(query) # collect attributes attr1_vals = pu.get_attributes(result, 1) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index ea8dd32..51db0ef 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -7,15 +7,15 @@ import numpy as np import pysal as ps import plpy import crankshaft.pysal_utils as pu -from crankshaft.query_runner import QueryRunner +from crankshaft.analysis_data_provider import AnalysisDataProvider class Markov: - def __init__(self, query_runner=None): - if query_runner is None: - self.query_runner = QueryRunner() + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() else: - self.query_runner = query_runner + self.data_provider = data_provider def spatial_trend(self, subquery, time_cols, num_classes=7, w_type='knn', num_ngbrs=5, permutations=0, @@ -62,7 +62,7 @@ class Markov: query = pu.construct_neighbor_query(w_type, qvals) - query_result = self.query_runner.get_markov(query) + query_result = self.data_provider.get_markov(query) # build weight weights = pu.get_weight(query_result, w_type) From 6fe4fc96689934690ed30a241298293f5a39c14b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Nov 2016 09:57:47 -0500 Subject: [PATCH 89/96] rename queryrunner in tests --- src/py/crankshaft/test/test_clustering_kmeans.py | 8 ++++---- src/py/crankshaft/test/test_clustering_moran.py | 12 ++++++------ src/py/crankshaft/test/test_space_time_dynamics.py | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 879dab7..04f99f6 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -9,7 +9,7 @@ import numpy as np # sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.clustering import Kmeans -from crankshaft.query_runner import QueryRunner +from crankshaft.analysis_data_provider import AnalysisDataProvider import crankshaft.clustering as cc from crankshaft import random_seeds @@ -17,11 +17,11 @@ import json from collections import OrderedDict -class FakeQueryRunner(QueryRunner): +class FakeDataProvider(AnalysisDataProvider): def __init__(self, mocked_result): self.mocked_result = mocked_result - def get_spatial_kmeans(self, query): + def get_spatial_kmeans(self, w_type, params): return self.mocked_result def get_nonspatial_kmeans(self, query, standarize): @@ -45,7 +45,7 @@ class KMeansTest(unittest.TestCase): 'ids': d['ids']} for d in self.cluster_data] random_seeds.set_random_seeds(1234) - kmeans = Kmeans(FakeQueryRunner(data)) + kmeans = Kmeans(FakeDataProvider(data)) clusters = kmeans.spatial('subquery', 2) labels = [a[1] for a in clusters] c1 = [a for a in clusters if a[1] == 0] diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 37cf7d0..5c8c5c9 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -3,18 +3,18 @@ import numpy as np from helper import fixture_file from crankshaft.clustering import Moran -from crankshaft.clustering import QueryRunner +from crankshaft.clustering import AnalysisDataProvider import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json from collections import OrderedDict -class FakeQueryRunner(QueryRunner): +class FakeDataProvider(AnalysisDataProvider): def __init__(self, mock_data): self.mock_result = mock_data - def get_moran(self, query): + def get_moran(self, w_type, params): return self.mock_result @@ -67,7 +67,7 @@ class MoranTest(unittest.TestCase): ('neighbors', d['neighbors'])]) for d in self.neighbors_data] - moran = Moran(FakeQueryRunner(data)) + moran = Moran(FakeDataProvider(data)) random_seeds.set_random_seeds(1234) result = moran.local_stat('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') @@ -86,7 +86,7 @@ class MoranTest(unittest.TestCase): 'neighbors': d['neighbors']} for d in self.neighbors_data] random_seeds.set_random_seeds(1234) - moran = Moran(FakeQueryRunner(data)) + moran = Moran(FakeDataProvider(data)) result = moran.local_rate_stat('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] @@ -102,7 +102,7 @@ class MoranTest(unittest.TestCase): 'attr1': d['value'], 'neighbors': d['neighbors']} for d in self.neighbors_data] random_seeds.set_random_seeds(1235) - moran = Moran(FakeQueryRunner(data)) + moran = Moran(FakeDataProvider(data)) result = moran.global_stat('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') diff --git a/src/py/crankshaft/test/test_space_time_dynamics.py b/src/py/crankshaft/test/test_space_time_dynamics.py index e58c7d4..20e659c 100644 --- a/src/py/crankshaft/test/test_space_time_dynamics.py +++ b/src/py/crankshaft/test/test_space_time_dynamics.py @@ -9,11 +9,11 @@ from helper import fixture_file from crankshaft.space_time_dynamics import Markov import crankshaft.space_time_dynamics as std from crankshaft import random_seeds -from crankshaft.query_runner import QueryRunner +from crankshaft.analysis_data_provider import AnalysisDataProvider import json -class FakeQueryRunner(QueryRunner): +class FakeDataProvider(AnalysisDataProvider): def __init__(self, data): self.mock_result = data @@ -85,7 +85,7 @@ class SpaceTimeTests(unittest.TestCase): 'attr15': d['y2009'], 'neighbors': d['neighbors']} for d in self.neighbors_data] # print(str(data[0])) - markov = Markov(FakeQueryRunner(data)) + markov = Markov(FakeDataProvider(data)) random_seeds.set_random_seeds(1234) result = markov.spatial_trend('subquery', From db501a2f025534550e8e113e5cba8340d7efac36 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Nov 2016 15:20:14 +0000 Subject: [PATCH 90/96] move query generation to inside analysis data provider class --- .../crankshaft/analysis_data_provider.py | 8 +- .../crankshaft/crankshaft/clustering/moran.py | 76 ++++++++----------- .../crankshaft/space_time_dynamics/markov.py | 14 ++-- .../crankshaft/test/test_clustering_kmeans.py | 2 +- .../test/test_space_time_dynamics.py | 2 +- 5 files changed, 47 insertions(+), 55 deletions(-) diff --git a/src/py/crankshaft/crankshaft/analysis_data_provider.py b/src/py/crankshaft/crankshaft/analysis_data_provider.py index ad572ec..6af37f5 100644 --- a/src/py/crankshaft/crankshaft/analysis_data_provider.py +++ b/src/py/crankshaft/crankshaft/analysis_data_provider.py @@ -1,10 +1,12 @@ """class for fetching data""" import plpy +import pysal_utils as pu class AnalysisDataProvider: - def get_markov(self, query): + def get_markov(self, w_type, params): try: + query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) if len(data) == 0: @@ -14,10 +16,12 @@ class AnalysisDataProvider: except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err) - def get_moran(self, query): + def get_moran(self, w_type, params): """fetch data for moran's i analyses""" try: + query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) + # if there are no neighbors, exit if len(data) == 0: return pu.empty_zipped_array(2) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 7cc9ba5..70a8501 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -31,15 +31,13 @@ class Moran: core clusters with PySAL. Andy Eschbacher """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr_name), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + params = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) - - result = self.data_provider.get_moran(query) + result = self.data_provider.get_moran(w_type, params) # collect attributes attr_vals = pu.get_attributes(result) @@ -63,15 +61,13 @@ class Moran: # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + params = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) - - result = self.data_provider.get_moran(query) + result = self.data_provider.get_moran(w_type, params) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) @@ -91,16 +87,14 @@ class Moran: Moran's I Rate (global) Andy Eschbacher """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", numerator), - ("attr2", denominator) - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + params = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) - - result = self.data_provider.get_moran(query) + result = self.data_provider.get_moran(w_type, params) # collect attributes numer = pu.get_attributes(result, 1) @@ -123,16 +117,14 @@ class Moran: # geometries with values that are null are ignored # resulting in a collection of not as near neighbors - qvals = OrderedDict([("id_col", id_col), - ("numerator", numerator), - ("denominator", denominator), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + params = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) - - result = self.data_provider.get_moran(query) + result = self.data_provider.get_moran(w_type, params) # collect attributes numer = pu.get_attributes(result, 1) @@ -156,16 +148,14 @@ class Moran: Moran's I (local) Bivariate (untested) """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr1), - ("attr2", attr2), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + params = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) - - result = self.data_provider.get_moran(query) + result = self.data_provider.get_moran(w_type, params) # collect attributes attr1_vals = pu.get_attributes(result, 1) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index 51db0ef..a1f0edb 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -54,15 +54,13 @@ class Markov: if len(time_cols) < 2: plpy.error('More than one time column needs to be passed') - qvals = {"id_col": id_col, - "time_cols": time_cols, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} + params = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} - query = pu.construct_neighbor_query(w_type, qvals) - - query_result = self.data_provider.get_markov(query) + query_result = self.data_provider.get_markov(w_type, params) # build weight weights = pu.get_weight(query_result, w_type) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 04f99f6..93633b0 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -21,7 +21,7 @@ class FakeDataProvider(AnalysisDataProvider): def __init__(self, mocked_result): self.mocked_result = mocked_result - def get_spatial_kmeans(self, w_type, params): + def get_spatial_kmeans(self, query): return self.mocked_result def get_nonspatial_kmeans(self, query, standarize): diff --git a/src/py/crankshaft/test/test_space_time_dynamics.py b/src/py/crankshaft/test/test_space_time_dynamics.py index 20e659c..d14563e 100644 --- a/src/py/crankshaft/test/test_space_time_dynamics.py +++ b/src/py/crankshaft/test/test_space_time_dynamics.py @@ -17,7 +17,7 @@ class FakeDataProvider(AnalysisDataProvider): def __init__(self, data): self.mock_result = data - def get_markov(self, query): + def get_markov(self, w_type, params): return self.mock_result From 7c63b66fddd8bc549d089229ee345e3d9327e698 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Tue, 29 Nov 2016 15:52:29 +0100 Subject: [PATCH 91/96] Update travis yml to the new postgres-9.5 package --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a5f544a..6fdd8f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,9 +38,9 @@ before_install: - sudo apt-get -y remove --purge postgis - sudo apt-get -y autoremove - - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb1 - - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-2ubuntu1 - - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-2ubuntu1 + - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2 - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 From 6ab1c285d946ed14835972fc2456756b8c16234c Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Nov 2016 10:08:36 -0500 Subject: [PATCH 92/96] places query gen in kmeans data provider --- .../crankshaft/crankshaft/analysis_data_provider.py | 8 +++++++- src/py/crankshaft/crankshaft/clustering/kmeans.py | 11 ++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/py/crankshaft/crankshaft/analysis_data_provider.py b/src/py/crankshaft/crankshaft/analysis_data_provider.py index 6af37f5..1d1cf2b 100644 --- a/src/py/crankshaft/crankshaft/analysis_data_provider.py +++ b/src/py/crankshaft/crankshaft/analysis_data_provider.py @@ -38,8 +38,14 @@ class AnalysisDataProvider: except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err) - def get_spatial_kmeans(self, query): + def get_spatial_kmeans(self, params): """fetch data for spatial kmeans""" + query = ("SELECT " + "array_agg({id_col} ORDER BY {id_col}) as ids," + "array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs," + "array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys " + "FROM ({subquery}) As a " + "WHERE {geom_col} IS NOT NULL").format(**params) try: data = plpy.execute(query) return data diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index e59c25b..1e49115 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -16,14 +16,11 @@ class Kmeans: find centers based on clusters of latitude/longitude pairs query: SQL query that has a WGS84 geometry (the_geom) """ - full_query = ("SELECT " - "array_agg(cartodb_id ORDER BY cartodb_id) as ids," - "array_agg(ST_X(the_geom) ORDER BY cartodb_id) xs," - "array_agg(ST_Y(the_geom) ORDER BY cartodb_id) ys " - "FROM ({query}) As a " - "WHERE the_geom IS NOT NULL").format(query=query) + params = {"subquery": query, + "geom_col": "the_geom", + "id_col": "cartodb_id"} - data = self.data_provider.get_spatial_kmeans(full_query) + data = self.data_provider.get_spatial_kmeans(params) # Unpack query response xs = data[0]['xs'] From 59dc9434f710f432c391de79281e2bc4c1d9c2e4 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Thu, 1 Dec 2016 17:06:21 -0500 Subject: [PATCH 93/96] moves getis to class-based framework --- src/pg/sql/16_getis.sql | 5 +- .../crankshaft/analysis_data_provider.py | 16 ++++- .../crankshaft/crankshaft/clustering/getis.py | 64 +++++++++---------- .../crankshaft/crankshaft/clustering/moran.py | 1 - .../crankshaft/space_time_dynamics/markov.py | 1 + .../crankshaft/test/test_clustering_getis.py | 30 +++++---- .../crankshaft/test/test_clustering_moran.py | 2 +- 7 files changed, 68 insertions(+), 51 deletions(-) diff --git a/src/pg/sql/16_getis.sql b/src/pg/sql/16_getis.sql index 578f15a..e520227 100644 --- a/src/pg/sql/16_getis.sql +++ b/src/pg/sql/16_getis.sql @@ -11,8 +11,9 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (z_score NUMERIC, p_value NUMERIC, p_z_sim NUMERIC, rowid BIGINT) AS $$ - from crankshaft.clustering import getis_ord - return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + from crankshaft.clustering import Getis + getis = Getis() + return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- TODO: make a version that accepts the values as arrays diff --git a/src/py/crankshaft/crankshaft/analysis_data_provider.py b/src/py/crankshaft/crankshaft/analysis_data_provider.py index 1d1cf2b..cbc27bc 100644 --- a/src/py/crankshaft/crankshaft/analysis_data_provider.py +++ b/src/py/crankshaft/crankshaft/analysis_data_provider.py @@ -4,7 +4,21 @@ import pysal_utils as pu class AnalysisDataProvider: + def get_getis(self, w_type, params): + """fetch data for getis ord's g""" + try: + query = pu.construct_neighbor_query(w_type, params) + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + else: + return result + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + def get_markov(self, w_type, params): + """fetch data for spatial markov""" try: query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) @@ -50,4 +64,4 @@ class AnalysisDataProvider: data = plpy.execute(query) return data except plpy.SPIError, err: - plpy.error("Analysis failed: %s" % err) + plpy.error('Analysis failed: %s' % err) diff --git a/src/py/crankshaft/crankshaft/clustering/getis.py b/src/py/crankshaft/crankshaft/clustering/getis.py index a593e64..bef8f50 100644 --- a/src/py/crankshaft/crankshaft/clustering/getis.py +++ b/src/py/crankshaft/crankshaft/clustering/getis.py @@ -3,50 +3,48 @@ Getis-Ord's G geostatistics (hotspot/coldspot analysis) """ import pysal as ps -import plpy from collections import OrderedDict -# crankshaft module +# crankshaft modules import crankshaft.pysal_utils as pu +from crankshaft.analysis_data_provider import AnalysisDataProvider # High level interface --------------------------------------- -def getis_ord(subquery, attr, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Getis-Ord's G* - Implementation building neighbors with a PostGIS database and PySAL's - Getis-Ord's G* hotspot/coldspot module. - Andy Eschbacher - """ +class Getis: + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() + else: + self.data_provider = data_provider - # geometries with attributes that are null are ignored - # resulting in a collection of not as near neighbors if kNN is chosen + def getis_ord(self, subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Getis-Ord's G* + Implementation building neighbors with a PostGIS database and PySAL's + Getis-Ord's G* hotspot/coldspot module. + Andy Eschbacher + """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors if kNN is chosen - query = pu.construct_neighbor_query(w_type, qvals) + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(4) - except plpy.SPIError, err: - plpy.error('Query failed: %s' % err) + result = self.data_provider.get_getis(w_type, qvals) + attr_vals = pu.get_attributes(result) - attr_vals = pu.get_attributes(result) + # build PySAL weight object + weight = pu.get_weight(result, w_type, num_ngbrs) - # build PySAL weight object - weight = pu.get_weight(result, w_type, num_ngbrs) + # calculate Getis-Ord's G* z- and p-values + getis = ps.esda.getisord.G_Local(attr_vals, weight, + star=True, permutations=permutations) - # calculate Getis-Ord's G* z- and p-values - getis = ps.esda.getisord.G_Local(attr_vals, weight, - star=True, permutations=permutations) - - return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order) + return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 70a8501..a42a981 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -6,7 +6,6 @@ Moran's I geostatistics (global clustering & outliers presence) # average of the their neighborhood import pysal as ps -import plpy from collections import OrderedDict from crankshaft.analysis_data_provider import AnalysisDataProvider diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index f0c0b4a..3ad8273 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -2,6 +2,7 @@ Spatial dynamics measurements using Spatial Markov """ +# TODO: remove all plpy dependencies import numpy as np import pysal as ps diff --git a/src/py/crankshaft/test/test_clustering_getis.py b/src/py/crankshaft/test/test_clustering_getis.py index 835a121..61add11 100644 --- a/src/py/crankshaft/test/test_clustering_getis.py +++ b/src/py/crankshaft/test/test_clustering_getis.py @@ -1,18 +1,13 @@ import unittest import numpy as np +from helper import fixture_file -# from mock_plpy import MockPlPy -# plpy = MockPlPy() -# -# import sys -# sys.modules['plpy'] = plpy -from helper import plpy, fixture_file - -import crankshaft.clustering as cc +from crankshaft.clustering import Getis import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json +from crankshaft.analysis_data_provider import AnalysisDataProvider # Fixture files produced as follows # @@ -42,6 +37,14 @@ import json # lgstar_queen.p_sim, lgstar_queen.p_z_sim))) +class FakeDataProvider(AnalysisDataProvider): + def __init__(self, mock_data): + self.mock_result = mock_data + + def get_getis(self, w_type, param): + return self.mock_result + + class GetisTest(unittest.TestCase): """Testing class for Getis-Ord's G* funtion This test replicates the work done in PySAL documentation: @@ -49,8 +52,6 @@ class GetisTest(unittest.TestCase): """ def setUp(self): - plpy._reset() - # load raw data for analysis self.neighbors_data = json.loads( open(fixture_file('neighbors_getis.json')).read()) @@ -64,10 +65,13 @@ class GetisTest(unittest.TestCase): data = [{'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors']} for d in self.neighbors_data] - plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) - result = cc.getis_ord('subquery', 'value', - 'queen', None, 999, 'the_geom', 'cartodb_id') + getis = Getis(FakeDataProvider(data)) + + result = getis.getis_ord('subquery', 'value', + 'queen', None, 999, 'the_geom', + 'cartodb_id') result = [(row[0], row[1]) for row in result] expected = np.array(self.getis_data)[:, 0:2] for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 5c8c5c9..cc1930e 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -3,7 +3,7 @@ import numpy as np from helper import fixture_file from crankshaft.clustering import Moran -from crankshaft.clustering import AnalysisDataProvider +from crankshaft.analysis_data_provider import AnalysisDataProvider import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json From 3c8ac7d45d30338a657cacb4086b0a3aade7f2b8 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Fri, 2 Dec 2016 12:36:11 +0100 Subject: [PATCH 94/96] Remove default postgres-9.5 from travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 6fdd8f0..e3c1883 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,6 +35,7 @@ before_install: - sudo apt-get -y remove --purge postgresql-9.2 - sudo apt-get -y remove --purge postgresql-9.3 - sudo apt-get -y remove --purge postgresql-9.4 + - sudo apt-get -y remove --purge postgresql-9.5 - sudo apt-get -y remove --purge postgis - sudo apt-get -y autoremove From 5c34e08c7d08430cd48161591c6fff46aad9d014 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Fri, 2 Dec 2016 12:49:32 +0100 Subject: [PATCH 95/96] Remove old configuration for postgresql 9.5 in travis --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e3c1883..ac2686d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,7 +36,10 @@ before_install: - sudo apt-get -y remove --purge postgresql-9.3 - sudo apt-get -y remove --purge postgresql-9.4 - sudo apt-get -y remove --purge postgresql-9.5 - - sudo apt-get -y remove --purge postgis + - sudo rm -rf /var/lib/postgresql/ + - sudo rm -rf /var/log/postgresql/ + - sudo rm -rf /etc/postgresql/ + - sudo apt-get -y remove --purge postgis-2.2 - sudo apt-get -y autoremove - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2 From f8739b6a686bc8fabe45c82f4fca727b465d65f3 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Fri, 2 Dec 2016 13:35:43 +0100 Subject: [PATCH 96/96] Version 0.5.0 release artifacts --- NEWS.md | 8 + release/crankshaft--0.4.2--0.5.0.sql | 1965 ++++++++++++++++ release/crankshaft--0.5.0.sql | 2070 +++++++++++++++++ release/crankshaft.control | 2 +- .../0.5.0/crankshaft/crankshaft/__init__.py | 6 + .../crankshaft/analysis_data_provider.py | 67 + .../crankshaft/clustering/__init__.py | 4 + .../crankshaft/crankshaft/clustering/getis.py | 50 + .../crankshaft/clustering/kmeans.py | 32 + .../crankshaft/crankshaft/clustering/moran.py | 208 ++ .../crankshaft/pysal_utils/__init__.py | 2 + .../crankshaft/pysal_utils/pysal_utils.py | 211 ++ .../crankshaft/crankshaft/random_seeds.py | 11 + .../crankshaft/segmentation/__init__.py | 1 + .../crankshaft/segmentation/segmentation.py | 176 ++ .../space_time_dynamics/__init__.py | 2 + .../crankshaft/space_time_dynamics/markov.py | 194 ++ .../python/0.5.0/crankshaft/requirements.txt | 5 + release/python/0.5.0/crankshaft/setup.py | 49 + .../0.5.0/crankshaft/test/fixtures/getis.json | 1 + .../crankshaft/test/fixtures/kmeans.json | 1 + .../crankshaft/test/fixtures/markov.json | 1 + .../0.5.0/crankshaft/test/fixtures/moran.json | 52 + .../crankshaft/test/fixtures/neighbors.json | 54 + .../test/fixtures/neighbors_getis.json | 1 + .../test/fixtures/neighbors_markov.json | 1 + .../python/0.5.0/crankshaft/test/helper.py | 13 + .../python/0.5.0/crankshaft/test/mock_plpy.py | 54 + .../crankshaft/test/test_clustering_getis.py | 78 + .../crankshaft/test/test_clustering_kmeans.py | 56 + .../crankshaft/test/test_clustering_moran.py | 112 + .../0.5.0/crankshaft/test/test_pysal_utils.py | 160 ++ .../crankshaft/test/test_segmentation.py | 64 + .../test/test_space_time_dynamics.py | 349 +++ src/pg/crankshaft.control | 2 +- 35 files changed, 6060 insertions(+), 2 deletions(-) create mode 100644 release/crankshaft--0.4.2--0.5.0.sql create mode 100644 release/crankshaft--0.5.0.sql create mode 100644 release/python/0.5.0/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/analysis_data_provider.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/clustering/getis.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/segmentation/__init__.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/segmentation/segmentation.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/__init__.py create mode 100644 release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/markov.py create mode 100644 release/python/0.5.0/crankshaft/requirements.txt create mode 100644 release/python/0.5.0/crankshaft/setup.py create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/getis.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/markov.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/neighbors_getis.json create mode 100644 release/python/0.5.0/crankshaft/test/fixtures/neighbors_markov.json create mode 100644 release/python/0.5.0/crankshaft/test/helper.py create mode 100644 release/python/0.5.0/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.5.0/crankshaft/test/test_clustering_getis.py create mode 100644 release/python/0.5.0/crankshaft/test/test_clustering_kmeans.py create mode 100644 release/python/0.5.0/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.5.0/crankshaft/test/test_pysal_utils.py create mode 100644 release/python/0.5.0/crankshaft/test/test_segmentation.py create mode 100644 release/python/0.5.0/crankshaft/test/test_space_time_dynamics.py diff --git a/NEWS.md b/NEWS.md index db04a2a..7779201 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +0.5.0 (2016-12-15) +------------------ +* Updated PULL_REQUEST_TEMPLATE +* Fixed a bug that flips the order of the numerator in denominator for calculating using Moran Local Rate because previously the code sorted the keys alphabetically. +* Add new CDB_GetisOrdsG functions. Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values +* Add new outlier detection functions: CDB_StaticOutlier, CDB_PercentOutlier and CDB_StdDevOutlier +* Updates in the framework for accessing the Python functions. + 0.4.2 (2016-09-22) ------------------ * Bugfix for cdb_areasofinterestglobal: import correct modules diff --git a/release/crankshaft--0.4.2--0.5.0.sql b/release/crankshaft--0.4.2--0.5.0.sql new file mode 100644 index 0000000..4c68ffc --- /dev/null +++ b/release/crankshaft--0.4.2--0.5.0.sql @@ -0,0 +1,1965 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.2'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft--0.5.0.sql b/release/crankshaft--0.5.0.sql new file mode 100644 index 0000000..023b265 --- /dev/null +++ b/release/crankshaft--0.5.0.sql @@ -0,0 +1,2070 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.5.0'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import Moran + # TODO: use named parameters or a dictionary + moran = Moran() + return moran.global_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import Moran + moran = Moran() + # TODO: use named parameters or a dictionary + return moran.local_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import Moran + moran = Moran() + # TODO: use named parameters or a dictionary + return moran.global_rate_stat(subquery, numerator, denominator, w_type, + num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import Moran + moran = Moran() + # TODO: use named parameters or a dictionary + return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +-- Spatial k-means clustering + +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer, no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import Kmeans + kmeans = Kmeans() + return kmeans.spatial(query, no_clusters, no_init) + +$$ LANGUAGE plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import Markov + markov = Markov() + + ## TODO: use named parameters or a dictionary + return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +-- Getis-Ord's G +-- Hotspot/Coldspot Analysis tool +CREATE OR REPLACE FUNCTION + CDB_GetisOrdsG( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 999, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (z_score NUMERIC, p_value NUMERIC, p_z_sim NUMERIC, rowid BIGINT) +AS $$ + from crankshaft.clustering import Getis + getis = Getis() + return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- TODO: make a version that accepts the values as arrays + +-- Find outliers using a static threshold +-- +CREATE OR REPLACE FUNCTION CDB_StaticOutlier(column_value numeric, threshold numeric) +RETURNS boolean +AS $$ +BEGIN + + RETURN column_value > threshold; + +END; +$$ LANGUAGE plpgsql; + +-- Find outliers by a percentage above the threshold +-- TODO: add symmetric option? `is_symmetric boolean DEFAULT false` + +CREATE OR REPLACE FUNCTION CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[]) +RETURNS TABLE(is_outlier boolean, rowid int) +AS $$ +DECLARE + avg_val numeric; + out_vals boolean[]; +BEGIN + + SELECT avg(i) INTO avg_val + FROM unnest(column_values) As x(i); + + IF avg_val = 0 THEN + RAISE EXCEPTION 'Mean value is zero. Try another outlier method.'; + END IF; + + SELECT array_agg( + outlier_fraction < i / avg_val) INTO out_vals + FROM unnest(column_values) As x(i); + + RETURN QUERY + SELECT unnest(out_vals) As is_outlier, + unnest(ids) As rowid; + +END; +$$ LANGUAGE plpgsql; + +-- Find outliers above a given number of standard deviations from the mean + +CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true) +RETURNS TABLE(is_outlier boolean, rowid int) +AS $$ +DECLARE + stddev_val numeric; + avg_val numeric; + out_vals boolean[]; +BEGIN + + SELECT stddev(i), avg(i) INTO stddev_val, avg_val + FROM unnest(column_values) As x(i); + + IF stddev_val = 0 THEN + RAISE EXCEPTION 'Standard deviation of input data is zero'; + END IF; + + IF is_symmetric THEN + SELECT array_agg( + abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals + FROM unnest(column_values) As x(i); + ELSE + SELECT array_agg( + (i - avg_val) / stddev_val > num_deviations) INTO out_vals + FROM unnest(column_values) As x(i); + END IF; + + RETURN QUERY + SELECT unnest(out_vals) As is_outlier, + unnest(ids) As rowid; +END; +$$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft.control b/release/crankshaft.control index 1e02d92..ec456b4 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.2' +default_version = '0.5.0' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.5.0/crankshaft/crankshaft/__init__.py b/release/python/0.5.0/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..a8060f8 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/__init__.py @@ -0,0 +1,6 @@ +"""Import all modules""" +import crankshaft.random_seeds +import crankshaft.clustering +import crankshaft.space_time_dynamics +import crankshaft.segmentation +import analysis_data_provider diff --git a/release/python/0.5.0/crankshaft/crankshaft/analysis_data_provider.py b/release/python/0.5.0/crankshaft/crankshaft/analysis_data_provider.py new file mode 100644 index 0000000..cbc27bc --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/analysis_data_provider.py @@ -0,0 +1,67 @@ +"""class for fetching data""" +import plpy +import pysal_utils as pu + + +class AnalysisDataProvider: + def get_getis(self, w_type, params): + """fetch data for getis ord's g""" + try: + query = pu.construct_neighbor_query(w_type, params) + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + else: + return result + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_markov(self, w_type, params): + """fetch data for spatial markov""" + try: + query = pu.construct_neighbor_query(w_type, params) + data = plpy.execute(query) + + if len(data) == 0: + return pu.empty_zipped_array(4) + + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_moran(self, w_type, params): + """fetch data for moran's i analyses""" + try: + query = pu.construct_neighbor_query(w_type, params) + data = plpy.execute(query) + + # if there are no neighbors, exit + if len(data) == 0: + return pu.empty_zipped_array(2) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + def get_nonspatial_kmeans(self, query): + """fetch data for non-spatial kmeans""" + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_spatial_kmeans(self, params): + """fetch data for spatial kmeans""" + query = ("SELECT " + "array_agg({id_col} ORDER BY {id_col}) as ids," + "array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs," + "array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys " + "FROM ({subquery}) As a " + "WHERE {geom_col} IS NOT NULL").format(**params) + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) diff --git a/release/python/0.5.0/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.5.0/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..d9682fa --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,4 @@ +"""Import all functions from for clustering""" +from moran import * +from kmeans import * +from getis import * diff --git a/release/python/0.5.0/crankshaft/crankshaft/clustering/getis.py b/release/python/0.5.0/crankshaft/crankshaft/clustering/getis.py new file mode 100644 index 0000000..bef8f50 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/clustering/getis.py @@ -0,0 +1,50 @@ +""" +Getis-Ord's G geostatistics (hotspot/coldspot analysis) +""" + +import pysal as ps +from collections import OrderedDict + +# crankshaft modules +import crankshaft.pysal_utils as pu +from crankshaft.analysis_data_provider import AnalysisDataProvider + +# High level interface --------------------------------------- + + +class Getis: + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() + else: + self.data_provider = data_provider + + def getis_ord(self, subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Getis-Ord's G* + Implementation building neighbors with a PostGIS database and PySAL's + Getis-Ord's G* hotspot/coldspot module. + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors if kNN is chosen + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_getis(w_type, qvals) + attr_vals = pu.get_attributes(result) + + # build PySAL weight object + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate Getis-Ord's G* z- and p-values + getis = ps.esda.getisord.G_Local(attr_vals, weight, + star=True, permutations=permutations) + + return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order) diff --git a/release/python/0.5.0/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.5.0/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..1e49115 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,32 @@ +from sklearn.cluster import KMeans +import numpy as np + +from crankshaft.analysis_data_provider import AnalysisDataProvider + + +class Kmeans: + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() + else: + self.data_provider = data_provider + + def spatial(self, query, no_clusters, no_init=20): + """ + find centers based on clusters of latitude/longitude pairs + query: SQL query that has a WGS84 geometry (the_geom) + """ + params = {"subquery": query, + "geom_col": "the_geom", + "id_col": "cartodb_id"} + + data = self.data_provider.get_spatial_kmeans(params) + + # Unpack query response + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters=no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs, ys)) + return zip(ids, labels) diff --git a/release/python/0.5.0/crankshaft/crankshaft/clustering/moran.py b/release/python/0.5.0/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..a42a981 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,208 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +from collections import OrderedDict +from crankshaft.analysis_data_provider import AnalysisDataProvider + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + + +class Moran: + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() + else: + self.data_provider = data_provider + + def global_stat(self, subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + params = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_moran(w_type, params) + + # collect attributes + attr_vals = pu.get_attributes(result) + + # calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + + def local_stat(self, subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + params = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_moran(w_type, params) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + + def global_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + params = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_moran(w_type, params) + + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + + def local_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + params = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_moran(w_type, params) + + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + + def local_bivariate_stat(self, subquery, attr1, attr2, + permutations, geom_col, id_col, + w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + + params = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + result = self.data_provider.get_moran(w_type, params) + + # collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..fdf073b --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions for pysal_utils""" +from crankshaft.pysal_utils.pysal_utils import * diff --git a/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..0be95c7 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,211 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on + translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + + +# Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res dict-like: query results with attributes and neighbors + """ + # if w_type.lower() == 'knn': + # row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + # weights = {x['id']: row_normed_weights for x in query_res} + # else: + # weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + # if len(x['neighbors']) > 0 + # else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + print 'len of neighbors: %d' % len(neighbors) + + built_weight = ps.W(neighbors) + built_weight.transform = 'r' + + return built_weight + + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + Defaults to order in the params + @param params: dict of information used in query (column names, + table name, etc.) + Example: + OrderedDict([('numerator', 'price'), + ('denominator', 'sq_meters'), + ('subquery', 'SELECT * FROM interesting_data')]) + Output: + "i.\"price\"::numeric As attr1, " \ + "i.\"sq_meters\"::numeric As attr2, " + """ + + attr_string = "" + template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " + + if 'time_cols' in params: + # if markov analysis + attrs = params['time_cols'] + + for idx, val in enumerate(attrs): + attr_string += template % {"col": val, "alias_num": idx + 1} + else: + # if moran's analysis + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] + + for idx, val in enumerate(attrs): + attr_string += template % {"col": params[val], + "alias_num": idx + 1} + + return attr_string + + +def query_attr_where(params): + """ + Construct where conditions when building neighbors query + Create portion of WHERE clauses for weeding out NULL-valued geometries + Input: dict of params: + {'subquery': ..., + 'numerator': 'data1', + 'denominator': 'data2', + '': ...} + Output: + 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Input: + {'subquery': ..., + 'time_cols': ['time1', 'time2', 'time3'], + 'etc': ...} + Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT + NULL AND idx_replace."time3" IS NOT NULL' + """ + attr_string = [] + template = "idx_replace.\"%s\" IS NOT NULL" + + if 'time_cols' in params: + # markov where clauses + attrs = params['time_cols'] + # add values to template + for attr in attrs: + attr_string.append(template % attr) + else: + # moran where clauses + + # get keys + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] + + # add values to template + for attr in attrs: + attr_string.append(template % params[attr]) + + if 'denominator' in attrs: + attr_string.append( + "idx_replace.\"%s\" <> 0" % params['denominator']) + + out = " AND ".join(attr_string) + + return out + + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + + +# SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +# to add more weight methods open a ticket or pull request + + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], + dtype=np.float) + + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.5.0/crankshaft/crankshaft/random_seeds.py b/release/python/0.5.0/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..31958cb --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,11 @@ +"""Random seed generator used for non-deterministic functions in crankshaft""" +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.5.0/crankshaft/crankshaft/segmentation/__init__.py b/release/python/0.5.0/crankshaft/crankshaft/segmentation/__init__.py new file mode 100644 index 0000000..b825e85 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/segmentation/__init__.py @@ -0,0 +1 @@ +from segmentation import * diff --git a/release/python/0.5.0/crankshaft/crankshaft/segmentation/segmentation.py b/release/python/0.5.0/crankshaft/crankshaft/segmentation/segmentation.py new file mode 100644 index 0000000..ed61139 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/segmentation/segmentation.py @@ -0,0 +1,176 @@ +""" +Segmentation creation and prediction +""" + +import sklearn +import numpy as np +import plpy +from sklearn.ensemble import GradientBoostingRegressor +from sklearn import metrics +from sklearn.cross_validation import train_test_split + +# Lower level functions +#---------------------- + +def replace_nan_with_mean(array): + """ + Input: + @param array: an array of floats which may have null-valued entries + Output: + array with nans filled in with the mean of the dataset + """ + # returns an array of rows and column indices + indices = np.where(np.isnan(array)) + + # iterate through entries which have nan values + for row, col in zip(*indices): + array[row, col] = np.mean(array[~np.isnan(array[:, col]), col]) + + return array + +def get_data(variable, feature_columns, query): + """ + Fetch data from the database, clean, and package into + numpy arrays + Input: + @param variable: name of the target variable + @param feature_columns: list of column names + @param query: subquery that data is pulled from for the packaging + Output: + prepared data, packaged into NumPy arrays + """ + + columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns]) + + try: + data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format( + variable=variable, + columns=columns, + query=query)) + except Exception, e: + plpy.error('Failed to access data to build segmentation model: %s' % e) + + # extract target data from plpy object + target = np.array(data[0]['target']) + + # put n feature data arrays into an n x m array of arrays + features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns]) + + return replace_nan_with_mean(target), replace_nan_with_mean(features) + +# High level interface +# -------------------- + +def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters): + """ + Version of create_and_predict_segment that works on arrays that come stright form the SQL calling + the function. + + Input: + @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict + @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model + @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from + @param model_parameters: A dictionary containing parameters for the model. + """ + + clean_target = replace_nan_with_mean(target) + clean_features = replace_nan_with_mean(features) + target_features = replace_nan_with_mean(target_features) + + model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2) + prediction = model.predict(target_features) + accuracy_array = [accuracy]*prediction.shape[0] + return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array)) + + + +def create_and_predict_segment(query, variable, target_query, model_params): + """ + generate a segment with machine learning + Stuart Lynn + """ + + ## fetch column names + try: + columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + ## extract column names to be used in building the segmentation model + feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator']) + ## get data from database + target, features = get_data(variable, feature_columns, query) + + model, accuracy = train_model(target, features, model_params, 0.2) + cartodb_ids, result = predict_segment(model, feature_columns, target_query) + accuracy_array = [accuracy]*result.shape[0] + return zip(cartodb_ids, result, accuracy_array) + + +def train_model(target, features, model_params, test_split): + """ + Train the Gradient Boosting model on the provided data and calculate the accuracy of the model + Input: + @param target: 1D Array of the variable that the model is to be trianed to predict + @param features: 2D Array NSamples * NFeatures to use in trining the model + @param model_params: A dictionary of model parameters, the full specification can be found on the + scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) + @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray + """ + features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split) + model = GradientBoostingRegressor(**model_params) + model.fit(features_train, target_train) + accuracy = calculate_model_accuracy(model, features, target) + return model, accuracy + +def calculate_model_accuracy(model, features, target): + """ + Calculate the mean squared error of the model prediction + Input: + @param model: model trained from input features + @param features: features to make a prediction from + @param target: target to compare prediction to + Output: + mean squared error of the model prection compared to the target + """ + prediction = model.predict(features) + return metrics.mean_squared_error(prediction, target) + +def predict_segment(model, features, target_query): + """ + Use the provided model to predict the values for the new feature set + Input: + @param model: The pretrained model + @features: A list of features to use in the model prediction (list of column names) + @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it. + """ + + batch_size = 1000 + joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) + + try: + cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( + joined_features=joined_features, + target_query=target_query)) + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + results = [] + + while True: + rows = cursor.fetch(batch_size) + if not rows: + break + batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) + + #Need to fix this. Should be global mean. This will cause weird effects + batch = replace_nan_with_mean(batch) + prediction = model.predict(batch) + results.append(prediction) + + try: + cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + return cartodb_ids, np.concatenate(results) diff --git a/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/__init__.py b/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/__init__.py new file mode 100644 index 0000000..a439286 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions from clustering libraries.""" +from markov import * diff --git a/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/markov.py b/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/markov.py new file mode 100644 index 0000000..3ad8273 --- /dev/null +++ b/release/python/0.5.0/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -0,0 +1,194 @@ +""" +Spatial dynamics measurements using Spatial Markov +""" + +# TODO: remove all plpy dependencies + +import numpy as np +import pysal as ps +import plpy +import crankshaft.pysal_utils as pu +from crankshaft.analysis_data_provider import AnalysisDataProvider + + +class Markov: + def __init__(self, data_provider=None): + if data_provider is None: + self.data_provider = AnalysisDataProvider() + else: + self.data_provider = data_provider + + def spatial_trend(self, subquery, time_cols, num_classes=7, + w_type='knn', num_ngbrs=5, permutations=0, + geom_col='the_geom', id_col='cartodb_id'): + """ + Predict the trends of a unit based on: + 1. history of its transitions to different classes (e.g., 1st + quantile -> 2nd quantile) + 2. average class of its neighbors + + Inputs: + @param subquery string: e.g., SELECT the_geom, cartodb_id, + interesting_time_column FROM table_name + @param time_cols list of strings: list of strings of column names + @param num_classes (optional): number of classes to break + distribution of values into. Currently uses quantile bins. + @param w_type string (optional): weight type ('knn' or 'queen') + @param num_ngbrs int (optional): number of neighbors (if knn type) + @param permutations int (optional): number of permutations for test + stats + @param geom_col string (optional): name of column which contains + the geometries + @param id_col string (optional): name of column which has the ids + of the table + + Outputs: + @param trend_up float: probablity that a geom will move to a higher + class + @param trend_down float: probablity that a geom will move to a + lower class + @param trend float: (trend_up - trend_down) / trend_static + @param volatility float: a measure of the volatility based on + probability stddev(prob array) + """ + + if len(time_cols) < 2: + plpy.error('More than one time column needs to be passed') + + params = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query_result = self.data_provider.get_markov(w_type, params) + + # build weight + weights = pu.get_weight(query_result, w_type) + weights.transform = 'r' + + # prep time data + t_data = get_time_data(query_result, time_cols) + + sp_markov_result = ps.Spatial_Markov(t_data, + weights, + k=num_classes, + fixed=False, + permutations=permutations) + + # get lag classes + lag_classes = ps.Quantiles( + ps.lag_spatial(weights, t_data[:, -1]), + k=num_classes).yb + + # look up probablity distribution for each unit according to class and + # lag class + prob_dist = get_prob_dist(sp_markov_result.P, + lag_classes, + sp_markov_result.classes[:, -1]) + + # find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1]) + + # output the results + return zip(trend, trend_up, trend_down, volatility, weights.id_order) + + + +def get_time_data(markov_data, time_cols): + """ + Extract the time columns and bin appropriately + """ + num_attrs = len(time_cols) + return np.array([[x['attr' + str(i)] for x in markov_data] + for i in range(1, num_attrs+1)], dtype=float).transpose() + + +# not currently used +def rebin_data(time_data, num_time_per_bin): + """ + Convert an n x l matrix into an (n/m) x l matrix where the values are + reduced (averaged) for the intervening states: + 1 2 3 4 1.5 3.5 + 5 6 7 8 -> 5.5 7.5 + 9 8 7 6 8.5 6.5 + 5 4 3 2 4.5 2.5 + + if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix. + + This process effectively resamples the data at a longer time span n + units longer than the input data. + For cases when there is a remainder (remainder(5/3) = 2), the remaining + two columns are binned together as the last time period, while the + first three are binned together for the first period. + + Input: + @param time_data n x l ndarray: measurements of an attribute at + different time intervals + @param num_time_per_bin int: number of columns to average into a new + column + Output: + ceil(n / m) x l ndarray of resampled time series + """ + + if time_data.shape[1] % num_time_per_bin == 0: + # if fit is perfect, then use it + n_max = time_data.shape[1] / num_time_per_bin + else: + # fit remainders into an additional column + n_max = time_data.shape[1] / num_time_per_bin + 1 + + return np.array( + [time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + + +def get_prob_dist(transition_matrix, lag_indices, unit_indices): + """ + Given an array of transition matrices, look up the probability + associated with the arrangements passed + + Input: + @param transition_matrix ndarray[k,k,k]: + @param lag_indices ndarray: + @param unit_indices ndarray: + + Output: + Array of probability distributions + """ + + return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] + for i in range(len(lag_indices))]) + + +def get_prob_stats(prob_dist, unit_indices): + """ + get the statistics of the probability distributions + + Outputs: + @param trend_up ndarray(float): sum of probabilities for upward + movement (relative to the unit index of that prob) + @param trend_down ndarray(float): sum of probabilities for downward + movement (relative to the unit index of that prob) + @param trend ndarray(float): difference of upward and downward + movements + """ + + num_elements = len(unit_indices) + trend_up = np.empty(num_elements, dtype=float) + trend_down = np.empty(num_elements, dtype=float) + trend = np.empty(num_elements, dtype=float) + + for i in range(num_elements): + trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() + trend_down[i] = prob_dist[i, :unit_indices[i]].sum() + if prob_dist[i, unit_indices[i]] > 0.0: + trend[i] = (trend_up[i] - trend_down[i]) / ( + prob_dist[i, unit_indices[i]]) + else: + trend[i] = None + + # calculate volatility of distribution + volatility = prob_dist.std(axis=1) + + return trend_up, trend_down, trend, volatility diff --git a/release/python/0.5.0/crankshaft/requirements.txt b/release/python/0.5.0/crankshaft/requirements.txt new file mode 100644 index 0000000..3f50cd7 --- /dev/null +++ b/release/python/0.5.0/crankshaft/requirements.txt @@ -0,0 +1,5 @@ +joblib==0.8.3 +numpy==1.6.1 +scipy==0.14.0 +pysal==1.11.2 +scikit-learn==0.14.1 diff --git a/release/python/0.5.0/crankshaft/setup.py b/release/python/0.5.0/crankshaft/setup.py new file mode 100644 index 0000000..dcb4d67 --- /dev/null +++ b/release/python/0.5.0/crankshaft/setup.py @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.5.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.5.0/crankshaft/test/fixtures/getis.json b/release/python/0.5.0/crankshaft/test/fixtures/getis.json new file mode 100644 index 0000000..02566fc --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/getis.json @@ -0,0 +1 @@ +[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]] diff --git a/release/python/0.5.0/crankshaft/test/fixtures/kmeans.json b/release/python/0.5.0/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.5.0/crankshaft/test/fixtures/markov.json b/release/python/0.5.0/crankshaft/test/fixtures/markov.json new file mode 100644 index 0000000..d60e4e0 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/markov.json @@ -0,0 +1 @@ +[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]] diff --git a/release/python/0.5.0/crankshaft/test/fixtures/moran.json b/release/python/0.5.0/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.5.0/crankshaft/test/fixtures/neighbors.json b/release/python/0.5.0/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.5.0/crankshaft/test/fixtures/neighbors_getis.json b/release/python/0.5.0/crankshaft/test/fixtures/neighbors_getis.json new file mode 100644 index 0000000..be367ff --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/neighbors_getis.json @@ -0,0 +1 @@ +[{"neighbors": [3, 6, 7], "id": 1, "value": 1.624458}, {"neighbors": [10, 5, 8], "id": 2, "value": 2.2554919999999998}, {"neighbors": [1, 4, 7], "id": 3, "value": 1.4678899999999999}, {"neighbors": [9, 3, 5, 7], "id": 4, "value": 2.4842559999999998}, {"neighbors": [9, 2, 4, 10], "id": 5, "value": 0.0}, {"neighbors": [1, 11, 12, 7, 16], "id": 6, "value": 9.0486730000000009}, {"neighbors": [1, 3, 4, 6, 9, 11, 18, 19], "id": 7, "value": 6.0294889999999999}, {"neighbors": [2, 15, 10], "id": 8, "value": 1.8003849999999999}, {"neighbors": [4, 5, 7, 10, 13, 19, 20], "id": 9, "value": 4.581251}, {"neighbors": [2, 5, 8, 9, 13, 15, 17, 20, 21], "id": 10, "value": 3.7906070000000001}, {"neighbors": [18, 6, 7, 16], "id": 11, "value": 1.4474359999999999}, {"neighbors": [16, 6, 14], "id": 12, "value": 1.1919660000000001}, {"neighbors": [9, 10, 20], "id": 13, "value": 0.0}, {"neighbors": [12, 22, 16], "id": 14, "value": 1.608017}, {"neighbors": [17, 10, 23, 8], "id": 15, "value": 1.9498120000000001}, {"neighbors": [6, 11, 12, 14, 18, 22, 27, 28], "id": 16, "value": 0.74509000000000003}, {"neighbors": [10, 15, 21, 23, 26, 30], "id": 17, "value": 4.1733180000000001}, {"neighbors": [33, 7, 11, 16, 19, 27, 32], "id": 18, "value": 3.7832520000000001}, {"neighbors": [33, 7, 9, 18, 20, 24], "id": 19, "value": 2.0851359999999999}, {"neighbors": [9, 10, 13, 19, 21, 24], "id": 20, "value": 2.1763020000000002}, {"neighbors": [35, 10, 17, 20, 24, 26], "id": 21, "value": 6.3093469999999998}, {"neighbors": [28, 29, 14, 16], "id": 22, "value": 10.855743}, {"neighbors": [17, 25, 31, 30, 15], "id": 23, "value": 4.211354}, {"neighbors": [33, 19, 20, 21, 35], "id": 24, "value": 0.80481000000000003}, {"neighbors": [42, 31, 23], "id": 25, "value": 3.2153309999999999}, {"neighbors": [17, 34, 35, 21, 30], "id": 26, "value": 2.8336640000000002}, {"neighbors": [36, 39, 41, 16, 18, 28, 32], "id": 27, "value": 1.5920399999999999}, {"neighbors": [27, 36, 29, 22, 16], "id": 28, "value": 1.5711580000000001}, {"neighbors": [36, 28, 22, 38], "id": 29, "value": 3.1275900000000001}, {"neighbors": [34, 43, 17, 23, 26, 31], "id": 30, "value": 4.4168960000000004}, {"neighbors": [42, 43, 44, 23, 25, 30], "id": 31, "value": 3.0174859999999999}, {"neighbors": [33, 18, 27, 41], "id": 32, "value": 9.9242450000000009}, {"neighbors": [35, 37, 40, 41, 46, 18, 19, 24, 32], "id": 33, "value": 7.9739570000000004}, {"neighbors": [26, 35, 43, 45, 30], "id": 34, "value": 5.0054639999999999}, {"neighbors": [33, 34, 37, 40, 45, 21, 24, 26], "id": 35, "value": 2.4638909999999998}, {"neighbors": [38, 39, 47, 27, 28, 29], "id": 36, "value": 0.0}, {"neighbors": [33, 35, 40, 45, 46, 49, 51], "id": 37, "value": 7.377974}, {"neighbors": [36, 29, 47, 48], "id": 38, "value": 1.0038750000000001}, {"neighbors": [36, 41, 47, 50, 52, 27], "id": 39, "value": 3.1900469999999999}, {"neighbors": [33, 35, 37, 46], "id": 40, "value": 45.905405999999999}, {"neighbors": [33, 39, 46, 50, 27, 32], "id": 41, "value": 2.447597}, {"neighbors": [25, 44, 53, 31], "id": 42, "value": 1.2949580000000001}, {"neighbors": [34, 44, 45, 54, 59, 61, 30, 31], "id": 43, "value": 5.9330980000000002}, {"neighbors": [42, 43, 53, 54, 31], "id": 44, "value": 4.1339969999999999}, {"neighbors": [34, 35, 37, 43, 51, 59, 60], "id": 45, "value": 4.298311}, {"neighbors": [33, 37, 40, 41, 49, 50, 57], "id": 46, "value": 27.483827000000002}, {"neighbors": [36, 38, 39, 48, 52, 55, 56], "id": 47, "value": 0.96979099999999996}, {"neighbors": [55, 38, 47], "id": 48, "value": 0.0}, {"neighbors": [57, 51, 37, 46, 63], "id": 49, "value": 2.934466}, {"neighbors": [39, 41, 46, 52, 57, 58], "id": 50, "value": 4.4564269999999997}, {"neighbors": [37, 45, 49, 60, 63, 64], "id": 51, "value": 4.629264}, {"neighbors": [39, 47, 50, 56, 58, 62], "id": 52, "value": 4.9415329999999997}, {"neighbors": [65, 42, 44, 54], "id": 53, "value": 3.9900410000000002}, {"neighbors": [65, 61, 43, 44, 53], "id": 54, "value": 2.064324}, {"neighbors": [56, 47, 48], "id": 55, "value": 3.0402529999999999}, {"neighbors": [52, 55, 47, 62], "id": 56, "value": 3.905411}, {"neighbors": [66, 67, 46, 49, 50, 58, 63], "id": 57, "value": 4.3328389999999999}, {"neighbors": [57, 50, 52, 62, 66], "id": 58, "value": 3.8941110000000001}, {"neighbors": [69, 70, 43, 45, 60, 61], "id": 59, "value": 6.8287940000000003}, {"neighbors": [51, 64, 45, 59, 70], "id": 60, "value": 3.2639469999999999}, {"neighbors": [65, 69, 72, 43, 54, 59], "id": 61, "value": 3.2821630000000002}, {"neighbors": [58, 68, 52, 66, 56], "id": 62, "value": 3.2957619999999999}, {"neighbors": [49, 57, 51, 67, 64], "id": 63, "value": 7.2496790000000004}, {"neighbors": [67, 70, 71, 51, 60, 63], "id": 64, "value": 3.041846}, {"neighbors": [61, 53, 54, 72], "id": 65, "value": 1.618018}, {"neighbors": [67, 68, 73, 76, 57, 58, 62], "id": 66, "value": 4.9108010000000002}, {"neighbors": [66, 71, 73, 75, 76, 57, 63, 64], "id": 67, "value": 1.991457}, {"neighbors": [73, 66, 62], "id": 68, "value": 3.1461920000000001}, {"neighbors": [70, 72, 74, 77, 59, 61], "id": 69, "value": 7.2666500000000003}, {"neighbors": [69, 71, 74, 78, 59, 60, 64], "id": 70, "value": 3.1109040000000001}, {"neighbors": [67, 75, 70, 78, 64], "id": 71, "value": 2.9802710000000001}, {"neighbors": [65, 69, 61, 77], "id": 72, "value": 3.8667669999999998}, {"neighbors": [76, 66, 67, 68], "id": 73, "value": 1.8684080000000001}, {"neighbors": [77, 69, 70, 78], "id": 74, "value": 12.577033999999999}, {"neighbors": [67, 76, 78, 71], "id": 75, "value": 7.8035990000000002}, {"neighbors": [73, 66, 67, 75], "id": 76, "value": 3.4714900000000002}, {"neighbors": [74, 69, 72], "id": 77, "value": 4.334822}, {"neighbors": [74, 75, 70, 71], "id": 78, "value": 8.4515370000000001}] diff --git a/release/python/0.5.0/crankshaft/test/fixtures/neighbors_markov.json b/release/python/0.5.0/crankshaft/test/fixtures/neighbors_markov.json new file mode 100644 index 0000000..45a20e7 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/fixtures/neighbors_markov.json @@ -0,0 +1 @@ +[{"neighbors": [10, 7, 21, 23, 1], "y1995": 0.87654416055651474, "y1997": 0.85637566664752718, "y1996": 0.8631470006766887, "y1999": 0.84461540228037335, "y1998": 0.84811668329242784, "y2006": 0.86302631339545688, "y2007": 0.86148266513456728, "y2004": 0.86416611731111015, "y2005": 0.87119374831581786, "y2002": 0.85012592862683589, "y2003": 0.8550965633336135, "y2000": 0.83271652434603094, "y2001": 0.83786313566577242, "id": 0, "y2008": 0.86252252380501315, "y2009": 0.86746356478544273}, {"neighbors": [5, 7, 22, 29, 3], "y1995": 0.91889509774542122, "y1997": 0.92333257900976462, "y1996": 0.91757931190043385, "y1999": 0.92552387732371888, "y1998": 0.92517289327379471, "y2006": 0.91706053906277052, "y2007": 0.90139504820726424, "y2004": 0.89815175749309051, "y2005": 0.91832090781161113, "y2002": 0.89431990798552208, "y2003": 0.88924793576523797, "y2000": 0.90746978227271013, "y2001": 0.89830489127332913, "id": 1, "y2008": 0.87897455159080617, "y2009": 0.86216858051752643}, {"neighbors": [11, 8, 13, 18, 17], "y1995": 0.82591007476914713, "y1997": 0.81989792988843901, "y1996": 0.82548595539161707, "y1999": 0.81731522200916285, "y1998": 0.81503235035017918, "y2006": 0.81814804358939286, "y2007": 0.83675961003285626, "y2004": 0.82668195534569056, "y2005": 0.82373723764184559, "y2002": 0.80849979516360859, "y2003": 0.82258550658074148, "y2000": 0.78964559168205917, "y2001": 0.8058444152731008, "id": 2, "y2008": 0.8357419865626442, "y2009": 0.84647177436289112}, {"neighbors": [4, 14, 9, 5, 12], "y1995": 1.0908817638059434, "y1997": 1.0845641754849344, "y1996": 1.0853768890893893, "y1999": 1.098988414417104, "y1998": 1.0841540389418189, "y2006": 1.1316479722785828, "y2007": 1.1295850763954971, "y2004": 1.1139980568106316, "y2005": 1.1216802898290368, "y2002": 1.1116069731657288, "y2003": 1.1088862051501811, "y2000": 1.1450694824791507, "y2001": 1.1215113292620285, "id": 3, "y2008": 1.1137181812756343, "y2009": 1.0993677488645406}, {"neighbors": [14, 3, 9, 31, 12], "y1995": 1.1073144618319228, "y1997": 1.1328363804627946, "y1996": 1.1137394350312471, "y1999": 1.1591002514611153, "y1998": 1.144725587086376, "y2006": 1.1173646811350333, "y2007": 1.1086324218539598, "y2004": 1.1102496406140896, "y2005": 1.11943471361418, "y2002": 1.1475230282561595, "y2003": 1.1184328424005199, "y2000": 1.1689820101690329, "y2001": 1.1721248787169682, "id": 4, "y2008": 1.0964251552643696, "y2009": 1.0776233718455337}, {"neighbors": [29, 1, 22, 7, 4], "y1995": 1.422697571371182, "y1997": 1.4427350196405593, "y1996": 1.4211843379728528, "y1999": 1.4440068434166562, "y1998": 1.4357757095632602, "y2006": 1.4405276647793266, "y2007": 1.4524121586440921, "y2004": 1.4059372049179741, "y2005": 1.4078864636665769, "y2002": 1.4197822680667809, "y2003": 1.3909220829548647, "y2000": 1.4418473669388905, "y2001": 1.4478283203013527, "id": 5, "y2008": 1.4330609762040207, "y2009": 1.4174430982377491}, {"neighbors": [12, 47, 9, 25, 20], "y1995": 1.1307388498039153, "y1997": 1.1107470843142355, "y1996": 1.1311051255854685, "y1999": 1.130881491772973, "y1998": 1.1336463608751246, "y2006": 1.1088003408832796, "y2007": 1.0840170924825394, "y2004": 1.1244623853593112, "y2005": 1.1167100811401538, "y2002": 1.1306293052597198, "y2003": 1.1194498381213465, "y2000": 1.1088813841947593, "y2001": 1.1185662918783175, "id": 6, "y2008": 1.0695920556329086, "y2009": 1.0787522517402164}, {"neighbors": [21, 1, 22, 10, 0], "y1995": 1.0470612357366649, "y1997": 1.0425337165747406, "y1996": 1.0451683097376836, "y1999": 1.0207254480945218, "y1998": 1.0323998680588111, "y2006": 1.0405109962442973, "y2007": 1.0174964540280445, "y2004": 1.0140090547678748, "y2005": 1.0317674181861733, "y2002": 0.99669586934394627, "y2003": 0.99327675611171373, "y2000": 0.99854316295509526, "y2001": 0.98802579761429143, "id": 7, "y2008": 0.9936394033949828, "y2009": 0.98279746069218921}, {"neighbors": [11, 13, 17, 18, 15], "y1995": 0.98996985668705595, "y1997": 0.99491000469481983, "y1996": 1.0014356415938011, "y1999": 1.0045584503565237, "y1998": 1.0018840754492748, "y2006": 0.92232873520447411, "y2007": 0.91284090705064902, "y2004": 0.93694786512729977, "y2005": 0.94308212820743131, "y2002": 0.96834820215592055, "y2003": 0.95335147249088092, "y2000": 0.99127006477048718, "y2001": 0.97925917470464008, "id": 8, "y2008": 0.89689832627117483, "y2009": 0.88928857608264111}, {"neighbors": [12, 6, 4, 3, 14], "y1995": 0.87418390853652306, "y1997": 0.84425695187978567, "y1996": 0.86416601430334228, "y1999": 0.83903043942542854, "y1998": 0.8404493987171674, "y2006": 0.87204140839730271, "y2007": 0.86633032299764789, "y2004": 0.86981997840756087, "y2005": 0.86837929279319737, "y2002": 0.86107306112852877, "y2003": 0.85007719735663123, "y2000": 0.85787080050645603, "y2001": 0.86036185149249467, "id": 9, "y2008": 0.84946077011565357, "y2009": 0.83287145944123797}, {"neighbors": [0, 7, 21, 23, 22], "y1995": 1.1419611801631209, "y1997": 1.1489271154554144, "y1996": 1.146602624490825, "y1999": 1.1443662376135306, "y1998": 1.1490959392942743, "y2006": 1.1049125811637337, "y2007": 1.1105984164317646, "y2004": 1.1119989015058092, "y2005": 1.1025779214946556, "y2002": 1.1259666377127024, "y2003": 1.1221399558345004, "y2000": 1.144501826035474, "y2001": 1.1234975172649961, "id": 10, "y2008": 1.1050979494645479, "y2009": 1.1002009697391872}, {"neighbors": [8, 13, 18, 17, 2], "y1995": 0.97282462974938089, "y1997": 0.96252588061647382, "y1996": 0.96700147279313231, "y1999": 0.96057686787383312, "y1998": 0.96538780087103548, "y2006": 0.91010201260822066, "y2007": 0.89280392121658247, "y2004": 0.94103988614185807, "y2005": 0.9212251863828258, "y2002": 0.94804194711420009, "y2003": 0.9543028555845573, "y2000": 0.95831051250950716, "y2001": 0.94480908623936988, "id": 11, "y2008": 0.89298242828382146, "y2009": 0.89165384824292859}, {"neighbors": [33, 9, 6, 25, 31], "y1995": 0.94325467991401402, "y1997": 0.96455242154753429, "y1996": 0.96436902092427723, "y1999": 0.94117647058823528, "y1998": 0.95243008993884537, "y2006": 0.9346681464882507, "y2007": 0.94281559150403071, "y2004": 0.96918424441756057, "y2005": 0.94781280876672958, "y2002": 0.95388717527096822, "y2003": 0.94597005193649519, "y2000": 0.94809269652332606, "y2001": 0.93539181553564288, "id": 12, "y2008": 0.965203150896216, "y2009": 0.967154410723015}, {"neighbors": [18, 17, 11, 8, 19], "y1995": 0.97478408425654373, "y1997": 0.98712808751954773, "y1996": 0.98169225257738801, "y1999": 0.985598971191053, "y1998": 0.98474769442356791, "y2006": 0.98416665248276058, "y2007": 0.98423613480079708, "y2004": 0.97399471186978948, "y2005": 0.96910087128357136, "y2002": 0.9820996926750224, "y2003": 0.98776529543110569, "y2000": 0.98687072733199255, "y2001": 0.99237486444837619, "id": 13, "y2008": 0.99823861244053191, "y2009": 0.99545704236827348}, {"neighbors": [4, 31, 3, 29, 12], "y1995": 0.85570268988941878, "y1997": 0.85986131704895119, "y1996": 0.85575915188345031, "y1999": 0.85380119644969055, "y1998": 0.85693406055397725, "y2006": 0.82803647591954255, "y2007": 0.81987360180979219, "y2004": 0.83998883284341452, "y2005": 0.83478547261894065, "y2002": 0.85472102128186755, "y2003": 0.84564834502399988, "y2000": 0.86191535266765262, "y2001": 0.84981450830432048, "id": 14, "y2008": 0.82265395167873867, "y2009": 0.83994039782937002}, {"neighbors": [19, 8, 17, 16, 13], "y1995": 0.87022046646521634, "y1997": 0.85961813213722393, "y1996": 0.85996258309339635, "y1999": 0.8394713575455558, "y1998": 0.85689572413110093, "y2006": 0.94202108334913126, "y2007": 0.94222309998743192, "y2004": 0.86763340229291142, "y2005": 0.89179316746010362, "y2002": 0.86776297543511893, "y2003": 0.86720209304280604, "y2000": 0.82785596604704892, "y2001": 0.86008789452656809, "id": 15, "y2008": 0.93902708112840494, "y2009": 0.94479183757120588}, {"neighbors": [28, 26, 15, 19, 32], "y1995": 0.90134907329491731, "y1997": 0.90403990934606904, "y1996": 0.904077381347274, "y1999": 0.90399237579083946, "y1998": 0.90201769385650832, "y2006": 0.91108803862404764, "y2007": 0.90543476309316473, "y2004": 0.94338264626469681, "y2005": 0.91981795862151561, "y2002": 0.93695966482853577, "y2003": 0.94242697007039, "y2000": 0.90906631602055099, "y2001": 0.92693339421265908, "id": 16, "y2008": 0.91737137682250491, "y2009": 0.94793657442067902}, {"neighbors": [13, 18, 11, 19, 8], "y1995": 1.1977611005602815, "y1997": 1.1843915817489725, "y1996": 1.1822256425225894, "y1999": 1.1928672308275252, "y1998": 1.1826786457339149, "y2006": 1.2392938410349985, "y2007": 1.2341867605077472, "y2004": 1.2385704217423759, "y2005": 1.2441989281116201, "y2002": 1.2262477774195681, "y2003": 1.2239707531714479, "y2000": 1.2017286912636342, "y2001": 1.2132869128474402, "id": 17, "y2008": 1.2362673914436095, "y2009": 1.2675439750795283}, {"neighbors": [13, 17, 11, 8, 19], "y1995": 1.2491967813733067, "y1997": 1.2699116090397236, "y1996": 1.2575477330927329, "y1999": 1.3062566740535762, "y1998": 1.2802065055312271, "y2006": 1.3210776560048689, "y2007": 1.329362443219563, "y2004": 1.3054484140490119, "y2005": 1.3030330249408666, "y2002": 1.3257518058685978, "y2003": 1.3079549159235695, "y2000": 1.3479002255103918, "y2001": 1.3439986302151703, "id": 18, "y2008": 1.3300124123891741, "y2009": 1.3328846185074705}, {"neighbors": [26, 17, 28, 15, 16], "y1995": 1.0676800411188558, "y1997": 1.0363730321443168, "y1996": 1.0379927554499979, "y1999": 1.0329609259280523, "y1998": 1.027684488045026, "y2006": 0.94241549375546196, "y2007": 0.92754546923532677, "y2004": 0.99614160423102482, "y2005": 0.97356208269708677, "y2002": 1.0274762326434594, "y2003": 1.0316273366809443, "y2000": 1.0505901631347052, "y2001": 1.0340505678899605, "id": 19, "y2008": 0.92549226593721745, "y2009": 0.92138101880290568}, {"neighbors": [30, 25, 24, 37, 47], "y1995": 1.0947561397632881, "y1997": 1.1165429913770684, "y1996": 1.1152679554712275, "y1999": 1.1314326394231322, "y1998": 1.1310394841195361, "y2006": 1.1090538904302065, "y2007": 1.1057776900012568, "y2004": 1.1402994437897009, "y2005": 1.1197940058085571, "y2002": 1.133670175399079, "y2003": 1.139822558851451, "y2000": 1.1388962186541665, "y2001": 1.1244221220249986, "id": 20, "y2008": 1.1116682481010467, "y2009": 1.0998515545336902}, {"neighbors": [23, 22, 7, 10, 34], "y1995": 0.76530058421804126, "y1997": 0.76542450966153397, "y1996": 0.76612841163904621, "y1999": 0.76014283909933289, "y1998": 0.7672268310234307, "y2006": 0.76842416021983684, "y2007": 0.77487117798086069, "y2004": 0.76533287692895391, "y2005": 0.78205934309410463, "y2002": 0.76156903267949927, "y2003": 0.76651951668098528, "y2000": 0.74480073263159763, "y2001": 0.76098396210261965, "id": 21, "y2008": 0.77768682781054099, "y2009": 0.78801192267396702}, {"neighbors": [21, 34, 5, 7, 29], "y1995": 0.98391336093764348, "y1997": 0.98295341320156315, "y1996": 0.98075815675295552, "y1999": 0.96913802803963667, "y1998": 0.97386015032669815, "y2006": 0.93965462091114671, "y2007": 0.93069644684632924, "y2004": 0.9635616201227476, "y2005": 0.94745351657235244, "y2002": 0.97209860866113018, "y2003": 0.97441312580606143, "y2000": 0.97370819354423843, "y2001": 0.96419154157867693, "id": 22, "y2008": 0.94020973488297466, "y2009": 0.94358232339833159}, {"neighbors": [21, 10, 22, 34, 7], "y1995": 0.83561828119099946, "y1997": 0.81738501913392403, "y1996": 0.82298088022609361, "y1999": 0.80904800725677739, "y1998": 0.81748588141426259, "y2006": 0.87170334233473346, "y2007": 0.8786379876833581, "y2004": 0.85954307066870839, "y2005": 0.86790023653402792, "y2002": 0.83451612857812574, "y2003": 0.85175031934895873, "y2000": 0.80071489233375537, "y2001": 0.83358255807316928, "id": 23, "y2008": 0.87497981001981484, "y2009": 0.87888675419592222}, {"neighbors": [27, 20, 30, 32, 47], "y1995": 0.98845573274970278, "y1997": 0.99665282989553183, "y1996": 1.0209242772035507, "y1999": 0.99386618594343845, "y1998": 0.99141823200404444, "y2006": 0.97906748937234156, "y2007": 0.9932312332800689, "y2004": 1.0111665058188304, "y2005": 0.9998802359352077, "y2002": 0.99669586934394627, "y2003": 1.0255909749831356, "y2000": 0.98733194819247994, "y2001": 0.99644997431653437, "id": 24, "y2008": 1.0020493856497013, "y2009": 0.99602148231561483}, {"neighbors": [20, 33, 6, 30, 12], "y1995": 1.1493091345649815, "y1997": 1.143009615936718, "y1996": 1.1524194939429724, "y1999": 1.1398468268822266, "y1998": 1.1426554202510555, "y2006": 1.0889107875354573, "y2007": 1.0860369499254896, "y2004": 1.0856975145267398, "y2005": 1.1244348633192611, "y2002": 1.0423089214343333, "y2003": 1.0557727834721793, "y2000": 1.0831239730629278, "y2001": 1.0519262599166714, "id": 25, "y2008": 1.0599731384290745, "y2009": 1.0216094265950888}, {"neighbors": [28, 19, 16, 32, 17], "y1995": 1.1136826889802023, "y1997": 1.1189343096757198, "y1996": 1.1057147027213501, "y1999": 1.1432271991365353, "y1998": 1.1377866945457653, "y2006": 1.1268023587150906, "y2007": 1.1235793669317915, "y2004": 1.1482023546040769, "y2005": 1.1238659840114973, "y2002": 1.1600919581655105, "y2003": 1.1446778932605579, "y2000": 1.1825702862895446, "y2001": 1.1622624279436105, "id": 26, "y2008": 1.115925801617498, "y2009": 1.1257082797404696}, {"neighbors": [32, 24, 36, 16, 28], "y1995": 1.303794309231981, "y1997": 1.3120636604057812, "y1996": 1.3075218596998686, "y1999": 1.3062566740535762, "y1998": 1.3153226688859194, "y2006": 1.2865667454509278, "y2007": 1.2973409698906584, "y2004": 1.2683078569016086, "y2005": 1.2617743046198988, "y2002": 1.2920319347677043, "y2003": 1.2718351646774422, "y2000": 1.3121023910310281, "y2001": 1.2998915587009874, "id": 27, "y2008": 1.2939020510829768, "y2009": 1.2934544564717687}, {"neighbors": [26, 16, 19, 32, 27], "y1995": 0.83953719020532513, "y1997": 0.82006005316292385, "y1996": 0.82701447583159737, "y1999": 0.80294863992835086, "y1998": 0.8118887636743225, "y2006": 0.8389109342655191, "y2007": 0.84349246817602375, "y2004": 0.83108634437662732, "y2005": 0.84373783646216949, "y2002": 0.82596790474192727, "y2003": 0.82435704751379402, "y2000": 0.78772975118465016, "y2001": 0.82848010958278628, "id": 28, "y2008": 0.85637272428125033, "y2009": 0.86539395164519117}, {"neighbors": [5, 39, 22, 14, 31], "y1995": 1.2345008725695852, "y1997": 1.2353793515744536, "y1996": 1.2426021999018138, "y1999": 1.2452262575926329, "y1998": 1.2358129278404693, "y2006": 1.2365329681906834, "y2007": 1.2796200872578414, "y2004": 1.1967443443492951, "y2005": 1.2153657295128597, "y2002": 1.1937780418204111, "y2003": 1.1835533748469893, "y2000": 1.2256766974812463, "y2001": 1.2112664802237314, "id": 29, "y2008": 1.2796839248335934, "y2009": 1.2590773758694083}, {"neighbors": [37, 20, 24, 25, 27], "y1995": 0.97696620404861145, "y1997": 0.98035944080980575, "y1996": 0.9740071914763756, "y1999": 0.95543282313901556, "y1998": 0.97581530789338955, "y2006": 0.92100464312607799, "y2007": 0.9147530387633086, "y2004": 0.9298883479571457, "y2005": 0.93442917452618346, "y2002": 0.93679072759857129, "y2003": 0.92540049332494034, "y2000": 0.96480308308405971, "y2001": 0.9468637634838194, "id": 30, "y2008": 0.90249622070947177, "y2009": 0.90213630440783921}, {"neighbors": [35, 14, 33, 12, 4], "y1995": 0.84986885942491119, "y1997": 0.84295996568390696, "y1996": 0.89868510090623221, "y1999": 0.85659367787716301, "y1998": 0.87280533962476625, "y2006": 0.92562487931452408, "y2007": 0.96635366357254426, "y2004": 0.92698332540482575, "y2005": 0.94745351657235244, "y2002": 0.90448992922937876, "y2003": 0.95495898185605821, "y2000": 0.88937573313051443, "y2001": 0.89440100450887505, "id": 31, "y2008": 1.025203118044723, "y2009": 1.0394296020754366}, {"neighbors": [36, 27, 28, 16, 26], "y1995": 1.0192280751235561, "y1997": 1.0097442843101825, "y1996": 1.0025820319237864, "y1999": 0.99765073314119712, "y1998": 1.0030341681355639, "y2006": 0.94779637858468868, "y2007": 0.93759089358493275, "y2004": 0.97583768316642261, "y2005": 0.96101679691008712, "y2002": 0.99747298060178258, "y2003": 0.99550758543481688, "y2000": 1.0075901875261932, "y2001": 0.99192968437874551, "id": 32, "y2008": 0.93353431146829191, "y2009": 0.94121705123804411}, {"neighbors": [44, 25, 12, 35, 31], "y1995": 0.86367410708901315, "y1997": 0.85544345781923936, "y1996": 0.85558931627900803, "y1999": 0.84336613427334628, "y1998": 0.85103025143102673, "y2006": 0.89455097373003656, "y2007": 0.88283929116469462, "y2004": 0.85951183386707053, "y2005": 0.87194227372077004, "y2002": 0.84667960913556228, "y2003": 0.84374557883664714, "y2000": 0.83434853662160158, "y2001": 0.85813595114434105, "id": 33, "y2008": 0.90349490610221961, "y2009": 0.9060067497610369}, {"neighbors": [22, 39, 21, 29, 23], "y1995": 1.0094753356447226, "y1997": 1.0069881886439402, "y1996": 1.0041105523637666, "y1999": 0.99291086334982948, "y1998": 0.99513686502304577, "y2006": 0.96382634438484593, "y2007": 0.95011400973122428, "y2004": 0.975119236728752, "y2005": 0.96134614808826613, "y2002": 0.99291167539274383, "y2003": 0.98983209318633369, "y2000": 1.0058162611397035, "y2001": 0.98850522230466298, "id": 34, "y2008": 0.94346860300667812, "y2009": 0.9463776450423077}, {"neighbors": [31, 38, 44, 33, 14], "y1995": 1.0571257066143651, "y1997": 1.0575301194645879, "y1996": 1.0545941857842291, "y1999": 1.0510385688532684, "y1998": 1.0488078570498685, "y2006": 1.0247627521629479, "y2007": 1.0234752320591773, "y2004": 1.0329697933620496, "y2005": 1.0219168238570018, "y2002": 1.0420048344203974, "y2003": 1.0402553971511816, "y2000": 1.0480002306104303, "y2001": 1.030249414987729, "id": 35, "y2008": 1.0251768368501768, "y2009": 1.0435957064486703}, {"neighbors": [32, 43, 27, 28, 42], "y1995": 1.070841888164505, "y1997": 1.0793762307014196, "y1996": 1.0666949726007404, "y1999": 1.0794043012481198, "y1998": 1.0738798776109699, "y2006": 1.087727556316465, "y2007": 1.0885954360198933, "y2004": 1.1032213602455734, "y2005": 1.0916793915985508, "y2002": 1.0938347765734742, "y2003": 1.1052447043433509, "y2000": 1.0531800956589803, "y2001": 1.0745277096056161, "id": 36, "y2008": 1.0917733838297285, "y2009": 1.1096083021948762}, {"neighbors": [30, 40, 20, 42, 41], "y1995": 0.8671922185905101, "y1997": 0.86675155621455668, "y1996": 0.86628895935887062, "y1999": 0.86511809486628932, "y1998": 0.86425631732335095, "y2006": 0.84488343470424199, "y2007": 0.83374328958471722, "y2004": 0.84517414191529749, "y2005": 0.84843857600526962, "y2002": 0.85411284725399572, "y2003": 0.84886336375435456, "y2000": 0.86287327291635718, "y2001": 0.8516979624450659, "id": 37, "y2008": 0.82812044014430564, "y2009": 0.82878598934619596}, {"neighbors": [35, 31, 45, 39, 44], "y1995": 0.8838921149583755, "y1997": 0.90282398478743275, "y1996": 0.92288667453925455, "y1999": 0.92023285988219217, "y1998": 0.91229185518735723, "y2006": 0.93869676706720051, "y2007": 0.96947770975097391, "y2004": 0.99223700402629367, "y2005": 0.97984969609868555, "y2002": 0.93682451504456421, "y2003": 0.98655146182882891, "y2000": 0.92652175166361039, "y2001": 0.94278865361566122, "id": 38, "y2008": 1.0036262573224608, "y2009": 0.98102350657197357}, {"neighbors": [29, 34, 38, 22, 35], "y1995": 0.970820642185237, "y1997": 0.94534081352108112, "y1996": 0.95320232993219844, "y1999": 0.93967000034446724, "y1998": 0.94215592860799646, "y2006": 0.91035556215514757, "y2007": 0.90430364292511256, "y2004": 0.92879505989982103, "y2005": 0.9211054223180335, "y2002": 0.93412151936513388, "y2003": 0.93501274320242933, "y2000": 0.93092108910210503, "y2001": 0.92662519262599163, "id": 39, "y2008": 0.89994694483851023, "y2009": 0.9007386435858511}, {"neighbors": [41, 37, 42, 30, 45], "y1995": 0.95861858457245008, "y1997": 0.98254810501535106, "y1996": 0.95774543235102894, "y1999": 0.98684823919808018, "y1998": 0.98919471947721893, "y2006": 0.97163003599581876, "y2007": 0.97007020126757271, "y2004": 0.9493488753775261, "y2005": 0.97152609359561659, "y2002": 0.95601578436851964, "y2003": 0.94905384541254967, "y2000": 0.98882204635713133, "y2001": 0.97662233890759653, "id": 40, "y2008": 0.97158948117089283, "y2009": 0.95884908006927827}, {"neighbors": [40, 45, 44, 37, 42], "y1995": 0.83980438854721107, "y1997": 0.85746999875029983, "y1996": 0.84726737166133714, "y1999": 0.85567509846023126, "y1998": 0.85467221160427542, "y2006": 0.8333891885768886, "y2007": 0.83511679264592342, "y2004": 0.81743586206088703, "y2005": 0.83550405700769481, "y2002": 0.84502402428191115, "y2003": 0.82645665158259707, "y2000": 0.84818516243622177, "y2001": 0.85265681182580899, "id": 41, "y2008": 0.82136617314598481, "y2009": 0.80921873783836296}, {"neighbors": [43, 40, 46, 37, 36], "y1995": 0.95118156405662746, "y1997": 0.94688098462868708, "y1996": 0.9466212002600608, "y1999": 0.95124410099780687, "y1998": 0.95085829660091703, "y2006": 0.96895367966714574, "y2007": 0.9700163384024274, "y2004": 0.97583768316642261, "y2005": 0.95571723704302525, "y2002": 0.96804411514198463, "y2003": 0.97136213864358201, "y2000": 0.95440787445922959, "y2001": 0.96364362764682376, "id": 42, "y2008": 0.97082732652905901, "y2009": 0.9878236640328002}, {"neighbors": [36, 42, 32, 27, 46], "y1995": 1.0891004415267045, "y1997": 1.0849289528525252, "y1996": 1.0824896838138709, "y1999": 1.0945424900391545, "y1998": 1.0865692335830259, "y2006": 1.1450297539219478, "y2007": 1.1447474729339102, "y2004": 1.1334273474293739, "y2005": 1.1468606844516303, "y2002": 1.1229257675733433, "y2003": 1.1302103089739621, "y2000": 1.1055818811158884, "y2001": 1.1214085953998059, "id": 43, "y2008": 1.1408403740471014, "y2009": 1.1614292649793569}, {"neighbors": [33, 41, 45, 35, 40], "y1995": 1.0633603345917013, "y1997": 1.0869149629649646, "y1996": 1.0736582323828732, "y1999": 1.1166986255755473, "y1998": 1.0976484597942771, "y2006": 1.0839806574563229, "y2007": 1.0983176831786272, "y2004": 1.0927882684985315, "y2005": 1.0700320368873319, "y2002": 1.0881584856466706, "y2003": 1.0804431312806149, "y2000": 1.1185670222649935, "y2001": 1.0976428286056732, "id": 44, "y2008": 1.0929823187788443, "y2009": 1.0917612486217978}, {"neighbors": [41, 44, 40, 35, 33], "y1995": 0.79772064970019041, "y1997": 0.7858115114280021, "y1996": 0.78829195801876151, "y1999": 0.77035744221561353, "y1998": 0.77615921755360906, "y2006": 0.79949806580432425, "y2007": 0.80172181625581262, "y2004": 0.79603865293896003, "y2005": 0.78966436120841943, "y2002": 0.81437881076636964, "y2003": 0.80788827809912023, "y2000": 0.77751193519846906, "y2001": 0.79902973574567659, "id": 45, "y2008": 0.82168154748053679, "y2009": 0.85587910681858015}, {"neighbors": [42, 43, 40, 36, 37], "y1995": 1.0052446952315301, "y1997": 1.0047589936197736, "y1996": 1.0000769567582628, "y1999": 1.0063956091903872, "y1998": 1.0061394183885444, "y2006": 0.97292595590233411, "y2007": 0.96519561197191939, "y2004": 0.99030032232474696, "y2005": 0.97682565346267858, "y2002": 1.0081498135355325, "y2003": 1.0057431552702318, "y2000": 1.0016297948675874, "y2001": 0.99860738542320637, "id": 46, "y2008": 0.9617340332161447, "y2009": 0.95890283625473927}, {"neighbors": [20, 6, 24, 25, 30], "y1995": 0.95808418788867844, "y1997": 0.9654440995572009, "y1996": 0.93825679674127938, "y1999": 0.96987289157318213, "y1998": 0.95561201303757848, "y2006": 1.1704973973021624, "y2007": 1.1702515395802287, "y2004": 1.0533361880299275, "y2005": 1.0983262971945267, "y2002": 1.0078119390756035, "y2003": 1.0348423554112989, "y2000": 0.96608031008233231, "y2001": 0.99727184521431422, "id": 47, "y2008": 1.1873055260044207, "y2009": 1.1424264534188653}] diff --git a/release/python/0.5.0/crankshaft/test/helper.py b/release/python/0.5.0/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.5.0/crankshaft/test/mock_plpy.py b/release/python/0.5.0/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..e8a279d --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/mock_plpy.py @@ -0,0 +1,54 @@ +import re + + +class MockCursor: + def __init__(self, data): + self.cursor_pos = 0 + self.data = data + + def fetch(self, batch_size): + batch = self.data[self.cursor_pos:self.cursor_pos + batch_size] + self.cursor_pos += batch_size + return batch + + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def debug(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def cursor(self, query): + data = self.execute(query) + return MockCursor(data) + + # TODO: additional arguments + def execute(self, query): + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.5.0/crankshaft/test/test_clustering_getis.py b/release/python/0.5.0/crankshaft/test/test_clustering_getis.py new file mode 100644 index 0000000..61add11 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_clustering_getis.py @@ -0,0 +1,78 @@ +import unittest +import numpy as np + +from helper import fixture_file + +from crankshaft.clustering import Getis +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json +from crankshaft.analysis_data_provider import AnalysisDataProvider + +# Fixture files produced as follows +# +# import pysal as ps +# import numpy as np +# import random +# +# # setup variables +# f = ps.open(ps.examples.get_path("stl_hom.dbf")) +# y = np.array(f.by_col['HR8893']) +# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp")) +# +# out_queen = [{"id": index + 1, +# "neighbors": [x+1 for x in w_queen.neighbors[index]], +# "value": val} for index, val in enumerate(y)] +# +# with open('neighbors_queen_getis.json', 'w') as f: +# f.write(str(out_queen)) +# +# random.seed(1234) +# np.random.seed(1234) +# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True, +# permutations=999) +# +# with open('getis_queen.json', 'w') as f: +# f.write(str(zip(lgstar_queen.z_sim, +# lgstar_queen.p_sim, lgstar_queen.p_z_sim))) + + +class FakeDataProvider(AnalysisDataProvider): + def __init__(self, mock_data): + self.mock_result = mock_data + + def get_getis(self, w_type, param): + return self.mock_result + + +class GetisTest(unittest.TestCase): + """Testing class for Getis-Ord's G* funtion + This test replicates the work done in PySAL documentation: + https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g + """ + + def setUp(self): + # load raw data for analysis + self.neighbors_data = json.loads( + open(fixture_file('neighbors_getis.json')).read()) + + # load pre-computed/known values + self.getis_data = json.loads( + open(fixture_file('getis.json')).read()) + + def test_getis_ord(self): + """Test Getis-Ord's G*""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + + random_seeds.set_random_seeds(1234) + getis = Getis(FakeDataProvider(data)) + + result = getis.getis_ord('subquery', 'value', + 'queen', None, 999, 'the_geom', + 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = np.array(self.getis_data)[:, 0:2] + for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected): + self.assertAlmostEqual(res_z, exp_z, delta=1e-2) diff --git a/release/python/0.5.0/crankshaft/test/test_clustering_kmeans.py b/release/python/0.5.0/crankshaft/test/test_clustering_kmeans.py new file mode 100644 index 0000000..93633b0 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_clustering_kmeans.py @@ -0,0 +1,56 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import fixture_file +from crankshaft.clustering import Kmeans +from crankshaft.analysis_data_provider import AnalysisDataProvider +import crankshaft.clustering as cc + +from crankshaft import random_seeds +import json +from collections import OrderedDict + + +class FakeDataProvider(AnalysisDataProvider): + def __init__(self, mocked_result): + self.mocked_result = mocked_result + + def get_spatial_kmeans(self, query): + return self.mocked_result + + def get_nonspatial_kmeans(self, query, standarize): + return self.mocked_result + + +class KMeansTest(unittest.TestCase): + """Testing class for k-means spatial""" + + def setUp(self): + self.cluster_data = json.loads( + open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10"} + + def test_kmeans(self): + """ + """ + data = [{'xs': d['xs'], + 'ys': d['ys'], + 'ids': d['ids']} for d in self.cluster_data] + + random_seeds.set_random_seeds(1234) + kmeans = Kmeans(FakeDataProvider(data)) + clusters = kmeans.spatial('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1] == 0] + c2 = [a for a in clusters if a[1] == 1] + + self.assertEqual(len(np.unique(labels)), 2) + self.assertEqual(len(c1), 20) + self.assertEqual(len(c2), 20) diff --git a/release/python/0.5.0/crankshaft/test/test_clustering_moran.py b/release/python/0.5.0/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..cc1930e --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,112 @@ +import unittest +import numpy as np + +from helper import fixture_file +from crankshaft.clustering import Moran +from crankshaft.analysis_data_provider import AnalysisDataProvider +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json +from collections import OrderedDict + + +class FakeDataProvider(AnalysisDataProvider): + def __init__(self, mock_data): + self.mock_result = mock_data + + def get_moran(self, w_type, params): + return self.mock_result + + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.params_markov = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", + "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads( + open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads( + open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + from crankshaft.clustering import map_quads + self.assertEqual(map_quads(1), 'HH') + self.assertEqual(map_quads(2), 'LH') + self.assertEqual(map_quads(3), 'LL') + self.assertEqual(map_quads(4), 'HL') + self.assertEqual(map_quads(33), None) + self.assertEqual(map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + from crankshaft.clustering import quad_position + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_local_stat(self): + """Test Moran's I local""" + data = [OrderedDict([('id', d['id']), + ('attr1', d['value']), + ('neighbors', d['neighbors'])]) + for d in self.neighbors_data] + + moran = Moran(FakeDataProvider(data)) + random_seeds.set_random_seeds(1234) + result = moran.local_stat('subquery', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'attr2': 1, + 'neighbors': d['neighbors']} for d in self.neighbors_data] + + random_seeds.set_random_seeds(1234) + moran = Moran(FakeDataProvider(data)) + result = moran.local_rate_stat('subquery', 'numerator', 'denominator', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + random_seeds.set_random_seeds(1235) + moran = Moran(FakeDataProvider(data)) + result = moran.global_stat('table', 'value', + 'knn', 5, 99, 'the_geom', + 'cartodb_id') + + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.5.0/crankshaft/test/test_pysal_utils.py b/release/python/0.5.0/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..92b528b --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,160 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +from collections import OrderedDict + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params1 = OrderedDict([("id_col", "cartodb_id"), + ("attr1", "andy"), + ("attr2", "jay_z"), + ("subquery", "SELECT * FROM a_list"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + + self.params2 = OrderedDict([("id_col", "cartodb_id"), + ("numerator", "price"), + ("denominator", "sq_meters"), + ("subquery", "SELECT * FROM pecan"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + + self.params3 = OrderedDict([("id_col", "cartodb_id"), + ("numerator", "sq_meters"), + ("denominator", "price"), + ("subquery", "SELECT * FROM pecan"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + + self.params_array = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans1 = ("i.\"andy\"::numeric As attr1, " + "i.\"jay_z\"::numeric As attr2, ") + + ans2 = ("i.\"price\"::numeric As attr1, " + "i.\"sq_meters\"::numeric As attr2, ") + + ans3 = ("i.\"sq_meters\"::numeric As attr1, " + "i.\"price\"::numeric As attr2, ") + + ans_array = ("i.\"_2013_dec\"::numeric As attr1, " + "i.\"_2014_jan\"::numeric As attr2, " + "i.\"_2014_feb\"::numeric As attr3, ") + + self.assertEqual(pu.query_attr_select(self.params1), ans1) + self.assertEqual(pu.query_attr_select(self.params2), ans2) + self.assertEqual(pu.query_attr_select(self.params3), ans3) + self.assertEqual(pu.query_attr_select(self.params_array), ans_array) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans1 = ("idx_replace.\"andy\" IS NOT NULL AND " + "idx_replace.\"jay_z\" IS NOT NULL") + + ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND " + "idx_replace.\"_2014_jan\" IS NOT NULL AND " + "idx_replace.\"_2014_feb\" IS NOT NULL") + + self.assertEqual(pu.query_attr_where(self.params1), ans1) + self.assertEqual(pu.query_attr_where(self.params_array), ans_array) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans1 = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + ans_array = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"_2013_dec\" IS NOT NULL AND " \ + "j.\"_2014_jan\" IS NOT NULL AND " \ + "j.\"_2014_feb\" IS NOT NULL " \ + "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"_2013_dec\" IS NOT NULL AND " \ + "i.\"_2014_jan\" IS NOT NULL AND " \ + "i.\"_2014_feb\" IS NOT NULL "\ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params1), ans1) + self.assertEqual(pu.knn(self.params_array), ans_array) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans1 = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params1), ans1) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params1), + pu.knn(self.params1)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/release/python/0.5.0/crankshaft/test/test_segmentation.py b/release/python/0.5.0/crankshaft/test/test_segmentation.py new file mode 100644 index 0000000..d02e8b1 --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_segmentation.py @@ -0,0 +1,64 @@ +import unittest +import numpy as np +from helper import plpy, fixture_file +import crankshaft.segmentation as segmentation +import json + +class SegmentationTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + + def generate_random_data(self,n_samples,random_state, row_type=False): + x1 = random_state.uniform(size=n_samples) + x2 = random_state.uniform(size=n_samples) + x3 = random_state.randint(0, 4, size=n_samples) + + y = x1+x2*x2+x3 + cartodb_id = range(len(x1)) + + if row_type: + return [ {'features': vals} for vals in zip(x1,x2,x3)], y + else: + return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))] + + def test_replace_nan_with_mean(self): + test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan]) + + def test_create_and_predict_segment(self): + n_samples = 1000 + + random_state_train = np.random.RandomState(13) + random_state_test = np.random.RandomState(134) + training_data = self.generate_random_data(n_samples, random_state_train) + test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) + + + ids = [{'cartodb_ids': range(len(test_data))}] + rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}] + + plpy._define_result('select \* from \(select \* from training\) a limit 1',rows) + plpy._define_result('.*from \(select \* from training\) as a' ,training_data) + plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids) + plpy._define_result('.*select \* from test.*' ,test_data) + + model_parameters = {'n_estimators': 1200, + 'max_depth': 3, + 'subsample' : 0.5, + 'learning_rate': 0.01, + 'min_samples_leaf': 1} + + result = segmentation.create_and_predict_segment( + 'select * from training', + 'target', + 'select * from test', + model_parameters) + + prediction = [r[1] for r in result] + + accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y)))) + + self.assertEqual(len(result),len(test_data)) + self.assertTrue( result[0][2] < 0.01) + self.assertTrue( accuracy < 0.5*np.mean(test_y) ) diff --git a/release/python/0.5.0/crankshaft/test/test_space_time_dynamics.py b/release/python/0.5.0/crankshaft/test/test_space_time_dynamics.py new file mode 100644 index 0000000..d14563e --- /dev/null +++ b/release/python/0.5.0/crankshaft/test/test_space_time_dynamics.py @@ -0,0 +1,349 @@ +import unittest +import numpy as np + +import unittest + + +from helper import fixture_file + +from crankshaft.space_time_dynamics import Markov +import crankshaft.space_time_dynamics as std +from crankshaft import random_seeds +from crankshaft.analysis_data_provider import AnalysisDataProvider +import json + + +class FakeDataProvider(AnalysisDataProvider): + def __init__(self, data): + self.mock_result = data + + def get_markov(self, w_type, params): + return self.mock_result + + +class SpaceTimeTests(unittest.TestCase): + """Testing class for Markov Functions.""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads( + open(fixture_file('neighbors_markov.json')).read()) + self.markov_data = json.loads(open(fixture_file('markov.json')).read()) + + self.time_data = np.array([i * np.ones(10, dtype=float) + for i in range(10)]).T + + self.transition_matrix = np.array([ + [[0.96341463, 0.0304878, 0.00609756, 0., 0.], + [0.06040268, 0.83221477, 0.10738255, 0., 0.], + [0., 0.14, 0.74, 0.12, 0.], + [0., 0.03571429, 0.32142857, 0.57142857, 0.07142857], + [0., 0., 0., 0.16666667, 0.83333333]], + [[0.79831933, 0.16806723, 0.03361345, 0., 0.], + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.], + [0., 0., 0.06372549, 0.90196078, 0.03431373], + [0., 0., 0., 0.19444444, 0.80555556]], + [[0.84693878, 0.15306122, 0., 0., 0.], + [0.08133971, 0.78947368, 0.1291866, 0., 0.], + [0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0., 0., 0., 0.10204082, 0.89795918]], + [[0.8852459, 0.09836066, 0., 0.01639344, 0.], + [0.03875969, 0.81395349, 0.13953488, 0., 0.00775194], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0.02339181, 0.12865497, 0.75438596, 0.09356725], + [0., 0., 0., 0.09661836, 0.90338164]], + [[0.33333333, 0.66666667, 0., 0., 0.], + [0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.], + [0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.], + [0., 0.01036269, 0.06217617, 0.89637306, 0.03108808], + [0., 0., 0., 0.02352941, 0.97647059]]] + ) + + def test_spatial_markov(self): + """Test Spatial Markov.""" + data = [{'id': d['id'], + 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + # print(str(data[0])) + markov = Markov(FakeDataProvider(data)) + random_seeds.set_random_seeds(1234) + + result = markov.spatial_trend('subquery', + ['y1995', 'y1996', 'y1997', 'y1998', + 'y1999', 'y2000', 'y2001', 'y2002', + 'y2003', 'y2004', 'y2005', 'y2006', + 'y2007', 'y2008', 'y2009'], + 5, 'knn', 5, 0, 'the_geom', + 'cartodb_id') + + self.assertTrue(result is not None) + result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] + print result[0] + expected = self.markov_data + for ([res_trend, res_up, res_down, res_vol, res_id], + [exp_trend, exp_up, exp_down, exp_vol, exp_id] + ) in zip(result, expected): + self.assertAlmostEqual(res_trend, exp_trend) + + def test_get_time_data(self): + """Test get_time_data""" + data = [{'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009']} for d in self.neighbors_data] + + result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', + 'y1999', 'y2000', 'y2001', 'y2002', + 'y2003', 'y2004', 'y2005', 'y2006', + 'y2007', 'y2008', 'y2009']) + + # expected was prepared from PySAL example: + # f = ps.open(ps.examples.get_path("usjoin.csv")) + # pci = np.array([f.by_col[str(y)] + # for y in range(1995, 2010)]).transpose() + # rpci = pci / (pci.mean(axis = 0)) + + expected = np.array( + [[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, + 0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612, + 0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356], + [0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, + 0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176, + 0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858], + [0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, + 0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196, + 0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177], + [1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, + 1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806, + 1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775], + [1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, + 1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964, + 1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337], + [1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, + 1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372, + 1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431], + [1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, + 1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239, + 1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225], + [1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, + 0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905, + 1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746], + [0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, + 0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787, + 0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858], + [0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, + 0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998, + 0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146], + [1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, + 1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989, + 1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097], + [0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, + 0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989, + 0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385], + [0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, + 0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424, + 0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441], + [0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, + 0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471, + 0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704], + [0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, + 0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883, + 0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404], + [0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, + 0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334, + 0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184], + [0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, + 0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265, + 0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657], + [1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, + 1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042, + 1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398], + [1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, + 1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841, + 1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462], + [1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, + 1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416, + 0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102], + [1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, + 1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944, + 1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155], + [0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, + 0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288, + 0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192], + [0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, + 0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162, + 0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232], + [0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, + 0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307, + 0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675], + [0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, + 0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651, + 0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148], + [1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, + 1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751, + 1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943], + [1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, + 1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235, + 1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828], + [1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, + 1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786, + 1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446], + [0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, + 0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634, + 0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395], + [1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, + 1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434, + 1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738], + [0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, + 0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835, + 0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363], + [0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, + 0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333, + 0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296], + [1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, + 1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768, + 0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705], + [0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, + 0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183, + 0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675], + [1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, + 1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924, + 0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765], + [1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, + 1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979, + 1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571], + [1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, + 1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136, + 1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083], + [0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, + 0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414, + 0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599], + [0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, + 0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237, + 0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351], + [0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, + 0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506, + 0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864], + [0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, + 0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888, + 0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908], + [0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, + 0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586, + 0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874], + [0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, + 0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768, + 0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366], + [1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, + 1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735, + 1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926], + [1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, + 1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827, + 1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125], + [0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, + 0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865, + 0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911], + [1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, + 1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032, + 0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284], + [0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, + 0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619, + 1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]]) + + self.assertTrue(np.allclose(result, expected)) + self.assertTrue(type(result) == type(expected)) + self.assertTrue(result.shape == expected.shape) + + def test_rebin_data(self): + """Test rebin_data""" + # sample in double the time (even case since 10 % 2 = 0): + # (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 + # = 0.5, 2.5, 4.5, 6.5, 8.5 + ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float) + for i in range(0, 10, 2)]).T + + self.assertTrue( + np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) + + # sample in triple the time (uneven since 10 % 3 = 1): + # (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 + # = 1, 4, 7, 9 + ans_odd = np.array([i * np.ones(10, dtype=float) + for i in (1, 4, 7, 9)]).T + self.assertTrue( + np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) + + def test_get_prob_dist(self): + """Test get_prob_dist""" + lag_indices = np.array([1, 2, 3, 4]) + unit_indices = np.array([1, 3, 2, 4]) + answer = np.array([ + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0., 0., 0.02352941, 0.97647059] + ]) + result = std.get_prob_dist(self.transition_matrix, + lag_indices, unit_indices) + + self.assertTrue(np.array_equal(result, answer)) + + def test_get_prob_stats(self): + """Test get_prob_stats""" + + probs = np.array([ + [0.0754717, 0.88207547, 0.04245283, 0., 0.], + [0., 0., 0.09411765, 0.87058824, 0.03529412], + [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505], + [0., 0., 0., 0.02352941, 0.97647059] + ]) + unit_indices = np.array([1, 3, 2, 4]) + answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.]) + answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941]) + answer_trend = np.array([-0.03301887 / 0.88207547, + -0.05882353 / 0.87058824, + 0.02475248 / 0.77722772, + -0.02352941 / 0.97647059]) + answer_volatility = np.array([0.34221495, 0.33705421, + 0.29226542, 0.38834223]) + + result = std.get_prob_stats(probs, unit_indices) + result_up = result[0] + result_down = result[1] + result_trend = result[2] + result_volatility = result[3] + + self.assertTrue(np.allclose(result_up, answer_up)) + self.assertTrue(np.allclose(result_down, answer_down)) + self.assertTrue(np.allclose(result_trend, answer_trend)) + self.assertTrue(np.allclose(result_volatility, answer_volatility)) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index 1e02d92..ec456b4 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.2' +default_version = '0.5.0' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft