diff --git a/pg/sql/0.0.1/moran.sql b/pg/sql/0.0.1/moran.sql new file mode 100644 index 0000000..e70dad1 --- /dev/null +++ b/pg/sql/0.0.1/moran.sql @@ -0,0 +1,22 @@ +CREATE OR REPLACE FUNCTION +cdb_moran_local ( + t TEXT, + attr TEXT, + significance float DEFAULT 0.05, + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_column TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn' +) +RETURNS TABLE ( + moran FLOAT, + quads TEXT, + significance FLOAT, + ids INT +) +AS $$ + from crankshaft.clustering import moran_local + -- TODO: use named parameters or a dictionary + return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type) +$$ LANGUAGE plpython2u; diff --git a/python/crankshaft/crankshaft/clustering/moran.py b/python/crankshaft/crankshaft/clustering/moran.py index 54045bd..e6d9707 100644 --- a/python/crankshaft/crankshaft/clustering/moran.py +++ b/python/crankshaft/crankshaft/clustering/moran.py @@ -7,6 +7,54 @@ Moran's I geostatistics (global clustering & outliers presence) import numpy as np import pysal as ps +import plpy + +# High level interface --------------------------------------- + +def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + # TODO: ensure that the significance output can be smaller that 1e-3 (0.001) + # TODO: make a wishlist of output features (zscores, pvalues, raw local lisa, what else?) + + plpy.notice('** Constructing query') + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = {"id_col": id_col, + "attr1": attr, + "geom_col": geom_column, + "table": t, + "num_ngbrs": num_ngbrs} + + q = get_query(w_type, qvals) + + try: + r = plpy.execute(q) + plpy.notice('** Query returned with %d rows' % len(r)) + except plpy.SPIError: + plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Exiting function') + return zip([None], [None], [None], [None]) + + y = get_attributes(r, 1) + w = get_weight(r, w_type) + + # calculate LISA values + lisa = ps.Moran_Local(y, w) + + # find units of significance + lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + + plpy.notice('** Finished calculations') + + return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) + + +# Low level functions ---------------------------------------- def map_quads(coord): """ @@ -165,3 +213,20 @@ def quad_position(quads): lisa_sig = np.array([map_quads(q) for q in quads]) return lisa_sig + +def lisa_sig_vals(pvals, quads, threshold): + """ + Produce Moran's I classification based of n + """ + + sig = (pvals <= threshold) + + lisa_sig = np.empty(len(sig), np.chararray) + + for idx, val in enumerate(sig): + if val: + lisa_sig[idx] = map_quads(quads[idx]) + else: + lisa_sig[idx] = 'Not significant' + + return lisa_sig diff --git a/python/crankshaft/test/test_clustering_moran.py b/python/crankshaft/test/test_clustering_moran.py index f758c37..5a2b0d8 100644 --- a/python/crankshaft/test/test_clustering_moran.py +++ b/python/crankshaft/test/test_clustering_moran.py @@ -19,6 +19,7 @@ class MoranTest(unittest.TestCase): """Testing class for Moran's I functions.""" def setUp(self): + plpy._reset() self.params = {"id_col": "cartodb_id", "attr1": "andy", "attr2": "jay_z", @@ -116,3 +117,19 @@ class MoranTest(unittest.TestCase): test_ans = cc.quad_position(quads) self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + plpy._define_result('select', [ + { 'id': 1, 'attr1': 100.0, 'neighbors': [2,4,5,7,8] }, + { 'id': 2, 'attr1': 110.0, 'neighbors': [1,4,5,6,7] }, + { 'id': 3, 'attr1': 90.0, 'neighbors': [1,4,5,7,8] }, + { 'id': 4, 'attr1': 100.0, 'neighbors': [1,2,5,7,8] }, + { 'id': 5, 'attr1': 100.0, 'neighbors': [1,2,3,7,8] }, + { 'id': 6, 'attr1': 105.0, 'neighbors': [1,2,3,7,8] }, + { 'id': 7, 'attr1': 105.0, 'neighbors': [1,2,3,6,8] }, + { 'id': 8, 'attr1': 105.0, 'neighbors': [1,2,3,6,7] }, + { 'id': 9, 'attr1': 120.0, 'neighbors': [1,2,5,6,7] } + ]) + result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') + # TODO: check results!