Compare commits
23 Commits
0.0
...
find_conto
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1dbbb5ecaa | ||
|
|
a00c8df201 | ||
|
|
a216a06cbc | ||
|
|
874b5318ff | ||
|
|
e59befae82 | ||
|
|
633b63bccc | ||
|
|
ea02f36235 | ||
|
|
22b6aed7c1 | ||
|
|
f6e8524669 | ||
|
|
02b74813ac | ||
|
|
4c243bf1d3 | ||
|
|
b0150d4fec | ||
|
|
6bb4f36df5 | ||
|
|
5a46f65e59 | ||
|
|
e56519f599 | ||
|
|
8dd8ab37a5 | ||
|
|
06f5cf9951 | ||
|
|
bc67ae8f69 | ||
|
|
eecbe39547 | ||
|
|
1578b17eb8 | ||
|
|
3eda8ecd16 | ||
|
|
0aa4d0a50e | ||
|
|
3b31da783a |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
envs/
|
||||
*.pyc
|
||||
.DS_Store
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Development process
|
||||
|
||||
Please read the Working Process/Quickstart Guide in README.md first.
|
||||
Please read the Working Process/Quickstart Guide in [README.md](https://github.com/CartoDB/crankshaft/blob/master/README.md) first.
|
||||
|
||||
For any modification of crankshaft, such as adding new features,
|
||||
refactoring or bug-fixing, topic branch must be created out of the `develop`
|
||||
|
||||
10
README.md
10
README.md
@@ -14,7 +14,7 @@ CartoDB Spatial Analysis extension for PostgreSQL.
|
||||
## Requirements
|
||||
|
||||
* pip, virtualenv, PostgreSQL
|
||||
* python-scipy system package (see src/py/README.md)
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/master/src/py/README.md))
|
||||
|
||||
# Working Process -- Quickstart Guide
|
||||
|
||||
@@ -33,7 +33,7 @@ deployed in production.
|
||||
Developers shall create a new topic branch from `develop` for any new feature
|
||||
or bugfix and commit their changes to it and eventually merge back into
|
||||
the `develop` branch. When a new release is required a Pull Request
|
||||
will be open againt the `develop` branch.
|
||||
will be open against the `develop` branch.
|
||||
|
||||
The `develop` pull requests will be handled by the release manage,
|
||||
who will merge into master where new releases are prepared and tagged.
|
||||
@@ -43,7 +43,7 @@ and developers must not commit or merge into it.
|
||||
## Development Guidelines
|
||||
|
||||
For a detailed description of the development process please see
|
||||
the CONTRIBUTING.md guide.
|
||||
the [CONTRIBUTING.md](https://github.com/CartoDB/crankshaft/blob/master/CONTRIBUTING.md) guide.
|
||||
|
||||
Any modification to the source code (`src/pg/sql` for the SQL extension,
|
||||
`src/py/crankshaft` for the Python package) shall always be done
|
||||
@@ -52,7 +52,7 @@ in a topic branch created from the `develop` branch.
|
||||
Tests, documentation and peer code reviewing are required for all
|
||||
modifications.
|
||||
|
||||
The tests (both for SQL and Pyhton) are executed by running,
|
||||
The tests (both for SQL and Python) are executed by running,
|
||||
from the top directory:
|
||||
|
||||
```
|
||||
@@ -67,5 +67,5 @@ branch.
|
||||
## Release
|
||||
|
||||
The release and deployment process is described in the
|
||||
RELEASE.md guide and it is the responsibility of the designated
|
||||
[RELEASE.md](https://github.com/CartoDB/crankshaft/blob/master/RELEASE.md) guide and it is the responsibility of the designated
|
||||
release manager.
|
||||
|
||||
@@ -1,37 +1,89 @@
|
||||
-- Moran's I
|
||||
-- Moran's I (global)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
CDB_AreasOfInterest_Global (
|
||||
subquery TEXT,
|
||||
attr_name TEXT,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5)
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, attr, num_ngbrs, permutations, geom_col, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterest_Local(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Rate (global)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterest_Global_Rate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5)
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
CDB_AreasOfInterest_Local_Rate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- -- Moran's I Local Bivariate
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_moran_local_bv(
|
||||
-- subquery TEXT,
|
||||
-- attr1 TEXT,
|
||||
-- attr2 TEXT,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_col TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs INT DEFAULT 5)
|
||||
-- RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
-- AS $$
|
||||
-- from crankshaft.clustering import moran_local_bv
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return moran_local_bv(t, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
|
||||
32
src/pg/sql/50_contours.sql
Normal file
32
src/pg/sql/50_contours.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_Contours (
|
||||
subquery TEXT,
|
||||
grid_size NUMERIC DEFAULT 100,
|
||||
bandwidth NUMERIC DEFAULT 0.0001,
|
||||
levels NUMERIC[] DEFAULT null
|
||||
)
|
||||
RETURNS table (level Numeric, geom_text text )
|
||||
AS $$
|
||||
from crankshaft.contours import cdb_generate_contours
|
||||
# TODO: use named parameters or a dictionary
|
||||
return cdb_generate_contours(subquery, grid_size, bandwidth, levels)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_Contours (
|
||||
subquery TEXT,
|
||||
grid_size NUMERIC DEFAULT 100,
|
||||
bandwidth NUMERIC DEFAULT 0.0001,
|
||||
levels NUMERIC[] DEFAULT null
|
||||
)
|
||||
RETURNS table (level Numeric, geom geometry )
|
||||
AS $$
|
||||
BEGIN
|
||||
|
||||
RETURN QUERY
|
||||
select cont.level as level, ST_GeomFromText(cont.geom_text, 4326)::geometry as geom from _CDB_Contours(subquery,grid_size,bandwidth,levels) as cont;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
@@ -110,7 +110,7 @@ INSERT INTO ppoints2 VALUES
|
||||
(24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,'24','07',0.3, 1.0),
|
||||
(29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,'29','01',0.3, 1.0),
|
||||
(52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,'52','19',0.0, 1.01)
|
||||
-- Moral functions perform some nondeterministic computations
|
||||
-- Areas of Interest functions perform some nondeterministic computations
|
||||
-- (to estimate the significance); we will set the seeds for the RNGs
|
||||
-- that affect those results to have repeateble results
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
@@ -121,18 +121,64 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints.code, m.quads
|
||||
FROM ppoints
|
||||
JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m
|
||||
JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m
|
||||
ON ppoints.cartodb_id = m.ids
|
||||
ORDER BY ppoints.code;
|
||||
NOTICE: ** Constructing query
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."value"::numeric As attr1, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints)" As j WHERE j."value" IS NOT NULL ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints)" As i WHERE i."value" IS NOT NULL ORDER BY i."cartodb_id" ASC;"
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
NOTICE: ** Exiting function
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
code | quads
|
||||
------+-------
|
||||
(0 rows)
|
||||
01 | HH
|
||||
02 | HL
|
||||
03 | LL
|
||||
04 | LL
|
||||
05 | LH
|
||||
06 | LL
|
||||
07 | HH
|
||||
08 | HH
|
||||
09 | HH
|
||||
10 | LL
|
||||
11 | LL
|
||||
12 | LL
|
||||
13 | HL
|
||||
14 | LL
|
||||
15 | LL
|
||||
16 | HH
|
||||
17 | HH
|
||||
18 | LL
|
||||
19 | HH
|
||||
20 | HH
|
||||
21 | LL
|
||||
22 | HH
|
||||
23 | LL
|
||||
24 | LL
|
||||
25 | HH
|
||||
26 | HH
|
||||
27 | LL
|
||||
28 | HH
|
||||
29 | LL
|
||||
30 | LL
|
||||
31 | HH
|
||||
32 | LL
|
||||
33 | HL
|
||||
34 | LH
|
||||
35 | LL
|
||||
36 | LL
|
||||
37 | HL
|
||||
38 | HL
|
||||
39 | HH
|
||||
40 | HH
|
||||
41 | HL
|
||||
42 | LH
|
||||
43 | LH
|
||||
44 | LL
|
||||
45 | LH
|
||||
46 | LL
|
||||
47 | LL
|
||||
48 | HH
|
||||
49 | LH
|
||||
50 | HH
|
||||
51 | LL
|
||||
52 | LL
|
||||
(52 rows)
|
||||
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
_cdb_random_seeds
|
||||
@@ -142,17 +188,61 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints2.code, m.quads
|
||||
FROM ppoints2
|
||||
JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m
|
||||
JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m
|
||||
ON ppoints2.cartodb_id = m.ids
|
||||
ORDER BY ppoints2.code;
|
||||
NOTICE: ** Constructing query
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."denominator"::numeric As attr1, i."numerator"::numeric As attr2, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints2)" As j WHERE j."denominator" IS NOT NULL AND j."numerator" IS NOT NULL AND j."numerator" <> 0 ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints2)" As i WHERE i."denominator" IS NOT NULL AND i."numerator" IS NOT NULL AND i."numerator" <> 0 ORDER BY i."cartodb_id" ASC;"
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
NOTICE: ** Error: <class 'plpy.SPIError'>
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
NOTICE: ** Exiting function
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
ERROR: length of returned sequence did not match number of columns in row
|
||||
CONTEXT: while creating return value
|
||||
PL/Python function "cdb_moran_local_rate"
|
||||
code | quads
|
||||
------+-------
|
||||
01 | LL
|
||||
02 | LH
|
||||
03 | HH
|
||||
04 | HH
|
||||
05 | LL
|
||||
06 | HH
|
||||
07 | LL
|
||||
08 | LL
|
||||
09 | LL
|
||||
10 | HH
|
||||
11 | HH
|
||||
12 | HL
|
||||
13 | LL
|
||||
14 | HH
|
||||
15 | LL
|
||||
16 | LL
|
||||
17 | LL
|
||||
18 | LH
|
||||
19 | LL
|
||||
20 | LL
|
||||
21 | HH
|
||||
22 | LL
|
||||
23 | HL
|
||||
24 | LL
|
||||
25 | LL
|
||||
26 | LL
|
||||
27 | LL
|
||||
28 | LL
|
||||
29 | LH
|
||||
30 | HH
|
||||
31 | LL
|
||||
32 | LL
|
||||
33 | LL
|
||||
34 | LL
|
||||
35 | LH
|
||||
36 | HL
|
||||
37 | LH
|
||||
38 | LH
|
||||
39 | LL
|
||||
40 | LL
|
||||
41 | LH
|
||||
42 | HL
|
||||
43 | LL
|
||||
44 | HL
|
||||
45 | LL
|
||||
46 | HL
|
||||
47 | LL
|
||||
48 | LL
|
||||
49 | HL
|
||||
50 | LL
|
||||
51 | HH
|
||||
(51 rows)
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
\i test/fixtures/ppoints.sql
|
||||
\i test/fixtures/ppoints2.sql
|
||||
|
||||
-- Moral functions perform some nondeterministic computations
|
||||
-- Areas of Interest functions perform some nondeterministic computations
|
||||
-- (to estimate the significance); we will set the seeds for the RNGs
|
||||
-- that affect those results to have repeateble results
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints.code, m.quads
|
||||
FROM ppoints
|
||||
JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m
|
||||
JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m
|
||||
ON ppoints.cartodb_id = m.ids
|
||||
ORDER BY ppoints.code;
|
||||
|
||||
@@ -16,6 +16,6 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints2.code, m.quads
|
||||
FROM ppoints2
|
||||
JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m
|
||||
JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m
|
||||
ON ppoints2.cartodb_id = m.ids
|
||||
ORDER BY ppoints2.code;
|
||||
|
||||
@@ -9,7 +9,7 @@ SET search_path TO public,cartodb,cdb_crankshaft;
|
||||
-- Exercise public functions
|
||||
SELECT ppoints.code, m.quads
|
||||
FROM ppoints
|
||||
JOIN cdb_moran_local('ppoints', 'value') m
|
||||
JOIN CDB_AreasOfInterest_Local('ppoints', 'value') m
|
||||
ON ppoints.cartodb_id = m.ids
|
||||
ORDER BY ppoints.code;
|
||||
SELECT round(cdb_overlap_sum(
|
||||
|
||||
@@ -8,7 +8,7 @@ cd crankshaft
|
||||
nosetests test/
|
||||
```
|
||||
|
||||
## Notes about python dependencies
|
||||
## Notes about Python dependencies
|
||||
* This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments.
|
||||
* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has all the dependencies
|
||||
* Every dependency should be:
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
import contours
|
||||
|
||||
@@ -5,143 +5,226 @@ Moran's I geostatistics (global clustering & outliers presence)
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran_local(subquery, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
def moran(subquery, attr_name,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr_name,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# TODO: ensure that the significance output can be smaller that 1e-3 (0.001)
|
||||
# TODO: make a wishlist of output features (zscores, pvalues, raw local lisa, what else?)
|
||||
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_column,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
y = get_attributes(r, 1)
|
||||
w = get_weight(r, w_type)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.Moran_Local(y, w)
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_column,
|
||||
"attr1": numerator,
|
||||
"attr2": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
|
||||
plpy.notice('r.nrows() = %d' % r.nrows())
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = get_attributes(r, 1)
|
||||
denom = get_attributes(r, 2)
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
w = get_weight(r, w_type, num_ngbrs)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
query = pu.construct_neighbor_query(w_type,
|
||||
{"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs})
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations)
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
## TODO: Decide on which return values here
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"table": t,
|
||||
"geom_col": geom_column,
|
||||
"subquery": subquery,
|
||||
"geom_col": geom_col,
|
||||
"id_col": id_col}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = get_attributes(r, 1)
|
||||
attr2_vals = get_attributes(r, 2)
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
w = get_weight(r, w_type, num_ngbrs)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, w)
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
@@ -150,7 +233,9 @@ def map_quads(coord):
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
:param coord (int): quadrant of a specific measurement
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
@@ -163,159 +248,13 @@ def map_quads(coord):
|
||||
else:
|
||||
return None
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
:param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
:param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM \"({subquery})\" As j " \
|
||||
"WHERE %(attr_where_j)s " \
|
||||
"ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs} OFFSET 1 ) " \
|
||||
") As neighbors " \
|
||||
"FROM \"({subquery})\" As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
:param params: dict of information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM \"({subquery})\" As j " \
|
||||
"WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM \"({subquery})\" As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_query(w_type, query_vals):
|
||||
"""Return requested query.
|
||||
:param w_type: type of neighbors to calculate (knn or queen)
|
||||
:param query_vals: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
def get_attributes(query_res, attr_num):
|
||||
"""
|
||||
:param query_res: query results with attributes and neighbors
|
||||
:param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='queen', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
:param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
elif w_type == 'queen':
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
|
||||
lisa_sig = np.array([map_quads(q) for q in quads])
|
||||
|
||||
return lisa_sig
|
||||
|
||||
def lisa_sig_vals(pvals, quads, threshold):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
"""
|
||||
|
||||
sig = (pvals <= threshold)
|
||||
|
||||
lisa_sig = np.empty(len(sig), np.chararray)
|
||||
|
||||
for idx, val in enumerate(sig):
|
||||
if val:
|
||||
lisa_sig[idx] = map_quads(quads[idx])
|
||||
else:
|
||||
lisa_sig[idx] = 'Not significant'
|
||||
|
||||
return lisa_sig
|
||||
return [map_quads(q) for q in quads]
|
||||
|
||||
1
src/py/crankshaft/crankshaft/contours/__init__.py
Normal file
1
src/py/crankshaft/crankshaft/contours/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from contours import *
|
||||
58
src/py/crankshaft/crankshaft/contours/contours.py
Normal file
58
src/py/crankshaft/crankshaft/contours/contours.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from scipy.stats import gaussian_kde
|
||||
from scipy.interpolate import griddata
|
||||
import numpy as np
|
||||
from sklearn.neighbors import KernelDensity
|
||||
from skimage.measure import find_contours
|
||||
import plpy
|
||||
|
||||
def cdb_generate_contours(query, grid_size, bandwidth, levels):
|
||||
plpy.notice('one')
|
||||
data = plpy.execute( 'select ST_X(the_geom) as x , ST_Y(the_geom) as y from ({0}) as a '.format(query))
|
||||
plpy.notice('two')
|
||||
|
||||
xs = [d['x'] for d in data]
|
||||
ys = [d['y'] for d in data]
|
||||
plpy.notice('three')
|
||||
return generate_contours(xs,ys,grid_size,bandwidth,levels)
|
||||
|
||||
def scale_coord(coord, x_range,y_range,grid_size):
|
||||
plpy.notice('ranges %, % ', x_range, y_range)
|
||||
return [coord[0]*(x_range[1]-x_range[0])/float(grid_size)+x_range[0],
|
||||
coord[1]*(y_range[1]-y_range[0])/float(grid_size)+y_range[0]]
|
||||
|
||||
def make_wkt(data,x_range, y_range, grid_size):
|
||||
joined = ','.join([' '.join(map(str,scale_coord(coord_pair, x_range, y_range, grid_size))) for coord_pair in data])
|
||||
return '({0})'.format(joined)
|
||||
|
||||
def make_multi_line(data,x_range,y_range, grid_size):
|
||||
joined = ','.join([ make_wkt(ring,x_range,y_range,grid_size) for ring in data ])
|
||||
return 'MULTILINESTRING({0})'.format(joined)
|
||||
|
||||
def generate_contours(xs,ys, grid_res=100, bandwidth=0.001, levels=None):
|
||||
plpy.notice("HERE")
|
||||
max_y, min_y = np.max(ys), np.min(ys)
|
||||
max_x, min_x = np.max(xs), np.min(xs)
|
||||
positions = np.vstack([ys,xs]).T
|
||||
grid_x,grid_y = np.meshgrid(np.linspace(min_x, max_x , grid_res), np.linspace(min_y, max_y, grid_res))
|
||||
xy = np.vstack([grid_y.ravel(), grid_x.ravel()]).T
|
||||
xy *= np.pi / 180.
|
||||
|
||||
plpy.notice(" Generating kernel density")
|
||||
kde = KernelDensity(bandwidth=bandwidth, metric='haversine',
|
||||
kernel='gaussian', algorithm='ball_tree')
|
||||
kde.fit(positions*np.pi/180.)
|
||||
results = np.exp(kde.score_samples(xy))
|
||||
results = results.reshape((grid_x.shape[0], grid_y.shape[0]))
|
||||
|
||||
if not levels:
|
||||
levels = np.linspace(results.min(), results.max(),60)
|
||||
plpy.notice(' finding contours')
|
||||
CS = [find_contours(results, level) for level in levels]
|
||||
|
||||
vertices = []
|
||||
for contours,level in zip(CS,levels):
|
||||
if len(contours)>0:
|
||||
multiline = make_multi_line(contours, (min_x,max_x), (min_y, max_y), grid_res)
|
||||
vertices.append([level, multiline ])
|
||||
plpy.notice('generated vertices retunring ?')
|
||||
return vertices
|
||||
1
src/py/crankshaft/crankshaft/pysal_utils/__init__.py
Normal file
1
src/py/crankshaft/crankshaft/pysal_utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from pysal_utils import *
|
||||
152
src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py
Normal file
152
src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
else:
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
@@ -42,7 +42,7 @@ setup(
|
||||
# provisioned in the production servers.
|
||||
install_requires=['pysal==1.9.1'],
|
||||
|
||||
requires=['pysal', 'numpy' ],
|
||||
requires=['pysal', 'numpy', 'sklearn', 'scikit-image'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
|
||||
70
src/py/crankshaft/test/fixtures/moran.json
vendored
70
src/py/crankshaft/test/fixtures/moran.json
vendored
@@ -1,52 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "Not significant"],
|
||||
[0.6152779669180425, "Not significant"],
|
||||
[-0.14657336660125297, "Not significant"],
|
||||
[0.6967858120189607, "Not significant"],
|
||||
[0.07949310115714454, "Not significant"],
|
||||
[0.4703198759258987, "Not significant"],
|
||||
[0.4421125200498064, "Not significant"],
|
||||
[0.5724288737143592, "Not significant"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "Not significant"],
|
||||
[-0.01466729201304962, "Not significant"],
|
||||
[0.3481559372544409, "Not significant"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "Not significant"],
|
||||
[0.15971286468915544, "Not significant"],
|
||||
[1.0543588860308968, "Not significant"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "Not significant"],
|
||||
[0.08438455015300014, "Not significant"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.795275137550483, "Not significant"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "Not significant"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "Not significant"],
|
||||
[-0.07116352791516614, "Not significant"],
|
||||
[-0.09945240794119009, "Not significant"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "Not significant"],
|
||||
[-0.39054253768447705, "Not significant"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "Not significant"],
|
||||
[0.2584386102554792, "Not significant"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "Not significant"],
|
||||
[0.051367269430983485, "Not significant"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "Not significant"],
|
||||
[0.04191046803899837, "Not significant"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "Not significant"],
|
||||
[0.5410013139159929, "Not significant"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
[1.4179402898927476, "LL"]]
|
||||
@@ -1,8 +1,6 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
@@ -12,11 +10,12 @@ import unittest
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions."""
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
@@ -30,7 +29,7 @@ class MoranTest(unittest.TestCase):
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads."""
|
||||
"""Test map_quads"""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
@@ -38,80 +37,8 @@ class MoranTest(unittest.TestCase):
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select."""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(cc.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test query_attr_where."""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND "\
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND "\
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(cc.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn function."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \
|
||||
"As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT " \
|
||||
"NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \
|
||||
"BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors function."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||
"j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE ST_Touches(" \
|
||||
"i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \
|
||||
"AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \
|
||||
"neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT NULL " \
|
||||
"AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \
|
||||
"i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.queen(self.params), ans)
|
||||
|
||||
def test_get_query(self):
|
||||
"""Test get_query."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||
"j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS " \
|
||||
"NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \
|
||||
"OFFSET 1 ) ) As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE " \
|
||||
"i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.get_query('knn', self.params), ans)
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes."""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight."""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals."""
|
||||
"""Test lisa_sig_vals"""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
@@ -125,7 +52,7 @@ class MoranTest(unittest.TestCase):
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||
result = cc.moran_local('subquery', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5)
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
@@ -137,8 +64,20 @@ class MoranTest(unittest.TestCase):
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 99, 'the_geom', 'cartodb_id', 'knn', 5)
|
||||
print 'result == None? ', result == None
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
result = cc.moran('table', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5)
|
||||
print 'result == None?', result == None
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
|
||||
107
src/py/crankshaft/test/test_pysal_utils.py
Normal file
107
src/py/crankshaft/test/test_pysal_utils.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params), ans)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params),
|
||||
pu.knn(self.params))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
Reference in New Issue
Block a user