Compare commits

..

3 Commits

Author SHA1 Message Date
Javier Goizueta
9e2f2b7c2d Fix the tests for adjacent union function 2016-02-26 11:55:20 +01:00
Stuart Lynn
69b6aa6aca cdb_union_adjacent documentation 2016-02-24 13:59:25 -05:00
Stuart Lynn
e84b467cbc cdb_union_adgacent 2016-02-24 13:51:18 -05:00
50 changed files with 313 additions and 164 deletions

View File

@@ -1,5 +1,5 @@
EXT_DIR = src/pg
PYP_DIR = src/py
EXT_DIR = pg
PYP_DIR = python
.PHONY: install
.PHONY: run_tests

View File

@@ -4,87 +4,9 @@ CartoDB Spatial Analysis extension for PostgreSQL.
## Code organization
* *doc* documentation
* *src* source code
* - *src/pg* contains the PostgreSQL extension source code
* - *src/py* Python module source code
* *release* reselesed versions
* *pg* contains the PostgreSQL extension source code
* *python* Python module
## Requirements
* pip, virtualenv, PostgreSQL
# Working Process
## Development
Work in `src/pg/sql`, `src/py/crankshaft`;
use topic branch.
Update local installation with `sudo make install`
(this will update the 'dev' version of the extension in 'src/pg/')
Run the tests with `PGUSER=postgres make test`
Update extension in working database with
* `ALTER EXTENSION crankshaft VERSION TO 'current';`
`ALTER EXTENSION crankshaft VERSION TO 'dev';`
Note: we keep the current development version install as 'dev' always;
we update through the 'current' alias to allow changing the extension
contents but not the version identifier. This will fail if the
changes involve incompatible function changes such as a different
return type; in that case the offending function (or the whole extension)
should be dropped manually before the update.
If the extension has not previously been installed in a database
we can:
Add tests...
* `CREATE EXTENSION crankshaft WITH VERSION 'dev';`
Test
Commit, push, create PR, wait for CI tests, CR, ...
## Release
To release current development version
(working directory should be clean in dev branch)
(process to be gradually automated)
For backwards compatible changes (no return value, num of arguments, etc. changes...)
new version number increasing either patch level (no new functionality)
or minor level (new functionality) => 'X.Y.Z'.
Update version in src/pg/crankshaft.control
Copy release/crankshaft--current.sql to release/crankshaft--X.Y.Z.sql
Prepare incremental downgrade, upgrade scripts....
Python: ...
Install the new release
`make install-release`
Test the new release
`make test-release`
Push the release
Wait for CI tests
Merge into master
Deploy: install extension and python to production hosts,
update extension in databases (limited to team users, data observatory, ...)
Release manager role: ...
.sql release scripts
commit
tests: staging....
merge, tag, deploy...
* pip

3
pg/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
regression.diffs
regression.out
results/

30
pg/Makefile Normal file
View File

@@ -0,0 +1,30 @@
# Makefile to generate the extension out of separate sql source files.
# Once a version is released, it is not meant to be changed. E.g: once version 0.0.1 is out, it SHALL NOT be changed.
EXTENSION = crankshaft
EXTVERSION = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
# The new version to be generated from templates
NEW_EXTENSION_ARTIFACT = $(EXTENSION)--$(EXTVERSION).sql
# DATA is a special variable used by postgres build infrastructure
# These are the files to be installed in the server shared dir,
# for installation from scratch, upgrades and downgrades.
# @see http://www.postgresql.org/docs/current/static/extend-pgxs.html
DATA = $(NEW_EXTENSION_ARTIFACT)
SOURCES_DATA_DIR = sql/$(EXTVERSION)
SOURCES_DATA = $(wildcard sql/$(EXTVERSION)/*.sql)
# The extension installation artifacts are stored in the base subdirectory
$(NEW_EXTENSION_ARTIFACT): $(SOURCES_DATA)
rm -f $@
cat $(SOURCES_DATA_DIR)/*.sql >> $@
REGRESS = $(notdir $(basename $(wildcard test/$(EXTVERSION)/sql/*test.sql)))
TEST_DIR = test/$(EXTVERSION)
REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

148
pg/crankshaft--0.0.1.sql Normal file
View File

@@ -0,0 +1,148 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
-- Internal function.
-- Set the seeds of the RNGs (Random Number Generators)
-- used internally.
CREATE OR REPLACE FUNCTION
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
AS $$
from crankshaft import random_seeds
random_seeds.set_random_seeds(seed_value)
$$ LANGUAGE plpythonu;
-- Moran's I
CREATE OR REPLACE FUNCTION
cdb_moran_local (
t TEXT,
attr TEXT,
significance float DEFAULT 0.05,
num_ngbrs INT DEFAULT 5,
permutations INT DEFAULT 99,
geom_column TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id',
w_type TEXT DEFAULT 'knn')
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
AS $$
from crankshaft.clustering import moran_local
# TODO: use named parameters or a dictionary
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
$$ LANGUAGE plpythonu;
-- Moran's I Local Rate
CREATE OR REPLACE FUNCTION
cdb_moran_local_rate(t TEXT,
numerator TEXT,
denominator TEXT,
significance FLOAT DEFAULT 0.05,
num_ngbrs INT DEFAULT 5,
permutations INT DEFAULT 99,
geom_column TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id',
w_type TEXT DEFAULT 'knn')
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
AS $$
from crankshaft.clustering import moran_local_rate
# TODO: use named parameters or a dictionary
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
$$ LANGUAGE plpythonu;
-- Function by Stuart Lynn for a simple interpolation of a value
-- from a polygon table over an arbitrary polygon
-- (weighted by the area proportion overlapped)
-- Aereal weighting is a very simple form of aereal interpolation.
--
-- Parameters:
-- * geom a Polygon geometry which defines the area where a value will be
-- estimated as the area-weighted sum of a given table/column
-- * target_table_name table name of the table that provides the values
-- * target_column column name of the column that provides the values
-- * schema_name optional parameter to defina the schema the target table
-- belongs to, which is necessary if its not in the search_path.
-- Note that target_table_name should never include the schema in it.
-- Return value:
-- Aereal-weighted interpolation of the column values over the geometry
CREATE OR REPLACE
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
RETURNS numeric AS
$$
DECLARE
result numeric;
qualified_name text;
BEGIN
IF schema_name IS NULL THEN
qualified_name := Format('%I', target_table_name);
ELSE
qualified_name := Format('%I.%s', schema_name, target_table_name);
END IF;
EXECUTE Format('
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
FROM %s AS a
WHERE $1 && a.the_geom
', target_column, qualified_name)
USING geom
INTO result;
RETURN result;
END;
$$ LANGUAGE plpgsql;
--
-- Creates N points randomly distributed arround the polygon
--
-- @param g - the geometry to be turned in to points
--
-- @param no_points - the number of points to generate
--
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
-- misses per point the funciton accepts before giving up.
--
-- Returns: Multipoint with the requested points
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
RETURNS GEOMETRY AS $$
DECLARE
extent GEOMETRY;
test_point Geometry;
width NUMERIC;
height NUMERIC;
x0 NUMERIC;
y0 NUMERIC;
xp NUMERIC;
yp NUMERIC;
no_left INTEGER;
remaining_iterations INTEGER;
points GEOMETRY[];
bbox_line GEOMETRY;
intersection_line GEOMETRY;
BEGIN
extent := ST_Envelope(geom);
width := ST_XMax(extent) - ST_XMIN(extent);
height := ST_YMax(extent) - ST_YMIN(extent);
x0 := ST_XMin(extent);
y0 := ST_YMin(extent);
no_left := no_points;
LOOP
if(no_left=0) THEN
EXIT;
END IF;
yp = y0 + height*random();
bbox_line = ST_MakeLine(
ST_SetSRID(ST_MakePoint(yp, x0),4326),
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
);
intersection_line = ST_Intersection(bbox_line,geom);
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
points := points || test_point;
no_left = no_left - 1 ;
END LOOP;
RETURN ST_Collect(points);
END;
$$
LANGUAGE plpgsql VOLATILE;
-- Make sure by default there are no permissions for publicuser
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
-- Grant permissions on the schema to publicuser (but just the schema)
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
-- Revoke execute permissions on all functions in the schema by default
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;

View File

@@ -0,0 +1,25 @@
### Union Adjacent
This is an aggregate function that will take a set of polygons and return a geometry array
of regions where the polygons are continuous. Basically it combines polygons
which are touching in to single polygons.
It takes a single value:
* `geometry` a list of geometries to be clustered and joined
and returns
* `geometry[]` an array of the joined geometries.
An example usage would be something like:
```postgresql
with joined_polygons as (
select cdb_union_adjacent(the_geom) regions from some_table
)
select unnest(region) the_geom from joined_polygons
```
which will produce a table with regions of continuous polygons from the original
table.

View File

@@ -4,7 +4,6 @@
CREATE OR REPLACE FUNCTION
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft import random_seeds
random_seeds.set_random_seeds(seed_value)
$$ LANGUAGE plpythonu;

View File

@@ -11,7 +11,6 @@ CREATE OR REPLACE FUNCTION
w_type TEXT DEFAULT 'knn')
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft.clustering import moran_local
# TODO: use named parameters or a dictionary
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
@@ -30,7 +29,6 @@ CREATE OR REPLACE FUNCTION
w_type TEXT DEFAULT 'knn')
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft.clustering import moran_local_rate
# TODO: use named parameters or a dictionary
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)

View File

@@ -0,0 +1,43 @@
CREATE OR REPLACE FUNCTION _cdb_final_union_adjacent( joined_geoms geometry[] )
RETURNS geometry[] AS $$
BEGIN
RETURN joined_geoms;
END
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION _cdb_state_update_union_adjacent(clusters geometry[], new_geom geometry)
RETURNS geometry[] AS $$
DECLARE
joins geometry[] :='{}';
unjoined geometry[] :='{}';
i integer;
combined geometry;
BEGIN
joins := (select array_agg(g)
from unnest(clusters) a(g)
where ST_TOUCHES(g, new_geom));
unjoined := (select array_agg(g)
from unnest(clusters) a(g)
where ST_TOUCHES(g, new_geom) = false);
IF array_length(joins, 1) > 0 THEN
joins := array_append(joins, new_geom);
combined := ST_UNION(joins);
ELSE
combined := new_geom;
END IF;
unjoined := array_append(unjoined, combined);
RETURN unjoined;
END
$$
LANGUAGE plpgsql;
CREATE AGGREGATE cdb_union_adjacent(geometry)(
SFUNC=_cdb_state_update_union_adjacent,
STYPE=geometry[],
FINALFUNC=_cdb_final_union_adjacent,
INITCOND='{}'
);

View File

@@ -3,4 +3,4 @@ CREATE EXTENSION plpythonu;
CREATE EXTENSION postgis;
CREATE EXTENSION cartodb;
-- Install the extension
CREATE EXTENSION crankshaft VERSION 'dev';
CREATE EXTENSION crankshaft;

View File

@@ -0,0 +1,22 @@
\i test/fixtures/touching_polygons.sql
-- test table (polygons, some of which touch and some which dont)
CREATE TABLE touching_polygons(cartodb_id integer, the_geom geometry);
INSERT INTO touching_polygons VALUES
(1, ST_GeomFromText('POLYGON ((0 0, 1 0,1 1, 0 1, 0 0 ))')),
(2, ST_GeomFromText('POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))')),
(1, ST_GeomFromText('POLYGON ((0 1, 1 1,1 2, 0 2, 0 1 ))')),
(4, ST_GeomFromText('POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))')),
(5, ST_GeomFromText('POLYGON ((3 1, 4 1, 4 2, 3 2, 3 1))'));
WITH joined_polygons AS (
SELECT cdb_crankshaft.cdb_union_adjacent(the_geom) the_geom FROM touching_polygons
),
unnested_polygons as (
select unnest(joined_polygons.the_geom) the_geom from joined_polygons
)
select ST_ASTEXT(unnested_polygons.the_geom) from unnested_polygons;
st_astext
------------------------------------------------
POLYGON((1 0,0 0,0 1,0 2,1 2,1 1,2 1,2 0,1 0))
POLYGON((4 1,4 0,3 0,3 1,3 2,4 2,4 1))
(2 rows)

View File

@@ -0,0 +1,6 @@
-- Install dependencies
CREATE EXTENSION plpythonu;
CREATE EXTENSION postgis;
CREATE EXTENSION cartodb;
-- Install the extension
CREATE EXTENSION crankshaft;

View File

@@ -4,4 +4,4 @@ CREATE EXTENSION postgis;
CREATE EXTENSION cartodb;
-- Install the extension
CREATE EXTENSION crankshaft VERSION 'dev';
CREATE EXTENSION crankshaft;

View File

@@ -0,0 +1,9 @@
\i test/fixtures/touching_polygons.sql
WITH joined_polygons AS (
SELECT cdb_crankshaft.cdb_union_adjacent(the_geom) the_geom FROM touching_polygons
),
unnested_polygons as (
select unnest(joined_polygons.the_geom) the_geom from joined_polygons
)
select ST_ASTEXT(unnested_polygons.the_geom) from unnested_polygons;

View File

@@ -0,0 +1,8 @@
-- test table (polygons, some of which touch and some which dont)
CREATE TABLE touching_polygons(cartodb_id integer, the_geom geometry);
INSERT INTO touching_polygons VALUES
(1, ST_GeomFromText('POLYGON ((0 0, 1 0,1 1, 0 1, 0 0 ))')),
(2, ST_GeomFromText('POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))')),
(1, ST_GeomFromText('POLYGON ((0 1, 1 1,1 2, 0 2, 0 1 ))')),
(4, ST_GeomFromText('POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))')),
(5, ST_GeomFromText('POLYGON ((3 1, 4 1, 4 2, 3 2, 3 1))'));

View File

@@ -1,2 +1 @@
*.pyc
dev/

11
python/Makefile Normal file
View File

@@ -0,0 +1,11 @@
# Install the package (needs root privileges)
install:
pip install ./crankshaft --upgrade
# Test from source code
test:
(cd crankshaft && nosetests test/)
# Test currently installed package
testinstalled:
nosetests crankshaft/test/

View File

@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
setup(
name='crankshaft',
version='0.0.1',
version='0.0.01',
description='CartoDB Spatial Analysis Python Library',

6
src/pg/.gitignore vendored
View File

@@ -1,6 +0,0 @@
regression.diffs
regression.out
results/
crankshaft--dev.sql
crankshaft--dev--current.sql
crankshaft--current--dev.sql

View File

@@ -1,41 +0,0 @@
# Generation of a new development version 'dev' (with an alias 'current' for
# updating easily by upgrading to 'current', then 'dev')
# sudo make install -- generate the 'dev' version from current source
# and make it available to PostgreSQL
# PGUSER=postgres make installcheck -- test the 'dev' extension
EXTENSION = crankshaft
DATA = $(EXTENSION)--dev.sql \
$(EXTENSION)--current--dev.sql \
$(EXTENSION)--dev--current.sql
SOURCES_DATA_DIR = sql
SOURCES_DATA = $(wildcard $(SOURCES_DATA_DIR)/*.sql)
$(DATA): $(SOURCES_DATA)
cat $(SOURCES_DATA_DIR)/*.sql > $@
TEST_DIR = test
REGRESS = $(notdir $(basename $(wildcard $(TEST_DIR)/sql/*test.sql)))
REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
# This seems to be needed at least for PG 9.3.11
all: $(DATA)
# WIP: goals for releasing the extension...
EXTVERSION = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
../release/$(EXTENSION).control: $(EXTENSION).control
cp $< $@
release: ../release/$(EXTENSION).control
cp $(EXTENSION)--dev.sql $(EXTENSION)--$(EXTVERSION).sql
# pending: create upgrade/downgrade scripts,
# commit, push, tag....

View File

@@ -1,18 +0,0 @@
-- Use the crankshaft python module
CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
RETURNS VOID
AS $$
# activate virtualenv
# TODO: parameterize with environment variables or something
venv_path = '/home/ubuntu/crankshaft/src/py/dev'
activate_path = venv_path + '/bin/activate_this.py'
exec(open(activate_path).read(),
dict(__file__=activate_path))
# import something from virtualenv
# from crankshaft import random_seeds
# do some stuff
# random_seeds.set_random_seeds(123)
# plpy.notice('here we are')
$$ LANGUAGE plpythonu;

View File

@@ -1,9 +0,0 @@
# Install the package locally for development
install:
virtualenv dev
./dev/bin/pip install ./crankshaft --upgrade
./dev/bin/pip install nose
# Test develpment install
testinstalled:
./dev/bin/nosetests crankshaft/test/