Compare commits
5 Commits
CDB_UNION_
...
bayesian_b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e24d542b3 | ||
|
|
79bd319366 | ||
|
|
46c66476b5 | ||
|
|
e03aac4d8f | ||
|
|
d885c16db2 |
@@ -28,3 +28,6 @@ REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
|
||||
# This seems to be needed at least for PG 9.3.11
|
||||
all: $(DATA)
|
||||
|
||||
@@ -137,6 +137,53 @@ BEGIN
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_create_segment (
|
||||
segment_name TEXT,
|
||||
table_name TEXT,
|
||||
column_name TEXT,
|
||||
geoid_column TEXT DEFAULT 'geoid',
|
||||
census_table TEXT DEFAULT 'block_groups'
|
||||
)
|
||||
RETURNS NUMERIC
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_segemnt
|
||||
# TODO: use named parameters or a dictionary
|
||||
return create_segment('table')
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_predict_segment (
|
||||
segment_name TEXT,
|
||||
geoid_column TEXT DEFAULT 'geoid',
|
||||
census_table TEXT DEFAULT 'block_groups'
|
||||
)
|
||||
RETURNS TABLE(geoid TEXT, prediction NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_segemnt
|
||||
# TODO: use named parameters or a dictionary
|
||||
return create_segment('table')
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_adaptive_histogram (
|
||||
table_name TEXT,
|
||||
column_name TEXT
|
||||
)
|
||||
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
|
||||
|
||||
AS $$
|
||||
from crankshaft.bayesian_blocks import adaptive_histogram
|
||||
return adaptive_histogram(table_name,column_name)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_simple_test (
|
||||
)
|
||||
RETURNS NUMERIC
|
||||
|
||||
AS $$
|
||||
return 5
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
### Union Adjacent
|
||||
|
||||
This is an aggregate function that will take a set of polygons and return a geometry array
|
||||
of regions where the polygons are continuous. Basically it combines polygons
|
||||
which are touching in to single polygons.
|
||||
|
||||
It takes a single value:
|
||||
|
||||
* `geometry` a list of geometries to be clustered and joined
|
||||
|
||||
and returns
|
||||
|
||||
* `geometry[]` an array of the joined geometries.
|
||||
|
||||
An example usage would be something like:
|
||||
|
||||
```postgresql
|
||||
with joined_polygons as (
|
||||
select cdb_union_adjacent(the_geom) regions from some_table
|
||||
)
|
||||
select unnest(region) the_geom from joined_polygons
|
||||
```
|
||||
|
||||
which will produce a table with regions of continuous polygons from the original
|
||||
table.
|
||||
@@ -1,43 +0,0 @@
|
||||
CREATE OR REPLACE FUNCTION _cdb_final_union_adjacent( joined_geoms geometry[] )
|
||||
RETURNS geometry[] AS $$
|
||||
BEGIN
|
||||
RETURN joined_geoms;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION _cdb_state_update_union_adjacent(clusters geometry[], new_geom geometry)
|
||||
RETURNS geometry[] AS $$
|
||||
DECLARE
|
||||
joins geometry[] :='{}';
|
||||
unjoined geometry[] :='{}';
|
||||
i integer;
|
||||
combined geometry;
|
||||
BEGIN
|
||||
joins := (select array_agg(g)
|
||||
from unnest(clusters) a(g)
|
||||
where ST_TOUCHES(g, new_geom));
|
||||
|
||||
unjoined := (select array_agg(g)
|
||||
from unnest(clusters) a(g)
|
||||
where ST_TOUCHES(g, new_geom) = false);
|
||||
|
||||
IF array_length(joins, 1) > 0 THEN
|
||||
joins := array_append(joins, new_geom);
|
||||
combined := ST_UNION(joins);
|
||||
ELSE
|
||||
combined := new_geom;
|
||||
END IF;
|
||||
|
||||
unjoined := array_append(unjoined, combined);
|
||||
RETURN unjoined;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE cdb_union_adjacent(geometry)(
|
||||
SFUNC=_cdb_state_update_union_adjacent,
|
||||
STYPE=geometry[],
|
||||
FINALFUNC=_cdb_final_union_adjacent,
|
||||
INITCOND='{}'
|
||||
);
|
||||
11
pg/sql/0.0.1/06_bayesian_blocks.sql
Normal file
11
pg/sql/0.0.1/06_bayesian_blocks.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_adaptive_histogram (
|
||||
table_name TEXT,
|
||||
column_name TEXT
|
||||
)
|
||||
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
|
||||
|
||||
AS $$
|
||||
from crankshaft.bayesian_blocks import adaptive_histogram
|
||||
return adaptive_histogram(table_name,column_name)
|
||||
$$ LANGUAGE plpythonu;
|
||||
@@ -1,22 +0,0 @@
|
||||
\i test/fixtures/touching_polygons.sql
|
||||
-- test table (polygons, some of which touch and some which dont)
|
||||
CREATE TABLE touching_polygons(cartodb_id integer, the_geom geometry);
|
||||
INSERT INTO touching_polygons VALUES
|
||||
(1, ST_GeomFromText('POLYGON ((0 0, 1 0,1 1, 0 1, 0 0 ))')),
|
||||
(2, ST_GeomFromText('POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))')),
|
||||
(1, ST_GeomFromText('POLYGON ((0 1, 1 1,1 2, 0 2, 0 1 ))')),
|
||||
(4, ST_GeomFromText('POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))')),
|
||||
(5, ST_GeomFromText('POLYGON ((3 1, 4 1, 4 2, 3 2, 3 1))'));
|
||||
WITH joined_polygons AS (
|
||||
SELECT cdb_crankshaft.cdb_union_adjacent(the_geom) the_geom FROM touching_polygons
|
||||
),
|
||||
unnested_polygons as (
|
||||
select unnest(joined_polygons.the_geom) the_geom from joined_polygons
|
||||
)
|
||||
select ST_ASTEXT(unnested_polygons.the_geom) from unnested_polygons;
|
||||
st_astext
|
||||
------------------------------------------------
|
||||
POLYGON((1 0,0 0,0 1,0 2,1 2,1 1,2 1,2 0,1 0))
|
||||
POLYGON((4 1,4 0,3 0,3 1,3 2,4 2,4 1))
|
||||
(2 rows)
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
\i test/fixtures/touching_polygons.sql
|
||||
|
||||
WITH joined_polygons AS (
|
||||
SELECT cdb_crankshaft.cdb_union_adjacent(the_geom) the_geom FROM touching_polygons
|
||||
),
|
||||
unnested_polygons as (
|
||||
select unnest(joined_polygons.the_geom) the_geom from joined_polygons
|
||||
)
|
||||
select ST_ASTEXT(unnested_polygons.the_geom) from unnested_polygons;
|
||||
8
pg/test/fixtures/touching_polygons.sql
vendored
8
pg/test/fixtures/touching_polygons.sql
vendored
@@ -1,8 +0,0 @@
|
||||
-- test table (polygons, some of which touch and some which dont)
|
||||
CREATE TABLE touching_polygons(cartodb_id integer, the_geom geometry);
|
||||
INSERT INTO touching_polygons VALUES
|
||||
(1, ST_GeomFromText('POLYGON ((0 0, 1 0,1 1, 0 1, 0 0 ))')),
|
||||
(2, ST_GeomFromText('POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))')),
|
||||
(1, ST_GeomFromText('POLYGON ((0 1, 1 1,1 2, 0 2, 0 1 ))')),
|
||||
(4, ST_GeomFromText('POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))')),
|
||||
(5, ST_GeomFromText('POLYGON ((3 1, 4 1, 4 2, 3 2, 3 1))'));
|
||||
@@ -1,2 +1,3 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
import bayesian_blocks
|
||||
|
||||
1
python/crankshaft/crankshaft/bayesian_blocks/__init__.py
Normal file
1
python/crankshaft/crankshaft/bayesian_blocks/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from bayesian_blocks import *
|
||||
@@ -0,0 +1,84 @@
|
||||
import plpy
|
||||
import numpy as np
|
||||
|
||||
|
||||
def adaptive_histogram(table_name,column_name):
|
||||
data = plpy.execute("select {column_name} from {table_name}".format(**locals()))
|
||||
|
||||
data = [float(d['count']) for d in data]
|
||||
plpy.notice(data)
|
||||
vals, bins = np.histogram( data, bins=_bayesian_blocks(data))
|
||||
return zip(vals,bins, bins[1:])
|
||||
|
||||
|
||||
def _bayesian_blocks(t):
|
||||
"""Bayesian Blocks Implementation
|
||||
|
||||
By Jake Vanderplas. License: BSD
|
||||
Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
|
||||
|
||||
Parameters
|
||||
----------
|
||||
t : ndarray, length N
|
||||
data to be histogrammed
|
||||
|
||||
Returns
|
||||
-------
|
||||
bins : ndarray
|
||||
array containing the (N+1) bin edges
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an incomplete implementation: it may fail for some
|
||||
datasets. Alternate fitness functions and prior forms can
|
||||
be found in the paper listed above.
|
||||
"""
|
||||
# copy and sort the array
|
||||
t = np.sort(t)
|
||||
N = t.size
|
||||
|
||||
# create length-(N + 1) array of cell edges
|
||||
edges = np.concatenate([t[:1],
|
||||
0.5 * (t[1:] + t[:-1]),
|
||||
t[-1:]])
|
||||
block_length = t[-1] - edges
|
||||
|
||||
# arrays needed for the iteration
|
||||
nn_vec = np.ones(N)
|
||||
best = np.zeros(N, dtype=float)
|
||||
last = np.zeros(N, dtype=int)
|
||||
|
||||
#-----------------------------------------------------------------
|
||||
# Start with first data cell; add one cell at each iteration
|
||||
#-----------------------------------------------------------------
|
||||
for K in range(N):
|
||||
# Compute the width and count of the final bin for all possible
|
||||
# locations of the K^th changepoint
|
||||
width = block_length[:K + 1] - block_length[K + 1]
|
||||
count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
|
||||
|
||||
# evaluate fitness function for these possibilities
|
||||
fit_vec = count_vec * (np.log(count_vec) - np.log(width))
|
||||
fit_vec -= 4 # 4 comes from the prior on the number of changepoints
|
||||
fit_vec[1:] += best[:K]
|
||||
|
||||
# find the max of the fitness: this is the K^th changepoint
|
||||
i_max = np.argmax(fit_vec)
|
||||
last[K] = i_max
|
||||
best[K] = fit_vec[i_max]
|
||||
|
||||
#-----------------------------------------------------------------
|
||||
# Recover changepoints by iteratively peeling off the last block
|
||||
#-----------------------------------------------------------------
|
||||
change_points = np.zeros(N, dtype=int)
|
||||
i_cp = N
|
||||
ind = N
|
||||
while True:
|
||||
i_cp -= 1
|
||||
change_points[i_cp] = ind
|
||||
if ind == 0:
|
||||
break
|
||||
ind = last[ind - 1]
|
||||
change_points = change_points[i_cp:]
|
||||
|
||||
return edges[change_points]
|
||||
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.01',
|
||||
version='0.0.1',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
@@ -40,7 +40,7 @@ setup(
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
|
||||
install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0'],
|
||||
|
||||
requires=['pysal', 'numpy'],
|
||||
|
||||
|
||||
Reference in New Issue
Block a user