Compare commits

...

10 Commits

Author SHA1 Message Date
Stuart Lynn
0e24d542b3 renaming to fix typo 2016-03-07 12:55:25 -05:00
Stuart Lynn
79bd319366 bayesian_blocks function 2016-03-07 11:49:57 -05:00
Javier Goizueta
46c66476b5 Merge pull request #5 from CartoDB/4-pgxs-fix
Adapt Makefile of the extension for some PGXS versions
2016-02-29 16:35:04 +01:00
Javier Goizueta
e03aac4d8f Fix typo 2016-02-26 19:09:17 +01:00
Javier Goizueta
d885c16db2 Adapt Makefile of the extension for some PGXS versions
Postgresql 9.3.11 doesn't generates $DATA by default.
fixes #4
2016-02-26 19:02:18 +01:00
Rafa de la Torre
abfda1c75e Update CONTRIBUTING.md
minor change (just a space)
2016-02-23 17:23:33 +01:00
Rafa de la Torre
8f478ef22c Update README.md
Remove FIXME that should be already fixed.
2016-02-23 17:18:19 +01:00
Javier Goizueta
c7bb50be5a Fix: Make extension publicly available 2016-02-22 17:39:58 +01:00
Javier Goizueta
ef17e2fe4c Add header 2016-02-22 16:14:52 +01:00
Javier Goizueta
f3b8546063 Fix syntax 2016-02-22 16:14:28 +01:00
13 changed files with 194 additions and 7 deletions

View File

@@ -12,7 +12,7 @@ name must be created.
### Version numbers
The version of both the SQL extension and the Python package shall
follow the[Semantic Versioning 2.0](http://semver.org/) guidelines:
follow the [Semantic Versioning 2.0](http://semver.org/) guidelines:
* When backwards incompatibility is introduced the major number is incremented
* When functionally is added (in a backwards-compatible manner) the minor number

View File

@@ -7,8 +7,6 @@ CartoDB Spatial Analysis extension for PostgreSQL.
* *pg* contains the PostgreSQL extension source code
* *python* Python module
FIXME: should it be `./extension` and `./lib/python' ?
## Requirements
* pip

View File

@@ -28,3 +28,6 @@ REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
# This seems to be needed at least for PG 9.3.11
all: $(DATA)

View File

@@ -1,3 +1,6 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
-- Internal function.
-- Set the seeds of the RNGs (Random Number Generators)
-- used internally.
@@ -133,4 +136,60 @@ BEGIN
RETURN ST_Collect(points);
END;
$$
LANGUAGE plpgsql VOLATILE
LANGUAGE plpgsql VOLATILE;
CREATE OR REPLACE FUNCTION
cdb_create_segment (
segment_name TEXT,
table_name TEXT,
column_name TEXT,
geoid_column TEXT DEFAULT 'geoid',
census_table TEXT DEFAULT 'block_groups'
)
RETURNS NUMERIC
AS $$
from crankshaft.segmentation import create_segemnt
# TODO: use named parameters or a dictionary
return create_segment('table')
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_predict_segment (
segment_name TEXT,
geoid_column TEXT DEFAULT 'geoid',
census_table TEXT DEFAULT 'block_groups'
)
RETURNS TABLE(geoid TEXT, prediction NUMERIC)
AS $$
from crankshaft.segmentation import create_segemnt
# TODO: use named parameters or a dictionary
return create_segment('table')
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_adaptive_histogram (
table_name TEXT,
column_name TEXT
)
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
AS $$
from crankshaft.bayesian_blocks import adaptive_histogram
return adaptive_histogram(table_name,column_name)
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
cdb_simple_test (
)
RETURNS NUMERIC
AS $$
return 5
$$ LANGUAGE plpythonu;
-- Make sure by default there are no permissions for publicuser
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
-- Grant permissions on the schema to publicuser (but just the schema)
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
-- Revoke execute permissions on all functions in the schema by default
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;

View File

@@ -0,0 +1,3 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit

View File

@@ -51,4 +51,4 @@ BEGIN
RETURN ST_Collect(points);
END;
$$
LANGUAGE plpgsql VOLATILE
LANGUAGE plpgsql VOLATILE;

View File

@@ -0,0 +1,11 @@
CREATE OR REPLACE FUNCTION
cdb_adaptive_histogram (
table_name TEXT,
column_name TEXT
)
RETURNS TABLE (bin_start numeric,bin_end numeric,value numeric)
AS $$
from crankshaft.bayesian_blocks import adaptive_histogram
return adaptive_histogram(table_name,column_name)
$$ LANGUAGE plpythonu;

View File

@@ -0,0 +1,9 @@
-- Make sure by default there are no permissions for publicuser
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
-- Grant permissions on the schema to publicuser (but just the schema)
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
-- Revoke execute permissions on all functions in the schema by default
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;

View File

@@ -0,0 +1,18 @@
SELECT cdb_crankshaft._cdb_random_seeds(1234);
-- Use regular user role
SET ROLE test_regular_user;
-- Add to the search path the schema
SET search_path TO public,cartodb,cdb_crankshaft;
-- Exercise public functions
SELECT ppoints.code, m.quads
FROM ppoints
JOIN cdb_moran_local('ppoints', 'value') m
ON ppoints.cartodb_id = m.ids
ORDER BY ppoints.code;
SELECT round(cdb_overlap_sum(
'0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry,
'values', 'value'
), 2);

View File

@@ -1,2 +1,3 @@
import random_seeds
import clustering
import bayesian_blocks

View File

@@ -0,0 +1 @@
from bayesian_blocks import *

View File

@@ -0,0 +1,84 @@
import plpy
import numpy as np
def adaptive_histogram(table_name,column_name):
data = plpy.execute("select {column_name} from {table_name}".format(**locals()))
data = [float(d['count']) for d in data]
plpy.notice(data)
vals, bins = np.histogram( data, bins=_bayesian_blocks(data))
return zip(vals,bins, bins[1:])
def _bayesian_blocks(t):
"""Bayesian Blocks Implementation
By Jake Vanderplas. License: BSD
Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
Parameters
----------
t : ndarray, length N
data to be histogrammed
Returns
-------
bins : ndarray
array containing the (N+1) bin edges
Notes
-----
This is an incomplete implementation: it may fail for some
datasets. Alternate fitness functions and prior forms can
be found in the paper listed above.
"""
# copy and sort the array
t = np.sort(t)
N = t.size
# create length-(N + 1) array of cell edges
edges = np.concatenate([t[:1],
0.5 * (t[1:] + t[:-1]),
t[-1:]])
block_length = t[-1] - edges
# arrays needed for the iteration
nn_vec = np.ones(N)
best = np.zeros(N, dtype=float)
last = np.zeros(N, dtype=int)
#-----------------------------------------------------------------
# Start with first data cell; add one cell at each iteration
#-----------------------------------------------------------------
for K in range(N):
# Compute the width and count of the final bin for all possible
# locations of the K^th changepoint
width = block_length[:K + 1] - block_length[K + 1]
count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
# evaluate fitness function for these possibilities
fit_vec = count_vec * (np.log(count_vec) - np.log(width))
fit_vec -= 4 # 4 comes from the prior on the number of changepoints
fit_vec[1:] += best[:K]
# find the max of the fitness: this is the K^th changepoint
i_max = np.argmax(fit_vec)
last[K] = i_max
best[K] = fit_vec[i_max]
#-----------------------------------------------------------------
# Recover changepoints by iteratively peeling off the last block
#-----------------------------------------------------------------
change_points = np.zeros(N, dtype=int)
i_cp = N
ind = N
while True:
i_cp -= 1
change_points[i_cp] = ind
if ind == 0:
break
ind = last[ind - 1]
change_points = change_points[i_cp:]
return edges[change_points]

View File

@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
setup(
name='crankshaft',
version='0.0.01',
version='0.0.1',
description='CartoDB Spatial Analysis Python Library',
@@ -40,7 +40,7 @@ setup(
# The choice of component versions is dictated by what's
# provisioned in the production servers.
install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
install_requires=['pysal==1.11.0','numpy==1.10.1','scipy==0.17.0'],
requires=['pysal', 'numpy'],