Compare commits
44 Commits
model-stor
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
494c047563 | ||
|
|
0e9736500f | ||
|
|
02a58873b2 | ||
|
|
78fd788180 | ||
|
|
e162547a09 | ||
|
|
3783e343a3 | ||
|
|
641332c9b8 | ||
|
|
1563c4eace | ||
|
|
d48f6d0236 | ||
|
|
7aafe83ffc | ||
|
|
0699ab09e7 | ||
|
|
462d03edbf | ||
|
|
a0476b1eca | ||
|
|
9b88db8475 | ||
|
|
6bc963aada | ||
|
|
632ce2ad5e | ||
|
|
a626822d4e | ||
|
|
c6c006eb22 | ||
|
|
ede701202f | ||
|
|
1985c1273b | ||
|
|
30ac9af1f9 | ||
|
|
bc365acc01 | ||
|
|
f5afd7c43f | ||
|
|
fb0b9d51b1 | ||
|
|
32b2068648 | ||
|
|
550e514726 | ||
|
|
e721830bc7 | ||
|
|
26ed991550 | ||
|
|
6d50c125f9 | ||
|
|
dfdbacfb30 | ||
|
|
c89805b56d | ||
|
|
c321caaefd | ||
|
|
b3c5fd756c | ||
|
|
dcf4e4beb5 | ||
|
|
cfebe6e854 | ||
|
|
bce2fc5f1f | ||
|
|
54646acc56 | ||
|
|
698f8e1a60 | ||
|
|
c86b989527 | ||
|
|
a24d7861d8 | ||
|
|
3e660a4d5f | ||
|
|
7c43596512 | ||
|
|
19397eec5b | ||
|
|
87c3fa8fc5 |
@@ -1,3 +0,0 @@
|
||||
{
|
||||
"sbruchmann.staticpreview.basepath": "/home/carto/Projects/crankshaft/"
|
||||
}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,4 +2,3 @@ envs/
|
||||
*.pyc
|
||||
.DS_Store
|
||||
.idea/
|
||||
.*.sw[nop]
|
||||
|
||||
74
.travis.yml
74
.travis.yml
@@ -1,60 +1,48 @@
|
||||
language: c
|
||||
dist: precise
|
||||
sudo: required
|
||||
|
||||
env:
|
||||
global:
|
||||
- PAGER=cat
|
||||
- PGUSER=postgres
|
||||
- PGDATABASE=postgres
|
||||
- PGOPTIONS='-c client_min_messages=NOTICE'
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- env: POSTGRESQL_VERSION="9.6" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="10" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="11" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="12" POSTGIS_VERSION="3"
|
||||
dist: bionic
|
||||
|
||||
before_install:
|
||||
- ./check-up-to-date-with-master.sh
|
||||
- sudo apt-get -y install python-pip
|
||||
|
||||
- sudo apt-get -y install python-software-properties
|
||||
- sudo add-apt-repository -y ppa:cartodb/sci
|
||||
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis-testing
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install -y --allow-unauthenticated --no-install-recommends --no-install-suggests postgresql-$POSTGRESQL_VERSION postgresql-client-$POSTGRESQL_VERSION postgresql-server-dev-$POSTGRESQL_VERSION postgresql-common
|
||||
- if [[ $POSTGRESQL_VERSION == '9.6' ]]; then sudo apt-get install -y postgresql-contrib-9.6; fi;
|
||||
- sudo apt-get install -y --allow-unauthenticated postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION-scripts postgis
|
||||
|
||||
- sudo apt-get -y install python-joblib=0.8.3-1-cdb1
|
||||
- sudo apt-get -y install python-numpy=1:1.6.1-6ubuntu1
|
||||
# For pre12, install plpython2. For PG12 install plpython3
|
||||
- if [[ $POSTGRESQL_VERSION != '12' ]]; then sudo apt-get install -y postgresql-plpython-$POSTGRESQL_VERSION python python-pip python-software-properties python-joblib python-nose python-setuptools; else sudo apt-get install -y postgresql-plpython3-12 python3 python3-pip python3-software-properties python3-joblib python3-nose python3-setuptools; fi;
|
||||
- if [[ $POSTGRESQL_VERSION == '12' ]]; then echo -e "joblib==0.11\nnumpy==1.13.3\nscipy==0.19.1\npysal==1.14.3\nscikit-learn==0.19.1" > ./src/py/crankshaft/requirements.txt && sed -i -e "s/.*install_requires.*$/ install_requires=['joblib==0.11.0', 'numpy==1.13.3', 'scipy==0.19.1', 'pysal==1.14.3', 'scikit-learn==0.19.1'],/g" ./src/py/crankshaft/setup.py; fi;
|
||||
|
||||
- sudo apt-get -y install python-scipy=0.14.0-2-cdb6
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn-lib=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-scikits-learn=0.14.1-3-cdb2
|
||||
|
||||
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
|
||||
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
|
||||
|
||||
# Install postgres db and build deps
|
||||
- sudo /etc/init.d/postgresql stop # stop travis default instance
|
||||
- sudo apt-get -y remove --purge postgresql-9.1
|
||||
- sudo apt-get -y remove --purge postgresql-9.2
|
||||
- sudo apt-get -y remove --purge postgresql-9.3
|
||||
- sudo apt-get -y remove --purge postgresql-9.4
|
||||
- sudo apt-get -y remove --purge postgresql-9.5
|
||||
- sudo rm -rf /var/lib/postgresql/
|
||||
- sudo rm -rf /var/log/postgresql/
|
||||
- sudo rm -rf /etc/postgresql/
|
||||
- sudo apt-get -y remove --purge postgis-2.2
|
||||
- sudo apt-get -y autoremove
|
||||
|
||||
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
|
||||
|
||||
# configure it to accept local connections from postgres
|
||||
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
|
||||
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
|
||||
- sudo /etc/init.d/postgresql restart 9.5
|
||||
- sudo pg_dropcluster --stop $POSTGRESQL_VERSION main
|
||||
- sudo rm -rf /etc/postgresql/$POSTGRESQL_VERSION /var/lib/postgresql/$POSTGRESQL_VERSION
|
||||
- sudo pg_createcluster -u postgres $POSTGRESQL_VERSION main --start -- -A trust
|
||||
- export PGPORT=$(pg_lsclusters | grep $POSTGRESQL_VERSION | awk '{print $3}')
|
||||
|
||||
install:
|
||||
- sudo make install
|
||||
|
||||
script:
|
||||
- make test || { cat src/pg/test/regression.diffs; false; }
|
||||
- make test
|
||||
- ./check-compatibility.sh
|
||||
|
||||
after_failure:
|
||||
- pg_lsclusters
|
||||
- cat src/pg/test/regression.diffs
|
||||
- echo $PGPORT
|
||||
- cat /var/log/postgresql/postgresql-$POSTGRESQL_VERSION-main.log
|
||||
|
||||
@@ -39,9 +39,7 @@ ALTER EXTENSION crankshaft UPDATE TO 'dev';
|
||||
If the extension has not previously been installed in a database,
|
||||
it can be installed directly with:
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS plpythonu;
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
CREATE EXTENSION crankshaft WITH VERSION 'dev';
|
||||
CREATE EXTENSION crankshaft WITH VERSION 'dev' CASCADE;
|
||||
```
|
||||
|
||||
Once the feature or bugfix is completed and all the tests are passing
|
||||
|
||||
4
Makefile
4
Makefile
@@ -23,7 +23,7 @@ test: ## Run the tests for the development version of the extension
|
||||
$(MAKE) -C $(EXT_DIR) test
|
||||
|
||||
# Generate a new release into release
|
||||
release: ## Generate a new release of the extension. Only for telease manager
|
||||
release: ## Generate a new release of the extension.
|
||||
$(MAKE) -C $(EXT_DIR) release
|
||||
$(MAKE) -C $(PYP_DIR) release
|
||||
|
||||
@@ -31,7 +31,7 @@ release: ## Generate a new release of the extension. Only for telease manager
|
||||
# Requires sudo.
|
||||
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
||||
# sudo make deploy RELEASE_VERSION=1.0.0
|
||||
deploy: ## Deploy a released extension. Only for release manager. Requires sudo.
|
||||
deploy:
|
||||
$(MAKE) -C $(EXT_DIR) deploy
|
||||
$(MAKE) -C $(PYP_DIR) deploy
|
||||
|
||||
|
||||
@@ -3,9 +3,21 @@ EXTENSION = crankshaft
|
||||
PACKAGE = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
RELEASE_VERSION ?= $(EXTVERSION)
|
||||
|
||||
SED = sed
|
||||
PIP = pip
|
||||
NOSETESTS = nosetests
|
||||
AWK = awk
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PG_PARALLEL := $(shell $(PG_CONFIG) --version | ($(AWK) '{$$2*=1000; if ($$2 >= 9600) print 1; else print 0;}' 2> /dev/null || echo 0))
|
||||
PG_VERSION_1000 := $(shell $(PG_CONFIG) --version | $(AWK) '{$$2*=1000; print $$2}')
|
||||
PG_PARALLEL := $(shell [ $(PG_VERSION_1000) -ge 9600 ] && echo true)
|
||||
|
||||
PG_12plus := $(shell [ $(PG_VERSION_1000) -ge 12000 ] && echo true)
|
||||
PYTHON3 ?= $(PG_12plus)
|
||||
|
||||
ifeq ($(PYTHON3), true)
|
||||
PIP := python3 -m pip
|
||||
NOSETESTS = nosetests3
|
||||
else
|
||||
PIP := python2 -m pip
|
||||
NOSETESTS = nosetests
|
||||
endif
|
||||
|
||||
11
NEWS.md
11
NEWS.md
@@ -1,3 +1,14 @@
|
||||
0.9.0 (2019-12-23)
|
||||
------------------
|
||||
* Compatibility with PG12.
|
||||
* Compatibility with python3 (enable with PYTHON3=true env variable, default in PG12+).
|
||||
|
||||
0.8.2 (2019-02-07)
|
||||
------------------
|
||||
* Update dependencies to match what it's being used in production.
|
||||
* Update travis to xenial, PG10 and 11, and postgis 2.5
|
||||
* Compatibility with PG11
|
||||
|
||||
0.8.1 (2018-03-12)
|
||||
------------------
|
||||
* Adds improperly added version files
|
||||
|
||||
14
README.md
14
README.md
@@ -8,28 +8,21 @@ CARTO Spatial Analysis extension for PostgreSQL.
|
||||
* `src/` source code
|
||||
- `pg/` contains the PostgreSQL extension source code
|
||||
- `py/` Python module source code
|
||||
* `release` reseleased versions
|
||||
* `release` released versions
|
||||
|
||||
## Requirements
|
||||
|
||||
* PostgreSQL
|
||||
* plpythonu and postgis extensions
|
||||
* plpythonu (for PG12+, plpython3u) and postgis extensions
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/develop/src/py/README.md))
|
||||
|
||||
# Development Process
|
||||
|
||||
We distinguish two roles:
|
||||
|
||||
* *developers* will implement new functionality and bugfixes into
|
||||
the codebase.
|
||||
* A *release manager* will handle the release process.
|
||||
|
||||
We use the branch `develop` as the main integration branch for development. The `master` is reserved to handle releases.
|
||||
|
||||
The process is as follows:
|
||||
|
||||
1. Create a new **topic branch** from `develop` for any new feature
|
||||
or bugfix and commit their changes to it:
|
||||
1. Create a new **topic branch** from `develop` for any new feature or bugfix and commit their changes to it:
|
||||
|
||||
```shell
|
||||
git fetch && git checkout -b my-cool-feature origin/develop
|
||||
@@ -39,7 +32,6 @@ or bugfix and commit their changes to it:
|
||||
1. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/develop/NEWS.md) doc.
|
||||
1. Create a pull request and mention relevant people for a **peer review**.
|
||||
1. Address the comments and improvements you get from the peer review.
|
||||
1. Mention `@CartoDB/dataservices` in the PR to get it merged into `develop`.
|
||||
|
||||
In order for a pull request to be accepted, the following criteria should be met:
|
||||
* The peer review should pass and no major issue should be left unaddressed.
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Release & Deployment Process
|
||||
|
||||
The release process of a new version of the extension
|
||||
shall be performed by the designated *Release Manager*.
|
||||
:warning: Do not forget about updating dependencies in `cartodb-platform` and `carto-postgres-artifacts` :warning:
|
||||
|
||||
## Release steps
|
||||
* Make sure `develop` branch passes all the tests.
|
||||
|
||||
20
carto-package.json
Normal file
20
carto-package.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "crankshaft",
|
||||
"current_version": {
|
||||
"requires": {
|
||||
"postgres": ">=9.5.0",
|
||||
"postgis": ">=2.2.0.0",
|
||||
"python": ">=2.7.0",
|
||||
"joblib": "0.8.3",
|
||||
"numpy": "1.6.1",
|
||||
"scipy": "0.14.0",
|
||||
"pysal": "1.14.3",
|
||||
"scikit-learn": "0.14.1"
|
||||
},
|
||||
"works_with": {
|
||||
}
|
||||
},
|
||||
|
||||
"exceptional_versions": {
|
||||
}
|
||||
}
|
||||
@@ -25,10 +25,6 @@ psql -c "SELECT * FROM pg_available_extension_versions WHERE name LIKE 'cranksha
|
||||
|
||||
# Install in the fresh DB
|
||||
psql $DBNAME <<'EOF'
|
||||
-- Install dependencies
|
||||
CREATE EXTENSION plpythonu;
|
||||
CREATE EXTENSION postgis VERSION '2.2.2';
|
||||
|
||||
-- Create role publicuser if it does not exist
|
||||
DO
|
||||
$$
|
||||
@@ -44,30 +40,53 @@ END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Install the default version
|
||||
CREATE EXTENSION crankshaft;
|
||||
CREATE EXTENSION crankshaft CASCADE;
|
||||
\dx
|
||||
EOF
|
||||
|
||||
|
||||
# Check PG version
|
||||
PG_VERSION=`psql -q -t -c "SELECT current_setting('server_version_num')"`
|
||||
|
||||
# Save public function signatures
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
if [[ "$PG_VERSION" -lt 110000 ]]; then
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
else
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
||||
WHEN p.prokind = 'w' THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
fi
|
||||
|
||||
# Deploy current dev branch
|
||||
make clean-dev || die "Could not clean dev files"
|
||||
@@ -76,26 +95,42 @@ sudo make install || die "Could not deploy current dev branch"
|
||||
# Check it can be upgraded
|
||||
psql $DBNAME -c "ALTER EXTENSION crankshaft update to 'dev';" || die "Cannot upgrade to dev version"
|
||||
|
||||
# Check against saved public function signatures
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
if [[ $PG_VERSION -lt 110000 ]]; then
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
else
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
||||
WHEN p.prokind = 'w' THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
fi
|
||||
|
||||
|
||||
echo "Functions in development not in latest release (ok):"
|
||||
psql $DBNAME -c "SELECT * FROM dev_function_signatures EXCEPT SELECT * FROM release_function_signatures;"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.9.1'::text;
|
||||
SELECT '0.8.2'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -35,25 +35,16 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -98,7 +89,6 @@ CREATE OR REPLACE FUNCTION
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -115,59 +105,24 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
all_cols = list(plpy.execute('''
|
||||
feature_cols = set(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -189,8 +144,7 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
@@ -1150,27 +1104,19 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.9.1'::text;
|
||||
SELECT '0.9.0'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
@@ -35,25 +35,16 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -91,14 +82,13 @@ AS $$
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -115,51 +105,17 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
all_cols = list(plpy.execute('''
|
||||
feature_cols = set(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -167,7 +123,6 @@ CREATE OR REPLACE FUNCTION
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -189,10 +144,9 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
@@ -702,7 +656,7 @@ AS $$
|
||||
moran = Moran()
|
||||
return moran.global_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -727,7 +681,7 @@ AS $$
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -755,7 +709,7 @@ moran = Moran()
|
||||
return moran.local_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
@@ -882,7 +836,7 @@ AS $$
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||
@@ -910,7 +864,7 @@ AS $$
|
||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -966,7 +920,7 @@ return moran.local_rate_stat(
|
||||
geom_col,
|
||||
id_col
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Rate
|
||||
-- Replaces CDB_AreasOfInterestLocalRate
|
||||
@@ -1079,7 +1033,7 @@ from crankshaft.clustering import Kmeans
|
||||
kmeans = Kmeans()
|
||||
return kmeans.spatial(query, no_clusters, no_init)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Non-spatial k-means clustering
|
||||
-- query: sql query to retrieve all the needed data
|
||||
@@ -1109,7 +1063,7 @@ kmeans = Kmeans()
|
||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||
standardize=standardize,
|
||||
id_col=id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||
@@ -1150,27 +1104,19 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -1200,7 +1146,7 @@ AS $$
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
@@ -1226,7 +1172,7 @@ $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
@@ -1252,7 +1198,7 @@ $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- Based on:
|
||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||
-- https://sites.google.com/site/polesofinaccessibility/
|
||||
@@ -1522,7 +1468,7 @@ AS $$
|
||||
from crankshaft.clustering import Getis
|
||||
getis = Getis()
|
||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- TODO: make a version that accepts the values as arrays
|
||||
|
||||
@@ -1862,7 +1808,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -1880,7 +1826,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.9.0'::text;
|
||||
SELECT '0.8.2'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -35,25 +35,16 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -98,7 +89,6 @@ CREATE OR REPLACE FUNCTION
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -115,59 +105,24 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
all_cols = list(plpy.execute('''
|
||||
feature_cols = set(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -189,8 +144,7 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
@@ -1150,27 +1104,19 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
@@ -35,25 +35,16 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -91,14 +82,13 @@ AS $$
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -115,51 +105,17 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
all_cols = list(plpy.execute('''
|
||||
feature_cols = set(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -167,7 +123,6 @@ CREATE OR REPLACE FUNCTION
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -189,10 +144,9 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params,
|
||||
model_name=model_name
|
||||
model_params
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
@@ -702,7 +656,7 @@ AS $$
|
||||
moran = Moran()
|
||||
return moran.global_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -727,7 +681,7 @@ AS $$
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -755,7 +709,7 @@ moran = Moran()
|
||||
return moran.local_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
@@ -882,7 +836,7 @@ AS $$
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||
@@ -910,7 +864,7 @@ AS $$
|
||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -966,7 +920,7 @@ return moran.local_rate_stat(
|
||||
geom_col,
|
||||
id_col
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Rate
|
||||
-- Replaces CDB_AreasOfInterestLocalRate
|
||||
@@ -1079,7 +1033,7 @@ from crankshaft.clustering import Kmeans
|
||||
kmeans = Kmeans()
|
||||
return kmeans.spatial(query, no_clusters, no_init)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Non-spatial k-means clustering
|
||||
-- query: sql query to retrieve all the needed data
|
||||
@@ -1109,7 +1063,7 @@ kmeans = Kmeans()
|
||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||
standardize=standardize,
|
||||
id_col=id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||
@@ -1150,27 +1104,19 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -1200,7 +1146,7 @@ AS $$
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
@@ -1226,7 +1172,7 @@ $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
@@ -1252,7 +1198,7 @@ $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- Based on:
|
||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||
-- https://sites.google.com/site/polesofinaccessibility/
|
||||
@@ -1522,7 +1468,7 @@ AS $$
|
||||
from crankshaft.clustering import Getis
|
||||
getis = Getis()
|
||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- TODO: make a version that accepts the values as arrays
|
||||
|
||||
@@ -1862,7 +1808,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -1880,7 +1826,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.9.5'
|
||||
requires = 'plpythonu, postgis'
|
||||
default_version = '0.9.0'
|
||||
requires = 'plpython3u, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -2,14 +2,11 @@
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import pickle
|
||||
import plpy
|
||||
import numpy as np
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
from crankshaft import model_storage
|
||||
|
||||
# NOTE: added optional param here
|
||||
|
||||
@@ -54,7 +51,6 @@ class Segmentation(object):
|
||||
|
||||
def create_and_predict_segment(self, query, variable, feature_columns,
|
||||
target_query, model_params,
|
||||
model_name=None,
|
||||
id_col='cartodb_id'):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
@@ -74,25 +70,15 @@ class Segmentation(object):
|
||||
(target, features, target_mean,
|
||||
feature_means) = self.clean_data(query, variable, feature_columns)
|
||||
|
||||
if model_name:
|
||||
model_storage.create_model_table()
|
||||
|
||||
# find model if it exists and is specified
|
||||
if model_name is not None:
|
||||
model = model_storage.get_model(model_name)
|
||||
|
||||
if locals().get('model') is None:
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
result = self.predict_segment(model, feature_columns, target_query,
|
||||
feature_means)
|
||||
accuracy_array = [accuracy] * result.shape[0]
|
||||
|
||||
rowid = self.data_provider.get_segmentation_data(params)
|
||||
|
||||
# store the model for later use
|
||||
if model_name:
|
||||
model_storage.set_model(model, model_name, feature_columns)
|
||||
'''
|
||||
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
|
||||
'''
|
||||
return zip(rowid[0]['ids'], result, accuracy_array)
|
||||
|
||||
def predict_segment(self, model, feature_columns, target_query,
|
||||
5
release/python/0.8.2/crankshaft/requirements.txt
Normal file
5
release/python/0.8.2/crankshaft/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
joblib==0.9.4
|
||||
numpy==1.11.0
|
||||
scipy==0.17.0
|
||||
pysal==1.14.3
|
||||
scikit-learn==0.17.0
|
||||
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.0',
|
||||
version='0.8.2',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
@@ -41,7 +41,7 @@ setup(
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1', 'petname==2.2'],
|
||||
install_requires=['joblib==0.9.4', 'numpy==1.11.0', 'scipy==0.17.0', 'pysal==1.14.3', 'scikit-learn==0.17.0'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
@@ -4,4 +4,4 @@ import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import crankshaft.regression
|
||||
import analysis_data_provider
|
||||
from . import analysis_data_provider
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
import pysal_utils as pu
|
||||
from . import pysal_utils as pu
|
||||
|
||||
NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
|
||||
'for null values and fill in appropriately.')
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
"""
|
||||
Based on the Weiszfeld algorithm:
|
||||
https://en.wikipedia.org/wiki/Geometric_median
|
||||
"""
|
||||
|
||||
|
||||
# import plpy
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||
|
||||
query = '''
|
||||
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||
array_agg(ST_Y({geom_col})) As y_coords
|
||||
FROM {tablename}
|
||||
'''.format(geom_col=geom_col, tablename=tablename)
|
||||
|
||||
try:
|
||||
resp = plpy.execute(query)
|
||||
data = np.vstack((resp['x_coords'][0],
|
||||
resp['y_coords'][0])).T
|
||||
|
||||
plpy.notice('coords: %s' % str(coords))
|
||||
except Exception, err:
|
||||
# plpy.error('Analysis failed: %s' % err)
|
||||
print('No plpy')
|
||||
data = np.array([[1.2 * np.random.random() + 10.,
|
||||
1.1 * (np.random.random() - 1.) + 3.]
|
||||
for i in range(1, 100)])
|
||||
|
||||
# initialize 'median center' to be the mean
|
||||
coords_center_temp = data.mean(axis=0)
|
||||
|
||||
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
|
||||
for i in range(0, num_iters):
|
||||
old_coords_center = coords_center_temp.copy()
|
||||
denom = denominator(coords_center_temp, data)
|
||||
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||
data[j])
|
||||
for j in range(len(data))], axis=0)
|
||||
coords_center_temp = coords_center_temp / denom
|
||||
|
||||
print("Pass #%d" % i)
|
||||
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||
coords_center_temp))
|
||||
print("Center coords: %s" % str(coords_center_temp))
|
||||
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||
|
||||
return coords_center_temp
|
||||
|
||||
|
||||
def obj_func(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.linalg.norm(center_coords - data)
|
||||
|
||||
|
||||
def numerator(center_coords, data_i):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||
|
||||
|
||||
def denominator(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(data - center_coords))
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
from getis import *
|
||||
from .moran import *
|
||||
from .kmeans import *
|
||||
from .getis import *
|
||||
|
||||
@@ -47,4 +47,4 @@ class Getis(object):
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
return list(zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order))
|
||||
|
||||
@@ -28,8 +28,8 @@ class Kmeans(object):
|
||||
ids = result[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
labels = km.fit_predict(list(zip(xs, ys)))
|
||||
return list(zip(ids, labels))
|
||||
|
||||
def nonspatial(self, subquery, colnames, no_clusters=5,
|
||||
standardize=True, id_col='cartodb_id'):
|
||||
@@ -75,18 +75,18 @@ class Kmeans(object):
|
||||
kmeans = KMeans(n_clusters=no_clusters,
|
||||
random_state=0).fit(cluster_columns)
|
||||
|
||||
centers = [json.dumps(dict(zip(colnames, c)))
|
||||
centers = [json.dumps(dict(list(zip(colnames, c))))
|
||||
for c in kmeans.cluster_centers_[kmeans.labels_]]
|
||||
|
||||
silhouettes = metrics.silhouette_samples(cluster_columns,
|
||||
kmeans.labels_,
|
||||
metric='sqeuclidean')
|
||||
|
||||
return zip(kmeans.labels_,
|
||||
return list(zip(kmeans.labels_,
|
||||
centers,
|
||||
silhouettes,
|
||||
[kmeans.inertia_] * kmeans.labels_.shape[0],
|
||||
data[0]['rowid'])
|
||||
data[0]['rowid']))
|
||||
|
||||
|
||||
# -- Preprocessing steps
|
||||
@@ -99,7 +99,7 @@ def _extract_columns(data):
|
||||
# number of columns minus rowid column
|
||||
n_cols = len(data[0]) - 1
|
||||
return np.array([data[0]['arr_col{0}'.format(i+1)]
|
||||
for i in xrange(n_cols)],
|
||||
for i in range(n_cols)],
|
||||
dtype=float).T
|
||||
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ class Moran(object):
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
return list(zip([moran_global.I], [moran_global.EI]))
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -139,7 +139,7 @@ class Moran(object):
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return zip(
|
||||
return list(zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
@@ -148,7 +148,7 @@ class Moran(object):
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
)
|
||||
))
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -194,7 +194,7 @@ class Moran(object):
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
return list(zip([lisa_rate.I], [lisa_rate.EI]))
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -262,7 +262,7 @@ class Moran(object):
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return zip(
|
||||
return list(zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
@@ -271,7 +271,7 @@ class Moran(object):
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
)
|
||||
))
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
@@ -303,7 +303,7 @@ class Moran(object):
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
return list(zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order))
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
from core import set_model, get_model, create_model_table
|
||||
@@ -1,86 +0,0 @@
|
||||
import time
|
||||
import plpy
|
||||
import pickle
|
||||
from petname import generate
|
||||
|
||||
def create_model_table():
|
||||
q = '''
|
||||
create table if not exists model_storage(
|
||||
description text,
|
||||
name text unique,
|
||||
model bytea,
|
||||
feature_names text[],
|
||||
date_created timestamptz,
|
||||
id serial primary key);
|
||||
'''
|
||||
plpy.notice(q)
|
||||
plan = plpy.prepare(q)
|
||||
resp = plpy.execute(plan)
|
||||
plpy.notice('Model table successfully created')
|
||||
plpy.notice(str(resp))
|
||||
|
||||
def get_model(model_name):
|
||||
"""retrieve model if it exists"""
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
SELECT model FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
if len(model_encoded) == 1:
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
plpy.notice('Model successfully loaded')
|
||||
else:
|
||||
plpy.notice('Model not found, or too many models '
|
||||
'({})'.format(len(model_encoded)))
|
||||
model = None
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
|
||||
return model
|
||||
|
||||
def set_model(model, model_name, feature_names):
|
||||
"""stores the model in the table model_storage"""
|
||||
if model_name is None:
|
||||
model_name = generate(words=2, separator='_', letters=8)
|
||||
existing_names = plpy.execute('''
|
||||
SELECT array_agg(name) as name
|
||||
FROM model_storage
|
||||
''')
|
||||
plpy.notice('nrows: {}'.format(existing_names.nrows()))
|
||||
plpy.notice('MODEL NAME: {}'.format(model_name))
|
||||
plpy.notice('LEN of ms: {}'.format(len(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names[0]['name'])))
|
||||
plpy.notice('type existing_names: {}'.format(type(existing_names[0]['name'])))
|
||||
if existing_names[0]['name'] is not None:
|
||||
while model_name in existing_names[0]['name']:
|
||||
model_name = generate(words=2, separator='_', letters=10)
|
||||
plpy.notice(model_name)
|
||||
|
||||
# store model
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
INSERT INTO model_storage(description, name, model, feature_names, date_created)
|
||||
VALUES (
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4::text[],
|
||||
to_timestamp($5));
|
||||
''', ['text', 'text', 'bytea', 'text', 'numeric'])
|
||||
plpy.notice('{%s}' % ','.join(feature_names))
|
||||
plpy.notice(feature_names)
|
||||
plpy.execute(
|
||||
plan,
|
||||
[' '.join(m.strip() for m in model.__repr__().split('\n')),
|
||||
model_name,
|
||||
pickle.dumps(model),
|
||||
'{%s}' % ','.join(feature_names),
|
||||
time.time()]
|
||||
)
|
||||
plpy.notice('model successfully stored as {}'.format(model_name))
|
||||
except plpy.SPIError as err:
|
||||
plpy.notice('ERROR: {}\nt: {}'.format(err, time.time()))
|
||||
@@ -27,7 +27,7 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
print('len of neighbors: %d' % len(neighbors))
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import glm
|
||||
import family
|
||||
import utils
|
||||
import iwls
|
||||
from . import glm
|
||||
from . import family
|
||||
from . import utils
|
||||
from . import iwls
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from utils import cache_readonly
|
||||
from .utils import cache_readonly
|
||||
from functools import reduce
|
||||
|
||||
class Results(object):
|
||||
"""
|
||||
|
||||
@@ -7,8 +7,8 @@ The one parameter exponential family distributions used by GLM.
|
||||
|
||||
import numpy as np
|
||||
from scipy import special
|
||||
import links as L
|
||||
import varfuncs as V
|
||||
from . import links as L
|
||||
from . import varfuncs as V
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ import numpy as np
|
||||
import numpy.linalg as la
|
||||
from pysal.spreg.utils import RegressionPropsY, spdot
|
||||
import pysal.spreg.user_output as USER
|
||||
from utils import cache_readonly
|
||||
from base import LikelihoodModelResults
|
||||
import family
|
||||
from iwls import iwls
|
||||
from .utils import cache_readonly
|
||||
from .base import LikelihoodModelResults
|
||||
from . import family
|
||||
from .iwls import iwls
|
||||
|
||||
__all__ = ['GLM']
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user