Compare commits
15 Commits
develop
...
model-stor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
daba2f9597 | ||
|
|
8f28f41060 | ||
|
|
7509afa5a6 | ||
|
|
a28c68502c | ||
|
|
5b4443ca88 | ||
|
|
2048db33fc | ||
|
|
99e78800b3 | ||
|
|
800648a710 | ||
|
|
91ee6ecc48 | ||
|
|
9a5ab17240 | ||
|
|
65be9befb1 | ||
|
|
37e6b4a228 | ||
|
|
766bfed9be | ||
|
|
e8a601e945 | ||
|
|
c2be340c07 |
3
.brackets.json
Normal file
3
.brackets.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"sbruchmann.staticpreview.basepath": "/home/carto/Projects/crankshaft/"
|
||||
}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@ envs/
|
||||
*.pyc
|
||||
.DS_Store
|
||||
.idea/
|
||||
.*.sw[nop]
|
||||
|
||||
74
.travis.yml
74
.travis.yml
@@ -1,48 +1,60 @@
|
||||
language: c
|
||||
dist: precise
|
||||
sudo: required
|
||||
|
||||
env:
|
||||
global:
|
||||
- PAGER=cat
|
||||
- PGUSER=postgres
|
||||
- PGDATABASE=postgres
|
||||
- PGOPTIONS='-c client_min_messages=NOTICE'
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- env: POSTGRESQL_VERSION="9.6" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="10" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="11" POSTGIS_VERSION="2.5"
|
||||
dist: xenial
|
||||
- env: POSTGRESQL_VERSION="12" POSTGIS_VERSION="3"
|
||||
dist: bionic
|
||||
|
||||
before_install:
|
||||
- ./check-up-to-date-with-master.sh
|
||||
- sudo apt-get -y install python-pip
|
||||
|
||||
- sudo apt-get install -y --allow-unauthenticated --no-install-recommends --no-install-suggests postgresql-$POSTGRESQL_VERSION postgresql-client-$POSTGRESQL_VERSION postgresql-server-dev-$POSTGRESQL_VERSION postgresql-common
|
||||
- if [[ $POSTGRESQL_VERSION == '9.6' ]]; then sudo apt-get install -y postgresql-contrib-9.6; fi;
|
||||
- sudo apt-get install -y --allow-unauthenticated postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION-scripts postgis
|
||||
- sudo apt-get -y install python-software-properties
|
||||
- sudo add-apt-repository -y ppa:cartodb/sci
|
||||
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis-testing
|
||||
- sudo apt-get update
|
||||
|
||||
# For pre12, install plpython2. For PG12 install plpython3
|
||||
- if [[ $POSTGRESQL_VERSION != '12' ]]; then sudo apt-get install -y postgresql-plpython-$POSTGRESQL_VERSION python python-pip python-software-properties python-joblib python-nose python-setuptools; else sudo apt-get install -y postgresql-plpython3-12 python3 python3-pip python3-software-properties python3-joblib python3-nose python3-setuptools; fi;
|
||||
- if [[ $POSTGRESQL_VERSION == '12' ]]; then echo -e "joblib==0.11\nnumpy==1.13.3\nscipy==0.19.1\npysal==1.14.3\nscikit-learn==0.19.1" > ./src/py/crankshaft/requirements.txt && sed -i -e "s/.*install_requires.*$/ install_requires=['joblib==0.11.0', 'numpy==1.13.3', 'scipy==0.19.1', 'pysal==1.14.3', 'scikit-learn==0.19.1'],/g" ./src/py/crankshaft/setup.py; fi;
|
||||
- sudo apt-get -y install python-joblib=0.8.3-1-cdb1
|
||||
- sudo apt-get -y install python-numpy=1:1.6.1-6ubuntu1
|
||||
|
||||
- sudo pg_dropcluster --stop $POSTGRESQL_VERSION main
|
||||
- sudo rm -rf /etc/postgresql/$POSTGRESQL_VERSION /var/lib/postgresql/$POSTGRESQL_VERSION
|
||||
- sudo pg_createcluster -u postgres $POSTGRESQL_VERSION main --start -- -A trust
|
||||
- export PGPORT=$(pg_lsclusters | grep $POSTGRESQL_VERSION | awk '{print $3}')
|
||||
- sudo apt-get -y install python-scipy=0.14.0-2-cdb6
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn-lib=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-scikits-learn=0.14.1-3-cdb2
|
||||
|
||||
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
|
||||
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
|
||||
|
||||
# Install postgres db and build deps
|
||||
- sudo /etc/init.d/postgresql stop # stop travis default instance
|
||||
- sudo apt-get -y remove --purge postgresql-9.1
|
||||
- sudo apt-get -y remove --purge postgresql-9.2
|
||||
- sudo apt-get -y remove --purge postgresql-9.3
|
||||
- sudo apt-get -y remove --purge postgresql-9.4
|
||||
- sudo apt-get -y remove --purge postgresql-9.5
|
||||
- sudo rm -rf /var/lib/postgresql/
|
||||
- sudo rm -rf /var/log/postgresql/
|
||||
- sudo rm -rf /etc/postgresql/
|
||||
- sudo apt-get -y remove --purge postgis-2.2
|
||||
- sudo apt-get -y autoremove
|
||||
|
||||
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb3
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
|
||||
|
||||
# configure it to accept local connections from postgres
|
||||
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
|
||||
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
|
||||
- sudo /etc/init.d/postgresql restart 9.5
|
||||
|
||||
install:
|
||||
- sudo make install
|
||||
|
||||
script:
|
||||
- make test
|
||||
- make test || { cat src/pg/test/regression.diffs; false; }
|
||||
- ./check-compatibility.sh
|
||||
|
||||
after_failure:
|
||||
- pg_lsclusters
|
||||
- cat src/pg/test/regression.diffs
|
||||
- echo $PGPORT
|
||||
- cat /var/log/postgresql/postgresql-$POSTGRESQL_VERSION-main.log
|
||||
|
||||
@@ -39,7 +39,9 @@ ALTER EXTENSION crankshaft UPDATE TO 'dev';
|
||||
If the extension has not previously been installed in a database,
|
||||
it can be installed directly with:
|
||||
```sql
|
||||
CREATE EXTENSION crankshaft WITH VERSION 'dev' CASCADE;
|
||||
CREATE EXTENSION IF NOT EXISTS plpythonu;
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
CREATE EXTENSION crankshaft WITH VERSION 'dev';
|
||||
```
|
||||
|
||||
Once the feature or bugfix is completed and all the tests are passing
|
||||
|
||||
4
Makefile
4
Makefile
@@ -23,7 +23,7 @@ test: ## Run the tests for the development version of the extension
|
||||
$(MAKE) -C $(EXT_DIR) test
|
||||
|
||||
# Generate a new release into release
|
||||
release: ## Generate a new release of the extension.
|
||||
release: ## Generate a new release of the extension. Only for telease manager
|
||||
$(MAKE) -C $(EXT_DIR) release
|
||||
$(MAKE) -C $(PYP_DIR) release
|
||||
|
||||
@@ -31,7 +31,7 @@ release: ## Generate a new release of the extension.
|
||||
# Requires sudo.
|
||||
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
||||
# sudo make deploy RELEASE_VERSION=1.0.0
|
||||
deploy:
|
||||
deploy: ## Deploy a released extension. Only for release manager. Requires sudo.
|
||||
$(MAKE) -C $(EXT_DIR) deploy
|
||||
$(MAKE) -C $(PYP_DIR) deploy
|
||||
|
||||
|
||||
@@ -3,21 +3,9 @@ EXTENSION = crankshaft
|
||||
PACKAGE = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
RELEASE_VERSION ?= $(EXTVERSION)
|
||||
|
||||
SED = sed
|
||||
AWK = awk
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PG_VERSION_1000 := $(shell $(PG_CONFIG) --version | $(AWK) '{$$2*=1000; print $$2}')
|
||||
PG_PARALLEL := $(shell [ $(PG_VERSION_1000) -ge 9600 ] && echo true)
|
||||
|
||||
PG_12plus := $(shell [ $(PG_VERSION_1000) -ge 12000 ] && echo true)
|
||||
PYTHON3 ?= $(PG_12plus)
|
||||
|
||||
ifeq ($(PYTHON3), true)
|
||||
PIP := python3 -m pip
|
||||
NOSETESTS = nosetests3
|
||||
else
|
||||
PIP := python2 -m pip
|
||||
PIP = pip
|
||||
NOSETESTS = nosetests
|
||||
endif
|
||||
AWK = awk
|
||||
PG_CONFIG = pg_config
|
||||
PG_PARALLEL := $(shell $(PG_CONFIG) --version | ($(AWK) '{$$2*=1000; if ($$2 >= 9600) print 1; else print 0;}' 2> /dev/null || echo 0))
|
||||
|
||||
11
NEWS.md
11
NEWS.md
@@ -1,14 +1,3 @@
|
||||
0.9.0 (2019-12-23)
|
||||
------------------
|
||||
* Compatibility with PG12.
|
||||
* Compatibility with python3 (enable with PYTHON3=true env variable, default in PG12+).
|
||||
|
||||
0.8.2 (2019-02-07)
|
||||
------------------
|
||||
* Update dependencies to match what it's being used in production.
|
||||
* Update travis to xenial, PG10 and 11, and postgis 2.5
|
||||
* Compatibility with PG11
|
||||
|
||||
0.8.1 (2018-03-12)
|
||||
------------------
|
||||
* Adds improperly added version files
|
||||
|
||||
14
README.md
14
README.md
@@ -8,21 +8,28 @@ CARTO Spatial Analysis extension for PostgreSQL.
|
||||
* `src/` source code
|
||||
- `pg/` contains the PostgreSQL extension source code
|
||||
- `py/` Python module source code
|
||||
* `release` released versions
|
||||
* `release` reseleased versions
|
||||
|
||||
## Requirements
|
||||
|
||||
* PostgreSQL
|
||||
* plpythonu (for PG12+, plpython3u) and postgis extensions
|
||||
* plpythonu and postgis extensions
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/develop/src/py/README.md))
|
||||
|
||||
# Development Process
|
||||
|
||||
We distinguish two roles:
|
||||
|
||||
* *developers* will implement new functionality and bugfixes into
|
||||
the codebase.
|
||||
* A *release manager* will handle the release process.
|
||||
|
||||
We use the branch `develop` as the main integration branch for development. The `master` is reserved to handle releases.
|
||||
|
||||
The process is as follows:
|
||||
|
||||
1. Create a new **topic branch** from `develop` for any new feature or bugfix and commit their changes to it:
|
||||
1. Create a new **topic branch** from `develop` for any new feature
|
||||
or bugfix and commit their changes to it:
|
||||
|
||||
```shell
|
||||
git fetch && git checkout -b my-cool-feature origin/develop
|
||||
@@ -32,6 +39,7 @@ The process is as follows:
|
||||
1. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/develop/NEWS.md) doc.
|
||||
1. Create a pull request and mention relevant people for a **peer review**.
|
||||
1. Address the comments and improvements you get from the peer review.
|
||||
1. Mention `@CartoDB/dataservices` in the PR to get it merged into `develop`.
|
||||
|
||||
In order for a pull request to be accepted, the following criteria should be met:
|
||||
* The peer review should pass and no major issue should be left unaddressed.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Release & Deployment Process
|
||||
|
||||
:warning: Do not forget about updating dependencies in `cartodb-platform` and `carto-postgres-artifacts` :warning:
|
||||
The release process of a new version of the extension
|
||||
shall be performed by the designated *Release Manager*.
|
||||
|
||||
## Release steps
|
||||
* Make sure `develop` branch passes all the tests.
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"name": "crankshaft",
|
||||
"current_version": {
|
||||
"requires": {
|
||||
"postgres": ">=9.5.0",
|
||||
"postgis": ">=2.2.0.0",
|
||||
"python": ">=2.7.0",
|
||||
"joblib": "0.8.3",
|
||||
"numpy": "1.6.1",
|
||||
"scipy": "0.14.0",
|
||||
"pysal": "1.14.3",
|
||||
"scikit-learn": "0.14.1"
|
||||
},
|
||||
"works_with": {
|
||||
}
|
||||
},
|
||||
|
||||
"exceptional_versions": {
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,10 @@ psql -c "SELECT * FROM pg_available_extension_versions WHERE name LIKE 'cranksha
|
||||
|
||||
# Install in the fresh DB
|
||||
psql $DBNAME <<'EOF'
|
||||
-- Install dependencies
|
||||
CREATE EXTENSION plpythonu;
|
||||
CREATE EXTENSION postgis VERSION '2.2.2';
|
||||
|
||||
-- Create role publicuser if it does not exist
|
||||
DO
|
||||
$$
|
||||
@@ -40,53 +44,30 @@ END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Install the default version
|
||||
CREATE EXTENSION crankshaft CASCADE;
|
||||
CREATE EXTENSION crankshaft;
|
||||
\dx
|
||||
EOF
|
||||
|
||||
|
||||
# Check PG version
|
||||
PG_VERSION=`psql -q -t -c "SELECT current_setting('server_version_num')"`
|
||||
|
||||
# Save public function signatures
|
||||
if [[ "$PG_VERSION" -lt 110000 ]]; then
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
else
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
||||
WHEN p.prokind = 'w' THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
fi
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
|
||||
# Deploy current dev branch
|
||||
make clean-dev || die "Could not clean dev files"
|
||||
@@ -95,42 +76,26 @@ sudo make install || die "Could not deploy current dev branch"
|
||||
# Check it can be upgraded
|
||||
psql $DBNAME -c "ALTER EXTENSION crankshaft update to 'dev';" || die "Cannot upgrade to dev version"
|
||||
|
||||
if [[ $PG_VERSION -lt 110000 ]]; then
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
else
|
||||
psql $DBNAME -c "
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
||||
WHEN p.prokind = 'w' THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;"
|
||||
fi
|
||||
|
||||
# Check against saved public function signatures
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
|
||||
echo "Functions in development not in latest release (ok):"
|
||||
psql $DBNAME -c "SELECT * FROM dev_function_signatures EXCEPT SELECT * FROM release_function_signatures;"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.8.2'::text;
|
||||
SELECT '0.9.0'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -89,6 +98,7 @@ CREATE OR REPLACE FUNCTION
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -105,24 +115,59 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
feature_cols = set(plpy.execute('''
|
||||
all_cols = list(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -144,7 +189,8 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
@@ -1104,19 +1150,27 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.8.2'::text;
|
||||
SELECT '0.9.1'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -89,6 +98,7 @@ CREATE OR REPLACE FUNCTION
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -105,24 +115,59 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
feature_cols = set(plpy.execute('''
|
||||
all_cols = list(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -144,7 +189,8 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
@@ -1104,19 +1150,27 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -82,13 +91,14 @@ AS $$
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -105,17 +115,51 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
feature_cols = set(plpy.execute('''
|
||||
all_cols = list(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -123,6 +167,7 @@ CREATE OR REPLACE FUNCTION
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -144,9 +189,10 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
@@ -656,7 +702,7 @@ AS $$
|
||||
moran = Moran()
|
||||
return moran.global_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -681,7 +727,7 @@ AS $$
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -709,7 +755,7 @@ moran = Moran()
|
||||
return moran.local_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
@@ -836,7 +882,7 @@ AS $$
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||
@@ -864,7 +910,7 @@ AS $$
|
||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -920,7 +966,7 @@ return moran.local_rate_stat(
|
||||
geom_col,
|
||||
id_col
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Rate
|
||||
-- Replaces CDB_AreasOfInterestLocalRate
|
||||
@@ -1033,7 +1079,7 @@ from crankshaft.clustering import Kmeans
|
||||
kmeans = Kmeans()
|
||||
return kmeans.spatial(query, no_clusters, no_init)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Non-spatial k-means clustering
|
||||
-- query: sql query to retrieve all the needed data
|
||||
@@ -1063,7 +1109,7 @@ kmeans = Kmeans()
|
||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||
standardize=standardize,
|
||||
id_col=id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||
@@ -1104,19 +1150,27 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -1146,7 +1200,7 @@ AS $$
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
@@ -1172,7 +1226,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
@@ -1198,7 +1252,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Based on:
|
||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||
-- https://sites.google.com/site/polesofinaccessibility/
|
||||
@@ -1468,7 +1522,7 @@ AS $$
|
||||
from crankshaft.clustering import Getis
|
||||
getis = Getis()
|
||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- TODO: make a version that accepts the values as arrays
|
||||
|
||||
@@ -1808,7 +1862,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -1826,7 +1880,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.9.0'::text;
|
||||
SELECT '0.9.2'::text;
|
||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -82,13 +91,14 @@ AS $$
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
model_name text DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -105,17 +115,51 @@ AS $$
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf
|
||||
}
|
||||
feature_cols = set(plpy.execute('''
|
||||
all_cols = list(plpy.execute('''
|
||||
select * from ({query}) as _w limit 0
|
||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
||||
'''.format(query=query)).colnames())
|
||||
feature_cols = [a for a in all_cols
|
||||
if a not in [variable_name, 'cartodb_id', ]]
|
||||
return seg.create_and_predict_segment(
|
||||
query,
|
||||
variable_name,
|
||||
feature_cols,
|
||||
target_table,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_RetrieveModelParams(
|
||||
model_name text,
|
||||
param_name text
|
||||
)
|
||||
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||
|
||||
import pickle
|
||||
from collections import Iterable
|
||||
|
||||
plan = plpy.prepare('''
|
||||
SELECT model, feature_names FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
|
||||
try:
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
plpy.notice(model_encoded[0]['feature_names'])
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
|
||||
res = getattr(model, param_name)
|
||||
if not isinstance(res, Iterable):
|
||||
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||
return zip(res, model_encoded[0]['feature_names'])
|
||||
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
@@ -123,6 +167,7 @@ CREATE OR REPLACE FUNCTION
|
||||
variable TEXT,
|
||||
feature_columns TEXT[],
|
||||
target_query TEXT,
|
||||
model_name TEXT DEFAULT NULL,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
@@ -144,9 +189,10 @@ AS $$
|
||||
variable,
|
||||
feature_columns,
|
||||
target_query,
|
||||
model_params
|
||||
model_params,
|
||||
model_name=model_name
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
@@ -656,7 +702,7 @@ AS $$
|
||||
moran = Moran()
|
||||
return moran.global_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -681,7 +727,7 @@ AS $$
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -709,7 +755,7 @@ moran = Moran()
|
||||
return moran.local_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
@@ -836,7 +882,7 @@ AS $$
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||
@@ -864,7 +910,7 @@ AS $$
|
||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
# remove spatial lag
|
||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -920,7 +966,7 @@ return moran.local_rate_stat(
|
||||
geom_col,
|
||||
id_col
|
||||
)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Moran's I Rate
|
||||
-- Replaces CDB_AreasOfInterestLocalRate
|
||||
@@ -1033,7 +1079,7 @@ from crankshaft.clustering import Kmeans
|
||||
kmeans = Kmeans()
|
||||
return kmeans.spatial(query, no_clusters, no_init)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- Non-spatial k-means clustering
|
||||
-- query: sql query to retrieve all the needed data
|
||||
@@ -1063,7 +1109,7 @@ kmeans = Kmeans()
|
||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||
standardize=standardize,
|
||||
id_col=id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||
@@ -1104,19 +1150,27 @@ BEGIN
|
||||
END
|
||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$ BEGIN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_function THEN NULL;
|
||||
END $$;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
PARALLEL = SAFE,
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
@@ -1146,7 +1200,7 @@ AS $$
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
@@ -1172,7 +1226,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
@@ -1198,7 +1252,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpython3u;
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Based on:
|
||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||
-- https://sites.google.com/site/polesofinaccessibility/
|
||||
@@ -1468,7 +1522,7 @@ AS $$
|
||||
from crankshaft.clustering import Getis
|
||||
getis = Getis()
|
||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
-- TODO: make a version that accepts the values as arrays
|
||||
|
||||
@@ -1808,7 +1862,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
@@ -1826,7 +1880,7 @@ gwr = GWR()
|
||||
|
||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||
|
||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
2393
release/crankshaft--0.9.1.sql
Normal file
2393
release/crankshaft--0.9.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.2--0.9.3.sql
Normal file
2393
release/crankshaft--0.9.2--0.9.3.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.2.sql
Normal file
2393
release/crankshaft--0.9.2.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.3--0.9.4.sql
Normal file
2421
release/crankshaft--0.9.3--0.9.4.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.3.sql
Normal file
2393
release/crankshaft--0.9.3.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.4--0.9.5.sql
Normal file
2421
release/crankshaft--0.9.4--0.9.5.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.4.sql
Normal file
2421
release/crankshaft--0.9.4.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.5.sql
Normal file
2421
release/crankshaft--0.9.5.sql
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.9.0'
|
||||
requires = 'plpython3u, postgis'
|
||||
default_version = '0.9.5'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,5 +0,0 @@
|
||||
joblib==0.9.4
|
||||
numpy==1.11.0
|
||||
scipy==0.17.0
|
||||
pysal==1.14.3
|
||||
scikit-learn==0.17.0
|
||||
@@ -4,4 +4,4 @@ import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import crankshaft.regression
|
||||
from . import analysis_data_provider
|
||||
import analysis_data_provider
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
from . import pysal_utils as pu
|
||||
import pysal_utils as pu
|
||||
|
||||
NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
|
||||
'for null values and fill in appropriately.')
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Based on the Weiszfeld algorithm:
|
||||
https://en.wikipedia.org/wiki/Geometric_median
|
||||
"""
|
||||
|
||||
|
||||
# import plpy
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||
|
||||
query = '''
|
||||
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||
array_agg(ST_Y({geom_col})) As y_coords
|
||||
FROM {tablename}
|
||||
'''.format(geom_col=geom_col, tablename=tablename)
|
||||
|
||||
try:
|
||||
resp = plpy.execute(query)
|
||||
data = np.vstack((resp['x_coords'][0],
|
||||
resp['y_coords'][0])).T
|
||||
|
||||
plpy.notice('coords: %s' % str(coords))
|
||||
except Exception, err:
|
||||
# plpy.error('Analysis failed: %s' % err)
|
||||
print('No plpy')
|
||||
data = np.array([[1.2 * np.random.random() + 10.,
|
||||
1.1 * (np.random.random() - 1.) + 3.]
|
||||
for i in range(1, 100)])
|
||||
|
||||
# initialize 'median center' to be the mean
|
||||
coords_center_temp = data.mean(axis=0)
|
||||
|
||||
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
|
||||
for i in range(0, num_iters):
|
||||
old_coords_center = coords_center_temp.copy()
|
||||
denom = denominator(coords_center_temp, data)
|
||||
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||
data[j])
|
||||
for j in range(len(data))], axis=0)
|
||||
coords_center_temp = coords_center_temp / denom
|
||||
|
||||
print("Pass #%d" % i)
|
||||
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||
coords_center_temp))
|
||||
print("Center coords: %s" % str(coords_center_temp))
|
||||
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||
|
||||
return coords_center_temp
|
||||
|
||||
|
||||
def obj_func(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.linalg.norm(center_coords - data)
|
||||
|
||||
|
||||
def numerator(center_coords, data_i):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||
|
||||
|
||||
def denominator(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(data - center_coords))
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from .moran import *
|
||||
from .kmeans import *
|
||||
from .getis import *
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
from getis import *
|
||||
|
||||
@@ -47,4 +47,4 @@ class Getis(object):
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return list(zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order))
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
|
||||
@@ -28,8 +28,8 @@ class Kmeans(object):
|
||||
ids = result[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(list(zip(xs, ys)))
|
||||
return list(zip(ids, labels))
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
|
||||
def nonspatial(self, subquery, colnames, no_clusters=5,
|
||||
standardize=True, id_col='cartodb_id'):
|
||||
@@ -75,18 +75,18 @@ class Kmeans(object):
|
||||
kmeans = KMeans(n_clusters=no_clusters,
|
||||
random_state=0).fit(cluster_columns)
|
||||
|
||||
centers = [json.dumps(dict(list(zip(colnames, c))))
|
||||
centers = [json.dumps(dict(zip(colnames, c)))
|
||||
for c in kmeans.cluster_centers_[kmeans.labels_]]
|
||||
|
||||
silhouettes = metrics.silhouette_samples(cluster_columns,
|
||||
kmeans.labels_,
|
||||
metric='sqeuclidean')
|
||||
|
||||
return list(zip(kmeans.labels_,
|
||||
return zip(kmeans.labels_,
|
||||
centers,
|
||||
silhouettes,
|
||||
[kmeans.inertia_] * kmeans.labels_.shape[0],
|
||||
data[0]['rowid']))
|
||||
data[0]['rowid'])
|
||||
|
||||
|
||||
# -- Preprocessing steps
|
||||
@@ -99,7 +99,7 @@ def _extract_columns(data):
|
||||
# number of columns minus rowid column
|
||||
n_cols = len(data[0]) - 1
|
||||
return np.array([data[0]['arr_col{0}'.format(i+1)]
|
||||
for i in range(n_cols)],
|
||||
for i in xrange(n_cols)],
|
||||
dtype=float).T
|
||||
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ class Moran(object):
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return list(zip([moran_global.I], [moran_global.EI]))
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -139,7 +139,7 @@ class Moran(object):
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return list(zip(
|
||||
return zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
@@ -148,7 +148,7 @@ class Moran(object):
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
))
|
||||
)
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -194,7 +194,7 @@ class Moran(object):
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return list(zip([lisa_rate.I], [lisa_rate.EI]))
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
@@ -262,7 +262,7 @@ class Moran(object):
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return list(zip(
|
||||
return zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
@@ -271,7 +271,7 @@ class Moran(object):
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
))
|
||||
)
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
@@ -303,7 +303,7 @@ class Moran(object):
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return list(zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order))
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
from core import set_model, get_model, create_model_table
|
||||
@@ -0,0 +1,86 @@
|
||||
import time
|
||||
import plpy
|
||||
import pickle
|
||||
from petname import generate
|
||||
|
||||
def create_model_table():
|
||||
q = '''
|
||||
create table if not exists model_storage(
|
||||
description text,
|
||||
name text unique,
|
||||
model bytea,
|
||||
feature_names text[],
|
||||
date_created timestamptz,
|
||||
id serial primary key);
|
||||
'''
|
||||
plpy.notice(q)
|
||||
plan = plpy.prepare(q)
|
||||
resp = plpy.execute(plan)
|
||||
plpy.notice('Model table successfully created')
|
||||
plpy.notice(str(resp))
|
||||
|
||||
def get_model(model_name):
|
||||
"""retrieve model if it exists"""
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
SELECT model FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
if len(model_encoded) == 1:
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
plpy.notice('Model successfully loaded')
|
||||
else:
|
||||
plpy.notice('Model not found, or too many models '
|
||||
'({})'.format(len(model_encoded)))
|
||||
model = None
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
|
||||
return model
|
||||
|
||||
def set_model(model, model_name, feature_names):
|
||||
"""stores the model in the table model_storage"""
|
||||
if model_name is None:
|
||||
model_name = generate(words=2, separator='_', letters=8)
|
||||
existing_names = plpy.execute('''
|
||||
SELECT array_agg(name) as name
|
||||
FROM model_storage
|
||||
''')
|
||||
plpy.notice('nrows: {}'.format(existing_names.nrows()))
|
||||
plpy.notice('MODEL NAME: {}'.format(model_name))
|
||||
plpy.notice('LEN of ms: {}'.format(len(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names[0]['name'])))
|
||||
plpy.notice('type existing_names: {}'.format(type(existing_names[0]['name'])))
|
||||
if existing_names[0]['name'] is not None:
|
||||
while model_name in existing_names[0]['name']:
|
||||
model_name = generate(words=2, separator='_', letters=10)
|
||||
plpy.notice(model_name)
|
||||
|
||||
# store model
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
INSERT INTO model_storage(description, name, model, feature_names, date_created)
|
||||
VALUES (
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4::text[],
|
||||
to_timestamp($5));
|
||||
''', ['text', 'text', 'bytea', 'text', 'numeric'])
|
||||
plpy.notice('{%s}' % ','.join(feature_names))
|
||||
plpy.notice(feature_names)
|
||||
plpy.execute(
|
||||
plan,
|
||||
[' '.join(m.strip() for m in model.__repr__().split('\n')),
|
||||
model_name,
|
||||
pickle.dumps(model),
|
||||
'{%s}' % ','.join(feature_names),
|
||||
time.time()]
|
||||
)
|
||||
plpy.notice('model successfully stored as {}'.format(model_name))
|
||||
except plpy.SPIError as err:
|
||||
plpy.notice('ERROR: {}\nt: {}'.format(err, time.time()))
|
||||
@@ -27,7 +27,7 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print('len of neighbors: %d' % len(neighbors))
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from . import glm
|
||||
from . import family
|
||||
from . import utils
|
||||
from . import iwls
|
||||
import glm
|
||||
import family
|
||||
import utils
|
||||
import iwls
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from .utils import cache_readonly
|
||||
from functools import reduce
|
||||
from utils import cache_readonly
|
||||
|
||||
class Results(object):
|
||||
"""
|
||||
|
||||
@@ -7,8 +7,8 @@ The one parameter exponential family distributions used by GLM.
|
||||
|
||||
import numpy as np
|
||||
from scipy import special
|
||||
from . import links as L
|
||||
from . import varfuncs as V
|
||||
import links as L
|
||||
import varfuncs as V
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ import numpy as np
|
||||
import numpy.linalg as la
|
||||
from pysal.spreg.utils import RegressionPropsY, spdot
|
||||
import pysal.spreg.user_output as USER
|
||||
from .utils import cache_readonly
|
||||
from .base import LikelihoodModelResults
|
||||
from . import family
|
||||
from .iwls import iwls
|
||||
from utils import cache_readonly
|
||||
from base import LikelihoodModelResults
|
||||
import family
|
||||
from iwls import iwls
|
||||
|
||||
__all__ = ['GLM']
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import numpy.linalg as la
|
||||
from scipy import sparse as sp
|
||||
from scipy.sparse import linalg as spla
|
||||
from pysal.spreg.utils import spdot, spmultiply
|
||||
from .family import Binomial, Poisson
|
||||
from family import Binomial, Poisson
|
||||
|
||||
def _compute_betas(y, x):
|
||||
"""
|
||||
@@ -49,7 +49,7 @@ def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=
|
||||
if isinstance(family, Binomial):
|
||||
y = family.link._clean(y)
|
||||
if isinstance(family, Poisson):
|
||||
y_off = y/offset
|
||||
y_off = y/offset
|
||||
y_off = family.starting_mu(y_off)
|
||||
v = family.predict(y_off)
|
||||
mu = family.starting_mu(y)
|
||||
@@ -58,13 +58,13 @@ def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=
|
||||
v = family.predict(mu)
|
||||
|
||||
while diff > tol and n_iter < max_iter:
|
||||
n_iter += 1
|
||||
n_iter += 1
|
||||
w = family.weights(mu)
|
||||
z = v + (family.link.deriv(mu)*(y-mu))
|
||||
w = np.sqrt(w)
|
||||
if type(x) != np.ndarray:
|
||||
w = sp.csr_matrix(w)
|
||||
z = sp.csr_matrix(z)
|
||||
w = sp.csr_matrix(w)
|
||||
z = sp.csr_matrix(z)
|
||||
wx = spmultiply(x, w, array_out=False)
|
||||
wz = spmultiply(z, w, array_out=False)
|
||||
if wi is None:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
@@ -17,7 +17,7 @@ try:
|
||||
from scipy.lib._version import NumpyVersion
|
||||
except ImportError:
|
||||
import re
|
||||
string_types = str
|
||||
string_types = basestring
|
||||
|
||||
class NumpyVersion():
|
||||
"""Parse and compare numpy version strings.
|
||||
|
||||
@@ -1 +1 @@
|
||||
from .base import *
|
||||
from base import *
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from . import gwr
|
||||
from . import sel_bw
|
||||
from . import diagnostics
|
||||
from . import kernels
|
||||
import gwr
|
||||
import sel_bw
|
||||
import diagnostics
|
||||
import kernels
|
||||
|
||||
@@ -7,8 +7,8 @@ __author__ = "Taylor Oshan Tayoshan@gmail.com"
|
||||
import numpy as np
|
||||
import numpy.linalg as la
|
||||
from scipy.stats import t
|
||||
from .kernels import *
|
||||
from .diagnostics import get_AIC, get_AICc, get_BIC
|
||||
from kernels import *
|
||||
from diagnostics import get_AIC, get_AICc, get_BIC
|
||||
import pysal.spreg.user_output as USER
|
||||
from crankshaft.regression.glm.family import Gaussian, Binomial, Poisson
|
||||
from crankshaft.regression.glm.glm import GLM, GLMResults
|
||||
@@ -156,7 +156,7 @@ class GWR(GLM):
|
||||
self.kernel = kernel
|
||||
self.fixed = fixed
|
||||
if offset is None:
|
||||
self.offset = np.ones((self.n, 1))
|
||||
self.offset = np.ones((self.n, 1))
|
||||
else:
|
||||
self.offset = offset * 1.0
|
||||
self.fit_params = {}
|
||||
@@ -169,7 +169,7 @@ class GWR(GLM):
|
||||
def _build_W(self, fixed, kernel, coords, bw, points=None):
|
||||
if fixed:
|
||||
try:
|
||||
W = fk[kernel](coords, bw, points)
|
||||
W = fk[kernel](coords, bw, points)
|
||||
except:
|
||||
raise TypeError('Unsupported kernel function ', kernel)
|
||||
else:
|
||||
@@ -177,6 +177,7 @@ class GWR(GLM):
|
||||
W = ak[kernel](coords, bw, points)
|
||||
except:
|
||||
raise TypeError('Unsupported kernel function ', kernel)
|
||||
|
||||
return W
|
||||
|
||||
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
||||
@@ -217,7 +218,8 @@ class GWR(GLM):
|
||||
p = np.zeros((m, 1))
|
||||
for i in range(m):
|
||||
wi = self.W[i].reshape((-1,1))
|
||||
rslt = iwls(self.y, self.X, self.family, self.offset, ini_params, tol, max_iter, wi=wi)
|
||||
rslt = iwls(self.y, self.X, self.family, self.offset,
|
||||
ini_params, tol, max_iter, wi=wi)
|
||||
params[i,:] = rslt[0].T
|
||||
predy[i] = rslt[1][i]
|
||||
v[i] = rslt[2][i]
|
||||
@@ -257,7 +259,7 @@ class GWR(GLM):
|
||||
fit_params : dict
|
||||
key-value pairs of parameters that will be passed into fit method to define estimation
|
||||
routine; see fit method for more details
|
||||
|
||||
|
||||
"""
|
||||
if (exog_scale is None) & (exog_resid is None):
|
||||
train_gwr = self.fit(**fit_params)
|
||||
@@ -496,7 +498,7 @@ class GWRResults(GLMResults):
|
||||
|
||||
"""
|
||||
if exog_scale is not None:
|
||||
return cov*exog_scale
|
||||
return cov*exog_scale
|
||||
else:
|
||||
return cov*self.scale
|
||||
|
||||
@@ -520,7 +522,7 @@ class GWRResults(GLMResults):
|
||||
weighted mean of y
|
||||
"""
|
||||
if self.model.points is not None:
|
||||
n = len(self.model.points)
|
||||
n = len(self.model.points)
|
||||
else:
|
||||
n = self.n
|
||||
off = self.offset.reshape((-1,1))
|
||||
@@ -543,13 +545,13 @@ class GWRResults(GLMResults):
|
||||
|
||||
"""
|
||||
if self.model.points is not None:
|
||||
n = len(self.model.points)
|
||||
n = len(self.model.points)
|
||||
else:
|
||||
n = self.n
|
||||
TSS = np.zeros(shape=(n,1))
|
||||
for i in range(n):
|
||||
TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) *
|
||||
(self.y.reshape((-1,1)) - self.y_bar[i])**2)
|
||||
TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) *
|
||||
(self.y.reshape((-1,1)) - self.y_bar[i])**2)
|
||||
return TSS
|
||||
|
||||
@cache_readonly
|
||||
@@ -563,15 +565,15 @@ class GWRResults(GLMResults):
|
||||
relationships.
|
||||
"""
|
||||
if self.model.points is not None:
|
||||
n = len(self.model.points)
|
||||
resid = self.model.exog_resid.reshape((-1,1))
|
||||
n = len(self.model.points)
|
||||
resid = self.model.exog_resid.reshape((-1,1))
|
||||
else:
|
||||
n = self.n
|
||||
resid = self.resid_response.reshape((-1,1))
|
||||
RSS = np.zeros(shape=(n,1))
|
||||
RSS = np.zeros(shape=(n,1))
|
||||
for i in range(n):
|
||||
RSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1))
|
||||
* resid**2)
|
||||
* resid**2)
|
||||
return RSS
|
||||
|
||||
@cache_readonly
|
||||
@@ -617,10 +619,10 @@ class GWRResults(GLMResults):
|
||||
"""
|
||||
if isinstance(self.family, (Poisson, Binomial)):
|
||||
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||
else:
|
||||
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||
@cache_readonly
|
||||
def sigma2_ML(self):
|
||||
"""
|
||||
@@ -673,14 +675,14 @@ class GWRResults(GLMResults):
|
||||
Note: in (9.11), p should be tr(S), that is, the effective number of parameters
|
||||
"""
|
||||
return self.std_res**2 * self.influ / (self.tr_S * (1.0-self.influ))
|
||||
|
||||
|
||||
@cache_readonly
|
||||
def deviance(self):
|
||||
off = self.offset.reshape((-1,1)).T
|
||||
y = self.y
|
||||
ybar = self.y_bar
|
||||
if isinstance(self.family, Gaussian):
|
||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||
elif isinstance(self.family, Poisson):
|
||||
dev = np.sum(2.0*self.W*(y*np.log(y/(ybar*off))-(y-ybar*off)),axis=1)
|
||||
elif isinstance(self.family, Binomial):
|
||||
@@ -690,7 +692,7 @@ class GWRResults(GLMResults):
|
||||
@cache_readonly
|
||||
def resid_deviance(self):
|
||||
if isinstance(self.family, Gaussian):
|
||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||
else:
|
||||
off = self.offset.reshape((-1,1)).T
|
||||
y = self.y
|
||||
@@ -708,7 +710,7 @@ class GWRResults(GLMResults):
|
||||
manual. Equivalent to 1 - (deviance/null deviance)
|
||||
"""
|
||||
if isinstance(self.family, Gaussian):
|
||||
raise NotImplementedError('Not implemented for Gaussian')
|
||||
raise NotImplementedError('Not implemented for Gaussian')
|
||||
else:
|
||||
return 1.0 - (self.resid_deviance/self.deviance)
|
||||
|
||||
@@ -831,8 +833,8 @@ class GWRResults(GLMResults):
|
||||
def predictions(self):
|
||||
P = self.model.P
|
||||
if P is None:
|
||||
raise NotImplementedError('predictions only avaialble if predict'
|
||||
'method called on GWR model')
|
||||
raise NotImplementedError('predictions only avaialble if predict'
|
||||
'method called on GWR model')
|
||||
else:
|
||||
predictions = np.sum(P*self.params, axis=1).reshape((-1,1))
|
||||
return predictions
|
||||
@@ -985,7 +987,7 @@ class FBGWR(GWR):
|
||||
self.fixed = fixed
|
||||
self.constant = constant
|
||||
if constant:
|
||||
self.X = USER.check_constant(self.X)
|
||||
self.X = USER.check_constant(self.X)
|
||||
|
||||
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
||||
"""
|
||||
|
||||
@@ -47,14 +47,14 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
||||
while np.abs(diff) > tol and iters < max_iter:
|
||||
iters += 1
|
||||
if int_score:
|
||||
b = np.round(b)
|
||||
d = np.round(d)
|
||||
b = np.round(b)
|
||||
d = np.round(d)
|
||||
|
||||
score_a = function(a)
|
||||
score_b = function(b)
|
||||
score_c = function(c)
|
||||
score_d = function(d)
|
||||
|
||||
|
||||
if score_b <= score_d:
|
||||
opt_val = b
|
||||
opt_score = score_b
|
||||
@@ -73,7 +73,7 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
||||
#d = np.round(b)
|
||||
|
||||
#if int_score:
|
||||
# opt_val = np.round(opt_val)
|
||||
# opt_val = np.round(opt_val)
|
||||
output.append((opt_val, opt_score))
|
||||
diff = score_b - score_d
|
||||
score = opt_score
|
||||
@@ -146,7 +146,7 @@ def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
||||
gwr_func, bw_func, sel_func):
|
||||
if init:
|
||||
bw = sel_func(bw_func(y, X))
|
||||
print(bw)
|
||||
print bw
|
||||
optim_model = gwr_func(y, X, bw)
|
||||
err = optim_model.resid_response.reshape((-1,1))
|
||||
est = optim_model.params
|
||||
@@ -198,7 +198,7 @@ def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
||||
new_rss = np.sum((y - predy)**2)
|
||||
score = np.abs((new_rss - rss)/new_rss)
|
||||
rss = new_rss
|
||||
print(score)
|
||||
print score
|
||||
scores.append(score)
|
||||
delta = score
|
||||
BWs.append(bws)
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
|
||||
__author__ = "Taylor Oshan Tayoshan@gmail.com"
|
||||
|
||||
from .kernels import *
|
||||
from .search import golden_section, equal_interval, flexible_bw
|
||||
from .gwr import GWR
|
||||
from kernels import *
|
||||
from search import golden_section, equal_interval, flexible_bw
|
||||
from gwr import GWR
|
||||
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
||||
import pysal.spreg.user_output as USER
|
||||
from .diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from scipy.spatial.distance import pdist, squareform
|
||||
from pysal.common import KDTree
|
||||
import numpy as np
|
||||
@@ -197,7 +197,7 @@ class Sel_BW(object):
|
||||
|
||||
if self.fb:
|
||||
self._fbw()
|
||||
print(self.bw[1])
|
||||
print self.bw[1]
|
||||
self.XB = self.bw[4]
|
||||
self.err = self.bw[5]
|
||||
else:
|
||||
|
||||
@@ -14,7 +14,7 @@ import pysal
|
||||
class TestGWRGaussian(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
@@ -56,10 +56,10 @@ class TestGWRGaussian(unittest.TestCase):
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 894.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 944.0)
|
||||
self.assertAlmostEqual(np.round(CV,2), 18.25)
|
||||
self.assertAlmostEquals(np.floor(AICc), 894.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 944.0)
|
||||
self.assertAlmostEquals(np.round(CV,2), 18.25)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
@@ -107,10 +107,10 @@ class TestGWRGaussian(unittest.TestCase):
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 896.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 892.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 941.0)
|
||||
self.assertAlmostEqual(np.around(CV, 2), 19.19)
|
||||
self.assertAlmostEquals(np.floor(AICc), 896.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 892.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 941.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 19.19)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
@@ -159,10 +159,10 @@ class TestGWRGaussian(unittest.TestCase):
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 895.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 943.0)
|
||||
self.assertAlmostEqual(np.around(CV, 2), 18.21)
|
||||
self.assertAlmostEquals(np.floor(AICc), 895.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 943.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 18.21)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
@@ -211,10 +211,10 @@ class TestGWRGaussian(unittest.TestCase):
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 896)
|
||||
self.assertAlmostEqual(np.floor(AIC), 894.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 922.0)
|
||||
self.assertAlmostEqual(np.around(CV, 2), 17.91)
|
||||
self.assertAlmostEquals(np.floor(AICc), 896)
|
||||
self.assertAlmostEquals(np.floor(AIC), 894.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 922.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 17.91)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
@@ -314,7 +314,7 @@ class TestGWRGaussian(unittest.TestCase):
|
||||
class TestGWRPoisson(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
|
||||
self.coords = list(zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID')))
|
||||
self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
|
||||
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
|
||||
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
|
||||
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
|
||||
@@ -355,9 +355,9 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 13294.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 13247.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 13485.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 13294.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13247.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13485.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
|
||||
@@ -404,9 +404,9 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 13285)
|
||||
self.assertAlmostEqual(np.floor(AIC), 13259.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 13442.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 13285)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13259.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13442.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
@@ -452,9 +452,9 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 367.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 361.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 451.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 367.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 361.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 451.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
|
||||
@@ -511,9 +511,9 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 11283.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 11211.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 11497.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 11283.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 11211.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 11497.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
@@ -559,9 +559,9 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 21070.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 21069.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 21111.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 21070.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 21069.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 21111.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
@@ -583,7 +583,7 @@ class TestGWRPoisson(unittest.TestCase):
|
||||
class TestGWRBinomial(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('landslides.csv'))
|
||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
|
||||
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
|
||||
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
|
||||
@@ -630,9 +630,9 @@ class TestGWRBinomial(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 275.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 349.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 275.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 349.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
@@ -693,9 +693,9 @@ class TestGWRBinomial(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 277.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 358.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 277.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 358.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
@@ -756,9 +756,9 @@ class TestGWRBinomial(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 272.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 341.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 272.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 341.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
@@ -819,9 +819,9 @@ class TestGWRBinomial(unittest.TestCase):
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEqual(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEqual(np.floor(AIC), 273.0)
|
||||
self.assertAlmostEqual(np.floor(BIC), 331.0)
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 273.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 331.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
|
||||
@@ -12,7 +12,7 @@ class TestKernels(unittest.TestCase):
|
||||
y = np.arange(5,0, -1)
|
||||
np.random.shuffle(x)
|
||||
np.random.shuffle(y)
|
||||
self.coords = np.array(list(zip(x, y)))
|
||||
self.coords = np.array(zip(x, y))
|
||||
self.fix_gauss_kern = np.array([
|
||||
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
|
||||
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
|
||||
|
||||
@@ -13,7 +13,7 @@ import pysal
|
||||
class TestSelBW(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
Geographically weighted regression
|
||||
"""
|
||||
import numpy as np
|
||||
from .gwr.base.gwr import GWR as PySAL_GWR
|
||||
from .gwr.base.sel_bw import Sel_BW
|
||||
from gwr.base.gwr import GWR as PySAL_GWR
|
||||
from gwr.base.sel_bw import Sel_BW
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import plpy
|
||||
@@ -48,7 +48,7 @@ class GWR:
|
||||
# x, y are centroids of input geometries
|
||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||
coords = list(zip(x, y))
|
||||
coords = zip(x, y)
|
||||
|
||||
# extract dependent variable
|
||||
Y = np.array(query_result[0]['dep_var'], dtype=np.float).reshape((-1, 1))
|
||||
@@ -88,7 +88,7 @@ class GWR:
|
||||
bw = np.repeat(float(bw), n)
|
||||
|
||||
# create lists of json objs for model outputs
|
||||
for idx in range(n):
|
||||
for idx in xrange(n):
|
||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||
@@ -99,8 +99,8 @@ class GWR:
|
||||
json.dumps({var: filtered_t[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
|
||||
return list(zip(coeffs, stand_errs, t_vals, filtered_t_vals,
|
||||
predicted, residuals, r_squared, bw, rowid))
|
||||
return zip(coeffs, stand_errs, t_vals, filtered_t_vals,
|
||||
predicted, residuals, r_squared, bw, rowid)
|
||||
|
||||
def gwr_predict(self, subquery, dep_var, ind_vars,
|
||||
bw=None, fixed=False, kernel='bisquare',
|
||||
@@ -133,7 +133,7 @@ class GWR:
|
||||
|
||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||
coords = np.array(list(zip(x, y)), dtype=np.float)
|
||||
coords = np.array(zip(x, y), dtype=np.float)
|
||||
|
||||
# extract dependent variable
|
||||
Y = np.array(query_result[0]['dep_var']).reshape((-1, 1))
|
||||
@@ -190,7 +190,7 @@ class GWR:
|
||||
predicted = model.predy.flatten()
|
||||
|
||||
m = len(model.predy)
|
||||
for idx in range(m):
|
||||
for idx in xrange(m):
|
||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||
@@ -198,5 +198,5 @@ class GWR:
|
||||
t_vals.append(json.dumps({var: model.tvalues[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
|
||||
return list(zip(coeffs, stand_errs, t_vals,
|
||||
r_squared, predicted, rowid[test]))
|
||||
return zip(coeffs, stand_errs, t_vals,
|
||||
r_squared, predicted, rowid[test])
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
"""Import all functions from for segmentation"""
|
||||
from .segmentation import *
|
||||
from segmentation import *
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import pickle
|
||||
import plpy
|
||||
import numpy as np
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
from crankshaft import model_storage
|
||||
|
||||
# NOTE: added optional param here
|
||||
|
||||
@@ -47,10 +50,11 @@ class Segmentation(object):
|
||||
model_parameters, 0.2)
|
||||
prediction = model.predict(target_features)
|
||||
accuracy_array = [accuracy] * prediction.shape[0]
|
||||
return list(zip(target_ids, prediction, accuracy_array))
|
||||
return zip(target_ids, prediction, accuracy_array)
|
||||
|
||||
def create_and_predict_segment(self, query, variable, feature_columns,
|
||||
target_query, model_params,
|
||||
model_name=None,
|
||||
id_col='cartodb_id'):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
@@ -70,16 +74,24 @@ class Segmentation(object):
|
||||
(target, features, target_mean,
|
||||
feature_means) = self.clean_data(query, variable, feature_columns)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
model_storage.create_model_table()
|
||||
|
||||
# find model if it exists and is specified
|
||||
if model_name is not None:
|
||||
model = model_storage.get_model(model_name)
|
||||
|
||||
if locals().get('model') is None:
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
|
||||
result = self.predict_segment(model, feature_columns, target_query,
|
||||
feature_means)
|
||||
accuracy_array = [accuracy] * result.shape[0]
|
||||
|
||||
rowid = self.data_provider.get_segmentation_data(params)
|
||||
'''
|
||||
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
|
||||
'''
|
||||
return list(zip(rowid[0]['ids'], result, accuracy_array))
|
||||
|
||||
# store the model for later use
|
||||
model_storage.set_model(model, model_name, feature_columns)
|
||||
return zip(rowid[0]['ids'], result, accuracy_array)
|
||||
|
||||
def predict_segment(self, model, feature_columns, target_query,
|
||||
feature_means):
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
"""Import all functions from clustering libraries."""
|
||||
from .markov import *
|
||||
from markov import *
|
||||
|
||||
@@ -91,7 +91,7 @@ class Markov(object):
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return list(zip(trend, trend_up, trend_down, volatility, weights.id_order))
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ def rebin_data(time_data, num_time_per_bin):
|
||||
|
||||
return np.array(
|
||||
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||
for i in range(int(n_max))]).T
|
||||
for i in range(n_max)]).T
|
||||
|
||||
|
||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
joblib==0.9.4
|
||||
numpy==1.11.0
|
||||
scipy==0.17.0
|
||||
joblib==0.8.3
|
||||
numpy==1.6.1
|
||||
scipy==0.14.0
|
||||
pysal==1.14.3
|
||||
scikit-learn==0.17.0
|
||||
scikit-learn==0.14.1
|
||||
|
||||
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.9.0',
|
||||
version='0.0.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
@@ -26,7 +26,7 @@ setup(
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
@@ -41,7 +41,7 @@ setup(
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.9.4', 'numpy==1.11.0', 'scipy==0.17.0', 'pysal==1.14.3', 'scikit-learn==0.17.0'],
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1', 'petname==2.2'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
|
||||
49
release/python/0.9.0/crankshaft/setup.py-r
Normal file
49
release/python/0.9.0/crankshaft/setup.py-r
Normal file
@@ -0,0 +1,49 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1', 'petname==2.2'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
6
release/python/0.9.0/crankshaft/test/fixtures/optim.json
vendored
Normal file
6
release/python/0.9.0/crankshaft/test/fixtures/optim.json
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"production_col": [10, 10, 10],
|
||||
"capacity_col": [0.09, 0.31],
|
||||
"marginal_col": [5, 5],
|
||||
"pairwise": [[1, 2, 3], [3, 2, 1]]
|
||||
}
|
||||
@@ -72,7 +72,7 @@ class MoranTest(unittest.TestCase):
|
||||
result = moran.local_stat('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[6]) for row in result]
|
||||
zipped_values = list(zip(result, self.moran_data))
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
@@ -91,7 +91,7 @@ class MoranTest(unittest.TestCase):
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[6]) for row in result]
|
||||
|
||||
zipped_values = list(zip(result, self.moran_data))
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
@@ -86,7 +86,7 @@ class GWRTest(unittest.TestCase):
|
||||
|
||||
# unpack response
|
||||
coeffs, stand_errs, t_vals, t_vals_filtered, predicteds, \
|
||||
residuals, r_squareds, bws, rowids = list(zip(*gwr_resp))
|
||||
residuals, r_squareds, bws, rowids = zip(*gwr_resp)
|
||||
|
||||
# prepare for comparision
|
||||
coeff_known_pctpov = self.knowns['est_pctpov']
|
||||
@@ -98,13 +98,13 @@ class GWRTest(unittest.TestCase):
|
||||
# test pctpov coefficient estimates
|
||||
for idx, val in enumerate(coeff_known_pctpov):
|
||||
resp_idx = rowids.index(ids[idx])
|
||||
self.assertAlmostEqual(val,
|
||||
self.assertAlmostEquals(val,
|
||||
json.loads(coeffs[resp_idx])['pctpov'],
|
||||
places=4)
|
||||
# test pctrural tvals
|
||||
for idx, val in enumerate(tval_known_pctblack):
|
||||
resp_idx = rowids.index(ids[idx])
|
||||
self.assertAlmostEqual(val,
|
||||
self.assertAlmostEquals(val,
|
||||
json.loads(t_vals[resp_idx])['pctrural'],
|
||||
places=4)
|
||||
|
||||
@@ -119,7 +119,7 @@ class GWRTest(unittest.TestCase):
|
||||
|
||||
# unpack response
|
||||
coeffs, stand_errs, t_vals, \
|
||||
r_squareds, predicteds, rowid = list(zip(*gwr_resp))
|
||||
r_squareds, predicteds, rowid = zip(*gwr_resp)
|
||||
threshold = 0.01
|
||||
|
||||
for i, idx in enumerate(self.idx_ids_of_unknowns):
|
||||
|
||||
@@ -66,7 +66,7 @@ class SegmentationTest(unittest.TestCase):
|
||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||
result = replace_nan_with_mean(test_array, means=None)[0]
|
||||
expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2], dtype=float)
|
||||
self.assertEqual(sorted(result), sorted(expectation))
|
||||
self.assertItemsEqual(result, expectation)
|
||||
|
||||
def test_create_and_predict_segment(self):
|
||||
"""test segmentation.test_create_and_predict"""
|
||||
@@ -118,7 +118,7 @@ class SegmentationTest(unittest.TestCase):
|
||||
model_parameters,
|
||||
id_col='cartodb_id')
|
||||
results = [(row[1], row[2]) for row in result]
|
||||
zipped_values = list(zip(results, self.result_seg))
|
||||
zipped_values = zip(results, self.result_seg)
|
||||
pre_res = [r[0] for r in self.true_result]
|
||||
acc_res = [r[1] for r in self.result_seg]
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ class SpaceTimeTests(unittest.TestCase):
|
||||
|
||||
self.assertTrue(result is not None)
|
||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||
print(result[0])
|
||||
print result[0]
|
||||
expected = self.markov_data
|
||||
for ([res_trend, res_up, res_down, res_vol, res_id],
|
||||
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
||||
|
||||
15
release/python/0.9.0/crankshaft/tools/setup.py
Normal file
15
release/python/0.9.0/crankshaft/tools/setup.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from test.helper import plpy, fixture_file
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import json
|
||||
import crankshaft
|
||||
|
||||
class RawDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, fixturedata):
|
||||
self.your_algo_data = fixturedata
|
||||
def get_moran(self, params):
|
||||
"""
|
||||
Replace this function name with the one used in your algorithm,
|
||||
and make sure to use the same function signature that is written
|
||||
for this algo in analysis_data_provider.py
|
||||
"""
|
||||
return self.your_algo_data
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Based on the Weiszfeld algorithm:
|
||||
https://en.wikipedia.org/wiki/Geometric_median
|
||||
"""
|
||||
|
||||
|
||||
# import plpy
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||
|
||||
query = '''
|
||||
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||
array_agg(ST_Y({geom_col})) As y_coords
|
||||
FROM {tablename}
|
||||
'''.format(geom_col=geom_col, tablename=tablename)
|
||||
|
||||
try:
|
||||
resp = plpy.execute(query)
|
||||
data = np.vstack((resp['x_coords'][0],
|
||||
resp['y_coords'][0])).T
|
||||
|
||||
plpy.notice('coords: %s' % str(coords))
|
||||
except Exception, err:
|
||||
# plpy.error('Analysis failed: %s' % err)
|
||||
print('No plpy')
|
||||
data = np.array([[1.2 * np.random.random() + 10.,
|
||||
1.1 * (np.random.random() - 1.) + 3.]
|
||||
for i in range(1, 100)])
|
||||
|
||||
# initialize 'median center' to be the mean
|
||||
coords_center_temp = data.mean(axis=0)
|
||||
|
||||
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
|
||||
for i in range(0, num_iters):
|
||||
old_coords_center = coords_center_temp.copy()
|
||||
denom = denominator(coords_center_temp, data)
|
||||
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||
data[j])
|
||||
for j in range(len(data))], axis=0)
|
||||
coords_center_temp = coords_center_temp / denom
|
||||
|
||||
print("Pass #%d" % i)
|
||||
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||
coords_center_temp))
|
||||
print("Center coords: %s" % str(coords_center_temp))
|
||||
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||
|
||||
return coords_center_temp
|
||||
|
||||
|
||||
def obj_func(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.linalg.norm(center_coords - data)
|
||||
|
||||
|
||||
def numerator(center_coords, data_i):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||
|
||||
|
||||
def denominator(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(data - center_coords))
|
||||
@@ -0,0 +1 @@
|
||||
from core import set_model, get_model, create_model_table
|
||||
@@ -0,0 +1,86 @@
|
||||
import time
|
||||
import plpy
|
||||
import pickle
|
||||
from petname import generate
|
||||
|
||||
def create_model_table():
|
||||
q = '''
|
||||
create table if not exists model_storage(
|
||||
description text,
|
||||
name text unique,
|
||||
model bytea,
|
||||
feature_names text[],
|
||||
date_created timestamptz,
|
||||
id serial primary key);
|
||||
'''
|
||||
plpy.notice(q)
|
||||
plan = plpy.prepare(q)
|
||||
resp = plpy.execute(plan)
|
||||
plpy.notice('Model table successfully created')
|
||||
plpy.notice(str(resp))
|
||||
|
||||
def get_model(model_name):
|
||||
"""retrieve model if it exists"""
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
SELECT model FROM model_storage
|
||||
WHERE name = $1;
|
||||
''', ['text', ])
|
||||
model_encoded = plpy.execute(plan, [model_name, ])
|
||||
if len(model_encoded) == 1:
|
||||
model = pickle.loads(
|
||||
model_encoded[0]['model']
|
||||
)
|
||||
plpy.notice('Model successfully loaded')
|
||||
else:
|
||||
plpy.notice('Model not found, or too many models '
|
||||
'({})'.format(len(model_encoded)))
|
||||
model = None
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('ERROR: {}'.format(err))
|
||||
|
||||
return model
|
||||
|
||||
def set_model(model, model_name, feature_names):
|
||||
"""stores the model in the table model_storage"""
|
||||
if model_name is None:
|
||||
model_name = generate(words=2, separator='_', letters=8)
|
||||
existing_names = plpy.execute('''
|
||||
SELECT array_agg(name) as name
|
||||
FROM model_storage
|
||||
''')
|
||||
plpy.notice('nrows: {}'.format(existing_names.nrows()))
|
||||
plpy.notice('MODEL NAME: {}'.format(model_name))
|
||||
plpy.notice('LEN of ms: {}'.format(len(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names)))
|
||||
plpy.notice('existing_names: {}'.format(str(existing_names[0]['name'])))
|
||||
plpy.notice('type existing_names: {}'.format(type(existing_names[0]['name'])))
|
||||
if existing_names[0]['name'] is not None:
|
||||
while model_name in existing_names[0]['name']:
|
||||
model_name = generate(words=2, separator='_', letters=10)
|
||||
plpy.notice(model_name)
|
||||
|
||||
# store model
|
||||
try:
|
||||
plan = plpy.prepare('''
|
||||
INSERT INTO model_storage(description, name, model, feature_names, date_created)
|
||||
VALUES (
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4::text[],
|
||||
to_timestamp($5));
|
||||
''', ['text', 'text', 'bytea', 'text', 'numeric'])
|
||||
plpy.notice('{%s}' % ','.join(feature_names))
|
||||
plpy.notice(feature_names)
|
||||
plpy.execute(
|
||||
plan,
|
||||
[' '.join(m.strip() for m in model.__repr__().split('\n')),
|
||||
model_name,
|
||||
pickle.dumps(model),
|
||||
'{%s}' % ','.join(feature_names),
|
||||
time.time()]
|
||||
)
|
||||
plpy.notice('model successfully stored as {}'.format(model_name))
|
||||
except plpy.SPIError as err:
|
||||
plpy.notice('ERROR: {}\nt: {}'.format(err, time.time()))
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user