Compare commits
15 Commits
develop
...
model-stor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
daba2f9597 | ||
|
|
8f28f41060 | ||
|
|
7509afa5a6 | ||
|
|
a28c68502c | ||
|
|
5b4443ca88 | ||
|
|
2048db33fc | ||
|
|
99e78800b3 | ||
|
|
800648a710 | ||
|
|
91ee6ecc48 | ||
|
|
9a5ab17240 | ||
|
|
65be9befb1 | ||
|
|
37e6b4a228 | ||
|
|
766bfed9be | ||
|
|
e8a601e945 | ||
|
|
c2be340c07 |
3
.brackets.json
Normal file
3
.brackets.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"sbruchmann.staticpreview.basepath": "/home/carto/Projects/crankshaft/"
|
||||||
|
}
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@ envs/
|
|||||||
*.pyc
|
*.pyc
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.idea/
|
.idea/
|
||||||
|
.*.sw[nop]
|
||||||
|
|||||||
74
.travis.yml
74
.travis.yml
@@ -1,48 +1,60 @@
|
|||||||
language: c
|
language: c
|
||||||
|
dist: precise
|
||||||
sudo: required
|
sudo: required
|
||||||
|
|
||||||
env:
|
env:
|
||||||
global:
|
global:
|
||||||
- PAGER=cat
|
- PAGER=cat
|
||||||
- PGUSER=postgres
|
|
||||||
- PGDATABASE=postgres
|
|
||||||
- PGOPTIONS='-c client_min_messages=NOTICE'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
include:
|
|
||||||
- env: POSTGRESQL_VERSION="9.6" POSTGIS_VERSION="2.5"
|
|
||||||
dist: xenial
|
|
||||||
- env: POSTGRESQL_VERSION="10" POSTGIS_VERSION="2.5"
|
|
||||||
dist: xenial
|
|
||||||
- env: POSTGRESQL_VERSION="11" POSTGIS_VERSION="2.5"
|
|
||||||
dist: xenial
|
|
||||||
- env: POSTGRESQL_VERSION="12" POSTGIS_VERSION="3"
|
|
||||||
dist: bionic
|
|
||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
|
- ./check-up-to-date-with-master.sh
|
||||||
|
- sudo apt-get -y install python-pip
|
||||||
|
|
||||||
- sudo apt-get install -y --allow-unauthenticated --no-install-recommends --no-install-suggests postgresql-$POSTGRESQL_VERSION postgresql-client-$POSTGRESQL_VERSION postgresql-server-dev-$POSTGRESQL_VERSION postgresql-common
|
- sudo apt-get -y install python-software-properties
|
||||||
- if [[ $POSTGRESQL_VERSION == '9.6' ]]; then sudo apt-get install -y postgresql-contrib-9.6; fi;
|
- sudo add-apt-repository -y ppa:cartodb/sci
|
||||||
- sudo apt-get install -y --allow-unauthenticated postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION-scripts postgis
|
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
|
||||||
|
- sudo add-apt-repository -y ppa:cartodb/gis
|
||||||
|
- sudo add-apt-repository -y ppa:cartodb/gis-testing
|
||||||
|
- sudo apt-get update
|
||||||
|
|
||||||
# For pre12, install plpython2. For PG12 install plpython3
|
- sudo apt-get -y install python-joblib=0.8.3-1-cdb1
|
||||||
- if [[ $POSTGRESQL_VERSION != '12' ]]; then sudo apt-get install -y postgresql-plpython-$POSTGRESQL_VERSION python python-pip python-software-properties python-joblib python-nose python-setuptools; else sudo apt-get install -y postgresql-plpython3-12 python3 python3-pip python3-software-properties python3-joblib python3-nose python3-setuptools; fi;
|
- sudo apt-get -y install python-numpy=1:1.6.1-6ubuntu1
|
||||||
- if [[ $POSTGRESQL_VERSION == '12' ]]; then echo -e "joblib==0.11\nnumpy==1.13.3\nscipy==0.19.1\npysal==1.14.3\nscikit-learn==0.19.1" > ./src/py/crankshaft/requirements.txt && sed -i -e "s/.*install_requires.*$/ install_requires=['joblib==0.11.0', 'numpy==1.13.3', 'scipy==0.19.1', 'pysal==1.14.3', 'scikit-learn==0.19.1'],/g" ./src/py/crankshaft/setup.py; fi;
|
|
||||||
|
|
||||||
- sudo pg_dropcluster --stop $POSTGRESQL_VERSION main
|
- sudo apt-get -y install python-scipy=0.14.0-2-cdb6
|
||||||
- sudo rm -rf /etc/postgresql/$POSTGRESQL_VERSION /var/lib/postgresql/$POSTGRESQL_VERSION
|
- sudo apt-get -y --no-install-recommends install python-sklearn-lib=0.14.1-3-cdb2
|
||||||
- sudo pg_createcluster -u postgres $POSTGRESQL_VERSION main --start -- -A trust
|
- sudo apt-get -y --no-install-recommends install python-sklearn=0.14.1-3-cdb2
|
||||||
- export PGPORT=$(pg_lsclusters | grep $POSTGRESQL_VERSION | awk '{print $3}')
|
- sudo apt-get -y --no-install-recommends install python-scikits-learn=0.14.1-3-cdb2
|
||||||
|
|
||||||
|
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
|
||||||
|
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
|
||||||
|
|
||||||
|
# Install postgres db and build deps
|
||||||
|
- sudo /etc/init.d/postgresql stop # stop travis default instance
|
||||||
|
- sudo apt-get -y remove --purge postgresql-9.1
|
||||||
|
- sudo apt-get -y remove --purge postgresql-9.2
|
||||||
|
- sudo apt-get -y remove --purge postgresql-9.3
|
||||||
|
- sudo apt-get -y remove --purge postgresql-9.4
|
||||||
|
- sudo apt-get -y remove --purge postgresql-9.5
|
||||||
|
- sudo rm -rf /var/lib/postgresql/
|
||||||
|
- sudo rm -rf /var/log/postgresql/
|
||||||
|
- sudo rm -rf /etc/postgresql/
|
||||||
|
- sudo apt-get -y remove --purge postgis-2.2
|
||||||
|
- sudo apt-get -y autoremove
|
||||||
|
|
||||||
|
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb3
|
||||||
|
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb3
|
||||||
|
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb3
|
||||||
|
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
|
||||||
|
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
|
||||||
|
|
||||||
|
# configure it to accept local connections from postgres
|
||||||
|
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
|
||||||
|
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
|
||||||
|
- sudo /etc/init.d/postgresql restart 9.5
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- sudo make install
|
- sudo make install
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- make test
|
- make test || { cat src/pg/test/regression.diffs; false; }
|
||||||
- ./check-compatibility.sh
|
- ./check-compatibility.sh
|
||||||
|
|
||||||
after_failure:
|
|
||||||
- pg_lsclusters
|
|
||||||
- cat src/pg/test/regression.diffs
|
|
||||||
- echo $PGPORT
|
|
||||||
- cat /var/log/postgresql/postgresql-$POSTGRESQL_VERSION-main.log
|
|
||||||
|
|||||||
@@ -39,7 +39,9 @@ ALTER EXTENSION crankshaft UPDATE TO 'dev';
|
|||||||
If the extension has not previously been installed in a database,
|
If the extension has not previously been installed in a database,
|
||||||
it can be installed directly with:
|
it can be installed directly with:
|
||||||
```sql
|
```sql
|
||||||
CREATE EXTENSION crankshaft WITH VERSION 'dev' CASCADE;
|
CREATE EXTENSION IF NOT EXISTS plpythonu;
|
||||||
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||||
|
CREATE EXTENSION crankshaft WITH VERSION 'dev';
|
||||||
```
|
```
|
||||||
|
|
||||||
Once the feature or bugfix is completed and all the tests are passing
|
Once the feature or bugfix is completed and all the tests are passing
|
||||||
|
|||||||
4
Makefile
4
Makefile
@@ -23,7 +23,7 @@ test: ## Run the tests for the development version of the extension
|
|||||||
$(MAKE) -C $(EXT_DIR) test
|
$(MAKE) -C $(EXT_DIR) test
|
||||||
|
|
||||||
# Generate a new release into release
|
# Generate a new release into release
|
||||||
release: ## Generate a new release of the extension.
|
release: ## Generate a new release of the extension. Only for telease manager
|
||||||
$(MAKE) -C $(EXT_DIR) release
|
$(MAKE) -C $(EXT_DIR) release
|
||||||
$(MAKE) -C $(PYP_DIR) release
|
$(MAKE) -C $(PYP_DIR) release
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ release: ## Generate a new release of the extension.
|
|||||||
# Requires sudo.
|
# Requires sudo.
|
||||||
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
||||||
# sudo make deploy RELEASE_VERSION=1.0.0
|
# sudo make deploy RELEASE_VERSION=1.0.0
|
||||||
deploy:
|
deploy: ## Deploy a released extension. Only for release manager. Requires sudo.
|
||||||
$(MAKE) -C $(EXT_DIR) deploy
|
$(MAKE) -C $(EXT_DIR) deploy
|
||||||
$(MAKE) -C $(PYP_DIR) deploy
|
$(MAKE) -C $(PYP_DIR) deploy
|
||||||
|
|
||||||
|
|||||||
@@ -3,21 +3,9 @@ EXTENSION = crankshaft
|
|||||||
PACKAGE = crankshaft
|
PACKAGE = crankshaft
|
||||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||||
RELEASE_VERSION ?= $(EXTVERSION)
|
RELEASE_VERSION ?= $(EXTVERSION)
|
||||||
|
|
||||||
SED = sed
|
SED = sed
|
||||||
AWK = awk
|
PIP = pip
|
||||||
|
|
||||||
PG_CONFIG = pg_config
|
|
||||||
PG_VERSION_1000 := $(shell $(PG_CONFIG) --version | $(AWK) '{$$2*=1000; print $$2}')
|
|
||||||
PG_PARALLEL := $(shell [ $(PG_VERSION_1000) -ge 9600 ] && echo true)
|
|
||||||
|
|
||||||
PG_12plus := $(shell [ $(PG_VERSION_1000) -ge 12000 ] && echo true)
|
|
||||||
PYTHON3 ?= $(PG_12plus)
|
|
||||||
|
|
||||||
ifeq ($(PYTHON3), true)
|
|
||||||
PIP := python3 -m pip
|
|
||||||
NOSETESTS = nosetests3
|
|
||||||
else
|
|
||||||
PIP := python2 -m pip
|
|
||||||
NOSETESTS = nosetests
|
NOSETESTS = nosetests
|
||||||
endif
|
AWK = awk
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PG_PARALLEL := $(shell $(PG_CONFIG) --version | ($(AWK) '{$$2*=1000; if ($$2 >= 9600) print 1; else print 0;}' 2> /dev/null || echo 0))
|
||||||
|
|||||||
11
NEWS.md
11
NEWS.md
@@ -1,14 +1,3 @@
|
|||||||
0.9.0 (2019-12-23)
|
|
||||||
------------------
|
|
||||||
* Compatibility with PG12.
|
|
||||||
* Compatibility with python3 (enable with PYTHON3=true env variable, default in PG12+).
|
|
||||||
|
|
||||||
0.8.2 (2019-02-07)
|
|
||||||
------------------
|
|
||||||
* Update dependencies to match what it's being used in production.
|
|
||||||
* Update travis to xenial, PG10 and 11, and postgis 2.5
|
|
||||||
* Compatibility with PG11
|
|
||||||
|
|
||||||
0.8.1 (2018-03-12)
|
0.8.1 (2018-03-12)
|
||||||
------------------
|
------------------
|
||||||
* Adds improperly added version files
|
* Adds improperly added version files
|
||||||
|
|||||||
14
README.md
14
README.md
@@ -8,21 +8,28 @@ CARTO Spatial Analysis extension for PostgreSQL.
|
|||||||
* `src/` source code
|
* `src/` source code
|
||||||
- `pg/` contains the PostgreSQL extension source code
|
- `pg/` contains the PostgreSQL extension source code
|
||||||
- `py/` Python module source code
|
- `py/` Python module source code
|
||||||
* `release` released versions
|
* `release` reseleased versions
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
* PostgreSQL
|
* PostgreSQL
|
||||||
* plpythonu (for PG12+, plpython3u) and postgis extensions
|
* plpythonu and postgis extensions
|
||||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/develop/src/py/README.md))
|
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/develop/src/py/README.md))
|
||||||
|
|
||||||
# Development Process
|
# Development Process
|
||||||
|
|
||||||
|
We distinguish two roles:
|
||||||
|
|
||||||
|
* *developers* will implement new functionality and bugfixes into
|
||||||
|
the codebase.
|
||||||
|
* A *release manager* will handle the release process.
|
||||||
|
|
||||||
We use the branch `develop` as the main integration branch for development. The `master` is reserved to handle releases.
|
We use the branch `develop` as the main integration branch for development. The `master` is reserved to handle releases.
|
||||||
|
|
||||||
The process is as follows:
|
The process is as follows:
|
||||||
|
|
||||||
1. Create a new **topic branch** from `develop` for any new feature or bugfix and commit their changes to it:
|
1. Create a new **topic branch** from `develop` for any new feature
|
||||||
|
or bugfix and commit their changes to it:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git fetch && git checkout -b my-cool-feature origin/develop
|
git fetch && git checkout -b my-cool-feature origin/develop
|
||||||
@@ -32,6 +39,7 @@ The process is as follows:
|
|||||||
1. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/develop/NEWS.md) doc.
|
1. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/develop/NEWS.md) doc.
|
||||||
1. Create a pull request and mention relevant people for a **peer review**.
|
1. Create a pull request and mention relevant people for a **peer review**.
|
||||||
1. Address the comments and improvements you get from the peer review.
|
1. Address the comments and improvements you get from the peer review.
|
||||||
|
1. Mention `@CartoDB/dataservices` in the PR to get it merged into `develop`.
|
||||||
|
|
||||||
In order for a pull request to be accepted, the following criteria should be met:
|
In order for a pull request to be accepted, the following criteria should be met:
|
||||||
* The peer review should pass and no major issue should be left unaddressed.
|
* The peer review should pass and no major issue should be left unaddressed.
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# Release & Deployment Process
|
# Release & Deployment Process
|
||||||
|
|
||||||
:warning: Do not forget about updating dependencies in `cartodb-platform` and `carto-postgres-artifacts` :warning:
|
The release process of a new version of the extension
|
||||||
|
shall be performed by the designated *Release Manager*.
|
||||||
|
|
||||||
## Release steps
|
## Release steps
|
||||||
* Make sure `develop` branch passes all the tests.
|
* Make sure `develop` branch passes all the tests.
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "crankshaft",
|
|
||||||
"current_version": {
|
|
||||||
"requires": {
|
|
||||||
"postgres": ">=9.5.0",
|
|
||||||
"postgis": ">=2.2.0.0",
|
|
||||||
"python": ">=2.7.0",
|
|
||||||
"joblib": "0.8.3",
|
|
||||||
"numpy": "1.6.1",
|
|
||||||
"scipy": "0.14.0",
|
|
||||||
"pysal": "1.14.3",
|
|
||||||
"scikit-learn": "0.14.1"
|
|
||||||
},
|
|
||||||
"works_with": {
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
"exceptional_versions": {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -25,6 +25,10 @@ psql -c "SELECT * FROM pg_available_extension_versions WHERE name LIKE 'cranksha
|
|||||||
|
|
||||||
# Install in the fresh DB
|
# Install in the fresh DB
|
||||||
psql $DBNAME <<'EOF'
|
psql $DBNAME <<'EOF'
|
||||||
|
-- Install dependencies
|
||||||
|
CREATE EXTENSION plpythonu;
|
||||||
|
CREATE EXTENSION postgis VERSION '2.2.2';
|
||||||
|
|
||||||
-- Create role publicuser if it does not exist
|
-- Create role publicuser if it does not exist
|
||||||
DO
|
DO
|
||||||
$$
|
$$
|
||||||
@@ -40,53 +44,30 @@ END
|
|||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
-- Install the default version
|
-- Install the default version
|
||||||
CREATE EXTENSION crankshaft CASCADE;
|
CREATE EXTENSION crankshaft;
|
||||||
\dx
|
\dx
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
|
||||||
# Check PG version
|
|
||||||
PG_VERSION=`psql -q -t -c "SELECT current_setting('server_version_num')"`
|
|
||||||
|
|
||||||
# Save public function signatures
|
# Save public function signatures
|
||||||
if [[ "$PG_VERSION" -lt 110000 ]]; then
|
psql $DBNAME <<'EOF'
|
||||||
psql $DBNAME -c "
|
CREATE TABLE release_function_signatures AS
|
||||||
CREATE TABLE release_function_signatures AS
|
SELECT
|
||||||
SELECT
|
p.proname as name,
|
||||||
p.proname as name,
|
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
CASE
|
||||||
CASE
|
WHEN p.proisagg THEN 'agg'
|
||||||
WHEN p.proisagg THEN 'agg'
|
WHEN p.proiswindow THEN 'window'
|
||||||
WHEN p.proiswindow THEN 'window'
|
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
ELSE 'normal'
|
||||||
ELSE 'normal'
|
END as type
|
||||||
END as type
|
FROM pg_catalog.pg_proc p
|
||||||
FROM pg_catalog.pg_proc p
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
WHERE
|
||||||
WHERE
|
n.nspname = 'cdb_crankshaft'
|
||||||
n.nspname = 'cdb_crankshaft'
|
AND p.proname LIKE 'cdb_%'
|
||||||
AND p.proname LIKE 'cdb_%'
|
ORDER BY 1, 2, 4;
|
||||||
ORDER BY 1, 2, 4;"
|
EOF
|
||||||
else
|
|
||||||
psql $DBNAME -c "
|
|
||||||
CREATE TABLE release_function_signatures AS
|
|
||||||
SELECT
|
|
||||||
p.proname as name,
|
|
||||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
|
||||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
|
||||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
|
||||||
WHEN p.prokind = 'w' THEN 'window'
|
|
||||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
|
||||||
ELSE 'normal'
|
|
||||||
END as type
|
|
||||||
FROM pg_catalog.pg_proc p
|
|
||||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
|
||||||
WHERE
|
|
||||||
n.nspname = 'cdb_crankshaft'
|
|
||||||
AND p.proname LIKE 'cdb_%'
|
|
||||||
ORDER BY 1, 2, 4;"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Deploy current dev branch
|
# Deploy current dev branch
|
||||||
make clean-dev || die "Could not clean dev files"
|
make clean-dev || die "Could not clean dev files"
|
||||||
@@ -95,42 +76,26 @@ sudo make install || die "Could not deploy current dev branch"
|
|||||||
# Check it can be upgraded
|
# Check it can be upgraded
|
||||||
psql $DBNAME -c "ALTER EXTENSION crankshaft update to 'dev';" || die "Cannot upgrade to dev version"
|
psql $DBNAME -c "ALTER EXTENSION crankshaft update to 'dev';" || die "Cannot upgrade to dev version"
|
||||||
|
|
||||||
if [[ $PG_VERSION -lt 110000 ]]; then
|
# Check against saved public function signatures
|
||||||
psql $DBNAME -c "
|
psql $DBNAME <<'EOF'
|
||||||
CREATE TABLE dev_function_signatures AS
|
CREATE TABLE dev_function_signatures AS
|
||||||
SELECT p.proname as name,
|
SELECT
|
||||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
p.proname as name,
|
||||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||||
CASE WHEN p.proisagg THEN 'agg'
|
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||||
WHEN p.proiswindow THEN 'window'
|
CASE
|
||||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
WHEN p.proisagg THEN 'agg'
|
||||||
ELSE 'normal'
|
WHEN p.proiswindow THEN 'window'
|
||||||
END as type
|
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||||
FROM pg_catalog.pg_proc p
|
ELSE 'normal'
|
||||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
END as type
|
||||||
WHERE
|
FROM pg_catalog.pg_proc p
|
||||||
n.nspname = 'cdb_crankshaft'
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
AND p.proname LIKE 'cdb_%'
|
WHERE
|
||||||
ORDER BY 1, 2, 4;"
|
n.nspname = 'cdb_crankshaft'
|
||||||
else
|
AND p.proname LIKE 'cdb_%'
|
||||||
psql $DBNAME -c "
|
ORDER BY 1, 2, 4;
|
||||||
CREATE TABLE dev_function_signatures AS
|
EOF
|
||||||
SELECT p.proname as name,
|
|
||||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
|
||||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
|
||||||
CASE WHEN p.prokind = 'a' THEN 'agg'
|
|
||||||
WHEN p.prokind = 'w' THEN 'window'
|
|
||||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
|
||||||
ELSE 'normal'
|
|
||||||
END as type
|
|
||||||
FROM pg_catalog.pg_proc p
|
|
||||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
|
||||||
WHERE
|
|
||||||
n.nspname = 'cdb_crankshaft'
|
|
||||||
AND p.proname LIKE 'cdb_%'
|
|
||||||
ORDER BY 1, 2, 4;"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
echo "Functions in development not in latest release (ok):"
|
echo "Functions in development not in latest release (ok):"
|
||||||
psql $DBNAME -c "SELECT * FROM dev_function_signatures EXCEPT SELECT * FROM release_function_signatures;"
|
psql $DBNAME -c "SELECT * FROM dev_function_signatures EXCEPT SELECT * FROM release_function_signatures;"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
-- Version number of the extension release
|
-- Version number of the extension release
|
||||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||||
RETURNS text AS $$
|
RETURNS text AS $$
|
||||||
SELECT '0.8.2'::text;
|
SELECT '0.9.0'::text;
|
||||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
-- Internal identifier of the installed extension instence
|
-- Internal identifier of the installed extension instence
|
||||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
|||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
BEGIN
|
||||||
SFUNC = CDB_PyAggS,
|
IF NOT EXISTS (
|
||||||
STYPE = Numeric[],
|
SELECT *
|
||||||
PARALLEL = SAFE,
|
FROM pg_catalog.pg_proc p
|
||||||
INITCOND = "{}"
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
);
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
EXCEPTION
|
AND p.proname = 'cdb_pyagg'
|
||||||
WHEN duplicate_function THEN NULL;
|
AND p.proisagg)
|
||||||
END $$;
|
THEN
|
||||||
|
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||||
|
SFUNC = CDB_PyAggS,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -89,6 +98,7 @@ CREATE OR REPLACE FUNCTION
|
|||||||
query TEXT,
|
query TEXT,
|
||||||
variable_name TEXT,
|
variable_name TEXT,
|
||||||
target_table TEXT,
|
target_table TEXT,
|
||||||
|
model_name text DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -105,24 +115,59 @@ AS $$
|
|||||||
'learning_rate': learning_rate,
|
'learning_rate': learning_rate,
|
||||||
'min_samples_leaf': min_samples_leaf
|
'min_samples_leaf': min_samples_leaf
|
||||||
}
|
}
|
||||||
feature_cols = set(plpy.execute('''
|
all_cols = list(plpy.execute('''
|
||||||
select * from ({query}) as _w limit 0
|
select * from ({query}) as _w limit 0
|
||||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
'''.format(query=query)).colnames())
|
||||||
|
feature_cols = [a for a in all_cols
|
||||||
|
if a not in [variable_name, 'cartodb_id', ]]
|
||||||
return seg.create_and_predict_segment(
|
return seg.create_and_predict_segment(
|
||||||
query,
|
query,
|
||||||
variable_name,
|
variable_name,
|
||||||
feature_cols,
|
feature_cols,
|
||||||
target_table,
|
target_table,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
CDB_RetrieveModelParams(
|
||||||
|
model_name text,
|
||||||
|
param_name text
|
||||||
|
)
|
||||||
|
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model, feature_names FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
plpy.notice(model_encoded[0]['feature_names'])
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
|
||||||
|
res = getattr(model, param_name)
|
||||||
|
if not isinstance(res, Iterable):
|
||||||
|
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||||
|
return zip(res, model_encoded[0]['feature_names'])
|
||||||
|
|
||||||
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
query TEXT,
|
query TEXT,
|
||||||
variable TEXT,
|
variable TEXT,
|
||||||
feature_columns TEXT[],
|
feature_columns TEXT[],
|
||||||
target_query TEXT,
|
target_query TEXT,
|
||||||
|
model_name TEXT DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -144,7 +189,8 @@ AS $$
|
|||||||
variable,
|
variable,
|
||||||
feature_columns,
|
feature_columns,
|
||||||
target_query,
|
target_query,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||||
@@ -1104,19 +1150,27 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
BEGIN
|
||||||
SFUNC = CDB_WeightedMeanS,
|
IF NOT EXISTS (
|
||||||
FINALFUNC = CDB_WeightedMeanF,
|
SELECT *
|
||||||
STYPE = Numeric[],
|
FROM pg_catalog.pg_proc p
|
||||||
PARALLEL = SAFE,
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
INITCOND = "{0.0,0.0,0.0}"
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
);
|
AND p.proname = 'cdb_weightedmean'
|
||||||
EXCEPTION
|
AND p.proisagg)
|
||||||
WHEN duplicate_function THEN NULL;
|
THEN
|
||||||
END $$;
|
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||||
|
SFUNC = CDB_WeightedMeanS,
|
||||||
|
FINALFUNC = CDB_WeightedMeanF,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{0.0,0.0,0.0}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
-- Spatial Markov
|
-- Spatial Markov
|
||||||
|
|
||||||
-- input table format:
|
-- input table format:
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
-- Version number of the extension release
|
-- Version number of the extension release
|
||||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||||
RETURNS text AS $$
|
RETURNS text AS $$
|
||||||
SELECT '0.8.2'::text;
|
SELECT '0.9.1'::text;
|
||||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
-- Internal identifier of the installed extension instence
|
-- Internal identifier of the installed extension instence
|
||||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
|||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
BEGIN
|
||||||
SFUNC = CDB_PyAggS,
|
IF NOT EXISTS (
|
||||||
STYPE = Numeric[],
|
SELECT *
|
||||||
PARALLEL = SAFE,
|
FROM pg_catalog.pg_proc p
|
||||||
INITCOND = "{}"
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
);
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
EXCEPTION
|
AND p.proname = 'cdb_pyagg'
|
||||||
WHEN duplicate_function THEN NULL;
|
AND p.proisagg)
|
||||||
END $$;
|
THEN
|
||||||
|
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||||
|
SFUNC = CDB_PyAggS,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -89,6 +98,7 @@ CREATE OR REPLACE FUNCTION
|
|||||||
query TEXT,
|
query TEXT,
|
||||||
variable_name TEXT,
|
variable_name TEXT,
|
||||||
target_table TEXT,
|
target_table TEXT,
|
||||||
|
model_name text DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -105,24 +115,59 @@ AS $$
|
|||||||
'learning_rate': learning_rate,
|
'learning_rate': learning_rate,
|
||||||
'min_samples_leaf': min_samples_leaf
|
'min_samples_leaf': min_samples_leaf
|
||||||
}
|
}
|
||||||
feature_cols = set(plpy.execute('''
|
all_cols = list(plpy.execute('''
|
||||||
select * from ({query}) as _w limit 0
|
select * from ({query}) as _w limit 0
|
||||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
'''.format(query=query)).colnames())
|
||||||
|
feature_cols = [a for a in all_cols
|
||||||
|
if a not in [variable_name, 'cartodb_id', ]]
|
||||||
return seg.create_and_predict_segment(
|
return seg.create_and_predict_segment(
|
||||||
query,
|
query,
|
||||||
variable_name,
|
variable_name,
|
||||||
feature_cols,
|
feature_cols,
|
||||||
target_table,
|
target_table,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
CDB_RetrieveModelParams(
|
||||||
|
model_name text,
|
||||||
|
param_name text
|
||||||
|
)
|
||||||
|
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model, feature_names FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
plpy.notice(model_encoded[0]['feature_names'])
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
|
||||||
|
res = getattr(model, param_name)
|
||||||
|
if not isinstance(res, Iterable):
|
||||||
|
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||||
|
return zip(res, model_encoded[0]['feature_names'])
|
||||||
|
|
||||||
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
query TEXT,
|
query TEXT,
|
||||||
variable TEXT,
|
variable TEXT,
|
||||||
feature_columns TEXT[],
|
feature_columns TEXT[],
|
||||||
target_query TEXT,
|
target_query TEXT,
|
||||||
|
model_name TEXT DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -144,7 +189,8 @@ AS $$
|
|||||||
variable,
|
variable,
|
||||||
feature_columns,
|
feature_columns,
|
||||||
target_query,
|
target_query,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||||
@@ -1104,19 +1150,27 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
BEGIN
|
||||||
SFUNC = CDB_WeightedMeanS,
|
IF NOT EXISTS (
|
||||||
FINALFUNC = CDB_WeightedMeanF,
|
SELECT *
|
||||||
STYPE = Numeric[],
|
FROM pg_catalog.pg_proc p
|
||||||
PARALLEL = SAFE,
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
INITCOND = "{0.0,0.0,0.0}"
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
);
|
AND p.proname = 'cdb_weightedmean'
|
||||||
EXCEPTION
|
AND p.proisagg)
|
||||||
WHEN duplicate_function THEN NULL;
|
THEN
|
||||||
END $$;
|
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||||
|
SFUNC = CDB_WeightedMeanS,
|
||||||
|
FINALFUNC = CDB_WeightedMeanF,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{0.0,0.0,0.0}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
-- Spatial Markov
|
-- Spatial Markov
|
||||||
|
|
||||||
-- input table format:
|
-- input table format:
|
||||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
|||||||
AS $$
|
AS $$
|
||||||
from crankshaft import random_seeds
|
from crankshaft import random_seeds
|
||||||
random_seeds.set_random_seeds(seed_value)
|
random_seeds.set_random_seeds(seed_value)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||||
returns NUMERIC[] as $$
|
returns NUMERIC[] as $$
|
||||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
|||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
BEGIN
|
||||||
SFUNC = CDB_PyAggS,
|
IF NOT EXISTS (
|
||||||
STYPE = Numeric[],
|
SELECT *
|
||||||
PARALLEL = SAFE,
|
FROM pg_catalog.pg_proc p
|
||||||
INITCOND = "{}"
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
);
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
EXCEPTION
|
AND p.proname = 'cdb_pyagg'
|
||||||
WHEN duplicate_function THEN NULL;
|
AND p.proisagg)
|
||||||
END $$;
|
THEN
|
||||||
|
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||||
|
SFUNC = CDB_PyAggS,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -82,13 +91,14 @@ AS $$
|
|||||||
target_ids,
|
target_ids,
|
||||||
model_params)
|
model_params)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
query TEXT,
|
query TEXT,
|
||||||
variable_name TEXT,
|
variable_name TEXT,
|
||||||
target_table TEXT,
|
target_table TEXT,
|
||||||
|
model_name text DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -105,17 +115,51 @@ AS $$
|
|||||||
'learning_rate': learning_rate,
|
'learning_rate': learning_rate,
|
||||||
'min_samples_leaf': min_samples_leaf
|
'min_samples_leaf': min_samples_leaf
|
||||||
}
|
}
|
||||||
feature_cols = set(plpy.execute('''
|
all_cols = list(plpy.execute('''
|
||||||
select * from ({query}) as _w limit 0
|
select * from ({query}) as _w limit 0
|
||||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
'''.format(query=query)).colnames())
|
||||||
|
feature_cols = [a for a in all_cols
|
||||||
|
if a not in [variable_name, 'cartodb_id', ]]
|
||||||
return seg.create_and_predict_segment(
|
return seg.create_and_predict_segment(
|
||||||
query,
|
query,
|
||||||
variable_name,
|
variable_name,
|
||||||
feature_cols,
|
feature_cols,
|
||||||
target_table,
|
target_table,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
CDB_RetrieveModelParams(
|
||||||
|
model_name text,
|
||||||
|
param_name text
|
||||||
|
)
|
||||||
|
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model, feature_names FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
plpy.notice(model_encoded[0]['feature_names'])
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
|
||||||
|
res = getattr(model, param_name)
|
||||||
|
if not isinstance(res, Iterable):
|
||||||
|
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||||
|
return zip(res, model_encoded[0]['feature_names'])
|
||||||
|
|
||||||
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -123,6 +167,7 @@ CREATE OR REPLACE FUNCTION
|
|||||||
variable TEXT,
|
variable TEXT,
|
||||||
feature_columns TEXT[],
|
feature_columns TEXT[],
|
||||||
target_query TEXT,
|
target_query TEXT,
|
||||||
|
model_name TEXT DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -144,9 +189,10 @@ AS $$
|
|||||||
variable,
|
variable,
|
||||||
feature_columns,
|
feature_columns,
|
||||||
target_query,
|
target_query,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||||
IN target_query text,
|
IN target_query text,
|
||||||
IN weight_column text,
|
IN weight_column text,
|
||||||
@@ -656,7 +702,7 @@ AS $$
|
|||||||
moran = Moran()
|
moran = Moran()
|
||||||
return moran.global_stat(subquery, column_name, w_type,
|
return moran.global_stat(subquery, column_name, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local (internal function) - DEPRECATED
|
-- Moran's I Local (internal function) - DEPRECATED
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -681,7 +727,7 @@ AS $$
|
|||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
# remove spatial lag
|
# remove spatial lag
|
||||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local (internal function)
|
-- Moran's I Local (internal function)
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -709,7 +755,7 @@ moran = Moran()
|
|||||||
return moran.local_stat(subquery, column_name, w_type,
|
return moran.local_stat(subquery, column_name, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Moran's I Local (public-facing function)
|
-- Moran's I Local (public-facing function)
|
||||||
@@ -836,7 +882,7 @@ AS $$
|
|||||||
# TODO: use named parameters or a dictionary
|
# TODO: use named parameters or a dictionary
|
||||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||||
@@ -864,7 +910,7 @@ AS $$
|
|||||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
# remove spatial lag
|
# remove spatial lag
|
||||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -920,7 +966,7 @@ return moran.local_rate_stat(
|
|||||||
geom_col,
|
geom_col,
|
||||||
id_col
|
id_col
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Rate
|
-- Moran's I Rate
|
||||||
-- Replaces CDB_AreasOfInterestLocalRate
|
-- Replaces CDB_AreasOfInterestLocalRate
|
||||||
@@ -1033,7 +1079,7 @@ from crankshaft.clustering import Kmeans
|
|||||||
kmeans = Kmeans()
|
kmeans = Kmeans()
|
||||||
return kmeans.spatial(query, no_clusters, no_init)
|
return kmeans.spatial(query, no_clusters, no_init)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Non-spatial k-means clustering
|
-- Non-spatial k-means clustering
|
||||||
-- query: sql query to retrieve all the needed data
|
-- query: sql query to retrieve all the needed data
|
||||||
@@ -1063,7 +1109,7 @@ kmeans = Kmeans()
|
|||||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||||
standardize=standardize,
|
standardize=standardize,
|
||||||
id_col=id_col)
|
id_col=id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||||
@@ -1104,19 +1150,27 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
BEGIN
|
||||||
SFUNC = CDB_WeightedMeanS,
|
IF NOT EXISTS (
|
||||||
FINALFUNC = CDB_WeightedMeanF,
|
SELECT *
|
||||||
STYPE = Numeric[],
|
FROM pg_catalog.pg_proc p
|
||||||
PARALLEL = SAFE,
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
INITCOND = "{0.0,0.0,0.0}"
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
);
|
AND p.proname = 'cdb_weightedmean'
|
||||||
EXCEPTION
|
AND p.proisagg)
|
||||||
WHEN duplicate_function THEN NULL;
|
THEN
|
||||||
END $$;
|
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||||
|
SFUNC = CDB_WeightedMeanS,
|
||||||
|
FINALFUNC = CDB_WeightedMeanF,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{0.0,0.0,0.0}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
-- Spatial Markov
|
-- Spatial Markov
|
||||||
|
|
||||||
-- input table format:
|
-- input table format:
|
||||||
@@ -1146,7 +1200,7 @@ AS $$
|
|||||||
|
|
||||||
## TODO: use named parameters or a dictionary
|
## TODO: use named parameters or a dictionary
|
||||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- input table format: identical to above but in a predictable format
|
-- input table format: identical to above but in a predictable format
|
||||||
-- Sample function call:
|
-- Sample function call:
|
||||||
@@ -1172,7 +1226,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
|||||||
-- from crankshaft.clustering import moran_local
|
-- from crankshaft.clustering import moran_local
|
||||||
-- # TODO: use named parameters or a dictionary
|
-- # TODO: use named parameters or a dictionary
|
||||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||||
-- $$ LANGUAGE plpython3u;
|
-- $$ LANGUAGE plpythonu;
|
||||||
--
|
--
|
||||||
-- -- input table format:
|
-- -- input table format:
|
||||||
-- -- id | geom | date | measurement
|
-- -- id | geom | date | measurement
|
||||||
@@ -1198,7 +1252,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
|||||||
-- from crankshaft.clustering import moran_local
|
-- from crankshaft.clustering import moran_local
|
||||||
-- # TODO: use named parameters or a dictionary
|
-- # TODO: use named parameters or a dictionary
|
||||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||||
-- $$ LANGUAGE plpython3u;
|
-- $$ LANGUAGE plpythonu;
|
||||||
-- Based on:
|
-- Based on:
|
||||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||||
-- https://sites.google.com/site/polesofinaccessibility/
|
-- https://sites.google.com/site/polesofinaccessibility/
|
||||||
@@ -1468,7 +1522,7 @@ AS $$
|
|||||||
from crankshaft.clustering import Getis
|
from crankshaft.clustering import Getis
|
||||||
getis = Getis()
|
getis = Getis()
|
||||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- TODO: make a version that accepts the values as arrays
|
-- TODO: make a version that accepts the values as arrays
|
||||||
|
|
||||||
@@ -1808,7 +1862,7 @@ gwr = GWR()
|
|||||||
|
|
||||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -1826,7 +1880,7 @@ gwr = GWR()
|
|||||||
|
|
||||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
--
|
--
|
||||||
-- Creates N points randomly distributed arround the polygon
|
-- Creates N points randomly distributed arround the polygon
|
||||||
--
|
--
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
-- Version number of the extension release
|
-- Version number of the extension release
|
||||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||||
RETURNS text AS $$
|
RETURNS text AS $$
|
||||||
SELECT '0.9.0'::text;
|
SELECT '0.9.2'::text;
|
||||||
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
$$ language 'sql' IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
-- Internal identifier of the installed extension instence
|
-- Internal identifier of the installed extension instence
|
||||||
@@ -21,7 +21,7 @@ _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
|||||||
AS $$
|
AS $$
|
||||||
from crankshaft import random_seeds
|
from crankshaft import random_seeds
|
||||||
random_seeds.set_random_seeds(seed_value)
|
random_seeds.set_random_seeds(seed_value)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||||
returns NUMERIC[] as $$
|
returns NUMERIC[] as $$
|
||||||
@@ -35,16 +35,25 @@ CREATE OR REPLACE FUNCTION
|
|||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
BEGIN
|
||||||
SFUNC = CDB_PyAggS,
|
IF NOT EXISTS (
|
||||||
STYPE = Numeric[],
|
SELECT *
|
||||||
PARALLEL = SAFE,
|
FROM pg_catalog.pg_proc p
|
||||||
INITCOND = "{}"
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
);
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
EXCEPTION
|
AND p.proname = 'cdb_pyagg'
|
||||||
WHEN duplicate_function THEN NULL;
|
AND p.proisagg)
|
||||||
END $$;
|
THEN
|
||||||
|
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||||
|
SFUNC = CDB_PyAggS,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -82,13 +91,14 @@ AS $$
|
|||||||
target_ids,
|
target_ids,
|
||||||
model_params)
|
model_params)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL RESTRICTED;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL RESTRICTED;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
query TEXT,
|
query TEXT,
|
||||||
variable_name TEXT,
|
variable_name TEXT,
|
||||||
target_table TEXT,
|
target_table TEXT,
|
||||||
|
model_name text DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -105,17 +115,51 @@ AS $$
|
|||||||
'learning_rate': learning_rate,
|
'learning_rate': learning_rate,
|
||||||
'min_samples_leaf': min_samples_leaf
|
'min_samples_leaf': min_samples_leaf
|
||||||
}
|
}
|
||||||
feature_cols = set(plpy.execute('''
|
all_cols = list(plpy.execute('''
|
||||||
select * from ({query}) as _w limit 0
|
select * from ({query}) as _w limit 0
|
||||||
'''.format(query=query)).colnames()) - set([variable_name, 'cartodb_id', ])
|
'''.format(query=query)).colnames())
|
||||||
|
feature_cols = [a for a in all_cols
|
||||||
|
if a not in [variable_name, 'cartodb_id', ]]
|
||||||
return seg.create_and_predict_segment(
|
return seg.create_and_predict_segment(
|
||||||
query,
|
query,
|
||||||
variable_name,
|
variable_name,
|
||||||
feature_cols,
|
feature_cols,
|
||||||
target_table,
|
target_table,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
CDB_RetrieveModelParams(
|
||||||
|
model_name text,
|
||||||
|
param_name text
|
||||||
|
)
|
||||||
|
RETURNS TABLE(param numeric, feature_name text) AS $$
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model, feature_names FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
plpy.notice(model_encoded[0]['feature_names'])
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
|
||||||
|
res = getattr(model, param_name)
|
||||||
|
if not isinstance(res, Iterable):
|
||||||
|
raise Exception('Cannot return `{}` as a table'.format(param_name))
|
||||||
|
return zip(res, model_encoded[0]['feature_names'])
|
||||||
|
|
||||||
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
CDB_CreateAndPredictSegment(
|
CDB_CreateAndPredictSegment(
|
||||||
@@ -123,6 +167,7 @@ CREATE OR REPLACE FUNCTION
|
|||||||
variable TEXT,
|
variable TEXT,
|
||||||
feature_columns TEXT[],
|
feature_columns TEXT[],
|
||||||
target_query TEXT,
|
target_query TEXT,
|
||||||
|
model_name TEXT DEFAULT NULL,
|
||||||
n_estimators INTEGER DEFAULT 1200,
|
n_estimators INTEGER DEFAULT 1200,
|
||||||
max_depth INTEGER DEFAULT 3,
|
max_depth INTEGER DEFAULT 3,
|
||||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||||
@@ -144,9 +189,10 @@ AS $$
|
|||||||
variable,
|
variable,
|
||||||
feature_columns,
|
feature_columns,
|
||||||
target_query,
|
target_query,
|
||||||
model_params
|
model_params,
|
||||||
|
model_name=model_name
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||||
IN target_query text,
|
IN target_query text,
|
||||||
IN weight_column text,
|
IN weight_column text,
|
||||||
@@ -656,7 +702,7 @@ AS $$
|
|||||||
moran = Moran()
|
moran = Moran()
|
||||||
return moran.global_stat(subquery, column_name, w_type,
|
return moran.global_stat(subquery, column_name, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local (internal function) - DEPRECATED
|
-- Moran's I Local (internal function) - DEPRECATED
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -681,7 +727,7 @@ AS $$
|
|||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
# remove spatial lag
|
# remove spatial lag
|
||||||
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local (internal function)
|
-- Moran's I Local (internal function)
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -709,7 +755,7 @@ moran = Moran()
|
|||||||
return moran.local_stat(subquery, column_name, w_type,
|
return moran.local_stat(subquery, column_name, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Moran's I Local (public-facing function)
|
-- Moran's I Local (public-facing function)
|
||||||
@@ -836,7 +882,7 @@ AS $$
|
|||||||
# TODO: use named parameters or a dictionary
|
# TODO: use named parameters or a dictionary
|
||||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||||
num_ngbrs, permutations, geom_col, id_col)
|
num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Moran's I Local Rate (internal function) - DEPRECATED
|
-- Moran's I Local Rate (internal function) - DEPRECATED
|
||||||
@@ -864,7 +910,7 @@ AS $$
|
|||||||
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
# remove spatial lag
|
# remove spatial lag
|
||||||
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
-- Moran's I Local Rate (public-facing function) - DEPRECATED
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -920,7 +966,7 @@ return moran.local_rate_stat(
|
|||||||
geom_col,
|
geom_col,
|
||||||
id_col
|
id_col
|
||||||
)
|
)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Moran's I Rate
|
-- Moran's I Rate
|
||||||
-- Replaces CDB_AreasOfInterestLocalRate
|
-- Replaces CDB_AreasOfInterestLocalRate
|
||||||
@@ -1033,7 +1079,7 @@ from crankshaft.clustering import Kmeans
|
|||||||
kmeans = Kmeans()
|
kmeans = Kmeans()
|
||||||
return kmeans.spatial(query, no_clusters, no_init)
|
return kmeans.spatial(query, no_clusters, no_init)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- Non-spatial k-means clustering
|
-- Non-spatial k-means clustering
|
||||||
-- query: sql query to retrieve all the needed data
|
-- query: sql query to retrieve all the needed data
|
||||||
@@ -1063,7 +1109,7 @@ kmeans = Kmeans()
|
|||||||
return kmeans.nonspatial(query, colnames, no_clusters,
|
return kmeans.nonspatial(query, colnames, no_clusters,
|
||||||
standardize=standardize,
|
standardize=standardize,
|
||||||
id_col=id_col)
|
id_col=id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(
|
||||||
@@ -1104,19 +1150,27 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Create aggregate if it did not exist
|
-- Create aggregate if it did not exist
|
||||||
DO $$ BEGIN
|
DO $$
|
||||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
BEGIN
|
||||||
SFUNC = CDB_WeightedMeanS,
|
IF NOT EXISTS (
|
||||||
FINALFUNC = CDB_WeightedMeanF,
|
SELECT *
|
||||||
STYPE = Numeric[],
|
FROM pg_catalog.pg_proc p
|
||||||
PARALLEL = SAFE,
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||||
INITCOND = "{0.0,0.0,0.0}"
|
WHERE n.nspname = 'cdb_crankshaft'
|
||||||
);
|
AND p.proname = 'cdb_weightedmean'
|
||||||
EXCEPTION
|
AND p.proisagg)
|
||||||
WHEN duplicate_function THEN NULL;
|
THEN
|
||||||
END $$;
|
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||||
|
SFUNC = CDB_WeightedMeanS,
|
||||||
|
FINALFUNC = CDB_WeightedMeanF,
|
||||||
|
STYPE = Numeric[],
|
||||||
|
PARALLEL = SAFE,
|
||||||
|
INITCOND = "{0.0,0.0,0.0}"
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
-- Spatial Markov
|
-- Spatial Markov
|
||||||
|
|
||||||
-- input table format:
|
-- input table format:
|
||||||
@@ -1146,7 +1200,7 @@ AS $$
|
|||||||
|
|
||||||
## TODO: use named parameters or a dictionary
|
## TODO: use named parameters or a dictionary
|
||||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- input table format: identical to above but in a predictable format
|
-- input table format: identical to above but in a predictable format
|
||||||
-- Sample function call:
|
-- Sample function call:
|
||||||
@@ -1172,7 +1226,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
|||||||
-- from crankshaft.clustering import moran_local
|
-- from crankshaft.clustering import moran_local
|
||||||
-- # TODO: use named parameters or a dictionary
|
-- # TODO: use named parameters or a dictionary
|
||||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||||
-- $$ LANGUAGE plpython3u;
|
-- $$ LANGUAGE plpythonu;
|
||||||
--
|
--
|
||||||
-- -- input table format:
|
-- -- input table format:
|
||||||
-- -- id | geom | date | measurement
|
-- -- id | geom | date | measurement
|
||||||
@@ -1198,7 +1252,7 @@ $$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
|||||||
-- from crankshaft.clustering import moran_local
|
-- from crankshaft.clustering import moran_local
|
||||||
-- # TODO: use named parameters or a dictionary
|
-- # TODO: use named parameters or a dictionary
|
||||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||||
-- $$ LANGUAGE plpython3u;
|
-- $$ LANGUAGE plpythonu;
|
||||||
-- Based on:
|
-- Based on:
|
||||||
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
-- https://github.com/mapbox/polylabel/blob/master/index.js
|
||||||
-- https://sites.google.com/site/polesofinaccessibility/
|
-- https://sites.google.com/site/polesofinaccessibility/
|
||||||
@@ -1468,7 +1522,7 @@ AS $$
|
|||||||
from crankshaft.clustering import Getis
|
from crankshaft.clustering import Getis
|
||||||
getis = Getis()
|
getis = Getis()
|
||||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
-- TODO: make a version that accepts the values as arrays
|
-- TODO: make a version that accepts the values as arrays
|
||||||
|
|
||||||
@@ -1808,7 +1862,7 @@ gwr = GWR()
|
|||||||
|
|
||||||
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
return gwr.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION
|
CREATE OR REPLACE FUNCTION
|
||||||
@@ -1826,7 +1880,7 @@ gwr = GWR()
|
|||||||
|
|
||||||
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
return gwr.gwr_predict(subquery, dep_var, ind_vars, bw, fixed, kernel, geom_col, id_col)
|
||||||
|
|
||||||
$$ LANGUAGE plpython3u VOLATILE PARALLEL UNSAFE;
|
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
|
||||||
--
|
--
|
||||||
-- Creates N points randomly distributed arround the polygon
|
-- Creates N points randomly distributed arround the polygon
|
||||||
--
|
--
|
||||||
2393
release/crankshaft--0.9.1.sql
Normal file
2393
release/crankshaft--0.9.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.2--0.9.3.sql
Normal file
2393
release/crankshaft--0.9.2--0.9.3.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.2.sql
Normal file
2393
release/crankshaft--0.9.2.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.3--0.9.4.sql
Normal file
2421
release/crankshaft--0.9.3--0.9.4.sql
Normal file
File diff suppressed because it is too large
Load Diff
2393
release/crankshaft--0.9.3.sql
Normal file
2393
release/crankshaft--0.9.3.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.4--0.9.5.sql
Normal file
2421
release/crankshaft--0.9.4--0.9.5.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.4.sql
Normal file
2421
release/crankshaft--0.9.4.sql
Normal file
File diff suppressed because it is too large
Load Diff
2421
release/crankshaft--0.9.5.sql
Normal file
2421
release/crankshaft--0.9.5.sql
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
|||||||
comment = 'CartoDB Spatial Analysis extension'
|
comment = 'CartoDB Spatial Analysis extension'
|
||||||
default_version = '0.9.0'
|
default_version = '0.9.5'
|
||||||
requires = 'plpython3u, postgis'
|
requires = 'plpythonu, postgis'
|
||||||
superuser = true
|
superuser = true
|
||||||
schema = cdb_crankshaft
|
schema = cdb_crankshaft
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -1,5 +0,0 @@
|
|||||||
joblib==0.9.4
|
|
||||||
numpy==1.11.0
|
|
||||||
scipy==0.17.0
|
|
||||||
pysal==1.14.3
|
|
||||||
scikit-learn==0.17.0
|
|
||||||
@@ -4,4 +4,4 @@ import crankshaft.clustering
|
|||||||
import crankshaft.space_time_dynamics
|
import crankshaft.space_time_dynamics
|
||||||
import crankshaft.segmentation
|
import crankshaft.segmentation
|
||||||
import crankshaft.regression
|
import crankshaft.regression
|
||||||
from . import analysis_data_provider
|
import analysis_data_provider
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""class for fetching data"""
|
"""class for fetching data"""
|
||||||
import plpy
|
import plpy
|
||||||
from . import pysal_utils as pu
|
import pysal_utils as pu
|
||||||
|
|
||||||
NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
|
NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
|
||||||
'for null values and fill in appropriately.')
|
'for null values and fill in appropriately.')
|
||||||
|
|||||||
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
Based on the Weiszfeld algorithm:
|
||||||
|
https://en.wikipedia.org/wiki/Geometric_median
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# import plpy
|
||||||
|
import numpy as np
|
||||||
|
from numpy.linalg import norm
|
||||||
|
|
||||||
|
|
||||||
|
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||||
|
|
||||||
|
query = '''
|
||||||
|
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||||
|
array_agg(ST_Y({geom_col})) As y_coords
|
||||||
|
FROM {tablename}
|
||||||
|
'''.format(geom_col=geom_col, tablename=tablename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = plpy.execute(query)
|
||||||
|
data = np.vstack((resp['x_coords'][0],
|
||||||
|
resp['y_coords'][0])).T
|
||||||
|
|
||||||
|
plpy.notice('coords: %s' % str(coords))
|
||||||
|
except Exception, err:
|
||||||
|
# plpy.error('Analysis failed: %s' % err)
|
||||||
|
print('No plpy')
|
||||||
|
data = np.array([[1.2 * np.random.random() + 10.,
|
||||||
|
1.1 * (np.random.random() - 1.) + 3.]
|
||||||
|
for i in range(1, 100)])
|
||||||
|
|
||||||
|
# initialize 'median center' to be the mean
|
||||||
|
coords_center_temp = data.mean(axis=0)
|
||||||
|
|
||||||
|
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||||
|
print('temp_center: %s' % str(coords_center_temp))
|
||||||
|
|
||||||
|
for i in range(0, num_iters):
|
||||||
|
old_coords_center = coords_center_temp.copy()
|
||||||
|
denom = denominator(coords_center_temp, data)
|
||||||
|
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||||
|
data[j])
|
||||||
|
for j in range(len(data))], axis=0)
|
||||||
|
coords_center_temp = coords_center_temp / denom
|
||||||
|
|
||||||
|
print("Pass #%d" % i)
|
||||||
|
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||||
|
print('temp_center: %s' % str(coords_center_temp))
|
||||||
|
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||||
|
coords_center_temp))
|
||||||
|
print("Center coords: %s" % str(coords_center_temp))
|
||||||
|
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||||
|
|
||||||
|
return coords_center_temp
|
||||||
|
|
||||||
|
|
||||||
|
def obj_func(center_coords, data):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.linalg.norm(center_coords - data)
|
||||||
|
|
||||||
|
|
||||||
|
def numerator(center_coords, data_i):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||||
|
|
||||||
|
|
||||||
|
def denominator(center_coords, data):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.reciprocal(np.linalg.norm(data - center_coords))
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Import all functions from for clustering"""
|
"""Import all functions from for clustering"""
|
||||||
from .moran import *
|
from moran import *
|
||||||
from .kmeans import *
|
from kmeans import *
|
||||||
from .getis import *
|
from getis import *
|
||||||
|
|||||||
@@ -47,4 +47,4 @@ class Getis(object):
|
|||||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||||
star=True, permutations=permutations)
|
star=True, permutations=permutations)
|
||||||
|
|
||||||
return list(zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order))
|
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ class Kmeans(object):
|
|||||||
ids = result[0]['ids']
|
ids = result[0]['ids']
|
||||||
|
|
||||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||||
labels = km.fit_predict(list(zip(xs, ys)))
|
labels = km.fit_predict(zip(xs, ys))
|
||||||
return list(zip(ids, labels))
|
return zip(ids, labels)
|
||||||
|
|
||||||
def nonspatial(self, subquery, colnames, no_clusters=5,
|
def nonspatial(self, subquery, colnames, no_clusters=5,
|
||||||
standardize=True, id_col='cartodb_id'):
|
standardize=True, id_col='cartodb_id'):
|
||||||
@@ -75,18 +75,18 @@ class Kmeans(object):
|
|||||||
kmeans = KMeans(n_clusters=no_clusters,
|
kmeans = KMeans(n_clusters=no_clusters,
|
||||||
random_state=0).fit(cluster_columns)
|
random_state=0).fit(cluster_columns)
|
||||||
|
|
||||||
centers = [json.dumps(dict(list(zip(colnames, c))))
|
centers = [json.dumps(dict(zip(colnames, c)))
|
||||||
for c in kmeans.cluster_centers_[kmeans.labels_]]
|
for c in kmeans.cluster_centers_[kmeans.labels_]]
|
||||||
|
|
||||||
silhouettes = metrics.silhouette_samples(cluster_columns,
|
silhouettes = metrics.silhouette_samples(cluster_columns,
|
||||||
kmeans.labels_,
|
kmeans.labels_,
|
||||||
metric='sqeuclidean')
|
metric='sqeuclidean')
|
||||||
|
|
||||||
return list(zip(kmeans.labels_,
|
return zip(kmeans.labels_,
|
||||||
centers,
|
centers,
|
||||||
silhouettes,
|
silhouettes,
|
||||||
[kmeans.inertia_] * kmeans.labels_.shape[0],
|
[kmeans.inertia_] * kmeans.labels_.shape[0],
|
||||||
data[0]['rowid']))
|
data[0]['rowid'])
|
||||||
|
|
||||||
|
|
||||||
# -- Preprocessing steps
|
# -- Preprocessing steps
|
||||||
@@ -99,7 +99,7 @@ def _extract_columns(data):
|
|||||||
# number of columns minus rowid column
|
# number of columns minus rowid column
|
||||||
n_cols = len(data[0]) - 1
|
n_cols = len(data[0]) - 1
|
||||||
return np.array([data[0]['arr_col{0}'.format(i+1)]
|
return np.array([data[0]['arr_col{0}'.format(i+1)]
|
||||||
for i in range(n_cols)],
|
for i in xrange(n_cols)],
|
||||||
dtype=float).T
|
dtype=float).T
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ class Moran(object):
|
|||||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||||
permutations=permutations)
|
permutations=permutations)
|
||||||
|
|
||||||
return list(zip([moran_global.I], [moran_global.EI]))
|
return zip([moran_global.I], [moran_global.EI])
|
||||||
|
|
||||||
def local_stat(self, subquery, attr,
|
def local_stat(self, subquery, attr,
|
||||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||||
@@ -139,7 +139,7 @@ class Moran(object):
|
|||||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||||
|
|
||||||
return list(zip(
|
return zip(
|
||||||
quads,
|
quads,
|
||||||
lisa.p_sim,
|
lisa.p_sim,
|
||||||
lag,
|
lag,
|
||||||
@@ -148,7 +148,7 @@ class Moran(object):
|
|||||||
lisa.z,
|
lisa.z,
|
||||||
lisa.Is,
|
lisa.Is,
|
||||||
weight.id_order
|
weight.id_order
|
||||||
))
|
)
|
||||||
|
|
||||||
def global_rate_stat(self, subquery, numerator, denominator,
|
def global_rate_stat(self, subquery, numerator, denominator,
|
||||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||||
@@ -194,7 +194,7 @@ class Moran(object):
|
|||||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||||
permutations=permutations)
|
permutations=permutations)
|
||||||
|
|
||||||
return list(zip([lisa_rate.I], [lisa_rate.EI]))
|
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||||
|
|
||||||
def local_rate_stat(self, subquery, numerator, denominator,
|
def local_rate_stat(self, subquery, numerator, denominator,
|
||||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||||
@@ -262,7 +262,7 @@ class Moran(object):
|
|||||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||||
|
|
||||||
return list(zip(
|
return zip(
|
||||||
quads,
|
quads,
|
||||||
lisa.p_sim,
|
lisa.p_sim,
|
||||||
lag,
|
lag,
|
||||||
@@ -271,7 +271,7 @@ class Moran(object):
|
|||||||
lisa.z,
|
lisa.z,
|
||||||
lisa.Is,
|
lisa.Is,
|
||||||
weight.id_order
|
weight.id_order
|
||||||
))
|
)
|
||||||
|
|
||||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||||
permutations, geom_col, id_col,
|
permutations, geom_col, id_col,
|
||||||
@@ -303,7 +303,7 @@ class Moran(object):
|
|||||||
# find clustering of significance
|
# find clustering of significance
|
||||||
lisa_sig = quad_position(lisa.q)
|
lisa_sig = quad_position(lisa.q)
|
||||||
|
|
||||||
return list(zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order))
|
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||||
|
|
||||||
# Low level functions ----------------------------------------
|
# Low level functions ----------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
from core import set_model, get_model, create_model_table
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
import time
|
||||||
|
import plpy
|
||||||
|
import pickle
|
||||||
|
from petname import generate
|
||||||
|
|
||||||
|
def create_model_table():
|
||||||
|
q = '''
|
||||||
|
create table if not exists model_storage(
|
||||||
|
description text,
|
||||||
|
name text unique,
|
||||||
|
model bytea,
|
||||||
|
feature_names text[],
|
||||||
|
date_created timestamptz,
|
||||||
|
id serial primary key);
|
||||||
|
'''
|
||||||
|
plpy.notice(q)
|
||||||
|
plan = plpy.prepare(q)
|
||||||
|
resp = plpy.execute(plan)
|
||||||
|
plpy.notice('Model table successfully created')
|
||||||
|
plpy.notice(str(resp))
|
||||||
|
|
||||||
|
def get_model(model_name):
|
||||||
|
"""retrieve model if it exists"""
|
||||||
|
try:
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
if len(model_encoded) == 1:
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
plpy.notice('Model successfully loaded')
|
||||||
|
else:
|
||||||
|
plpy.notice('Model not found, or too many models '
|
||||||
|
'({})'.format(len(model_encoded)))
|
||||||
|
model = None
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def set_model(model, model_name, feature_names):
|
||||||
|
"""stores the model in the table model_storage"""
|
||||||
|
if model_name is None:
|
||||||
|
model_name = generate(words=2, separator='_', letters=8)
|
||||||
|
existing_names = plpy.execute('''
|
||||||
|
SELECT array_agg(name) as name
|
||||||
|
FROM model_storage
|
||||||
|
''')
|
||||||
|
plpy.notice('nrows: {}'.format(existing_names.nrows()))
|
||||||
|
plpy.notice('MODEL NAME: {}'.format(model_name))
|
||||||
|
plpy.notice('LEN of ms: {}'.format(len(existing_names)))
|
||||||
|
plpy.notice('existing_names: {}'.format(str(existing_names)))
|
||||||
|
plpy.notice('existing_names: {}'.format(str(existing_names[0]['name'])))
|
||||||
|
plpy.notice('type existing_names: {}'.format(type(existing_names[0]['name'])))
|
||||||
|
if existing_names[0]['name'] is not None:
|
||||||
|
while model_name in existing_names[0]['name']:
|
||||||
|
model_name = generate(words=2, separator='_', letters=10)
|
||||||
|
plpy.notice(model_name)
|
||||||
|
|
||||||
|
# store model
|
||||||
|
try:
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
INSERT INTO model_storage(description, name, model, feature_names, date_created)
|
||||||
|
VALUES (
|
||||||
|
$1,
|
||||||
|
$2,
|
||||||
|
$3,
|
||||||
|
$4::text[],
|
||||||
|
to_timestamp($5));
|
||||||
|
''', ['text', 'text', 'bytea', 'text', 'numeric'])
|
||||||
|
plpy.notice('{%s}' % ','.join(feature_names))
|
||||||
|
plpy.notice(feature_names)
|
||||||
|
plpy.execute(
|
||||||
|
plan,
|
||||||
|
[' '.join(m.strip() for m in model.__repr__().split('\n')),
|
||||||
|
model_name,
|
||||||
|
pickle.dumps(model),
|
||||||
|
'{%s}' % ','.join(feature_names),
|
||||||
|
time.time()]
|
||||||
|
)
|
||||||
|
plpy.notice('model successfully stored as {}'.format(model_name))
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.notice('ERROR: {}\nt: {}'.format(err, time.time()))
|
||||||
@@ -27,7 +27,7 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||||
print('len of neighbors: %d' % len(neighbors))
|
print 'len of neighbors: %d' % len(neighbors)
|
||||||
|
|
||||||
built_weight = ps.W(neighbors)
|
built_weight = ps.W(neighbors)
|
||||||
built_weight.transform = 'r'
|
built_weight.transform = 'r'
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from . import glm
|
import glm
|
||||||
from . import family
|
import family
|
||||||
from . import utils
|
import utils
|
||||||
from . import iwls
|
import iwls
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
from .utils import cache_readonly
|
from utils import cache_readonly
|
||||||
from functools import reduce
|
|
||||||
|
|
||||||
class Results(object):
|
class Results(object):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ The one parameter exponential family distributions used by GLM.
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import special
|
from scipy import special
|
||||||
from . import links as L
|
import links as L
|
||||||
from . import varfuncs as V
|
import varfuncs as V
|
||||||
FLOAT_EPS = np.finfo(float).eps
|
FLOAT_EPS = np.finfo(float).eps
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ import numpy as np
|
|||||||
import numpy.linalg as la
|
import numpy.linalg as la
|
||||||
from pysal.spreg.utils import RegressionPropsY, spdot
|
from pysal.spreg.utils import RegressionPropsY, spdot
|
||||||
import pysal.spreg.user_output as USER
|
import pysal.spreg.user_output as USER
|
||||||
from .utils import cache_readonly
|
from utils import cache_readonly
|
||||||
from .base import LikelihoodModelResults
|
from base import LikelihoodModelResults
|
||||||
from . import family
|
import family
|
||||||
from .iwls import iwls
|
from iwls import iwls
|
||||||
|
|
||||||
__all__ = ['GLM']
|
__all__ = ['GLM']
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import numpy.linalg as la
|
|||||||
from scipy import sparse as sp
|
from scipy import sparse as sp
|
||||||
from scipy.sparse import linalg as spla
|
from scipy.sparse import linalg as spla
|
||||||
from pysal.spreg.utils import spdot, spmultiply
|
from pysal.spreg.utils import spdot, spmultiply
|
||||||
from .family import Binomial, Poisson
|
from family import Binomial, Poisson
|
||||||
|
|
||||||
def _compute_betas(y, x):
|
def _compute_betas(y, x):
|
||||||
"""
|
"""
|
||||||
@@ -49,7 +49,7 @@ def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=
|
|||||||
if isinstance(family, Binomial):
|
if isinstance(family, Binomial):
|
||||||
y = family.link._clean(y)
|
y = family.link._clean(y)
|
||||||
if isinstance(family, Poisson):
|
if isinstance(family, Poisson):
|
||||||
y_off = y/offset
|
y_off = y/offset
|
||||||
y_off = family.starting_mu(y_off)
|
y_off = family.starting_mu(y_off)
|
||||||
v = family.predict(y_off)
|
v = family.predict(y_off)
|
||||||
mu = family.starting_mu(y)
|
mu = family.starting_mu(y)
|
||||||
@@ -58,13 +58,13 @@ def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=
|
|||||||
v = family.predict(mu)
|
v = family.predict(mu)
|
||||||
|
|
||||||
while diff > tol and n_iter < max_iter:
|
while diff > tol and n_iter < max_iter:
|
||||||
n_iter += 1
|
n_iter += 1
|
||||||
w = family.weights(mu)
|
w = family.weights(mu)
|
||||||
z = v + (family.link.deriv(mu)*(y-mu))
|
z = v + (family.link.deriv(mu)*(y-mu))
|
||||||
w = np.sqrt(w)
|
w = np.sqrt(w)
|
||||||
if type(x) != np.ndarray:
|
if type(x) != np.ndarray:
|
||||||
w = sp.csr_matrix(w)
|
w = sp.csr_matrix(w)
|
||||||
z = sp.csr_matrix(z)
|
z = sp.csr_matrix(z)
|
||||||
wx = spmultiply(x, w, array_out=False)
|
wx = spmultiply(x, w, array_out=False)
|
||||||
wz = spmultiply(z, w, array_out=False)
|
wz = spmultiply(z, w, array_out=False)
|
||||||
if wi is None:
|
if wi is None:
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
|
from __future__ import absolute_import, print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ try:
|
|||||||
from scipy.lib._version import NumpyVersion
|
from scipy.lib._version import NumpyVersion
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import re
|
import re
|
||||||
string_types = str
|
string_types = basestring
|
||||||
|
|
||||||
class NumpyVersion():
|
class NumpyVersion():
|
||||||
"""Parse and compare numpy version strings.
|
"""Parse and compare numpy version strings.
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
from .base import *
|
from base import *
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from . import gwr
|
import gwr
|
||||||
from . import sel_bw
|
import sel_bw
|
||||||
from . import diagnostics
|
import diagnostics
|
||||||
from . import kernels
|
import kernels
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ __author__ = "Taylor Oshan Tayoshan@gmail.com"
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import numpy.linalg as la
|
import numpy.linalg as la
|
||||||
from scipy.stats import t
|
from scipy.stats import t
|
||||||
from .kernels import *
|
from kernels import *
|
||||||
from .diagnostics import get_AIC, get_AICc, get_BIC
|
from diagnostics import get_AIC, get_AICc, get_BIC
|
||||||
import pysal.spreg.user_output as USER
|
import pysal.spreg.user_output as USER
|
||||||
from crankshaft.regression.glm.family import Gaussian, Binomial, Poisson
|
from crankshaft.regression.glm.family import Gaussian, Binomial, Poisson
|
||||||
from crankshaft.regression.glm.glm import GLM, GLMResults
|
from crankshaft.regression.glm.glm import GLM, GLMResults
|
||||||
@@ -156,7 +156,7 @@ class GWR(GLM):
|
|||||||
self.kernel = kernel
|
self.kernel = kernel
|
||||||
self.fixed = fixed
|
self.fixed = fixed
|
||||||
if offset is None:
|
if offset is None:
|
||||||
self.offset = np.ones((self.n, 1))
|
self.offset = np.ones((self.n, 1))
|
||||||
else:
|
else:
|
||||||
self.offset = offset * 1.0
|
self.offset = offset * 1.0
|
||||||
self.fit_params = {}
|
self.fit_params = {}
|
||||||
@@ -169,7 +169,7 @@ class GWR(GLM):
|
|||||||
def _build_W(self, fixed, kernel, coords, bw, points=None):
|
def _build_W(self, fixed, kernel, coords, bw, points=None):
|
||||||
if fixed:
|
if fixed:
|
||||||
try:
|
try:
|
||||||
W = fk[kernel](coords, bw, points)
|
W = fk[kernel](coords, bw, points)
|
||||||
except:
|
except:
|
||||||
raise TypeError('Unsupported kernel function ', kernel)
|
raise TypeError('Unsupported kernel function ', kernel)
|
||||||
else:
|
else:
|
||||||
@@ -177,6 +177,7 @@ class GWR(GLM):
|
|||||||
W = ak[kernel](coords, bw, points)
|
W = ak[kernel](coords, bw, points)
|
||||||
except:
|
except:
|
||||||
raise TypeError('Unsupported kernel function ', kernel)
|
raise TypeError('Unsupported kernel function ', kernel)
|
||||||
|
|
||||||
return W
|
return W
|
||||||
|
|
||||||
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
||||||
@@ -217,7 +218,8 @@ class GWR(GLM):
|
|||||||
p = np.zeros((m, 1))
|
p = np.zeros((m, 1))
|
||||||
for i in range(m):
|
for i in range(m):
|
||||||
wi = self.W[i].reshape((-1,1))
|
wi = self.W[i].reshape((-1,1))
|
||||||
rslt = iwls(self.y, self.X, self.family, self.offset, ini_params, tol, max_iter, wi=wi)
|
rslt = iwls(self.y, self.X, self.family, self.offset,
|
||||||
|
ini_params, tol, max_iter, wi=wi)
|
||||||
params[i,:] = rslt[0].T
|
params[i,:] = rslt[0].T
|
||||||
predy[i] = rslt[1][i]
|
predy[i] = rslt[1][i]
|
||||||
v[i] = rslt[2][i]
|
v[i] = rslt[2][i]
|
||||||
@@ -257,7 +259,7 @@ class GWR(GLM):
|
|||||||
fit_params : dict
|
fit_params : dict
|
||||||
key-value pairs of parameters that will be passed into fit method to define estimation
|
key-value pairs of parameters that will be passed into fit method to define estimation
|
||||||
routine; see fit method for more details
|
routine; see fit method for more details
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if (exog_scale is None) & (exog_resid is None):
|
if (exog_scale is None) & (exog_resid is None):
|
||||||
train_gwr = self.fit(**fit_params)
|
train_gwr = self.fit(**fit_params)
|
||||||
@@ -496,7 +498,7 @@ class GWRResults(GLMResults):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
if exog_scale is not None:
|
if exog_scale is not None:
|
||||||
return cov*exog_scale
|
return cov*exog_scale
|
||||||
else:
|
else:
|
||||||
return cov*self.scale
|
return cov*self.scale
|
||||||
|
|
||||||
@@ -520,7 +522,7 @@ class GWRResults(GLMResults):
|
|||||||
weighted mean of y
|
weighted mean of y
|
||||||
"""
|
"""
|
||||||
if self.model.points is not None:
|
if self.model.points is not None:
|
||||||
n = len(self.model.points)
|
n = len(self.model.points)
|
||||||
else:
|
else:
|
||||||
n = self.n
|
n = self.n
|
||||||
off = self.offset.reshape((-1,1))
|
off = self.offset.reshape((-1,1))
|
||||||
@@ -543,13 +545,13 @@ class GWRResults(GLMResults):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
if self.model.points is not None:
|
if self.model.points is not None:
|
||||||
n = len(self.model.points)
|
n = len(self.model.points)
|
||||||
else:
|
else:
|
||||||
n = self.n
|
n = self.n
|
||||||
TSS = np.zeros(shape=(n,1))
|
TSS = np.zeros(shape=(n,1))
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) *
|
TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) *
|
||||||
(self.y.reshape((-1,1)) - self.y_bar[i])**2)
|
(self.y.reshape((-1,1)) - self.y_bar[i])**2)
|
||||||
return TSS
|
return TSS
|
||||||
|
|
||||||
@cache_readonly
|
@cache_readonly
|
||||||
@@ -563,15 +565,15 @@ class GWRResults(GLMResults):
|
|||||||
relationships.
|
relationships.
|
||||||
"""
|
"""
|
||||||
if self.model.points is not None:
|
if self.model.points is not None:
|
||||||
n = len(self.model.points)
|
n = len(self.model.points)
|
||||||
resid = self.model.exog_resid.reshape((-1,1))
|
resid = self.model.exog_resid.reshape((-1,1))
|
||||||
else:
|
else:
|
||||||
n = self.n
|
n = self.n
|
||||||
resid = self.resid_response.reshape((-1,1))
|
resid = self.resid_response.reshape((-1,1))
|
||||||
RSS = np.zeros(shape=(n,1))
|
RSS = np.zeros(shape=(n,1))
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
RSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1))
|
RSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1))
|
||||||
* resid**2)
|
* resid**2)
|
||||||
return RSS
|
return RSS
|
||||||
|
|
||||||
@cache_readonly
|
@cache_readonly
|
||||||
@@ -617,10 +619,10 @@ class GWRResults(GLMResults):
|
|||||||
"""
|
"""
|
||||||
if isinstance(self.family, (Poisson, Binomial)):
|
if isinstance(self.family, (Poisson, Binomial)):
|
||||||
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
||||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||||
else:
|
else:
|
||||||
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
return self.resid_ss/(self.n - 2.0*self.tr_S +
|
||||||
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
self.tr_STS) #could be changed to SWSTW - nothing to test against
|
||||||
@cache_readonly
|
@cache_readonly
|
||||||
def sigma2_ML(self):
|
def sigma2_ML(self):
|
||||||
"""
|
"""
|
||||||
@@ -673,14 +675,14 @@ class GWRResults(GLMResults):
|
|||||||
Note: in (9.11), p should be tr(S), that is, the effective number of parameters
|
Note: in (9.11), p should be tr(S), that is, the effective number of parameters
|
||||||
"""
|
"""
|
||||||
return self.std_res**2 * self.influ / (self.tr_S * (1.0-self.influ))
|
return self.std_res**2 * self.influ / (self.tr_S * (1.0-self.influ))
|
||||||
|
|
||||||
@cache_readonly
|
@cache_readonly
|
||||||
def deviance(self):
|
def deviance(self):
|
||||||
off = self.offset.reshape((-1,1)).T
|
off = self.offset.reshape((-1,1)).T
|
||||||
y = self.y
|
y = self.y
|
||||||
ybar = self.y_bar
|
ybar = self.y_bar
|
||||||
if isinstance(self.family, Gaussian):
|
if isinstance(self.family, Gaussian):
|
||||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||||
elif isinstance(self.family, Poisson):
|
elif isinstance(self.family, Poisson):
|
||||||
dev = np.sum(2.0*self.W*(y*np.log(y/(ybar*off))-(y-ybar*off)),axis=1)
|
dev = np.sum(2.0*self.W*(y*np.log(y/(ybar*off))-(y-ybar*off)),axis=1)
|
||||||
elif isinstance(self.family, Binomial):
|
elif isinstance(self.family, Binomial):
|
||||||
@@ -690,7 +692,7 @@ class GWRResults(GLMResults):
|
|||||||
@cache_readonly
|
@cache_readonly
|
||||||
def resid_deviance(self):
|
def resid_deviance(self):
|
||||||
if isinstance(self.family, Gaussian):
|
if isinstance(self.family, Gaussian):
|
||||||
raise NotImplementedError('deviance not currently used for Gaussian')
|
raise NotImplementedError('deviance not currently used for Gaussian')
|
||||||
else:
|
else:
|
||||||
off = self.offset.reshape((-1,1)).T
|
off = self.offset.reshape((-1,1)).T
|
||||||
y = self.y
|
y = self.y
|
||||||
@@ -708,7 +710,7 @@ class GWRResults(GLMResults):
|
|||||||
manual. Equivalent to 1 - (deviance/null deviance)
|
manual. Equivalent to 1 - (deviance/null deviance)
|
||||||
"""
|
"""
|
||||||
if isinstance(self.family, Gaussian):
|
if isinstance(self.family, Gaussian):
|
||||||
raise NotImplementedError('Not implemented for Gaussian')
|
raise NotImplementedError('Not implemented for Gaussian')
|
||||||
else:
|
else:
|
||||||
return 1.0 - (self.resid_deviance/self.deviance)
|
return 1.0 - (self.resid_deviance/self.deviance)
|
||||||
|
|
||||||
@@ -831,8 +833,8 @@ class GWRResults(GLMResults):
|
|||||||
def predictions(self):
|
def predictions(self):
|
||||||
P = self.model.P
|
P = self.model.P
|
||||||
if P is None:
|
if P is None:
|
||||||
raise NotImplementedError('predictions only avaialble if predict'
|
raise NotImplementedError('predictions only avaialble if predict'
|
||||||
'method called on GWR model')
|
'method called on GWR model')
|
||||||
else:
|
else:
|
||||||
predictions = np.sum(P*self.params, axis=1).reshape((-1,1))
|
predictions = np.sum(P*self.params, axis=1).reshape((-1,1))
|
||||||
return predictions
|
return predictions
|
||||||
@@ -985,7 +987,7 @@ class FBGWR(GWR):
|
|||||||
self.fixed = fixed
|
self.fixed = fixed
|
||||||
self.constant = constant
|
self.constant = constant
|
||||||
if constant:
|
if constant:
|
||||||
self.X = USER.check_constant(self.X)
|
self.X = USER.check_constant(self.X)
|
||||||
|
|
||||||
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls'):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -47,14 +47,14 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
|||||||
while np.abs(diff) > tol and iters < max_iter:
|
while np.abs(diff) > tol and iters < max_iter:
|
||||||
iters += 1
|
iters += 1
|
||||||
if int_score:
|
if int_score:
|
||||||
b = np.round(b)
|
b = np.round(b)
|
||||||
d = np.round(d)
|
d = np.round(d)
|
||||||
|
|
||||||
score_a = function(a)
|
score_a = function(a)
|
||||||
score_b = function(b)
|
score_b = function(b)
|
||||||
score_c = function(c)
|
score_c = function(c)
|
||||||
score_d = function(d)
|
score_d = function(d)
|
||||||
|
|
||||||
if score_b <= score_d:
|
if score_b <= score_d:
|
||||||
opt_val = b
|
opt_val = b
|
||||||
opt_score = score_b
|
opt_score = score_b
|
||||||
@@ -73,7 +73,7 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
|||||||
#d = np.round(b)
|
#d = np.round(b)
|
||||||
|
|
||||||
#if int_score:
|
#if int_score:
|
||||||
# opt_val = np.round(opt_val)
|
# opt_val = np.round(opt_val)
|
||||||
output.append((opt_val, opt_score))
|
output.append((opt_val, opt_score))
|
||||||
diff = score_b - score_d
|
diff = score_b - score_d
|
||||||
score = opt_score
|
score = opt_score
|
||||||
@@ -146,7 +146,7 @@ def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
|||||||
gwr_func, bw_func, sel_func):
|
gwr_func, bw_func, sel_func):
|
||||||
if init:
|
if init:
|
||||||
bw = sel_func(bw_func(y, X))
|
bw = sel_func(bw_func(y, X))
|
||||||
print(bw)
|
print bw
|
||||||
optim_model = gwr_func(y, X, bw)
|
optim_model = gwr_func(y, X, bw)
|
||||||
err = optim_model.resid_response.reshape((-1,1))
|
err = optim_model.resid_response.reshape((-1,1))
|
||||||
est = optim_model.params
|
est = optim_model.params
|
||||||
@@ -198,7 +198,7 @@ def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
|||||||
new_rss = np.sum((y - predy)**2)
|
new_rss = np.sum((y - predy)**2)
|
||||||
score = np.abs((new_rss - rss)/new_rss)
|
score = np.abs((new_rss - rss)/new_rss)
|
||||||
rss = new_rss
|
rss = new_rss
|
||||||
print(score)
|
print score
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
delta = score
|
delta = score
|
||||||
BWs.append(bws)
|
BWs.append(bws)
|
||||||
|
|||||||
@@ -8,12 +8,12 @@
|
|||||||
|
|
||||||
__author__ = "Taylor Oshan Tayoshan@gmail.com"
|
__author__ = "Taylor Oshan Tayoshan@gmail.com"
|
||||||
|
|
||||||
from .kernels import *
|
from kernels import *
|
||||||
from .search import golden_section, equal_interval, flexible_bw
|
from search import golden_section, equal_interval, flexible_bw
|
||||||
from .gwr import GWR
|
from gwr import GWR
|
||||||
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
||||||
import pysal.spreg.user_output as USER
|
import pysal.spreg.user_output as USER
|
||||||
from .diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||||
from scipy.spatial.distance import pdist, squareform
|
from scipy.spatial.distance import pdist, squareform
|
||||||
from pysal.common import KDTree
|
from pysal.common import KDTree
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -197,7 +197,7 @@ class Sel_BW(object):
|
|||||||
|
|
||||||
if self.fb:
|
if self.fb:
|
||||||
self._fbw()
|
self._fbw()
|
||||||
print(self.bw[1])
|
print self.bw[1]
|
||||||
self.XB = self.bw[4]
|
self.XB = self.bw[4]
|
||||||
self.err = self.bw[5]
|
self.err = self.bw[5]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import pysal
|
|||||||
class TestGWRGaussian(unittest.TestCase):
|
class TestGWRGaussian(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||||
@@ -56,10 +56,10 @@ class TestGWRGaussian(unittest.TestCase):
|
|||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
CV = get_CV(rslt)
|
CV = get_CV(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 894.0)
|
self.assertAlmostEquals(np.floor(AICc), 894.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 890.0)
|
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 944.0)
|
self.assertAlmostEquals(np.floor(BIC), 944.0)
|
||||||
self.assertAlmostEqual(np.round(CV,2), 18.25)
|
self.assertAlmostEquals(np.round(CV,2), 18.25)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||||
@@ -107,10 +107,10 @@ class TestGWRGaussian(unittest.TestCase):
|
|||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
CV = get_CV(rslt)
|
CV = get_CV(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 896.0)
|
self.assertAlmostEquals(np.floor(AICc), 896.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 892.0)
|
self.assertAlmostEquals(np.floor(AIC), 892.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 941.0)
|
self.assertAlmostEquals(np.floor(BIC), 941.0)
|
||||||
self.assertAlmostEqual(np.around(CV, 2), 19.19)
|
self.assertAlmostEquals(np.around(CV, 2), 19.19)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||||
@@ -159,10 +159,10 @@ class TestGWRGaussian(unittest.TestCase):
|
|||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
CV = get_CV(rslt)
|
CV = get_CV(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 895.0)
|
self.assertAlmostEquals(np.floor(AICc), 895.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 890.0)
|
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 943.0)
|
self.assertAlmostEquals(np.floor(BIC), 943.0)
|
||||||
self.assertAlmostEqual(np.around(CV, 2), 18.21)
|
self.assertAlmostEquals(np.around(CV, 2), 18.21)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||||
@@ -211,10 +211,10 @@ class TestGWRGaussian(unittest.TestCase):
|
|||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
CV = get_CV(rslt)
|
CV = get_CV(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 896)
|
self.assertAlmostEquals(np.floor(AICc), 896)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 894.0)
|
self.assertAlmostEquals(np.floor(AIC), 894.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 922.0)
|
self.assertAlmostEquals(np.floor(BIC), 922.0)
|
||||||
self.assertAlmostEqual(np.around(CV, 2), 17.91)
|
self.assertAlmostEquals(np.around(CV, 2), 17.91)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||||
@@ -314,7 +314,7 @@ class TestGWRGaussian(unittest.TestCase):
|
|||||||
class TestGWRPoisson(unittest.TestCase):
|
class TestGWRPoisson(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
|
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
|
||||||
self.coords = list(zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID')))
|
self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
|
||||||
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
|
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
|
||||||
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
|
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
|
||||||
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
|
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
|
||||||
@@ -355,9 +355,9 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 13294.0)
|
self.assertAlmostEquals(np.floor(AICc), 13294.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 13247.0)
|
self.assertAlmostEquals(np.floor(AIC), 13247.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 13485.0)
|
self.assertAlmostEquals(np.floor(BIC), 13485.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
|
||||||
@@ -404,9 +404,9 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 13285)
|
self.assertAlmostEquals(np.floor(AICc), 13285)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 13259.0)
|
self.assertAlmostEquals(np.floor(AIC), 13259.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 13442.0)
|
self.assertAlmostEquals(np.floor(BIC), 13442.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||||
@@ -452,9 +452,9 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 367.0)
|
self.assertAlmostEquals(np.floor(AICc), 367.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 361.0)
|
self.assertAlmostEquals(np.floor(AIC), 361.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 451.0)
|
self.assertAlmostEquals(np.floor(BIC), 451.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
|
||||||
atol=1e-02)
|
atol=1e-02)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
|
||||||
@@ -511,9 +511,9 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 11283.0)
|
self.assertAlmostEquals(np.floor(AICc), 11283.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 11211.0)
|
self.assertAlmostEquals(np.floor(AIC), 11211.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 11497.0)
|
self.assertAlmostEquals(np.floor(BIC), 11497.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||||
@@ -559,9 +559,9 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 21070.0)
|
self.assertAlmostEquals(np.floor(AICc), 21070.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 21069.0)
|
self.assertAlmostEquals(np.floor(AIC), 21069.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 21111.0)
|
self.assertAlmostEquals(np.floor(BIC), 21111.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||||
@@ -583,7 +583,7 @@ class TestGWRPoisson(unittest.TestCase):
|
|||||||
class TestGWRBinomial(unittest.TestCase):
|
class TestGWRBinomial(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
data = pysal.open(pysal.examples.get_path('landslides.csv'))
|
data = pysal.open(pysal.examples.get_path('landslides.csv'))
|
||||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||||
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
|
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
|
||||||
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
|
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
|
||||||
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
|
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
|
||||||
@@ -630,9 +630,9 @@ class TestGWRBinomial(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 275.0)
|
self.assertAlmostEquals(np.floor(AICc), 275.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 271.0)
|
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 349.0)
|
self.assertAlmostEquals(np.floor(BIC), 349.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||||
@@ -693,9 +693,9 @@ class TestGWRBinomial(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 277.0)
|
self.assertAlmostEquals(np.floor(AICc), 277.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 271.0)
|
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 358.0)
|
self.assertAlmostEquals(np.floor(BIC), 358.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||||
@@ -756,9 +756,9 @@ class TestGWRBinomial(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 276.0)
|
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 272.0)
|
self.assertAlmostEquals(np.floor(AIC), 272.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 341.0)
|
self.assertAlmostEquals(np.floor(BIC), 341.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||||
@@ -819,9 +819,9 @@ class TestGWRBinomial(unittest.TestCase):
|
|||||||
AIC = get_AIC(rslt)
|
AIC = get_AIC(rslt)
|
||||||
BIC = get_BIC(rslt)
|
BIC = get_BIC(rslt)
|
||||||
|
|
||||||
self.assertAlmostEqual(np.floor(AICc), 276.0)
|
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||||
self.assertAlmostEqual(np.floor(AIC), 273.0)
|
self.assertAlmostEquals(np.floor(AIC), 273.0)
|
||||||
self.assertAlmostEqual(np.floor(BIC), 331.0)
|
self.assertAlmostEquals(np.floor(BIC), 331.0)
|
||||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class TestKernels(unittest.TestCase):
|
|||||||
y = np.arange(5,0, -1)
|
y = np.arange(5,0, -1)
|
||||||
np.random.shuffle(x)
|
np.random.shuffle(x)
|
||||||
np.random.shuffle(y)
|
np.random.shuffle(y)
|
||||||
self.coords = np.array(list(zip(x, y)))
|
self.coords = np.array(zip(x, y))
|
||||||
self.fix_gauss_kern = np.array([
|
self.fix_gauss_kern = np.array([
|
||||||
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
|
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
|
||||||
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
|
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import pysal
|
|||||||
class TestSelBW(unittest.TestCase):
|
class TestSelBW(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||||
self.coords = list(zip(data.by_col('X'), data.by_col('Y')))
|
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
Geographically weighted regression
|
Geographically weighted regression
|
||||||
"""
|
"""
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .gwr.base.gwr import GWR as PySAL_GWR
|
from gwr.base.gwr import GWR as PySAL_GWR
|
||||||
from .gwr.base.sel_bw import Sel_BW
|
from gwr.base.sel_bw import Sel_BW
|
||||||
import json
|
import json
|
||||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||||
import plpy
|
import plpy
|
||||||
@@ -48,7 +48,7 @@ class GWR:
|
|||||||
# x, y are centroids of input geometries
|
# x, y are centroids of input geometries
|
||||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||||
coords = list(zip(x, y))
|
coords = zip(x, y)
|
||||||
|
|
||||||
# extract dependent variable
|
# extract dependent variable
|
||||||
Y = np.array(query_result[0]['dep_var'], dtype=np.float).reshape((-1, 1))
|
Y = np.array(query_result[0]['dep_var'], dtype=np.float).reshape((-1, 1))
|
||||||
@@ -88,7 +88,7 @@ class GWR:
|
|||||||
bw = np.repeat(float(bw), n)
|
bw = np.repeat(float(bw), n)
|
||||||
|
|
||||||
# create lists of json objs for model outputs
|
# create lists of json objs for model outputs
|
||||||
for idx in range(n):
|
for idx in xrange(n):
|
||||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||||
for k, var in enumerate(ind_vars)}))
|
for k, var in enumerate(ind_vars)}))
|
||||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||||
@@ -99,8 +99,8 @@ class GWR:
|
|||||||
json.dumps({var: filtered_t[idx, k]
|
json.dumps({var: filtered_t[idx, k]
|
||||||
for k, var in enumerate(ind_vars)}))
|
for k, var in enumerate(ind_vars)}))
|
||||||
|
|
||||||
return list(zip(coeffs, stand_errs, t_vals, filtered_t_vals,
|
return zip(coeffs, stand_errs, t_vals, filtered_t_vals,
|
||||||
predicted, residuals, r_squared, bw, rowid))
|
predicted, residuals, r_squared, bw, rowid)
|
||||||
|
|
||||||
def gwr_predict(self, subquery, dep_var, ind_vars,
|
def gwr_predict(self, subquery, dep_var, ind_vars,
|
||||||
bw=None, fixed=False, kernel='bisquare',
|
bw=None, fixed=False, kernel='bisquare',
|
||||||
@@ -133,7 +133,7 @@ class GWR:
|
|||||||
|
|
||||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||||
coords = np.array(list(zip(x, y)), dtype=np.float)
|
coords = np.array(zip(x, y), dtype=np.float)
|
||||||
|
|
||||||
# extract dependent variable
|
# extract dependent variable
|
||||||
Y = np.array(query_result[0]['dep_var']).reshape((-1, 1))
|
Y = np.array(query_result[0]['dep_var']).reshape((-1, 1))
|
||||||
@@ -190,7 +190,7 @@ class GWR:
|
|||||||
predicted = model.predy.flatten()
|
predicted = model.predy.flatten()
|
||||||
|
|
||||||
m = len(model.predy)
|
m = len(model.predy)
|
||||||
for idx in range(m):
|
for idx in xrange(m):
|
||||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||||
for k, var in enumerate(ind_vars)}))
|
for k, var in enumerate(ind_vars)}))
|
||||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||||
@@ -198,5 +198,5 @@ class GWR:
|
|||||||
t_vals.append(json.dumps({var: model.tvalues[idx, k]
|
t_vals.append(json.dumps({var: model.tvalues[idx, k]
|
||||||
for k, var in enumerate(ind_vars)}))
|
for k, var in enumerate(ind_vars)}))
|
||||||
|
|
||||||
return list(zip(coeffs, stand_errs, t_vals,
|
return zip(coeffs, stand_errs, t_vals,
|
||||||
r_squared, predicted, rowid[test]))
|
r_squared, predicted, rowid[test])
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
"""Import all functions from for segmentation"""
|
"""Import all functions from for segmentation"""
|
||||||
from .segmentation import *
|
from segmentation import *
|
||||||
|
|||||||
@@ -2,11 +2,14 @@
|
|||||||
Segmentation creation and prediction
|
Segmentation creation and prediction
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
import plpy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.ensemble import GradientBoostingRegressor
|
from sklearn.ensemble import GradientBoostingRegressor
|
||||||
from sklearn import metrics
|
from sklearn import metrics
|
||||||
from sklearn.cross_validation import train_test_split
|
from sklearn.cross_validation import train_test_split
|
||||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||||
|
from crankshaft import model_storage
|
||||||
|
|
||||||
# NOTE: added optional param here
|
# NOTE: added optional param here
|
||||||
|
|
||||||
@@ -47,10 +50,11 @@ class Segmentation(object):
|
|||||||
model_parameters, 0.2)
|
model_parameters, 0.2)
|
||||||
prediction = model.predict(target_features)
|
prediction = model.predict(target_features)
|
||||||
accuracy_array = [accuracy] * prediction.shape[0]
|
accuracy_array = [accuracy] * prediction.shape[0]
|
||||||
return list(zip(target_ids, prediction, accuracy_array))
|
return zip(target_ids, prediction, accuracy_array)
|
||||||
|
|
||||||
def create_and_predict_segment(self, query, variable, feature_columns,
|
def create_and_predict_segment(self, query, variable, feature_columns,
|
||||||
target_query, model_params,
|
target_query, model_params,
|
||||||
|
model_name=None,
|
||||||
id_col='cartodb_id'):
|
id_col='cartodb_id'):
|
||||||
"""
|
"""
|
||||||
generate a segment with machine learning
|
generate a segment with machine learning
|
||||||
@@ -70,16 +74,24 @@ class Segmentation(object):
|
|||||||
(target, features, target_mean,
|
(target, features, target_mean,
|
||||||
feature_means) = self.clean_data(query, variable, feature_columns)
|
feature_means) = self.clean_data(query, variable, feature_columns)
|
||||||
|
|
||||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
model_storage.create_model_table()
|
||||||
|
|
||||||
|
# find model if it exists and is specified
|
||||||
|
if model_name is not None:
|
||||||
|
model = model_storage.get_model(model_name)
|
||||||
|
|
||||||
|
if locals().get('model') is None:
|
||||||
|
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||||
|
|
||||||
result = self.predict_segment(model, feature_columns, target_query,
|
result = self.predict_segment(model, feature_columns, target_query,
|
||||||
feature_means)
|
feature_means)
|
||||||
accuracy_array = [accuracy] * result.shape[0]
|
accuracy_array = [accuracy] * result.shape[0]
|
||||||
|
|
||||||
rowid = self.data_provider.get_segmentation_data(params)
|
rowid = self.data_provider.get_segmentation_data(params)
|
||||||
'''
|
|
||||||
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
|
# store the model for later use
|
||||||
'''
|
model_storage.set_model(model, model_name, feature_columns)
|
||||||
return list(zip(rowid[0]['ids'], result, accuracy_array))
|
return zip(rowid[0]['ids'], result, accuracy_array)
|
||||||
|
|
||||||
def predict_segment(self, model, feature_columns, target_query,
|
def predict_segment(self, model, feature_columns, target_query,
|
||||||
feature_means):
|
feature_means):
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
"""Import all functions from clustering libraries."""
|
"""Import all functions from clustering libraries."""
|
||||||
from .markov import *
|
from markov import *
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ class Markov(object):
|
|||||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||||
|
|
||||||
# output the results
|
# output the results
|
||||||
return list(zip(trend, trend_up, trend_down, volatility, weights.id_order))
|
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -140,7 +140,7 @@ def rebin_data(time_data, num_time_per_bin):
|
|||||||
|
|
||||||
return np.array(
|
return np.array(
|
||||||
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||||
for i in range(int(n_max))]).T
|
for i in range(n_max)]).T
|
||||||
|
|
||||||
|
|
||||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
joblib==0.9.4
|
joblib==0.8.3
|
||||||
numpy==1.11.0
|
numpy==1.6.1
|
||||||
scipy==0.17.0
|
scipy==0.14.0
|
||||||
pysal==1.14.3
|
pysal==1.14.3
|
||||||
scikit-learn==0.17.0
|
scikit-learn==0.14.1
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
|||||||
setup(
|
setup(
|
||||||
name='crankshaft',
|
name='crankshaft',
|
||||||
|
|
||||||
version='0.9.0',
|
version='0.0.0',
|
||||||
|
|
||||||
description='CartoDB Spatial Analysis Python Library',
|
description='CartoDB Spatial Analysis Python Library',
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ setup(
|
|||||||
'Intended Audience :: Mapping comunity',
|
'Intended Audience :: Mapping comunity',
|
||||||
'Topic :: Maps :: Mapping Tools',
|
'Topic :: Maps :: Mapping Tools',
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python :: 2.7',
|
||||||
],
|
],
|
||||||
|
|
||||||
keywords='maps mapping tools spatial analysis geostatistics',
|
keywords='maps mapping tools spatial analysis geostatistics',
|
||||||
@@ -41,7 +41,7 @@ setup(
|
|||||||
# The choice of component versions is dictated by what's
|
# The choice of component versions is dictated by what's
|
||||||
# provisioned in the production servers.
|
# provisioned in the production servers.
|
||||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||||
install_requires=['joblib==0.9.4', 'numpy==1.11.0', 'scipy==0.17.0', 'pysal==1.14.3', 'scikit-learn==0.17.0'],
|
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1', 'petname==2.2'],
|
||||||
|
|
||||||
requires=['pysal', 'numpy', 'sklearn'],
|
requires=['pysal', 'numpy', 'sklearn'],
|
||||||
|
|
||||||
|
|||||||
49
release/python/0.9.0/crankshaft/setup.py-r
Normal file
49
release/python/0.9.0/crankshaft/setup.py-r
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
|
||||||
|
"""
|
||||||
|
CartoDB Spatial Analysis Python Library
|
||||||
|
See:
|
||||||
|
https://github.com/CartoDB/crankshaft
|
||||||
|
"""
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='crankshaft',
|
||||||
|
|
||||||
|
version='0.0.0',
|
||||||
|
|
||||||
|
description='CartoDB Spatial Analysis Python Library',
|
||||||
|
|
||||||
|
url='https://github.com/CartoDB/crankshaft',
|
||||||
|
|
||||||
|
author='Data Services Team - CartoDB',
|
||||||
|
author_email='dataservices@cartodb.com',
|
||||||
|
|
||||||
|
license='MIT',
|
||||||
|
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 3 - Alpha',
|
||||||
|
'Intended Audience :: Mapping comunity',
|
||||||
|
'Topic :: Maps :: Mapping Tools',
|
||||||
|
'License :: OSI Approved :: MIT License',
|
||||||
|
'Programming Language :: Python :: 2.7',
|
||||||
|
],
|
||||||
|
|
||||||
|
keywords='maps mapping tools spatial analysis geostatistics',
|
||||||
|
|
||||||
|
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||||
|
|
||||||
|
extras_require={
|
||||||
|
'dev': ['unittest'],
|
||||||
|
'test': ['unittest', 'nose', 'mock'],
|
||||||
|
},
|
||||||
|
|
||||||
|
# The choice of component versions is dictated by what's
|
||||||
|
# provisioned in the production servers.
|
||||||
|
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||||
|
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1', 'petname==2.2'],
|
||||||
|
|
||||||
|
requires=['pysal', 'numpy', 'sklearn'],
|
||||||
|
|
||||||
|
test_suite='test'
|
||||||
|
)
|
||||||
6
release/python/0.9.0/crankshaft/test/fixtures/optim.json
vendored
Normal file
6
release/python/0.9.0/crankshaft/test/fixtures/optim.json
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"production_col": [10, 10, 10],
|
||||||
|
"capacity_col": [0.09, 0.31],
|
||||||
|
"marginal_col": [5, 5],
|
||||||
|
"pairwise": [[1, 2, 3], [3, 2, 1]]
|
||||||
|
}
|
||||||
@@ -72,7 +72,7 @@ class MoranTest(unittest.TestCase):
|
|||||||
result = moran.local_stat('subquery', 'value',
|
result = moran.local_stat('subquery', 'value',
|
||||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||||
result = [(row[0], row[6]) for row in result]
|
result = [(row[0], row[6]) for row in result]
|
||||||
zipped_values = list(zip(result, self.moran_data))
|
zipped_values = zip(result, self.moran_data)
|
||||||
|
|
||||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||||
self.assertAlmostEqual(res_val, exp_val)
|
self.assertAlmostEqual(res_val, exp_val)
|
||||||
@@ -91,7 +91,7 @@ class MoranTest(unittest.TestCase):
|
|||||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||||
result = [(row[0], row[6]) for row in result]
|
result = [(row[0], row[6]) for row in result]
|
||||||
|
|
||||||
zipped_values = list(zip(result, self.moran_data))
|
zipped_values = zip(result, self.moran_data)
|
||||||
|
|
||||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||||
self.assertAlmostEqual(res_val, exp_val)
|
self.assertAlmostEqual(res_val, exp_val)
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ class GWRTest(unittest.TestCase):
|
|||||||
|
|
||||||
# unpack response
|
# unpack response
|
||||||
coeffs, stand_errs, t_vals, t_vals_filtered, predicteds, \
|
coeffs, stand_errs, t_vals, t_vals_filtered, predicteds, \
|
||||||
residuals, r_squareds, bws, rowids = list(zip(*gwr_resp))
|
residuals, r_squareds, bws, rowids = zip(*gwr_resp)
|
||||||
|
|
||||||
# prepare for comparision
|
# prepare for comparision
|
||||||
coeff_known_pctpov = self.knowns['est_pctpov']
|
coeff_known_pctpov = self.knowns['est_pctpov']
|
||||||
@@ -98,13 +98,13 @@ class GWRTest(unittest.TestCase):
|
|||||||
# test pctpov coefficient estimates
|
# test pctpov coefficient estimates
|
||||||
for idx, val in enumerate(coeff_known_pctpov):
|
for idx, val in enumerate(coeff_known_pctpov):
|
||||||
resp_idx = rowids.index(ids[idx])
|
resp_idx = rowids.index(ids[idx])
|
||||||
self.assertAlmostEqual(val,
|
self.assertAlmostEquals(val,
|
||||||
json.loads(coeffs[resp_idx])['pctpov'],
|
json.loads(coeffs[resp_idx])['pctpov'],
|
||||||
places=4)
|
places=4)
|
||||||
# test pctrural tvals
|
# test pctrural tvals
|
||||||
for idx, val in enumerate(tval_known_pctblack):
|
for idx, val in enumerate(tval_known_pctblack):
|
||||||
resp_idx = rowids.index(ids[idx])
|
resp_idx = rowids.index(ids[idx])
|
||||||
self.assertAlmostEqual(val,
|
self.assertAlmostEquals(val,
|
||||||
json.loads(t_vals[resp_idx])['pctrural'],
|
json.loads(t_vals[resp_idx])['pctrural'],
|
||||||
places=4)
|
places=4)
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ class GWRTest(unittest.TestCase):
|
|||||||
|
|
||||||
# unpack response
|
# unpack response
|
||||||
coeffs, stand_errs, t_vals, \
|
coeffs, stand_errs, t_vals, \
|
||||||
r_squareds, predicteds, rowid = list(zip(*gwr_resp))
|
r_squareds, predicteds, rowid = zip(*gwr_resp)
|
||||||
threshold = 0.01
|
threshold = 0.01
|
||||||
|
|
||||||
for i, idx in enumerate(self.idx_ids_of_unknowns):
|
for i, idx in enumerate(self.idx_ids_of_unknowns):
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ class SegmentationTest(unittest.TestCase):
|
|||||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||||
result = replace_nan_with_mean(test_array, means=None)[0]
|
result = replace_nan_with_mean(test_array, means=None)[0]
|
||||||
expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2], dtype=float)
|
expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2], dtype=float)
|
||||||
self.assertEqual(sorted(result), sorted(expectation))
|
self.assertItemsEqual(result, expectation)
|
||||||
|
|
||||||
def test_create_and_predict_segment(self):
|
def test_create_and_predict_segment(self):
|
||||||
"""test segmentation.test_create_and_predict"""
|
"""test segmentation.test_create_and_predict"""
|
||||||
@@ -118,7 +118,7 @@ class SegmentationTest(unittest.TestCase):
|
|||||||
model_parameters,
|
model_parameters,
|
||||||
id_col='cartodb_id')
|
id_col='cartodb_id')
|
||||||
results = [(row[1], row[2]) for row in result]
|
results = [(row[1], row[2]) for row in result]
|
||||||
zipped_values = list(zip(results, self.result_seg))
|
zipped_values = zip(results, self.result_seg)
|
||||||
pre_res = [r[0] for r in self.true_result]
|
pre_res = [r[0] for r in self.true_result]
|
||||||
acc_res = [r[1] for r in self.result_seg]
|
acc_res = [r[1] for r in self.result_seg]
|
||||||
|
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ class SpaceTimeTests(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertTrue(result is not None)
|
self.assertTrue(result is not None)
|
||||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||||
print(result[0])
|
print result[0]
|
||||||
expected = self.markov_data
|
expected = self.markov_data
|
||||||
for ([res_trend, res_up, res_down, res_vol, res_id],
|
for ([res_trend, res_up, res_down, res_vol, res_id],
|
||||||
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
||||||
|
|||||||
15
release/python/0.9.0/crankshaft/tools/setup.py
Normal file
15
release/python/0.9.0/crankshaft/tools/setup.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from test.helper import plpy, fixture_file
|
||||||
|
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||||
|
import json
|
||||||
|
import crankshaft
|
||||||
|
|
||||||
|
class RawDataProvider(AnalysisDataProvider):
|
||||||
|
def __init__(self, fixturedata):
|
||||||
|
self.your_algo_data = fixturedata
|
||||||
|
def get_moran(self, params):
|
||||||
|
"""
|
||||||
|
Replace this function name with the one used in your algorithm,
|
||||||
|
and make sure to use the same function signature that is written
|
||||||
|
for this algo in analysis_data_provider.py
|
||||||
|
"""
|
||||||
|
return self.your_algo_data
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
Based on the Weiszfeld algorithm:
|
||||||
|
https://en.wikipedia.org/wiki/Geometric_median
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# import plpy
|
||||||
|
import numpy as np
|
||||||
|
from numpy.linalg import norm
|
||||||
|
|
||||||
|
|
||||||
|
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||||
|
|
||||||
|
query = '''
|
||||||
|
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||||
|
array_agg(ST_Y({geom_col})) As y_coords
|
||||||
|
FROM {tablename}
|
||||||
|
'''.format(geom_col=geom_col, tablename=tablename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = plpy.execute(query)
|
||||||
|
data = np.vstack((resp['x_coords'][0],
|
||||||
|
resp['y_coords'][0])).T
|
||||||
|
|
||||||
|
plpy.notice('coords: %s' % str(coords))
|
||||||
|
except Exception, err:
|
||||||
|
# plpy.error('Analysis failed: %s' % err)
|
||||||
|
print('No plpy')
|
||||||
|
data = np.array([[1.2 * np.random.random() + 10.,
|
||||||
|
1.1 * (np.random.random() - 1.) + 3.]
|
||||||
|
for i in range(1, 100)])
|
||||||
|
|
||||||
|
# initialize 'median center' to be the mean
|
||||||
|
coords_center_temp = data.mean(axis=0)
|
||||||
|
|
||||||
|
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||||
|
print('temp_center: %s' % str(coords_center_temp))
|
||||||
|
|
||||||
|
for i in range(0, num_iters):
|
||||||
|
old_coords_center = coords_center_temp.copy()
|
||||||
|
denom = denominator(coords_center_temp, data)
|
||||||
|
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||||
|
data[j])
|
||||||
|
for j in range(len(data))], axis=0)
|
||||||
|
coords_center_temp = coords_center_temp / denom
|
||||||
|
|
||||||
|
print("Pass #%d" % i)
|
||||||
|
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||||
|
print('temp_center: %s' % str(coords_center_temp))
|
||||||
|
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||||
|
coords_center_temp))
|
||||||
|
print("Center coords: %s" % str(coords_center_temp))
|
||||||
|
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||||
|
|
||||||
|
return coords_center_temp
|
||||||
|
|
||||||
|
|
||||||
|
def obj_func(center_coords, data):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.linalg.norm(center_coords - data)
|
||||||
|
|
||||||
|
|
||||||
|
def numerator(center_coords, data_i):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||||
|
|
||||||
|
|
||||||
|
def denominator(center_coords, data):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.reciprocal(np.linalg.norm(data - center_coords))
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
from core import set_model, get_model, create_model_table
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
import time
|
||||||
|
import plpy
|
||||||
|
import pickle
|
||||||
|
from petname import generate
|
||||||
|
|
||||||
|
def create_model_table():
|
||||||
|
q = '''
|
||||||
|
create table if not exists model_storage(
|
||||||
|
description text,
|
||||||
|
name text unique,
|
||||||
|
model bytea,
|
||||||
|
feature_names text[],
|
||||||
|
date_created timestamptz,
|
||||||
|
id serial primary key);
|
||||||
|
'''
|
||||||
|
plpy.notice(q)
|
||||||
|
plan = plpy.prepare(q)
|
||||||
|
resp = plpy.execute(plan)
|
||||||
|
plpy.notice('Model table successfully created')
|
||||||
|
plpy.notice(str(resp))
|
||||||
|
|
||||||
|
def get_model(model_name):
|
||||||
|
"""retrieve model if it exists"""
|
||||||
|
try:
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
SELECT model FROM model_storage
|
||||||
|
WHERE name = $1;
|
||||||
|
''', ['text', ])
|
||||||
|
model_encoded = plpy.execute(plan, [model_name, ])
|
||||||
|
if len(model_encoded) == 1:
|
||||||
|
model = pickle.loads(
|
||||||
|
model_encoded[0]['model']
|
||||||
|
)
|
||||||
|
plpy.notice('Model successfully loaded')
|
||||||
|
else:
|
||||||
|
plpy.notice('Model not found, or too many models '
|
||||||
|
'({})'.format(len(model_encoded)))
|
||||||
|
model = None
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.error('ERROR: {}'.format(err))
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def set_model(model, model_name, feature_names):
|
||||||
|
"""stores the model in the table model_storage"""
|
||||||
|
if model_name is None:
|
||||||
|
model_name = generate(words=2, separator='_', letters=8)
|
||||||
|
existing_names = plpy.execute('''
|
||||||
|
SELECT array_agg(name) as name
|
||||||
|
FROM model_storage
|
||||||
|
''')
|
||||||
|
plpy.notice('nrows: {}'.format(existing_names.nrows()))
|
||||||
|
plpy.notice('MODEL NAME: {}'.format(model_name))
|
||||||
|
plpy.notice('LEN of ms: {}'.format(len(existing_names)))
|
||||||
|
plpy.notice('existing_names: {}'.format(str(existing_names)))
|
||||||
|
plpy.notice('existing_names: {}'.format(str(existing_names[0]['name'])))
|
||||||
|
plpy.notice('type existing_names: {}'.format(type(existing_names[0]['name'])))
|
||||||
|
if existing_names[0]['name'] is not None:
|
||||||
|
while model_name in existing_names[0]['name']:
|
||||||
|
model_name = generate(words=2, separator='_', letters=10)
|
||||||
|
plpy.notice(model_name)
|
||||||
|
|
||||||
|
# store model
|
||||||
|
try:
|
||||||
|
plan = plpy.prepare('''
|
||||||
|
INSERT INTO model_storage(description, name, model, feature_names, date_created)
|
||||||
|
VALUES (
|
||||||
|
$1,
|
||||||
|
$2,
|
||||||
|
$3,
|
||||||
|
$4::text[],
|
||||||
|
to_timestamp($5));
|
||||||
|
''', ['text', 'text', 'bytea', 'text', 'numeric'])
|
||||||
|
plpy.notice('{%s}' % ','.join(feature_names))
|
||||||
|
plpy.notice(feature_names)
|
||||||
|
plpy.execute(
|
||||||
|
plan,
|
||||||
|
[' '.join(m.strip() for m in model.__repr__().split('\n')),
|
||||||
|
model_name,
|
||||||
|
pickle.dumps(model),
|
||||||
|
'{%s}' % ','.join(feature_names),
|
||||||
|
time.time()]
|
||||||
|
)
|
||||||
|
plpy.notice('model successfully stored as {}'.format(model_name))
|
||||||
|
except plpy.SPIError as err:
|
||||||
|
plpy.notice('ERROR: {}\nt: {}'.format(err, time.time()))
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user