Compare commits
241 Commits
contours
...
KDE_contou
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f72d2785d8 | ||
|
|
c21fcdf69a | ||
|
|
c8871a5547 | ||
|
|
3d99d1f9bf | ||
|
|
78ceb02c22 | ||
|
|
408dc6806e | ||
|
|
702a1fb1ed | ||
|
|
76ee4cacbc | ||
|
|
4192930260 | ||
|
|
2fe50e1f71 | ||
|
|
d97231f604 | ||
|
|
cb330ebe6b | ||
|
|
505ae0fb44 | ||
|
|
35b5612ded | ||
|
|
cfb58f7898 | ||
|
|
bef5fe2c80 | ||
|
|
ab349fbdc0 | ||
|
|
9e4d378a08 | ||
|
|
3a2e5ec7f9 | ||
|
|
1e42c23919 | ||
|
|
55d2860a84 | ||
|
|
b7860d5a24 | ||
|
|
476ec04386 | ||
|
|
75d97915d6 | ||
|
|
068c80f369 | ||
|
|
5f98735ce9 | ||
|
|
7d6148456e | ||
|
|
f6f9d6e9c8 | ||
|
|
32515f445e | ||
|
|
dd0ca9a24f | ||
|
|
045fd67f47 | ||
|
|
2bb2e60af8 | ||
|
|
99dc363c7d | ||
|
|
c799f4d73b | ||
|
|
3e01470aa0 | ||
|
|
a2b3733a1e | ||
|
|
a5e4ae99ce | ||
|
|
c80975fe46 | ||
|
|
dae406927f | ||
|
|
a177bf5620 | ||
|
|
de7d56dc29 | ||
|
|
642935e44a | ||
|
|
15fc0bb683 | ||
|
|
8cbc29a3e6 | ||
|
|
03aada8758 | ||
|
|
faa899cf87 | ||
|
|
911a0ccccc | ||
|
|
76b3a873b8 | ||
|
|
de274bf628 | ||
|
|
95803b4cd4 | ||
|
|
609e2aa015 | ||
|
|
44a6471c69 | ||
|
|
2fa087bb62 | ||
|
|
3f210c2a71 | ||
|
|
6a9045ba62 | ||
|
|
6f72075999 | ||
|
|
89c47dcef6 | ||
|
|
1d7f62fa85 | ||
|
|
279912c03f | ||
|
|
7c9628eef9 | ||
|
|
81d7af9e9a | ||
|
|
4df8257377 | ||
|
|
b62d7b32ef | ||
|
|
7c4314a411 | ||
|
|
1912d57891 | ||
|
|
1d13b98d68 | ||
|
|
3c066d65fc | ||
|
|
79699cd5cb | ||
|
|
59d50a1c48 | ||
|
|
01fc2c1dd1 | ||
|
|
f5fb4499db | ||
|
|
ad5cffbf0d | ||
|
|
1db938c450 | ||
|
|
3480a0d252 | ||
|
|
237aa1c581 | ||
|
|
1e19f468eb | ||
|
|
8b5e910234 | ||
|
|
d08a2b6d2d | ||
|
|
a222341863 | ||
|
|
4834ee2f42 | ||
|
|
bbe22d0b4d | ||
|
|
284d8ede44 | ||
|
|
a8943bae98 | ||
|
|
75531b671e | ||
|
|
0acae8240f | ||
|
|
7b98415da3 | ||
|
|
5a2319db72 | ||
|
|
b0c47663da | ||
|
|
9db4b7f519 | ||
|
|
fd1862167c | ||
|
|
c870f68c77 | ||
|
|
1e8bc12e0a | ||
|
|
b33ba2d294 | ||
|
|
889cd5c579 | ||
|
|
1a4944b960 | ||
|
|
9d3de5a8ef | ||
|
|
7f3b23f67a | ||
|
|
cc4579461d | ||
|
|
e95c40c2f9 | ||
|
|
69f08c4b78 | ||
|
|
4e86965f03 | ||
|
|
d41e28bc6f | ||
|
|
1f73be2752 | ||
|
|
5183f5ff92 | ||
|
|
73d38bbbaa | ||
|
|
e7de471ac8 | ||
|
|
14d50facda | ||
|
|
5e8d11fce2 | ||
|
|
183a0f9604 | ||
|
|
fc3a7e3d78 | ||
|
|
4782d39849 | ||
|
|
21dd956c15 | ||
|
|
c0dfaa8341 | ||
|
|
edf635886b | ||
|
|
a5cb857841 | ||
|
|
971c8f75d8 | ||
|
|
54d35c614b | ||
|
|
d1a267febb | ||
|
|
0eb3db3c1d | ||
|
|
7a7cbcf33f | ||
|
|
a0a026c2a1 | ||
|
|
c04e15ef81 | ||
|
|
ae1bb703a7 | ||
|
|
0e3970f52c | ||
|
|
59c520da16 | ||
|
|
90c3e21c0d | ||
|
|
3013998e1b | ||
|
|
2b8adb744d | ||
|
|
434cfcd1e9 | ||
|
|
b5c6b42081 | ||
|
|
408e34cd38 | ||
|
|
ca610af4d8 | ||
|
|
e80fdca7fc | ||
|
|
fd7aa4140a | ||
|
|
7a1eb6b9b6 | ||
|
|
1b0d1cc82c | ||
|
|
fe22464b75 | ||
|
|
ca5175f15b | ||
|
|
4e870e4393 | ||
|
|
b05ad98ed9 | ||
|
|
bc8055a12b | ||
|
|
c3913459d9 | ||
|
|
f571e59a95 | ||
|
|
0400b1a880 | ||
|
|
c7e4baa4aa | ||
|
|
cc4a35ebd9 | ||
|
|
c44434ef08 | ||
|
|
95247f66bb | ||
|
|
9ba9d07bb5 | ||
|
|
ef475adc26 | ||
|
|
3294eb35ab | ||
|
|
693f6a68db | ||
|
|
e73862a6e1 | ||
|
|
fd76a509ea | ||
|
|
c18baf26d8 | ||
|
|
314d1851db | ||
|
|
6165d5e61e | ||
|
|
7695102500 | ||
|
|
369d1d2f41 | ||
|
|
e32bab3f88 | ||
|
|
cd3790860a | ||
|
|
1de90a7d39 | ||
|
|
9535506b93 | ||
|
|
bae2f04955 | ||
|
|
dc0873cd2b | ||
|
|
d4621a6e9c | ||
|
|
9943d4de58 | ||
|
|
8cccb18eed | ||
|
|
a0cb699b1a | ||
|
|
633b63bccc | ||
|
|
ea02f36235 | ||
|
|
22b6aed7c1 | ||
|
|
f6e8524669 | ||
|
|
02b74813ac | ||
|
|
4c243bf1d3 | ||
|
|
b0150d4fec | ||
|
|
6bb4f36df5 | ||
|
|
5a46f65e59 | ||
|
|
e56519f599 | ||
|
|
8dd8ab37a5 | ||
|
|
06f5cf9951 | ||
|
|
bc67ae8f69 | ||
|
|
b8330dce07 | ||
|
|
2e1b598b4f | ||
|
|
d140b4249e | ||
|
|
d398494720 | ||
|
|
fbc30f1224 | ||
|
|
98c2b11935 | ||
|
|
491577ed62 | ||
|
|
68e5e0892c | ||
|
|
00579cd838 | ||
|
|
3f20275d3d | ||
|
|
c5a58f97ec | ||
|
|
42e760b5d1 | ||
|
|
cfb40ddecd | ||
|
|
f3673d6f89 | ||
|
|
c488900c8c | ||
|
|
b889416947 | ||
|
|
58cf210e96 | ||
|
|
1e16b7839b | ||
|
|
eecbe39547 | ||
|
|
1578b17eb8 | ||
|
|
3eda8ecd16 | ||
|
|
9b32918746 | ||
|
|
0aa4d0a50e | ||
|
|
3b31da783a | ||
|
|
fb071215dc | ||
|
|
d3e1fca2b3 | ||
|
|
8762f6ca1c | ||
|
|
58c141d217 | ||
|
|
5a7d3178dd | ||
|
|
4903af6cdc | ||
|
|
692014d694 | ||
|
|
47e0253652 | ||
|
|
9f03a9b075 | ||
|
|
b5281d0681 | ||
|
|
689ec8a925 | ||
|
|
a7e42e93cc | ||
|
|
bad09ffd7b | ||
|
|
4706442a1d | ||
|
|
935c7f9963 | ||
|
|
ef3bcaeee8 | ||
|
|
4ffb2c9664 | ||
|
|
dea6e2f1a7 | ||
|
|
d13f167d47 | ||
|
|
a518034e65 | ||
|
|
24e4037995 | ||
|
|
82a738fe40 | ||
|
|
e801c9cb60 | ||
|
|
f134a54c24 | ||
|
|
803781e08d | ||
|
|
0206cc6c44 | ||
|
|
fcf57289fc | ||
|
|
b754ffe42a | ||
|
|
f885cc9f7b | ||
|
|
0056f411b5 | ||
|
|
1810f02242 | ||
|
|
8e972128eb | ||
|
|
cdd2d9e722 | ||
|
|
d96d6b2c48 | ||
|
|
746dcc9723 |
7
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
7
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
- [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)`
|
||||
- [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades).
|
||||
- [ ] Docs for public-facing functions are written
|
||||
- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`.
|
||||
- [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information)
|
||||
|
||||
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
envs/
|
||||
*.pyc
|
||||
.DS_Store
|
||||
.idea/
|
||||
141
CONTRIBUTING.md
141
CONTRIBUTING.md
@@ -1,84 +1,93 @@
|
||||
# Contributing guide
|
||||
# Development process
|
||||
|
||||
## How to add new functions
|
||||
Please read the Working Process/Quickstart Guide in [README.md](https://github.com/CartoDB/crankshaft/blob/master/README.md) first.
|
||||
|
||||
Try to put as little logic in the SQL extension as possible and
|
||||
just use it as a wrapper to the Python module functionality.
|
||||
For any modification of crankshaft, such as adding new features,
|
||||
refactoring or bug-fixing, topic branch must be created out of the `develop`
|
||||
branch and be used for the development process.
|
||||
|
||||
Once a function is defined it should never change its signature in subsequent
|
||||
versions. To change a function's signature a new function with a different
|
||||
name must be created.
|
||||
Modifications are done inside `src/pg/sql` and `src/py/crankshaft`.
|
||||
|
||||
### Version numbers
|
||||
Take into account:
|
||||
|
||||
The version of both the SQL extension and the Python package shall
|
||||
follow the [Semantic Versioning 2.0](http://semver.org/) guidelines:
|
||||
* Tests must be added for any new functionality
|
||||
(inside `src/pg/test`, `src/py/crankshaft/test`) as well as to
|
||||
detect any bugs that are being fixed.
|
||||
* Add or modify the corresponding documentation files in the `doc` folder.
|
||||
Since we expect to have highly technical functions here, an extense
|
||||
background explanation would be of great help to users of this extension.
|
||||
* Convention: snake case(i.e. `snake_case` and not `CamelCase`)
|
||||
shall be used for all function names.
|
||||
Prefix function names intended for public use with `cdb_`
|
||||
and private functions (to be used only internally inside
|
||||
the extension) with `_cdb_`.
|
||||
|
||||
* When backwards incompatibility is introduced the major number is incremented
|
||||
* When functionally is added (in a backwards-compatible manner) the minor number
|
||||
is incremented
|
||||
* When only fixes are introduced (backwards-compatible) the patch number is
|
||||
incremented
|
||||
Once the code is ready to be tested, update the local development installation
|
||||
with `sudo make install`.
|
||||
This will update the 'dev' version of the extension in `src/pg/` and
|
||||
make it available to PostgreSQL.
|
||||
It will also install the python package (crankshaft) in a virtual
|
||||
environment `env/dev`.
|
||||
|
||||
### Python Package
|
||||
The version number of the Python package, defined in
|
||||
`src/pg/crankshaft/setup.py` will be overridden when
|
||||
the package is released and always match the extension version number,
|
||||
but for development it shall be kept as '0.0.0'.
|
||||
|
||||
...
|
||||
Run the tests with `make test`.
|
||||
|
||||
### SQL Extension
|
||||
|
||||
* Generate a **new subfolder version** for `sql` and `test` folders to define
|
||||
the new functions and tests
|
||||
- Use symlinks to avoid file duplication between versions that don't update them
|
||||
- Add new files or modify copies of the old files to add new functions or
|
||||
modify existing functions (remember to rename a function if the signature
|
||||
changes)
|
||||
- Add or modify the corresponding documentation files in the `doc` folder.
|
||||
Since we expect to have highly technical functions here, an extense
|
||||
background explanation would be of great help to users of this extension.
|
||||
- Create tests for the new functions/behaviour
|
||||
|
||||
* Generate the **upgrade and downgrade files** for the extension
|
||||
|
||||
* Update the control file and the Makefile to generate the complete SQL
|
||||
file for the new created version. After running `make` a new
|
||||
file `crankshaft--X.Y.Z.sql` will be created for the current version.
|
||||
Additional files for migrating to/from the previous version A.B.Z should be
|
||||
created:
|
||||
- `crankshaft--X.Y.Z--A.B.C.sql`
|
||||
- `crankshaft--A.B.C--X.Y.Z.sql`
|
||||
All these new files must be added to git and pushed.
|
||||
|
||||
* Update the public docs! ;-)
|
||||
|
||||
## Conventions
|
||||
|
||||
# SQL
|
||||
|
||||
Use snake case (i.e. `snake_case` and not `CamelCase`) for all
|
||||
functions. Prefix functions intended for public use with `cdb_`
|
||||
and private functions (to be used only internally inside
|
||||
the extension) with `_cdb_`.
|
||||
|
||||
# Python
|
||||
|
||||
...
|
||||
|
||||
## Testing
|
||||
|
||||
Running just the Python tests:
|
||||
To use the python extension for custom tests, activate the virtual
|
||||
environment with:
|
||||
|
||||
```
|
||||
(cd python && make test)
|
||||
source envs/dev/bin/activate
|
||||
```
|
||||
|
||||
Installing the Extension and running just the PostgreSQL tests:
|
||||
Update extension in a working database with:
|
||||
|
||||
* `ALTER EXTENSION crankshaft UPDATE TO 'current';`
|
||||
`ALTER EXTENSION crankshaft UPDATE TO 'dev';`
|
||||
|
||||
Note: we keep the current development version install as 'dev' always;
|
||||
we update through the 'current' alias to allow changing the extension
|
||||
contents but not the version identifier. This will fail if the
|
||||
changes involve incompatible function changes such as a different
|
||||
return type; in that case the offending function (or the whole extension)
|
||||
should be dropped manually before the update.
|
||||
|
||||
If the extension has not previously been installed in a database,
|
||||
it can be installed directly with:
|
||||
|
||||
* `CREATE EXTENSION IF NOT EXISTS plpythonu;`
|
||||
`CREATE EXTENSION IF NOT EXISTS postgis;`
|
||||
`CREATE EXTENSION crankshaft WITH VERSION 'dev';`
|
||||
|
||||
Note: the development extension uses the development python virtual
|
||||
environment automatically.
|
||||
|
||||
Before proceeding to the release process peer code reviewing of the code is
|
||||
a must.
|
||||
|
||||
Once the feature or bugfix is completed and all the tests are passing
|
||||
a Pull-Request shall be created on the topic branch, reviewed by a peer
|
||||
and then merged back into the `develop` branch when all CI tests pass.
|
||||
|
||||
When the changes in the `develop` branch are to be released in a new
|
||||
version of the extension, a PR must be created on the `develop` branch.
|
||||
|
||||
The release manage will take hold of the PR at this moment to proceed
|
||||
to the release process for a new revision of the extension.
|
||||
|
||||
## Relevant development tasks available in the Makefile
|
||||
|
||||
```
|
||||
(cd pg && sudo make install && PGUSER=postgres make installcheck)
|
||||
```
|
||||
* `make help` show a short description of the available targets
|
||||
|
||||
Installing and testing everything:
|
||||
* `sudo make install` will generate the extension scripts for the development
|
||||
version ('dev'/'current') and install the python package into the
|
||||
development virtual environment `envs/dev`.
|
||||
Intended for use by developers.
|
||||
|
||||
```
|
||||
sudo make install && PGUSER=postgres make testinstalled
|
||||
* `make test` will run the tests for the installed development extension.
|
||||
Intended for use by developers.
|
||||
```
|
||||
|
||||
43
DEPLOYING.md
43
DEPLOYING.md
@@ -1,43 +0,0 @@
|
||||
# Workflow
|
||||
|
||||
... (branching/merging flow)
|
||||
|
||||
# Deployment
|
||||
|
||||
...
|
||||
|
||||
Deployment to db servers: the next command will install both the Python
|
||||
package and the extension.
|
||||
|
||||
```
|
||||
sudo make install
|
||||
```
|
||||
|
||||
Installing only the Python package:
|
||||
|
||||
```
|
||||
sudo pip install python/crankshaft --upgrade
|
||||
```
|
||||
|
||||
Caveat: note that `pip install ./crankshaft` will install
|
||||
from local files, but `pip install crankshaft` will not.
|
||||
|
||||
CI: Install and run the tests on the installed extension and package:
|
||||
|
||||
```
|
||||
(sudo make install && PGUSER=postgres make testinstalled)
|
||||
```
|
||||
|
||||
Installing the extension in user databases:
|
||||
Once installed in a server, the extension can be added
|
||||
to a database with the next SQL command:
|
||||
|
||||
```
|
||||
CREATE EXTENSION crankshaft;
|
||||
```
|
||||
|
||||
To upgrade the extension to an specific version X.Y.Z:
|
||||
|
||||
```
|
||||
ALTER EXTENSION crankshaft UPGRADE TO 'X.Y.Z';
|
||||
```
|
||||
63
Makefile
63
Makefile
@@ -1,13 +1,64 @@
|
||||
EXT_DIR = pg
|
||||
PYP_DIR = python
|
||||
include ./Makefile.global
|
||||
|
||||
EXT_DIR = src/pg
|
||||
PYP_DIR = src/py
|
||||
|
||||
.PHONY: install
|
||||
.PHONY: run_tests
|
||||
.PHONY: release
|
||||
.PHONY: deploy
|
||||
|
||||
install:
|
||||
# Generate and install developmet versions of the extension
|
||||
# and python package.
|
||||
# The extension is named 'dev' with a 'current' alias for easily upgrading.
|
||||
# Requires sudo.
|
||||
install: ## Generate and install development version of the extension; requires sudo.
|
||||
$(MAKE) -C $(PYP_DIR) install
|
||||
$(MAKE) -C $(EXT_DIR) install
|
||||
|
||||
testinstalled:
|
||||
$(MAKE) -C $(PYP_DIR) testinstalled
|
||||
$(MAKE) -C $(EXT_DIR) installcheck
|
||||
# Run the tests for the installed development extension and
|
||||
# python package
|
||||
test: ## Run the tests for the development version of the extension
|
||||
$(MAKE) -C $(PYP_DIR) test
|
||||
$(MAKE) -C $(EXT_DIR) test
|
||||
|
||||
# Generate a new release into release
|
||||
release: ## Generate a new release of the extension. Only for telease manager
|
||||
$(MAKE) -C $(EXT_DIR) release
|
||||
$(MAKE) -C $(PYP_DIR) release
|
||||
|
||||
# Install the current release.
|
||||
# Requires sudo.
|
||||
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
||||
# sudo make deploy RELEASE_VERSION=1.0.0
|
||||
deploy: ## Deploy a released extension. Only for release manager. Requires sudo.
|
||||
$(MAKE) -C $(EXT_DIR) deploy
|
||||
$(MAKE) -C $(PYP_DIR) deploy
|
||||
|
||||
# Cleanup development extension script files
|
||||
clean-dev: ## clean up development extension script files
|
||||
rm -f src/pg/$(EXTENSION)--*.sql
|
||||
|
||||
# Cleanup all releases
|
||||
clean-releases: ## clean up all releases
|
||||
rm -rf release/python/*
|
||||
rm -f release/$(EXTENSION)--*.sql
|
||||
rm -f release/$(EXTENSION).control
|
||||
|
||||
# Cleanup current/specific version
|
||||
clean-release: ## clean up current release
|
||||
rm -rf release/python/$(RELEASE_VERSION)
|
||||
rm -f release/$(RELEASE_VERSION)--*.sql
|
||||
|
||||
clean-all: clean-dev clean-release
|
||||
|
||||
help:
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//'`); \
|
||||
for help_line in $${help_lines[@]}; do \
|
||||
IFS=$$'#' ; \
|
||||
help_split=($$help_line) ; \
|
||||
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
printf "%-30s %s\n" $$help_command $$help_info ; \
|
||||
done
|
||||
|
||||
6
Makefile.global
Normal file
6
Makefile.global
Normal file
@@ -0,0 +1,6 @@
|
||||
SELF_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
|
||||
EXTENSION = crankshaft
|
||||
PACKAGE = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
RELEASE_VERSION ?= $(EXTVERSION)
|
||||
SED = sed
|
||||
24
NEWS.md
Normal file
24
NEWS.md
Normal file
@@ -0,0 +1,24 @@
|
||||
0.1.0 (2016-06-29)
|
||||
------------------
|
||||
* Adds Spatial Markov function
|
||||
* Adds Spacial interpolation function
|
||||
* Adds `CDB_pyAgg (columns Numeric[])` helper function
|
||||
* Adds Segmentation Functions
|
||||
|
||||
0.0.4 (2016-06-20)
|
||||
------------------
|
||||
* Remove cartodb extension dependency from tests
|
||||
* Declare all correct dependencies with correct versions in setup.py
|
||||
|
||||
0.0.3 (2016-06-16)
|
||||
------------------
|
||||
* Adds new functions: kmeans, weighted centroids.
|
||||
* Replaces moran functions with new areas of interest naming.
|
||||
|
||||
0.0.2 (2016-03-16)
|
||||
------------------
|
||||
* New versioning approach using per-version Python virtual environments
|
||||
|
||||
0.0.1 (2016-02-22)
|
||||
------------------
|
||||
* Preliminar release
|
||||
64
README.md
64
README.md
@@ -4,9 +4,67 @@ CartoDB Spatial Analysis extension for PostgreSQL.
|
||||
|
||||
## Code organization
|
||||
|
||||
* *pg* contains the PostgreSQL extension source code
|
||||
* *python* Python module
|
||||
* *doc* documentation
|
||||
* *src* source code
|
||||
* - *src/pg* contains the PostgreSQL extension source code
|
||||
* - *src/py* Python module source code
|
||||
* *release* reseleased versions
|
||||
|
||||
## Requirements
|
||||
|
||||
* pip
|
||||
* pip, PostgreSQL
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/master/src/py/README.md))
|
||||
|
||||
# Working Process -- Quickstart Guide
|
||||
|
||||
We distinguish two roles regarding the development cycle of crankshaft:
|
||||
|
||||
* *developers* will implement new functionality and bugfixes into
|
||||
the codebase and will request for new releases of the extension.
|
||||
* A *release manager* will attend these requests and will handle
|
||||
the release process. The release process is sequential:
|
||||
no concurrent releases will ever be in the works.
|
||||
|
||||
We use the default `develop` branch as the basis for development.
|
||||
The `master` branch is used to merge and tag releases to be
|
||||
deployed in production.
|
||||
|
||||
Developers shall create a new topic branch from `develop` for any new feature
|
||||
or bugfix and commit their changes to it and eventually merge back into
|
||||
the `develop` branch. When a new release is required a Pull Request
|
||||
will be open against the `develop` branch.
|
||||
|
||||
The `develop` pull requests will be handled by the release manage,
|
||||
who will merge into master where new releases are prepared and tagged.
|
||||
The `master` branch is the sole responsibility of the release masters
|
||||
and developers must not commit or merge into it.
|
||||
|
||||
## Development Guidelines
|
||||
|
||||
For a detailed description of the development process please see
|
||||
the [CONTRIBUTING.md](https://github.com/CartoDB/crankshaft/blob/master/CONTRIBUTING.md) guide.
|
||||
|
||||
Any modification to the source code (`src/pg/sql` for the SQL extension,
|
||||
`src/py/crankshaft` for the Python package) shall always be done
|
||||
in a topic branch created from the `develop` branch.
|
||||
|
||||
Tests, documentation and peer code reviewing are required for all
|
||||
modifications.
|
||||
|
||||
The tests (both for SQL and Python) are executed by running,
|
||||
from the top directory:
|
||||
|
||||
```
|
||||
sudo make install
|
||||
make test
|
||||
```
|
||||
|
||||
To request a new release, which will be handled by them
|
||||
release manager, a Pull Request must be created in the `develop`
|
||||
branch.
|
||||
|
||||
## Release
|
||||
|
||||
The release and deployment process is described in the
|
||||
[RELEASE.md](https://github.com/CartoDB/crankshaft/blob/master/RELEASE.md) guide and it is the responsibility of the designated
|
||||
release manager.
|
||||
|
||||
93
RELEASE.md
Normal file
93
RELEASE.md
Normal file
@@ -0,0 +1,93 @@
|
||||
# Release & Deployment Process
|
||||
|
||||
Please read the Working Process/Quickstart Guide in README.md
|
||||
and the Development guidelines in CONTRIBUTING.md.
|
||||
|
||||
The release process of a new version of the extension
|
||||
shall be performed by the designated *Release Manager*.
|
||||
|
||||
Note that we expect to gradually automate more of this process.
|
||||
|
||||
Having checked PR to be released it shall be
|
||||
merged back into the `master` branch to prepare the new release.
|
||||
|
||||
The version number in `pg/cranckshaft.control` must first be updated.
|
||||
To do so [Semantic Versioning 2.0](http://semver.org/) is in order.
|
||||
|
||||
Thew `NEWS.md` will be updated.
|
||||
|
||||
We now will explain the process for the case of backwards-compatible
|
||||
releases (updating the minor or patch version numbers).
|
||||
|
||||
TODO: document the complex case of major releases.
|
||||
|
||||
The next command must be executed to produce the main installation
|
||||
script for the new release, `release/cranckshaft--X.Y.Z.sql` and
|
||||
also to copy the python package to `release/python/X.Y.Z/crankshaft`.
|
||||
|
||||
```
|
||||
make release
|
||||
```
|
||||
|
||||
Then, the release manager shall produce upgrade and downgrade scripts
|
||||
to migrate to/from the previous release. In the case of minor/patch
|
||||
releases this simply consist in extracting the functions that have changed
|
||||
and placing them in the proper `release/cranckshaft--X.Y.Z--A.B.C.sql`
|
||||
file.
|
||||
|
||||
The new release can be deployed for staging/smoke tests with this command:
|
||||
|
||||
```
|
||||
sudo make deploy
|
||||
```
|
||||
|
||||
This will copy the current 'X.Y.Z' released version of the extension to
|
||||
PostgreSQL. The corresponding Python extension will be installed in a
|
||||
virtual environment in `envs/X.Y.Z`.
|
||||
|
||||
It can be activated with:
|
||||
|
||||
```
|
||||
source envs/X.Y.Z/bin/activate
|
||||
```
|
||||
|
||||
But note that this is needed only for using the package directly;
|
||||
the 'X.Y.Z' version of the extension will automatically use the
|
||||
python package from this virtual environment.
|
||||
|
||||
The `sudo make deploy` operation can be also used for installing
|
||||
the new version after it has been released.
|
||||
|
||||
To install a specific version 'X.Y.Z' different from the current one
|
||||
(which must be present in `releases/`) you can:
|
||||
|
||||
```
|
||||
sudo make deploy RELEASE_VERSION=X.Y.Z
|
||||
```
|
||||
|
||||
TODO: testing procedure for the new release.
|
||||
|
||||
TODO: procedure for staging deployment.
|
||||
|
||||
TODO: procedure for merging to master, tagging and deploying
|
||||
in production.
|
||||
|
||||
## Relevant release & deployment tasks available in the Makefile
|
||||
|
||||
```
|
||||
* `make help` show a short description of the available targets
|
||||
|
||||
* `make release` will generate a new release (version number defined in
|
||||
`src/pg/crankshaft.control`) into `release/`.
|
||||
Intended for use by the release manager.
|
||||
|
||||
* `sudo make deploy` will install the current release X.Y.Z from the
|
||||
`release/` files into PostgreSQL and a Python virtual environment
|
||||
`envs/X.Y.Z`.
|
||||
Intended for use by the release manager and deployment jobs.
|
||||
|
||||
* `sudo make deploy RELEASE_VERSION=X.Y.Z` will install specified version
|
||||
previously generated in `release/`
|
||||
into PostgreSQL and a Python virtual environment `envs/X.Y.Z`.
|
||||
Intended for use by the release manager and deployment jobs.
|
||||
```
|
||||
9
TODO.md
9
TODO.md
@@ -1,9 +0,0 @@
|
||||
* [x] Support versioning
|
||||
* [x] Test use of `plpy` from python Package
|
||||
* [x] Add `pysal` etc. dependencies
|
||||
* [x] Define documentation practices (general, per extension/package?)
|
||||
* [x] Add initial function set (WIP)
|
||||
* Unify style of function comments
|
||||
* [x] Add integration tests
|
||||
* Make target to open a new version development (create symlinks, etc.)
|
||||
* [x] Should add cartodb ext. as a dependency?
|
||||
185
doc/02_moran.md
Normal file
185
doc/02_moran.md
Normal file
@@ -0,0 +1,185 @@
|
||||
## Areas of Interest Functions
|
||||
|
||||
### CDB_AreasOfInterestLocal(subquery text, column_name text)
|
||||
|
||||
This function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. The classification happens through an autocorrelation statistic called Local Moran's I.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` |
|
||||
| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. |
|
||||
| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. |
|
||||
| rowid | INT | Row id of the values which correspond to the input rows. |
|
||||
| vals | NUMERIC | Values from `'column_name'`. |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c.the_geom,
|
||||
aoi.quads,
|
||||
aoi.significance,
|
||||
c.num_cyclists_per_total_population
|
||||
FROM CDB_GetAreasOfInterestLocal('SELECT * FROM commute_data'
|
||||
'num_cyclists_per_total_population') As aoi
|
||||
JOIN commute_data As c
|
||||
ON c.cartodb_id = aoi.rowid;
|
||||
```
|
||||
|
||||
### CDB_AreasOfInterestGlobal(subquery text, column_name text)
|
||||
|
||||
This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. |
|
||||
| significance | NUMERIC | The statistical significance of the `moran` measure. |
|
||||
|
||||
#### Examples
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM CDB_AreasOfInterestGlobal('SELECT * FROM commute_data', 'num_cyclists_per_total_population')
|
||||
```
|
||||
|
||||
### CDB_AreasOfInterestLocalRate(subquery text, numerator_column text, denominator_column text)
|
||||
|
||||
Just like `CDB_AreasOfInterestLocal`, this function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. This function differs in that it calculates the classifications based on input `numerator` and `denominator` columns for finding the areas where there are clusters and outliers for the resulting rate of those two values.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. |
|
||||
| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` |
|
||||
| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. |
|
||||
| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. |
|
||||
| rowid | INT | Row id of the values which correspond to the input rows. |
|
||||
| vals | NUMERIC | Values from `'column_name'`. |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c.the_geom,
|
||||
aoi.quads,
|
||||
aoi.significance,
|
||||
c.cyclists_per_total_population
|
||||
FROM CDB_GetAreasOfInterestLocalRate('SELECT * FROM commute_data'
|
||||
'num_cyclists',
|
||||
'total_population') As aoi
|
||||
JOIN commute_data As c
|
||||
ON c.cartodb_id = aoi.rowid;
|
||||
```
|
||||
|
||||
### CDB_AreasOfInterestGlobalRate(subquery text, column_name text)
|
||||
|
||||
This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. |
|
||||
| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. |
|
||||
| significance | NUMERIC | The statistical significance of the `moran` measure. |
|
||||
|
||||
#### Examples
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM CDB_AreasOfInterestGlobalRate('SELECT * FROM commute_data',
|
||||
'num_cyclists',
|
||||
'total_population')
|
||||
```
|
||||
|
||||
## Hotspot, Coldspot, and Outlier Functions
|
||||
|
||||
These functions are convenience functions for extracting only information that you are interested in exposing based on the outputs of the `CDB_AreasOfInterest` functions. For instance, you can use `CDB_GetSpatialHotspots` to output only the classifications of `HH` and `HL`.
|
||||
|
||||
### Non-rate functions
|
||||
|
||||
#### CDB_GetSpatialHotspots
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
#### CDB_GetSpatialColdspots
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
#### CDB_GetSpatialOutliers
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
### Rate functions
|
||||
|
||||
#### CDB_GetSpatialHotspotsRate
|
||||
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
|
||||
#### CDB_GetSpatialColdspotsRate
|
||||
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
|
||||
#### CDB_GetSpatialOutliersRate
|
||||
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
47
doc/04_markov.md
Normal file
47
doc/04_markov.md
Normal file
@@ -0,0 +1,47 @@
|
||||
## Spatial Markov
|
||||
|
||||
### CDB_SpatialMarkovTrend(subquery text, column_names text array)
|
||||
|
||||
This function takes time series data associated with geometries and outputs likelihoods that the next value of a geometry will move up, down, or stay static as compared to the most recent measurement. For more information, read about [Spatial Dynamics in PySAL](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/dynamics.html).
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM real_estate_history`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_names | TEXT Array | Names of column that form the history of measurements for the geometries (e.g., `Array['y2011', 'y2012', 'y2013', 'y2014', 'y2015', 'y2016']`). |
|
||||
| num_classes (optional) | INT | Number of quantile classes to separate data into. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| trend | NUMERIC | The probability that the measure at this location will move up (a positive number) or down (a negative number) |
|
||||
| trend_up | NUMERIC | The probability that a measure will move up in subsequent steps of time |
|
||||
| trend_down | NUMERIC | The probability that a measure will move down in subsequent steps of time |
|
||||
| volatility | NUMERIC | A measure of the variance of the probabilities returned from the Spatial Markov predictions |
|
||||
| rowid | NUMERIC | id of the row that corresponds to the `id_col` (by default `cartodb_id` of the input rows) |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c.cartodb_id,
|
||||
c.the_geom,
|
||||
m.trend,
|
||||
m.trend_up,
|
||||
m.trend_down,
|
||||
m.volatility
|
||||
FROM CDB_SpatialMarkovTrend('SELECT * FROM nyc_real_estate'
|
||||
Array['m03y2009','m03y2010','m03y2011','m03y2012','m03y2013','m03y2014','m03y2015','m03y2016']) As m
|
||||
JOIN nyc_real_estate As c
|
||||
ON c.cartodb_id = m.rowid;
|
||||
```
|
||||
23
doc/04_pyAgg.md
Normal file
23
doc/04_pyAgg.md
Normal file
@@ -0,0 +1,23 @@
|
||||
## PyAgg Helper Function
|
||||
|
||||
### CDB_pyAgg (columns Numeric[])
|
||||
|
||||
Currently it's not possible to pass a multidiemensional array between plpsql and plpythonu. This function aims to
|
||||
help fix that by aggergating the columns provided in the argument across rows in to a rows * columns + 1 length 1D array. The first element of the array is the array\_length of the columns argument so that python can reconstruct
|
||||
the 2D array.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| columns | NUMERIC[] | The columns to aggregate across rows|
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| result | NUMERIC[] | An columns * rows + 1 array where the first entry is the no of columns|
|
||||
|
||||
|
||||
51
doc/08_interpolation.md
Normal file
51
doc/08_interpolation.md
Normal file
@@ -0,0 +1,51 @@
|
||||
## Spacial interpolation
|
||||
|
||||
Function to interpolate a numeric attribute of a point in a scatter dataset of points, using one of three methos:
|
||||
|
||||
* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
|
||||
* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system)
|
||||
* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting)
|
||||
|
||||
### CDB_SpatialInterpolation (query text, point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| query | text | query that returns at least `the_geom` and a numeric value as `attrib` |
|
||||
| point | geometry | The target point to calc the value |
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| p1 | integer | IDW: limit the number of neighbors, 0->no limit|
|
||||
| p2 | integer | IDW: order of distance decay, 0-> order 1|
|
||||
|
||||
### CDB_SpatialInterpolation (geom geometry[], values numeric[], point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geom | geometry[] | Array of points's geometries |
|
||||
| values | numeric[] | Array of points' values for the param under study|
|
||||
| point | geometry | The target point to calc the value |
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| p1 | integer | IDW: limit the number of neighbors, 0->no limit|
|
||||
| p2 | integer | IDW: order of distance decay, 0-> order 1|
|
||||
|
||||
### Returns
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| value | numeric | Interpolated value at the given point, `-888.888` if the given point is out of the boundaries of the source points set |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with a as (
|
||||
select
|
||||
array_agg(the_geom) as geomin,
|
||||
array_agg(temp::numeric) as colin
|
||||
from table_4804232032
|
||||
)
|
||||
SELECT CDB_SpatialInterpolation(geomin, colin, CDB_latlng(41.38, 2.15),1) FROM a;
|
||||
```
|
||||
62
doc/11_kmeans.md
Normal file
62
doc/11_kmeans.md
Normal file
@@ -0,0 +1,62 @@
|
||||
## K-Means Functions
|
||||
|
||||
### CDB_KMeans(subquery text, no_clusters INTEGER)
|
||||
|
||||
This function attempts to find n clusters within the input data. It will return a table to CartoDB ids and
|
||||
the number of the cluster each point in the input was assigend to.
|
||||
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| no\_clusters | INTEGER | The number of clusters to try and find |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the input table.|
|
||||
| cluster\_no | INTEGER | The cluster that this point belongs to. |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
customers.*,
|
||||
km.cluster_no
|
||||
FROM cdb_crankshaft.CDB_Kmeans('SELECT * from customers' , 6) km, customers_3
|
||||
WHERE customers.cartodb_id = km.cartodb_id
|
||||
```
|
||||
|
||||
### CDB_WeightedMean(subquery text, weight_column text, category_column text)
|
||||
|
||||
Function that computes the weighted centroid of a number of clusters by some weight column.
|
||||
|
||||
### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column and the columns specified as the weight and category columns|
|
||||
| weight\_column | TEXT | The name of the column to use as a weight |
|
||||
| category\_column | TEXT | The name of the column to use as a category |
|
||||
|
||||
### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| the\_geom | GEOMETRY | A point for the weighted cluster center |
|
||||
| class | INTEGER | The cluster class |
|
||||
|
||||
### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT ST_TRANSFORM(the_geom, 3857) as the_geom_webmercator, class
|
||||
FROM cdb_weighted_mean('SELECT *, customer_value FROM customers','customer_value','cluster_no')
|
||||
```
|
||||
83
doc/12_segmentation.md
Normal file
83
doc/12_segmentation.md
Normal file
@@ -0,0 +1,83 @@
|
||||
|
||||
## Segmentation Functions
|
||||
|
||||
### CDB_CreateAndPredictSegment(query TEXT, variable_name TEXT, target_query TEXT)
|
||||
|
||||
This function trains a [Gradient Boosting](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) model to attempt to predict the target data and then generates predictions for new data.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| query | TEXT | The input query to train the algorithm, which should have both the variable of interest and the features that will be used to predict it |
|
||||
| variable\_name| TEXT | Specify the variable in the query to predict, all other columns are assumed to be features |
|
||||
| target\_table | TEXT | The query which returns the `cartodb_id` and features for the rows your would like to predict the target variable for |
|
||||
| n\_estimators (optional) | INTEGER DEFAULT 1200 | Number of estimators to be used. Values should be between 1 and x. |
|
||||
| max\_depth (optional) | INTEGER DEFAULT 3 | Max tree depth. Values should be between 1 and n. |
|
||||
| subsample (optional) | DOUBLE PRECISION DEFAULT 0.5 | Subsample parameter for GradientBooster. Values should be within the range 0 to 1. |
|
||||
| learning\_rate (optional) | DOUBLE PRECISION DEFAULT 0.01 | Learning rate for the GradientBooster. Values should be between 0 and 1 (??) |
|
||||
| min\_samples\_leaf (optional) | INTEGER DEFAULT 1 | Minimum samples to use per leaf. Values should range from x to y |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the target\_query |
|
||||
| prediction | NUMERIC | The predicted value of the variable of interest |
|
||||
| accuracy | NUMERIC | The mean squared accuracy of the model. |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT * from cdb_crankshaft.CDB_CreateAndPredictSegment(
|
||||
'SELECT agg, median_rent::numeric, male_pop::numeric, female_pop::numeric FROM late_night_agg',
|
||||
'agg',
|
||||
'SELECT row_number() OVER () As cartodb_id, median_rent, male_pop, female_pop FROM ml_learning_ny');
|
||||
```
|
||||
|
||||
### CDB_CreateAndPredictSegment(target numeric[], train_features numeric[], prediction_features numeric[], prediction_ids numeric[])
|
||||
|
||||
This function trains a [Gradient Boosting](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) model to attempt to predict the target data and then generates predictions for new data.
|
||||
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| target | numeric[] | An array of target values of the variable you want to predict|
|
||||
| train\_features| numeric[] | 1D array of length n features \* n\_rows + 1 with the first entry in the array being the number of features in each row. These are the features the model will be trained on. CDB\_Crankshaft.CDB_pyAgg(Array[feature1, feature2, feature3]::numeric[]) can be used to construct this. |
|
||||
| prediction\_features | numeric[] | 1D array of length nfeatures\* n\_rows\_ + 1 with the first entry in the array being the number of features in each row. These are the features that will be used to predict the target variable CDB\_Crankshaft.CDB\_pyAgg(Array[feature1, feature2, feature3]::numeric[]) can be used to construct this. |
|
||||
| prediction\_ids | numeric[] | 1D array of length n\_rows with the ids that can use used to re-join the data with inputs |
|
||||
| n\_estimators (optional) | INTEGER DEFAULT 1200 | Number of estimators to be used |
|
||||
| max\_depth (optional) | INTEGER DEFAULT 3 | Max tree depth |
|
||||
| subsample (optional) | DOUBLE PRECISION DEFAULT 0.5 | Subsample parameter for GradientBooster|
|
||||
| learning\_rate (optional) | DOUBLE PRECISION DEFAULT 0.01 | Learning rate for the GradientBooster |
|
||||
| min\_samples\_leaf (optional) | INTEGER DEFAULT 1 | Minimum samples to use per leaf |
|
||||
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the target\_query |
|
||||
| prediction | NUMERIC | The predicted value of the variable of interest |
|
||||
| accuracy | NUMERIC | The mean squared accuracy of the model. |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
WITH training As (
|
||||
SELECT array_agg(agg) As target,
|
||||
cdb_crankshaft.CDB_PyAgg(Array[median_rent, male_pop, female_pop]::Numeric[]) As features
|
||||
FROM late_night_agg),
|
||||
target AS (
|
||||
SELECT cdb_crankshaft.CDB_PyAgg(Array[median_rent, male_pop, female_pop]::Numeric[]) As features,
|
||||
array_agg(cartodb_id) As cartodb_ids FROM late_night_agg)
|
||||
|
||||
SELECT cdb_crankshaft.CDB_CreateAndPredictSegment(training.target, training.features, target.features, target.cartodb_ids)
|
||||
FROM training, target;
|
||||
```
|
||||
24
doc/docs_template.md
Normal file
24
doc/docs_template.md
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
## Name
|
||||
|
||||
## Synopsis
|
||||
|
||||
## Description
|
||||
|
||||
Availability: v...
|
||||
|
||||
## Examples
|
||||
|
||||
```SQL
|
||||
-- example of the function in use
|
||||
SELECT cdb_awesome_function(the_geom, 'total_pop')
|
||||
FROM table_name
|
||||
```
|
||||
|
||||
## API Usage
|
||||
|
||||
_asdf_
|
||||
|
||||
## See Also
|
||||
|
||||
_Other function pages_
|
||||
3
pg/.gitignore
vendored
3
pg/.gitignore
vendored
@@ -1,3 +0,0 @@
|
||||
regression.diffs
|
||||
regression.out
|
||||
results/
|
||||
33
pg/Makefile
33
pg/Makefile
@@ -1,33 +0,0 @@
|
||||
# Makefile to generate the extension out of separate sql source files.
|
||||
# Once a version is released, it is not meant to be changed. E.g: once version 0.0.1 is out, it SHALL NOT be changed.
|
||||
|
||||
EXTENSION = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
|
||||
# The new version to be generated from templates
|
||||
NEW_EXTENSION_ARTIFACT = $(EXTENSION)--$(EXTVERSION).sql
|
||||
|
||||
# DATA is a special variable used by postgres build infrastructure
|
||||
# These are the files to be installed in the server shared dir,
|
||||
# for installation from scratch, upgrades and downgrades.
|
||||
# @see http://www.postgresql.org/docs/current/static/extend-pgxs.html
|
||||
DATA = $(NEW_EXTENSION_ARTIFACT)
|
||||
|
||||
SOURCES_DATA_DIR = sql/$(EXTVERSION)
|
||||
SOURCES_DATA = $(wildcard sql/$(EXTVERSION)/*.sql)
|
||||
|
||||
# The extension installation artifacts are stored in the base subdirectory
|
||||
$(NEW_EXTENSION_ARTIFACT): $(SOURCES_DATA)
|
||||
rm -f $@
|
||||
cat $(SOURCES_DATA_DIR)/*.sql >> $@
|
||||
|
||||
REGRESS = $(notdir $(basename $(wildcard test/$(EXTVERSION)/sql/*test.sql)))
|
||||
TEST_DIR = test/$(EXTVERSION)
|
||||
REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
|
||||
# This seems to be needed at least for PG 9.3.11
|
||||
all: $(DATA)
|
||||
@@ -1,7 +0,0 @@
|
||||
|
||||
# Running the tests:
|
||||
|
||||
```
|
||||
sudo make install
|
||||
PGUSER=postgres make installcheck
|
||||
```
|
||||
@@ -1,71 +0,0 @@
|
||||
### Moran's I
|
||||
|
||||
#### What is Moran's I and why is it significant for CartoDB?
|
||||
|
||||
Moran's I is a geostatistical calculation which gives a measure of the global
|
||||
clustering and presence of outliers within the geographies in a map. Here global
|
||||
means over all of the geographies in a dataset. Imagine mapping the incidence
|
||||
rates of cancer in neighborhoods of a city. If there were areas covering several
|
||||
neighborhoods with abnormally low rates of cancer, those areas are positively
|
||||
spatially correlated with one another and would be considered a cluster. If
|
||||
there was a single neighborhood with a high rate but with all neighbors on
|
||||
average having a low rate, it would be considered a spatial outlier.
|
||||
|
||||
While Moran's I gives a global snapshot, there are local indicators for
|
||||
clustering called Local Indicators of Spatial Autocorrelation. Clustering is a
|
||||
process related to autocorrelation -- i.e., a process that compares a
|
||||
geography's attribute to the attribute in neighbor geographies.
|
||||
|
||||
For the example of cancer rates in neighborhoods, since these neighborhoods have
|
||||
a high value for rate of cancer, and all of their neighbors do as well, they are
|
||||
designated as "High High" or simply **HH**. For areas with multiple neighborhoods
|
||||
with low rates of cancer, they are designated as "Low Low" or **LL**. HH and LL
|
||||
naturally fit into the concept of clustering and are in the correlated
|
||||
variables.
|
||||
|
||||
"Anticorrelated" geogs are in **LH** and **HL** regions -- that is, regions
|
||||
where a geog has a high value and it's neighbors, on average, have a low value
|
||||
(or vice versa). An example of this is a "gated community" or placement of a
|
||||
city housing project in a rich region. These deliberate developments have
|
||||
opposite median income as compared to the neighbors around them. They have a
|
||||
high (or low) value while their neighbors have a low (or high) value. They exist
|
||||
typically as islands, and in rare circumstances can extend as chains dividing
|
||||
**LL** or **HH**.
|
||||
|
||||
Strong policies such as rent stabilization (probably) tend to prevent the
|
||||
clustering of high rent areas as they integrate middle class incomes. Luxury
|
||||
apartment buildings, which are a kind of gated community, probably tend to skew
|
||||
an area's median income upwards while housing projects have the opposite effect.
|
||||
What are the nuggets in the analysis?
|
||||
|
||||
Two functions are available to compute Moran I statistics:
|
||||
|
||||
* `cdb_moran_local` computes Moran I measures, quad classification and
|
||||
significance values from numerial values associated to geometry entities
|
||||
in an input table. The geometries should be contiguous polygons When
|
||||
then `queen` `w_type` is used.
|
||||
* `cdb_moran_local_rate` computes the same statistics using a ratio between
|
||||
numerator and denominator columns of a table.
|
||||
|
||||
The parameters for `cdb_moran_local` are:
|
||||
|
||||
* `table` name of the table that contains the data values
|
||||
* `attr` name of the column
|
||||
* `signficance` significance threshold for the quads values
|
||||
* `num_ngbrs` number of neighbors to consider (default: 5)
|
||||
* `permutations` number of random permutations for calculation of
|
||||
pseudo-p values (default: 99)
|
||||
* `geom_column` number of the geometry column (default: "the_geom")
|
||||
* `id_col` PK column of the table (default: "cartodb_id")
|
||||
* `w_type` Weight types: can be "knn" for k-nearest neighbor weights
|
||||
or "queen" for contiguity based weights.
|
||||
|
||||
The function returns a table with the following columns:
|
||||
|
||||
* `moran` Moran's value
|
||||
* `quads` quad classification ('HH', 'LL', 'HL', 'LH' or 'Not significant')
|
||||
* `significance` significance value
|
||||
* `ids` id of the corresponding record in the input table
|
||||
|
||||
Function `cdb_moran_local_rate` only differs in that the `attr` input
|
||||
parameter is substituted by `numerator` and `denominator`.
|
||||
@@ -1,260 +0,0 @@
|
||||
\i test/fixtures/ppoints.sql
|
||||
-- test table (spanish province centroids with some invented values)
|
||||
CREATE TABLE ppoints (cartodb_id integer, the_geom geometry, the_geom_webmercator geometry, code text, region_code text, value float);
|
||||
INSERT INTO ppoints VALUES
|
||||
( 1,'0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry,ST_Transform('0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry, 3857),'01','16',0.5),
|
||||
( 4,'0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry,ST_Transform('0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry, 3857),'04','01',0.1),
|
||||
( 5,'0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry,ST_Transform('0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry, 3857),'05','07',0.3),
|
||||
( 2,'0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry,ST_Transform('0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry, 3857),'02','08',0.7),
|
||||
( 3,'0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry,ST_Transform('0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry, 3857),'03','10',0.2),
|
||||
( 6,'0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry,ST_Transform('0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry, 3857),'06','11',0.05),
|
||||
( 7,'0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry,ST_Transform('0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry, 3857),'07','04',0.4),
|
||||
( 8,'0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry,ST_Transform('0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry, 3857),'08','09',0.7),
|
||||
( 9,'0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry,ST_Transform('0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry, 3857),'09','07',0.5),
|
||||
(13,'0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry,ST_Transform('0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry, 3857),'13','08',0.4),
|
||||
(16,'0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry,ST_Transform('0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry, 3857),'16','08',0.4),
|
||||
(17,'0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry,ST_Transform('0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry, 3857),'17','09',0.6),
|
||||
(18,'0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry,ST_Transform('0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry, 3857),'18','01',0.3),
|
||||
(19,'0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry,ST_Transform('0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry, 3857),'19','08',0.7),
|
||||
(21,'0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry,ST_Transform('0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry, 3857),'21','01',0.1),
|
||||
(22,'0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry,ST_Transform('0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry, 3857),'22','02',0.4),
|
||||
(25,'0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry,ST_Transform('0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry, 3857),'25','09',0.4),
|
||||
(26,'0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry,ST_Transform('0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry, 3857),'26','17',0.6),
|
||||
(27,'0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry,ST_Transform('0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry, 3857),'27','12',0.3),
|
||||
(28,'0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry,ST_Transform('0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry, 3857),'28','13',0.8),
|
||||
(30,'0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry,ST_Transform('0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry, 3857),'30','14',0.1),
|
||||
(31,'0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry,ST_Transform('0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry, 3857),'31','15',0.9),
|
||||
(32,'0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry,ST_Transform('0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry, 3857),'32','12',0.3),
|
||||
(33,'0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry,ST_Transform('0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry, 3857),'33','03',0.4),
|
||||
(34,'0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry,ST_Transform('0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry, 3857),'34','07',0.3),
|
||||
(35,'0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry,ST_Transform('0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry, 3857),'35','05',0.3),
|
||||
(36,'0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry,ST_Transform('0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry, 3857),'36','12',0.2),
|
||||
(37,'0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry,ST_Transform('0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry, 3857),'37','07',0.5),
|
||||
(38,'0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry,ST_Transform('0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry, 3857),'38','05',0.4),
|
||||
(39,'0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry,ST_Transform('0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry, 3857),'39','06',0.6),
|
||||
(40,'0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry,ST_Transform('0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry, 3857),'40','07',0.5),
|
||||
(41,'0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry,ST_Transform('0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry, 3857),'41','01',0.4),
|
||||
(42,'0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry,ST_Transform('0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry, 3857),'42','07',0.2),
|
||||
(43,'0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry,ST_Transform('0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry, 3857),'43','09',0.3),
|
||||
(44,'0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry,ST_Transform('0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry, 3857),'44','02',0.2),
|
||||
(45,'0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry,ST_Transform('0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry, 3857),'45','08',0.3),
|
||||
(46,'0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry,ST_Transform('0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry, 3857),'46','10',0.2),
|
||||
(47,'0101000020E61000003AE63525866313C02100050B2BD14440'::geometry,ST_Transform('0101000020E61000003AE63525866313C02100050B2BD14440'::geometry, 3857),'47','07',0.3),
|
||||
(48,'0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry,ST_Transform('0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry, 3857),'48','16',0.5),
|
||||
(49,'0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry,ST_Transform('0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry, 3857),'49','07',0.2),
|
||||
(50,'0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry,ST_Transform('0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry, 3857),'50','02',0.6),
|
||||
(51,'0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry,ST_Transform('0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry, 3857),'51','18',0.01),
|
||||
(10,'0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry,ST_Transform('0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry, 3857),'10','11',0.04),
|
||||
(11,'0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry,ST_Transform('0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry, 3857),'11','01',0.08),
|
||||
(12,'0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry,ST_Transform('0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry, 3857),'12','10',0.2),
|
||||
(14,'0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry,ST_Transform('0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry, 3857),'14','01',0.2),
|
||||
(15,'0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry,ST_Transform('0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry, 3857),'15','12',0.3),
|
||||
(20,'0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry,ST_Transform('0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry, 3857),'20','16',0.8),
|
||||
(23,'0101000020E610000025FA13E595880BC022BB07131D024340'::geometry,ST_Transform('0101000020E610000025FA13E595880BC022BB07131D024340'::geometry, 3857),'23','01',0.1),
|
||||
(24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,ST_Transform('0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry, 3857),'24','07',0.3),
|
||||
(29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,ST_Transform('0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry, 3857),'29','01',0.3),
|
||||
(52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,ST_Transform('0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry, 3857),'52','19',0.01)
|
||||
\i test/fixtures/ppoints2.sql
|
||||
-- test table (spanish province centroids with some invented values)
|
||||
CREATE TABLE ppoints2 (cartodb_id integer, the_geom geometry, code text, region_code text, numerator float, denominator float);
|
||||
INSERT INTO ppoints2 VALUES
|
||||
( 1,'0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry,'01','16',0.5, 1.0),
|
||||
( 4,'0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry,'04','01',0.1, 1.0),
|
||||
( 5,'0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry,'05','07',0.3, 1.0),
|
||||
( 2,'0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry,'02','08',0.7, 1.0),
|
||||
( 3,'0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry,'03','10',0.2, 1.0),
|
||||
( 6,'0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry,'06','11',0.05, 1.0),
|
||||
( 7,'0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry,'07','04',0.4, 1.0),
|
||||
( 8,'0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry,'08','09',0.7, 1.0),
|
||||
( 9,'0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry,'09','07',0.5, 1.0),
|
||||
(13,'0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry,'13','08',0.4, 1.0),
|
||||
(16,'0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry,'16','08',0.4, 1.0),
|
||||
(17,'0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry,'17','09',0.6, 1.0),
|
||||
(18,'0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry,'18','01',0.3, 1.0),
|
||||
(19,'0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry,'19','08',0.7, 1.0),
|
||||
(21,'0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry,'21','01',0.1, 1.0),
|
||||
(22,'0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry,'22','02',0.4, 1.0),
|
||||
(25,'0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry,'25','09',0.4, 1.0),
|
||||
(26,'0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry,'26','17',0.6, 1.0),
|
||||
(27,'0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry,'27','12',0.3, 1.0),
|
||||
(28,'0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry,'28','13',0.8, 1.0),
|
||||
(30,'0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry,'30','14',0.1, 1.0),
|
||||
(31,'0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry,'31','15',0.9, 1.0),
|
||||
(32,'0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry,'32','12',0.3, 1.0),
|
||||
(33,'0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry,'33','03',0.4, 1.0),
|
||||
(34,'0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry,'34','07',0.3, 1.0),
|
||||
(35,'0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry,'35','05',0.3, 1.0),
|
||||
(36,'0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry,'36','12',0.2, 1.0),
|
||||
(37,'0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry,'37','07',0.5, 1.0),
|
||||
(38,'0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry,'38','05',0.4, 1.0),
|
||||
(39,'0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry,'39','06',0.6, 1.0),
|
||||
(40,'0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry,'40','07',0.5, 1.0),
|
||||
(41,'0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry,'41','01',0.4, 1.0),
|
||||
(42,'0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry,'42','07',0.2, 1.0),
|
||||
(43,'0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry,'43','09',0.3, 1.0),
|
||||
(44,'0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry,'44','02',0.2, 1.0),
|
||||
(45,'0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry,'45','08',0.3, 1.0),
|
||||
(46,'0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry,'46','10',0.2, 1.0),
|
||||
(47,'0101000020E61000003AE63525866313C02100050B2BD14440'::geometry,'47','07',0.3, 1.0),
|
||||
(48,'0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry,'48','16',0.5, 1.0),
|
||||
(49,'0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry,'49','07',0.2, 1.0),
|
||||
(50,'0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry,'50','02',0.6, 1.0),
|
||||
(51,'0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry,'51','18',0.01, 1.0),
|
||||
(10,'0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry,'10','11',0.04, 1.0),
|
||||
(11,'0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry,'11','01',0.08, 1.0),
|
||||
(12,'0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry,'12','10',0.2, 1.0),
|
||||
(14,'0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry,'14','01',0.2, 1.0),
|
||||
(15,'0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry,'15','12',0.3, 1.0),
|
||||
(20,'0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry,'20','16',0.8, 1.0),
|
||||
(23,'0101000020E610000025FA13E595880BC022BB07131D024340'::geometry,'23','01',0.1, 1.0),
|
||||
(24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,'24','07',0.3, 1.0),
|
||||
(29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,'29','01',0.3, 1.0),
|
||||
(52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,'52','19',0.0, 1.01)
|
||||
-- Moral functions perform some nondeterministic computations
|
||||
-- (to estimate the significance); we will set the seeds for the RNGs
|
||||
-- that affect those results to have repeateble results
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
_cdb_random_seeds
|
||||
-------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT ppoints.code, m.quads
|
||||
FROM ppoints
|
||||
JOIN cdb_crankshaft.cdb_moran_local('ppoints', 'value') m
|
||||
ON ppoints.cartodb_id = m.ids
|
||||
ORDER BY ppoints.code;
|
||||
NOTICE: ** Constructing query
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
NOTICE: ** Query returned with 52 rows
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
NOTICE: ** Finished calculations
|
||||
CONTEXT: PL/Python function "cdb_moran_local"
|
||||
code | quads
|
||||
------+-----------------
|
||||
01 | HH
|
||||
02 | HL
|
||||
03 | Not significant
|
||||
04 | Not significant
|
||||
05 | Not significant
|
||||
06 | Not significant
|
||||
07 | Not significant
|
||||
08 | Not significant
|
||||
09 | Not significant
|
||||
10 | Not significant
|
||||
11 | LL
|
||||
12 | Not significant
|
||||
13 | Not significant
|
||||
14 | Not significant
|
||||
15 | Not significant
|
||||
16 | HH
|
||||
17 | Not significant
|
||||
18 | Not significant
|
||||
19 | Not significant
|
||||
20 | HH
|
||||
21 | LL
|
||||
22 | Not significant
|
||||
23 | Not significant
|
||||
24 | Not significant
|
||||
25 | HH
|
||||
26 | HH
|
||||
27 | Not significant
|
||||
28 | Not significant
|
||||
29 | LL
|
||||
30 | Not significant
|
||||
31 | HH
|
||||
32 | Not significant
|
||||
33 | Not significant
|
||||
34 | Not significant
|
||||
35 | LL
|
||||
36 | Not significant
|
||||
37 | Not significant
|
||||
38 | HL
|
||||
39 | Not significant
|
||||
40 | Not significant
|
||||
41 | HL
|
||||
42 | LH
|
||||
43 | Not significant
|
||||
44 | Not significant
|
||||
45 | LH
|
||||
46 | Not significant
|
||||
47 | Not significant
|
||||
48 | HH
|
||||
49 | Not significant
|
||||
50 | Not significant
|
||||
51 | LL
|
||||
52 | LL
|
||||
(52 rows)
|
||||
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
_cdb_random_seeds
|
||||
-------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT ppoints2.code, m.quads
|
||||
FROM ppoints2
|
||||
JOIN cdb_crankshaft.cdb_moran_local_rate('ppoints2', 'numerator', 'denominator') m
|
||||
ON ppoints2.cartodb_id = m.ids
|
||||
ORDER BY ppoints2.code;
|
||||
NOTICE: ** Constructing query
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
NOTICE: ** Query returned with 51 rows
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
NOTICE: ** Finished calculations
|
||||
CONTEXT: PL/Python function "cdb_moran_local_rate"
|
||||
code | quads
|
||||
------+-----------------
|
||||
01 | LL
|
||||
02 | Not significant
|
||||
03 | Not significant
|
||||
04 | Not significant
|
||||
05 | Not significant
|
||||
06 | Not significant
|
||||
07 | Not significant
|
||||
08 | Not significant
|
||||
09 | LL
|
||||
10 | Not significant
|
||||
11 | HH
|
||||
12 | Not significant
|
||||
13 | Not significant
|
||||
14 | Not significant
|
||||
15 | Not significant
|
||||
16 | Not significant
|
||||
17 | LL
|
||||
18 | Not significant
|
||||
19 | Not significant
|
||||
20 | LL
|
||||
21 | Not significant
|
||||
22 | Not significant
|
||||
23 | Not significant
|
||||
24 | Not significant
|
||||
25 | LL
|
||||
26 | LL
|
||||
27 | Not significant
|
||||
28 | Not significant
|
||||
29 | LH
|
||||
30 | Not significant
|
||||
31 | LL
|
||||
32 | Not significant
|
||||
33 | Not significant
|
||||
34 | Not significant
|
||||
35 | LH
|
||||
36 | Not significant
|
||||
37 | Not significant
|
||||
38 | LH
|
||||
39 | Not significant
|
||||
40 | Not significant
|
||||
41 | LH
|
||||
42 | HL
|
||||
43 | Not significant
|
||||
44 | Not significant
|
||||
45 | LL
|
||||
46 | Not significant
|
||||
47 | Not significant
|
||||
48 | LL
|
||||
49 | Not significant
|
||||
50 | Not significant
|
||||
51 | Not significant
|
||||
(51 rows)
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
\i test/fixtures/polyg_values.sql
|
||||
CREATE TABLE values (cartodb_id integer, value float, the_geom geometry);
|
||||
INSERT INTO values(cartodb_id, value, the_geom) VALUES
|
||||
(1,10,'0106000020E61000000100000001030000000100000005000000E5AF3500C03608C08068629111374440C7BC0A00C00F02C0AC0551523B414440C7BC0A00C0A700C0CAF23B6E74FB4340A7267FFFFF5206C0FBB7E41B7EE74340E5AF3500C03608C08068629111374440'::geometry),
|
||||
(2,20,'0106000020E610000001000000010300000001000000050000002439EC00804AF7BF07D6CCB5C3064440C7BC0A00C0A700C0CAF23B6E74FB4340C7BC0A00C00F02C0AC0551523B414440E20CD5FFFF30FABFBE4F76AFEA4B44402439EC00804AF7BF07D6CCB5C3064440'::geometry)
|
||||
SELECT round(cdb_crankshaft.cdb_overlap_sum(
|
||||
'0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry,
|
||||
'values', 'value'
|
||||
), 2);
|
||||
round
|
||||
-------
|
||||
4.42
|
||||
(1 row)
|
||||
|
||||
SELECT round(cdb_crankshaft.cdb_overlap_sum(
|
||||
'0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry,
|
||||
'values', 'value', schema_name := 'public'
|
||||
), 2);
|
||||
round
|
||||
-------
|
||||
4.42
|
||||
(1 row)
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
-- Install dependencies
|
||||
CREATE EXTENSION plpythonu;
|
||||
CREATE EXTENSION postgis;
|
||||
CREATE EXTENSION cartodb;
|
||||
-- Install the extension
|
||||
CREATE EXTENSION crankshaft;
|
||||
@@ -1,21 +0,0 @@
|
||||
\i test/fixtures/ppoints.sql
|
||||
\i test/fixtures/ppoints2.sql
|
||||
|
||||
-- Moral functions perform some nondeterministic computations
|
||||
-- (to estimate the significance); we will set the seeds for the RNGs
|
||||
-- that affect those results to have repeateble results
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints.code, m.quads
|
||||
FROM ppoints
|
||||
JOIN cdb_crankshaft.cdb_moran_local('ppoints', 'value') m
|
||||
ON ppoints.cartodb_id = m.ids
|
||||
ORDER BY ppoints.code;
|
||||
|
||||
SELECT cdb_crankshaft._cdb_random_seeds(1234);
|
||||
|
||||
SELECT ppoints2.code, m.quads
|
||||
FROM ppoints2
|
||||
JOIN cdb_crankshaft.cdb_moran_local_rate('ppoints2', 'numerator', 'denominator') m
|
||||
ON ppoints2.cartodb_id = m.ids
|
||||
ORDER BY ppoints2.code;
|
||||
1
python/.gitignore
vendored
1
python/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
*.pyc
|
||||
@@ -1,11 +0,0 @@
|
||||
# Install the package (needs root privileges)
|
||||
install:
|
||||
pip install ./crankshaft --upgrade
|
||||
|
||||
# Test from source code
|
||||
test:
|
||||
(cd crankshaft && nosetests test/)
|
||||
|
||||
# Test currently installed package
|
||||
testinstalled:
|
||||
nosetests crankshaft/test/
|
||||
@@ -1,9 +0,0 @@
|
||||
# Crankshaft Python Package
|
||||
|
||||
...
|
||||
### Run the tests
|
||||
|
||||
```bash
|
||||
cd crankshaft
|
||||
nosetests test/
|
||||
```
|
||||
0
release/.gitignore
vendored
Normal file
0
release/.gitignore
vendored
Normal file
74
release/crankshaft--0.0.1--0.0.2.sql
Normal file
74
release/crankshaft--0.0.1--0.0.2.sql
Normal file
@@ -0,0 +1,74 @@
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft.cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.2'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft._cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft._cdb_crankshaft_virtualenvs_path()
|
||||
RETURNS text
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN '/home/ubuntu/crankshaft/envs';
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft._cdb_crankshaft_activate_py()
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
import os
|
||||
# plpy.notice('%',str(os.environ))
|
||||
# activate virtualenv
|
||||
crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
|
||||
base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
|
||||
default_venv_path = os.path.join(base_path, crankshaft_version)
|
||||
venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path)
|
||||
activate_path = venv_path + '/bin/activate_this.py'
|
||||
exec(open(activate_path).read(), dict(__file__=activate_path))
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_crankshaft._cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_crankshaft.cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_crankshaft.cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
@@ -1,6 +1,12 @@
|
||||
-- Moran's I
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local (
|
||||
cdb_crankshaft._cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_crankshaft.cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
@@ -16,9 +22,8 @@ AS $$
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local_rate(t TEXT,
|
||||
cdb_crankshaft.cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
@@ -33,3 +38,7 @@ AS $$
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_crankshaft.cdb_crankshaft_version();
|
||||
DROP FUNCTION IF EXISTS cdb_crankshaft._cdb_crankshaft_internal_version();
|
||||
DROP FUNCTION IF EXISTS cdb_crankshaft._cdb_crankshaft_activate_py();
|
||||
413
release/crankshaft--0.0.2--0.0.3.sql
Normal file
413
release/crankshaft--0.0.2--0.0.3.sql
Normal file
@@ -0,0 +1,413 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- [MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_moran_local(TEXT, TEXT, float, INT, INT, TEXT, TEXT, TEXT);
|
||||
DROP FUNCTION IF EXISTS cdb_moran_local_rate(TEXT, TEXT, TEXT, FLOAT, INT, INT, TEXT, TEXT, TEXT);
|
||||
DROP FUNCTION IF EXISTS _cdb_crankshaft_virtualenvs_path();
|
||||
DROP FUNCTION IF EXISTS _cdb_crankshaft_activate_py();
|
||||
|
||||
-- [END MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
186
release/crankshaft--0.0.2.sql
Normal file
186
release/crankshaft--0.0.2.sql
Normal file
@@ -0,0 +1,186 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.2'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path()
|
||||
RETURNS text
|
||||
AS $$
|
||||
BEGIN
|
||||
-- RETURN '/opt/virtualenvs/crankshaft';
|
||||
RETURN '/home/ubuntu/crankshaft/envs';
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT;
|
||||
|
||||
-- Use the crankshaft python module
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
import os
|
||||
# plpy.notice('%',str(os.environ))
|
||||
# activate virtualenv
|
||||
crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
|
||||
base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
|
||||
default_venv_path = os.path.join(base_path, crankshaft_version)
|
||||
venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path)
|
||||
activate_path = venv_path + '/bin/activate_this.py'
|
||||
exec(open(activate_path).read(), dict(__file__=activate_path))
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
209
release/crankshaft--0.0.3--0.0.2.sql
Normal file
209
release/crankshaft--0.0.3--0.0.2.sql
Normal file
@@ -0,0 +1,209 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- [MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
|
||||
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialHotspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialColdspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialOutliers(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialHotspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialColdspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialOutliersRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_KMeans(text,integer,integer);
|
||||
DROP AGGREGATE IF EXISTS CDB_WeightedMean(geometry(Point, 4326), NUMERIC);
|
||||
DROP FUNCTION IF EXISTS CDB_WeightedMeanS(Numeric[], GEOMETRY(Point, 4326), NUMERIC);
|
||||
DROP FUNCTION IF EXISTS CDB_WeightedMeanF(Numeric[]);
|
||||
|
||||
|
||||
-- [END MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.2'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path()
|
||||
RETURNS text
|
||||
AS $$
|
||||
BEGIN
|
||||
-- RETURN '/opt/virtualenvs/crankshaft';
|
||||
RETURN '/home/ubuntu/crankshaft/envs';
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT;
|
||||
|
||||
-- Use the crankshaft python module
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
import os
|
||||
# plpy.notice('%',str(os.environ))
|
||||
# activate virtualenv
|
||||
crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
|
||||
base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
|
||||
default_venv_path = os.path.join(base_path, crankshaft_version)
|
||||
venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path)
|
||||
activate_path = venv_path + '/bin/activate_this.py'
|
||||
exec(open(activate_path).read(), dict(__file__=activate_path))
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
8
release/crankshaft--0.0.3--0.0.4.sql
Normal file
8
release/crankshaft--0.0.3--0.0.4.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
403
release/crankshaft--0.0.3.sql
Normal file
403
release/crankshaft--0.0.3.sql
Normal file
@@ -0,0 +1,403 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
8
release/crankshaft--0.0.4--0.0.3.sql
Normal file
8
release/crankshaft--0.0.4--0.0.3.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
258
release/crankshaft--0.0.4--0.1.0.sql
Normal file
258
release/crankshaft--0.0.4--0.1.0.sql
Normal file
@@ -0,0 +1,258 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
|
||||
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.1.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- PyAgg stuff
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Segmentation stuff
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial interpolation
|
||||
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
403
release/crankshaft--0.0.4.sql
Normal file
403
release/crankshaft--0.0.4.sql
Normal file
@@ -0,0 +1,403 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
81
release/crankshaft--0.1.0--0.0.4.sql
Normal file
81
release/crankshaft--0.1.0--0.0.4.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
|
||||
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial Markov
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT);
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial interpolation
|
||||
|
||||
DROP FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer,
|
||||
IN p1 numeric,
|
||||
IN p2 numeric
|
||||
);
|
||||
|
||||
DROP FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer,
|
||||
IN p1 numeric,
|
||||
IN p2 numeric
|
||||
);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Segmentation stuff
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER,
|
||||
max_depth INTEGER,
|
||||
subsample DOUBLE PRECISION,
|
||||
learning_rate DOUBLE PRECISION,
|
||||
min_samples_leaf INTEGER);
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER,
|
||||
max_depth INTEGER,
|
||||
subsample DOUBLE PRECISION,
|
||||
learning_rate DOUBLE PRECISION,
|
||||
min_samples_leaf INTEGER);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- PyAgg stuff
|
||||
|
||||
DROP AGGREGATE CDB_PyAgg(NUMERIC[]);
|
||||
DROP FUNCTION CDB_PyAggS(Numeric[], Numeric[]);
|
||||
686
release/crankshaft--0.1.0.sql
Normal file
686
release/crankshaft--0.1.0.sql
Normal file
@@ -0,0 +1,686 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.1.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
-- SELECT cdb_spatial_markov('SELECT * FROM real_estate',
|
||||
-- 'date_1')
|
||||
|
||||
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col_min text,
|
||||
-- time_col_max text,
|
||||
-- date_format text, -- '_YYYY_MM_DD'
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
-- -- 1 | Pt1 | 12/3 | 13.2
|
||||
-- -- 2 | Pt2 | 11/5 | 11.3
|
||||
-- -- 3 | Pt1 | 11/13 | 12.9
|
||||
-- -- 4 | Pt3 | 12/19 | 10.1
|
||||
-- -- ...
|
||||
--
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col text,
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.0.1'
|
||||
requires = 'plpythonu, postgis, cartodb'
|
||||
default_version = '0.1.0'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
0
release/python/.gitignore
vendored
Normal file
0
release/python/.gitignore
vendored
Normal file
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.1',
|
||||
version='0.0.01',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
2
release/python/0.0.2/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.2/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
@@ -0,0 +1 @@
|
||||
from moran import *
|
||||
321
release/python/0.0.2/crankshaft/crankshaft/clustering/moran.py
Normal file
321
release/python/0.0.2/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,321 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# TODO: ensure that the significance output can be smaller that 1e-3 (0.001)
|
||||
# TODO: make a wishlist of output features (zscores, pvalues, raw local lisa, what else?)
|
||||
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_column,
|
||||
"table": t,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
|
||||
y = get_attributes(r, 1)
|
||||
w = get_weight(r, w_type)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.Moran_Local(y, w)
|
||||
|
||||
# find units of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||
|
||||
|
||||
def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_column,
|
||||
"table": t,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
|
||||
plpy.notice('r.nrows() = %d' % r.nrows())
|
||||
|
||||
## collect attributes
|
||||
numer = get_attributes(r, 1)
|
||||
denom = get_attributes(r, 2)
|
||||
|
||||
w = get_weight(r, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
## TODO: Decide on which return values here
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"table": t,
|
||||
"geom_col": geom_column,
|
||||
"id_col": id_col}
|
||||
|
||||
q = get_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
r = plpy.execute(q)
|
||||
plpy.notice('** Query returned with %d rows' % len(r))
|
||||
except plpy.SPIError:
|
||||
plpy.notice('** Query failed: "%s"' % q)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
plpy.notice('** Exiting function')
|
||||
return zip([None], [None], [None], [None])
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = get_attributes(r, 1)
|
||||
attr2_vals = get_attributes(r, 2)
|
||||
|
||||
# create weights
|
||||
w = get_weight(r, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, w)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
:param coord (int): quadrant of a specific measurement
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
:param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
:param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM \"{table}\" As j " \
|
||||
"WHERE %(attr_where_j)s " \
|
||||
"ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs} OFFSET 1 ) " \
|
||||
") As neighbors " \
|
||||
"FROM \"{table}\" As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
:param params: dict of information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM \"{table}\" As j " \
|
||||
"WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM \"{table}\" As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_query(w_type, query_vals):
|
||||
"""Return requested query.
|
||||
:param w_type: type of neighbors to calculate (knn or queen)
|
||||
:param query_vals: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
def get_attributes(query_res, attr_num):
|
||||
"""
|
||||
:param query_res: query results with attributes and neighbors
|
||||
:param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='queen', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
:param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
elif w_type == 'queen':
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
"""
|
||||
|
||||
lisa_sig = np.array([map_quads(q) for q in quads])
|
||||
|
||||
return lisa_sig
|
||||
|
||||
def lisa_sig_vals(pvals, quads, threshold):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
"""
|
||||
|
||||
sig = (pvals <= threshold)
|
||||
|
||||
lisa_sig = np.empty(len(sig), np.chararray)
|
||||
|
||||
for idx, val in enumerate(sig):
|
||||
if val:
|
||||
lisa_sig[idx] = map_quads(quads[idx])
|
||||
else:
|
||||
lisa_sig[idx] = 'Not significant'
|
||||
|
||||
return lisa_sig
|
||||
10
release/python/0.0.2/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.2/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
48
release/python/0.0.2/crankshaft/setup.py
Normal file
48
release/python/0.0.2/crankshaft/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.2',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['pysal==1.9.1'],
|
||||
|
||||
requires=['pysal', 'numpy' ],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
52
release/python/0.0.2/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.2/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "Not significant"],
|
||||
[0.6152779669180425, "Not significant"],
|
||||
[-0.14657336660125297, "Not significant"],
|
||||
[0.6967858120189607, "Not significant"],
|
||||
[0.07949310115714454, "Not significant"],
|
||||
[0.4703198759258987, "Not significant"],
|
||||
[0.4421125200498064, "Not significant"],
|
||||
[0.5724288737143592, "Not significant"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "Not significant"],
|
||||
[-0.01466729201304962, "Not significant"],
|
||||
[0.3481559372544409, "Not significant"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "Not significant"],
|
||||
[0.15971286468915544, "Not significant"],
|
||||
[1.0543588860308968, "Not significant"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "Not significant"],
|
||||
[0.08438455015300014, "Not significant"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "Not significant"],
|
||||
[0.795275137550483, "Not significant"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "Not significant"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "Not significant"],
|
||||
[-0.07116352791516614, "Not significant"],
|
||||
[-0.09945240794119009, "Not significant"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "Not significant"],
|
||||
[-0.39054253768447705, "Not significant"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "Not significant"],
|
||||
[0.2584386102554792, "Not significant"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "Not significant"],
|
||||
[0.051367269430983485, "Not significant"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "Not significant"],
|
||||
[0.04191046803899837, "Not significant"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "Not significant"],
|
||||
[0.5410013139159929, "Not significant"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.0.2/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.2/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
13
release/python/0.0.2/crankshaft/test/helper.py
Normal file
13
release/python/0.0.2/crankshaft/test/helper.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
34
release/python/0.0.2/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.2/crankshaft/test/mock_plpy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
144
release/python/0.0.2/crankshaft/test/test_clustering_moran.py
Normal file
144
release/python/0.0.2/crankshaft/test/test_clustering_moran.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions."""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"table": "a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads."""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select."""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(cc.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test query_attr_where."""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND "\
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND "\
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(cc.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn function."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM \"a_list\" As j WHERE j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \
|
||||
"As neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT " \
|
||||
"NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \
|
||||
"BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors function."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||
"j.\"cartodb_id\" FROM \"a_list\" As j WHERE ST_Touches(" \
|
||||
"i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \
|
||||
"AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \
|
||||
"neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT NULL " \
|
||||
"AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \
|
||||
"i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.queen(self.params), ans)
|
||||
|
||||
def test_get_query(self):
|
||||
"""Test get_query."""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||
"j.\"cartodb_id\" FROM \"a_list\" As j WHERE j.\"andy\" IS " \
|
||||
"NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \
|
||||
"OFFSET 1 ) ) As neighbors FROM \"a_list\" As i WHERE " \
|
||||
"i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(cc.get_query('knn', self.params), ans)
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes."""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight."""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals."""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
2
release/python/0.0.3/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.3/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
@@ -0,0 +1,2 @@
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
260
release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
Normal file
260
release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr_name,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": numerator,
|
||||
"attr2": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
query = pu.construct_neighbor_query(w_type,
|
||||
{"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs})
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"subquery": subquery,
|
||||
"geom_col": geom_col,
|
||||
"id_col": id_col}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -0,0 +1 @@
|
||||
from pysal_utils import *
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type.lower() == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
else:
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
10
release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
48
release/python/0.0.3/crankshaft/setup.py
Normal file
48
release/python/0.0.3/crankshaft/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.3',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
1
release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
52
release/python/0.0.3/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.3/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
13
release/python/0.0.3/crankshaft/test/helper.py
Normal file
13
release/python/0.0.3/crankshaft/test/helper.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
34
release/python/0.0.3/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.3/crankshaft/test/mock_plpy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
38
release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
Normal file
38
release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
import numpy as np
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"
|
||||
}
|
||||
|
||||
def test_kmeans(self):
|
||||
data = self.cluster_data
|
||||
plpy._define_result('select' ,data)
|
||||
clusters = cc.kmeans('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1]==0]
|
||||
c2 = [a for a in clusters if a[1]==1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)),2)
|
||||
self.assertEqual(len(c1),20)
|
||||
self.assertEqual(len(c2),20)
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None? ', result == None
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None?', result == None
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
107
release/python/0.0.3/crankshaft/test/test_pysal_utils.py
Normal file
107
release/python/0.0.3/crankshaft/test/test_pysal_utils.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params), ans)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params),
|
||||
pu.knn(self.params))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
2
release/python/0.0.4/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.4/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
@@ -0,0 +1,2 @@
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
260
release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
Normal file
260
release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr_name,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": numerator,
|
||||
"attr2": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
query = pu.construct_neighbor_query(w_type,
|
||||
{"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs})
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"subquery": subquery,
|
||||
"geom_col": geom_col,
|
||||
"id_col": id_col}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -0,0 +1 @@
|
||||
from pysal_utils import *
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type.lower() == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
else:
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
10
release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
48
release/python/0.0.4/crankshaft/setup.py
Normal file
48
release/python/0.0.4/crankshaft/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.4',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
1
release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
52
release/python/0.0.4/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.4/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
13
release/python/0.0.4/crankshaft/test/helper.py
Normal file
13
release/python/0.0.4/crankshaft/test/helper.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
34
release/python/0.0.4/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.4/crankshaft/test/mock_plpy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
38
release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
Normal file
38
release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
import numpy as np
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"
|
||||
}
|
||||
|
||||
def test_kmeans(self):
|
||||
data = self.cluster_data
|
||||
plpy._define_result('select' ,data)
|
||||
clusters = cc.kmeans('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1]==0]
|
||||
c2 = [a for a in clusters if a[1]==1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)),2)
|
||||
self.assertEqual(len(c1),20)
|
||||
self.assertEqual(len(c2),20)
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None? ', result == None
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None?', result == None
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
107
release/python/0.0.4/crankshaft/test/test_pysal_utils.py
Normal file
107
release/python/0.0.4/crankshaft/test/test_pysal_utils.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params), ans)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params),
|
||||
pu.knn(self.params))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
5
release/python/0.1.0/crankshaft/crankshaft/__init__.py
Normal file
5
release/python/0.1.0/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Import all modules"""
|
||||
import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
@@ -0,0 +1,3 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user