Compare commits
504 Commits
similarity
...
beter
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69713ecb0a | ||
|
|
d07822c7a0 | ||
|
|
154d1a674d | ||
|
|
34161fd8a4 | ||
|
|
850f3f6a31 | ||
|
|
021738d9f8 | ||
|
|
161bb14c08 | ||
|
|
f8739b6a68 | ||
|
|
5df846fe66 | ||
|
|
b9c4e6e8ef | ||
|
|
5c34e08c7d | ||
|
|
3c8ac7d45d | ||
|
|
59dc9434f7 | ||
|
|
2c6fcfc294 | ||
|
|
15b460eeb9 | ||
|
|
b0dcd7f572 | ||
|
|
2547318f59 | ||
|
|
25e453a882 | ||
|
|
62076bb48c | ||
|
|
6ab1c285d9 | ||
|
|
b22f79b0cc | ||
|
|
7c63b66fdd | ||
|
|
db501a2f02 | ||
|
|
6fe4fc9668 | ||
|
|
280a5193ef | ||
|
|
c27ec58948 | ||
|
|
bb3ff43f0f | ||
|
|
2f27622a6d | ||
|
|
c5a2746a53 | ||
|
|
538ab9a071 | ||
|
|
c8f5448b7c | ||
|
|
224fbc2fc5 | ||
|
|
2738c1f29c | ||
|
|
a8bd122762 | ||
|
|
a9add4b49c | ||
|
|
83f1900512 | ||
|
|
7eee4faac1 | ||
|
|
84d33d841f | ||
|
|
ded26dc46b | ||
|
|
0d40080f6c | ||
|
|
0867e69d1f | ||
|
|
cbe8571546 | ||
|
|
af536757fe | ||
|
|
b6dae5e380 | ||
|
|
64c4b6611c | ||
|
|
a188b2e104 | ||
|
|
4389c9538d | ||
|
|
2bc6b0782a | ||
|
|
3c6d73b7e2 | ||
|
|
3e0dba3522 | ||
|
|
5d8641732f | ||
|
|
f0c6cca766 | ||
|
|
f800a35fd1 | ||
|
|
54bbd18b02 | ||
|
|
da23b002cf | ||
|
|
a370a2da52 | ||
|
|
5404589058 | ||
|
|
b255fd3e06 | ||
|
|
0feaf36cf6 | ||
|
|
5d2a1881b1 | ||
|
|
a95423174c | ||
|
|
4314f0f066 | ||
|
|
c2e2359e65 | ||
|
|
361505fca9 | ||
|
|
c47116571f | ||
|
|
3e1cef9958 | ||
|
|
947d6ba798 | ||
|
|
ffd651b91a | ||
|
|
a271593fe9 | ||
|
|
83219270ae | ||
|
|
215e61396a | ||
|
|
c7e690980f | ||
|
|
da1449331c | ||
|
|
c7f5c24510 | ||
|
|
11c33ce3fa | ||
|
|
0a53a6e71d | ||
|
|
fa4e5ae686 | ||
|
|
ecb4bd9606 | ||
|
|
ecc9814a88 | ||
|
|
6846014a4f | ||
|
|
23b2ad57c5 | ||
|
|
99856ce956 | ||
|
|
f11982f531 | ||
|
|
bd05e7739d | ||
|
|
5754087140 | ||
|
|
8bc6f69a1b | ||
|
|
b54c62890f | ||
|
|
acde384157 | ||
|
|
b8accb48fc | ||
|
|
f2bb0b496b | ||
|
|
aaa36569de | ||
|
|
803816f5c9 | ||
|
|
1ef3f86474 | ||
|
|
f1d420a6f7 | ||
|
|
06452562b9 | ||
|
|
07e4062237 | ||
|
|
5443b67470 | ||
|
|
c7bb9698a9 | ||
|
|
b68f1c53b6 | ||
|
|
a665e41a83 | ||
|
|
d0e967d22c | ||
|
|
bcb73dee11 | ||
|
|
c52eb507ea | ||
|
|
375f765531 | ||
|
|
e924abbacc | ||
|
|
dc150e6936 | ||
|
|
24b9cda5aa | ||
|
|
2ed9479e8f | ||
|
|
795413e46d | ||
|
|
e5ea836493 | ||
|
|
84896e0634 | ||
|
|
258322fcca | ||
|
|
ff0363894f | ||
|
|
d216b02928 | ||
|
|
eee612b12d | ||
|
|
4a65c88300 | ||
|
|
754e66c02c | ||
|
|
166e9e223f | ||
|
|
29de72de33 | ||
|
|
37e4fc7cad | ||
|
|
4902f6a9d4 | ||
|
|
5f1cf951ea | ||
|
|
2ff20e596e | ||
|
|
eff548dec9 | ||
|
|
dcb364c3ee | ||
|
|
1d09eac3e7 | ||
|
|
5127845100 | ||
|
|
ee4eb795b7 | ||
|
|
2ede55d165 | ||
|
|
df5faa6745 | ||
|
|
06f0cb0dc4 | ||
|
|
11176b71b3 | ||
|
|
b5445da303 | ||
|
|
5d109acd8d | ||
|
|
2937c97fea | ||
|
|
c392aec98a | ||
|
|
4e42625d79 | ||
|
|
b71152a884 | ||
|
|
ce4cc637ae | ||
|
|
ccccf68066 | ||
|
|
60f52633fa | ||
|
|
1148aa417a | ||
|
|
e29f6f2861 | ||
|
|
c229a85491 | ||
|
|
6d6d7ef2ba | ||
|
|
feab6f177e | ||
|
|
a6440f2ef7 | ||
|
|
a530de80f1 | ||
|
|
139e86d414 | ||
|
|
b8ce37eb60 | ||
|
|
036a33aced | ||
|
|
90f36b4058 | ||
|
|
df68d2454e | ||
|
|
bbdd4de6ee | ||
|
|
44dc5811b5 | ||
|
|
2261d11de0 | ||
|
|
2a665a60db | ||
|
|
22cdc53c49 | ||
|
|
6ed9901103 | ||
|
|
02c98443be | ||
|
|
a42eb400d8 | ||
|
|
457f54007e | ||
|
|
e071d323d4 | ||
|
|
23a576939b | ||
|
|
2b75da1db2 | ||
|
|
37eb85350d | ||
|
|
72ee1a6d48 | ||
|
|
605e6e88ae | ||
|
|
7e88ca6c47 | ||
|
|
c3e12036e7 | ||
|
|
40481f1286 | ||
|
|
622235d787 | ||
|
|
623613aa5c | ||
|
|
a451fb5b6a | ||
|
|
ef436546b4 | ||
|
|
fa14b8d9bb | ||
|
|
43d00a9976 | ||
|
|
0f47659cf9 | ||
|
|
5423684e46 | ||
|
|
eeb3a01c62 | ||
|
|
f127d4a18e | ||
|
|
9016e5f35e | ||
|
|
66a0c80aee | ||
|
|
1061c85405 | ||
|
|
6fc460f6db | ||
|
|
ce86c4c5b4 | ||
|
|
4352d3ded7 | ||
|
|
cd0a802aff | ||
|
|
6fe467366a | ||
|
|
2963afd711 | ||
|
|
dd6e116da9 | ||
|
|
aa745f67db | ||
|
|
c668a82fcf | ||
|
|
cfacda5799 | ||
|
|
cbc505a62b | ||
|
|
e9ce87fb99 | ||
|
|
d67f3951a7 | ||
|
|
3ddcd4ed2d | ||
|
|
94497cd791 | ||
|
|
cf38cb43c0 | ||
|
|
c16a364254 | ||
|
|
990e714685 | ||
|
|
1889c52578 | ||
|
|
fba043e832 | ||
|
|
5dcbbe0429 | ||
|
|
c181bfa72e | ||
|
|
b7a412fa4d | ||
|
|
f6cedd7c86 | ||
|
|
0ea060a698 | ||
|
|
ed6ecd0fef | ||
|
|
0afb39fb1d | ||
|
|
bae3362b59 | ||
|
|
edeac18389 | ||
|
|
e2f8c67034 | ||
|
|
250f932915 | ||
|
|
2c373dd9c7 | ||
|
|
6b4b04d9a3 | ||
|
|
8a529fc956 | ||
|
|
756db707b5 | ||
|
|
5b8f7c4cd5 | ||
|
|
51b8da5bf8 | ||
|
|
630965605c | ||
|
|
14e5ca4b22 | ||
|
|
ebbf68af57 | ||
|
|
cb39d1830d | ||
|
|
5b6d2c568b | ||
|
|
7914fcdb87 | ||
|
|
317f7f2fa6 | ||
|
|
e95374646f | ||
|
|
5c207d9ec0 | ||
|
|
8430b98931 | ||
|
|
644542e5df | ||
|
|
7f6766b6c1 | ||
|
|
8953bf92ee | ||
|
|
065dc476b4 | ||
|
|
170260b7f5 | ||
|
|
a6b5f74d67 | ||
|
|
47f1f918d4 | ||
|
|
9ada8d12ca | ||
|
|
1bad568fbf | ||
|
|
75b34d29ce | ||
|
|
1e74be7cac | ||
|
|
66f4dfa4ea | ||
|
|
60eb316cbe | ||
|
|
592eb68d42 | ||
|
|
b80324c0d1 | ||
|
|
7c2d160da0 | ||
|
|
01b0efd82b | ||
|
|
749953931c | ||
|
|
4986743ce9 | ||
|
|
4886c187c4 | ||
|
|
8ea1f2edea | ||
|
|
3284ed19a5 | ||
|
|
a31921409c | ||
|
|
0b2caa48e9 | ||
|
|
4118b57f1f | ||
|
|
ac51256463 | ||
|
|
e74d80f3ea | ||
|
|
abe07166de | ||
|
|
cdc072b098 | ||
|
|
34c9a7d4cf | ||
|
|
75a0e54849 | ||
|
|
667bdb79a2 | ||
|
|
613cdb4c6f | ||
|
|
d88c967bb7 | ||
|
|
5f154d84d6 | ||
|
|
244cf65617 | ||
|
|
2d8d5e8c2e | ||
|
|
358202d324 | ||
|
|
3becd449c2 | ||
|
|
b5142002e9 | ||
|
|
8c273a53bc | ||
|
|
c4b2ae91bf | ||
|
|
b2eb5bd355 | ||
|
|
448562f53e | ||
|
|
5f339a01f7 | ||
|
|
132f4bbda9 | ||
|
|
4fdb28e2df | ||
|
|
c7788b7b2c | ||
|
|
e8e6f2b6b6 | ||
|
|
b5c3e73bc0 | ||
|
|
a148fc7d89 | ||
|
|
84d3238d59 | ||
|
|
58ef79fb6a | ||
|
|
33aff6a744 | ||
|
|
6a5f9155cc | ||
|
|
c3afdbff4b | ||
|
|
6de5fb10a0 | ||
|
|
a636d28457 | ||
|
|
927d66911e | ||
|
|
955f25cdae | ||
|
|
2362e51c10 | ||
|
|
14b44c358f | ||
|
|
02a20566dc | ||
|
|
491eeb34d8 | ||
|
|
3ef7c9f62e | ||
|
|
1abb7d669d | ||
|
|
60b9f9bd0e | ||
|
|
d078a87890 | ||
|
|
57aa28ee5c | ||
|
|
3181c51637 | ||
|
|
1234bfeb9d | ||
|
|
f2b5c788c0 | ||
|
|
45653a850e | ||
|
|
c48f9b67b7 | ||
|
|
966dd4268a | ||
|
|
15aad5a695 | ||
|
|
0eca26d515 | ||
|
|
27971711b5 | ||
|
|
cb3a49594d | ||
|
|
e84dc30b77 | ||
|
|
55f8626901 | ||
|
|
e82f5b0264 | ||
|
|
c21fcdf69a | ||
|
|
c8871a5547 | ||
|
|
3d99d1f9bf | ||
|
|
78ceb02c22 | ||
|
|
408dc6806e | ||
|
|
702a1fb1ed | ||
|
|
76ee4cacbc | ||
|
|
4192930260 | ||
|
|
2fe50e1f71 | ||
|
|
d97231f604 | ||
|
|
cb330ebe6b | ||
|
|
505ae0fb44 | ||
|
|
35b5612ded | ||
|
|
cfb58f7898 | ||
|
|
bef5fe2c80 | ||
|
|
ab349fbdc0 | ||
|
|
9e4d378a08 | ||
|
|
3a2e5ec7f9 | ||
|
|
1e42c23919 | ||
|
|
55d2860a84 | ||
|
|
b7860d5a24 | ||
|
|
476ec04386 | ||
|
|
75d97915d6 | ||
|
|
068c80f369 | ||
|
|
5f98735ce9 | ||
|
|
7d6148456e | ||
|
|
f6f9d6e9c8 | ||
|
|
32515f445e | ||
|
|
dd0ca9a24f | ||
|
|
045fd67f47 | ||
|
|
2bb2e60af8 | ||
|
|
99dc363c7d | ||
|
|
c799f4d73b | ||
|
|
3e01470aa0 | ||
|
|
a2b3733a1e | ||
|
|
a5e4ae99ce | ||
|
|
c80975fe46 | ||
|
|
dae406927f | ||
|
|
a177bf5620 | ||
|
|
de7d56dc29 | ||
|
|
642935e44a | ||
|
|
15fc0bb683 | ||
|
|
8cbc29a3e6 | ||
|
|
03aada8758 | ||
|
|
faa899cf87 | ||
|
|
911a0ccccc | ||
|
|
76b3a873b8 | ||
|
|
de274bf628 | ||
|
|
95803b4cd4 | ||
|
|
609e2aa015 | ||
|
|
44a6471c69 | ||
|
|
2fa087bb62 | ||
|
|
3f210c2a71 | ||
|
|
6a9045ba62 | ||
|
|
6f72075999 | ||
|
|
89c47dcef6 | ||
|
|
1d7f62fa85 | ||
|
|
279912c03f | ||
|
|
7c9628eef9 | ||
|
|
81d7af9e9a | ||
|
|
4df8257377 | ||
|
|
b62d7b32ef | ||
|
|
7c4314a411 | ||
|
|
1912d57891 | ||
|
|
1d13b98d68 | ||
|
|
3c066d65fc | ||
|
|
79699cd5cb | ||
|
|
59d50a1c48 | ||
|
|
01fc2c1dd1 | ||
|
|
f5fb4499db | ||
|
|
ad5cffbf0d | ||
|
|
1db938c450 | ||
|
|
3480a0d252 | ||
|
|
237aa1c581 | ||
|
|
1e19f468eb | ||
|
|
8b5e910234 | ||
|
|
d08a2b6d2d | ||
|
|
a222341863 | ||
|
|
4834ee2f42 | ||
|
|
bbe22d0b4d | ||
|
|
284d8ede44 | ||
|
|
a8943bae98 | ||
|
|
75531b671e | ||
|
|
0acae8240f | ||
|
|
7b98415da3 | ||
|
|
5a2319db72 | ||
|
|
b0c47663da | ||
|
|
9db4b7f519 | ||
|
|
fd1862167c | ||
|
|
c870f68c77 | ||
|
|
1e8bc12e0a | ||
|
|
b33ba2d294 | ||
|
|
889cd5c579 | ||
|
|
1a4944b960 | ||
|
|
9d3de5a8ef | ||
|
|
7f3b23f67a | ||
|
|
cc4579461d | ||
|
|
e95c40c2f9 | ||
|
|
69f08c4b78 | ||
|
|
4e86965f03 | ||
|
|
d41e28bc6f | ||
|
|
1f73be2752 | ||
|
|
5183f5ff92 | ||
|
|
73d38bbbaa | ||
|
|
e7de471ac8 | ||
|
|
14d50facda | ||
|
|
5e8d11fce2 | ||
|
|
183a0f9604 | ||
|
|
fc3a7e3d78 | ||
|
|
4782d39849 | ||
|
|
21dd956c15 | ||
|
|
c0dfaa8341 | ||
|
|
edf635886b | ||
|
|
a5cb857841 | ||
|
|
971c8f75d8 | ||
|
|
54d35c614b | ||
|
|
d1a267febb | ||
|
|
0eb3db3c1d | ||
|
|
7a7cbcf33f | ||
|
|
a0a026c2a1 | ||
|
|
c04e15ef81 | ||
|
|
ae1bb703a7 | ||
|
|
0e3970f52c | ||
|
|
59c520da16 | ||
|
|
90c3e21c0d | ||
|
|
3013998e1b | ||
|
|
2b8adb744d | ||
|
|
434cfcd1e9 | ||
|
|
b5c6b42081 | ||
|
|
408e34cd38 | ||
|
|
ca610af4d8 | ||
|
|
e80fdca7fc | ||
|
|
fd7aa4140a | ||
|
|
7a1eb6b9b6 | ||
|
|
1b0d1cc82c | ||
|
|
ca5175f15b | ||
|
|
4e870e4393 | ||
|
|
b05ad98ed9 | ||
|
|
bc8055a12b | ||
|
|
c3913459d9 | ||
|
|
f571e59a95 | ||
|
|
0400b1a880 | ||
|
|
c7e4baa4aa | ||
|
|
c44434ef08 | ||
|
|
95247f66bb | ||
|
|
9ba9d07bb5 | ||
|
|
ef475adc26 | ||
|
|
3294eb35ab | ||
|
|
693f6a68db | ||
|
|
e73862a6e1 | ||
|
|
fd76a509ea | ||
|
|
c18baf26d8 | ||
|
|
314d1851db | ||
|
|
6165d5e61e | ||
|
|
7695102500 | ||
|
|
369d1d2f41 | ||
|
|
e32bab3f88 | ||
|
|
cd3790860a | ||
|
|
1de90a7d39 | ||
|
|
9535506b93 | ||
|
|
bae2f04955 | ||
|
|
dc0873cd2b | ||
|
|
d4621a6e9c | ||
|
|
9943d4de58 | ||
|
|
8cccb18eed | ||
|
|
a0cb699b1a | ||
|
|
b8330dce07 | ||
|
|
2e1b598b4f | ||
|
|
d140b4249e | ||
|
|
d398494720 | ||
|
|
fbc30f1224 | ||
|
|
98c2b11935 | ||
|
|
491577ed62 | ||
|
|
68e5e0892c | ||
|
|
c5a58f97ec | ||
|
|
42e760b5d1 | ||
|
|
cfb40ddecd | ||
|
|
f3673d6f89 | ||
|
|
c488900c8c | ||
|
|
b889416947 | ||
|
|
58cf210e96 | ||
|
|
1e16b7839b | ||
|
|
9b32918746 | ||
|
|
fb071215dc | ||
|
|
d3e1fca2b3 | ||
|
|
f134a54c24 | ||
|
|
803781e08d | ||
|
|
fcf57289fc | ||
|
|
f885cc9f7b | ||
|
|
d96d6b2c48 | ||
|
|
746dcc9723 |
3
.brackets.json
Normal file
3
.brackets.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"sbruchmann.staticpreview.basepath": "/home/carto/Projects/crankshaft/"
|
||||
}
|
||||
10
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
10
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
- [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)`
|
||||
- [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades).
|
||||
- [ ] Docs for public-facing functions are written
|
||||
- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore
|
||||
- [ ] Video explaining the analysis and showing examples
|
||||
- [ ] Analysis Documentation written [template](https://docs.google.com/a/cartodb.com/document/d/1X2KOtaiEBKWNMp8UjwcLB-kE9aIOw09aOjX3oaCjeME/edit?usp=sharing)
|
||||
- [ ] Smoke test written
|
||||
- [ ] Hand-off document for camshaft node written
|
||||
- [ ] If function is in Python, code conforms to [PEP8 Style Guide](https://www.python.org/dev/peps/pep-0008/)
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
envs/
|
||||
*.pyc
|
||||
.DS_Store
|
||||
.idea/
|
||||
|
||||
61
.travis.yml
Normal file
61
.travis.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
language: c
|
||||
|
||||
env:
|
||||
global:
|
||||
- PAGER=cat
|
||||
|
||||
before_install:
|
||||
- ./check-up-to-date-with-master.sh
|
||||
- sudo apt-get -y install python-pip
|
||||
|
||||
- sudo apt-get -y install python-software-properties
|
||||
- sudo add-apt-repository -y ppa:cartodb/sci
|
||||
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis
|
||||
- sudo add-apt-repository -y ppa:cartodb/gis-testing
|
||||
- sudo apt-get update
|
||||
|
||||
- sudo apt-get -y install python-joblib=0.8.3-1-cdb1
|
||||
- sudo apt-get -y install python-numpy=1:1.6.1-6ubuntu1
|
||||
|
||||
# Install pysal
|
||||
- sudo pip install -I pysal==1.11.2
|
||||
|
||||
- sudo apt-get -y install python-scipy=0.14.0-2-cdb6
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn-lib=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-sklearn=0.14.1-3-cdb2
|
||||
- sudo apt-get -y --no-install-recommends install python-scikits-learn=0.14.1-3-cdb2
|
||||
|
||||
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
|
||||
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
|
||||
|
||||
# Install postgres db and build deps
|
||||
- sudo /etc/init.d/postgresql stop # stop travis default instance
|
||||
- sudo apt-get -y remove --purge postgresql-9.1
|
||||
- sudo apt-get -y remove --purge postgresql-9.2
|
||||
- sudo apt-get -y remove --purge postgresql-9.3
|
||||
- sudo apt-get -y remove --purge postgresql-9.4
|
||||
- sudo apt-get -y remove --purge postgresql-9.5
|
||||
- sudo rm -rf /var/lib/postgresql/
|
||||
- sudo rm -rf /var/log/postgresql/
|
||||
- sudo rm -rf /etc/postgresql/
|
||||
- sudo apt-get -y remove --purge postgis-2.2
|
||||
- sudo apt-get -y autoremove
|
||||
|
||||
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
|
||||
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
|
||||
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
|
||||
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
|
||||
|
||||
# configure it to accept local connections from postgres
|
||||
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
|
||||
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
|
||||
- sudo /etc/init.d/postgresql restart 9.5
|
||||
|
||||
install:
|
||||
- sudo make install
|
||||
|
||||
script:
|
||||
- make test || { cat src/pg/test/regression.diffs; false; }
|
||||
- ./check-compatibility.sh
|
||||
@@ -1,10 +1,7 @@
|
||||
# Development process
|
||||
|
||||
Please read the Working Process/Quickstart Guide in [README.md](https://github.com/CartoDB/crankshaft/blob/master/README.md) first.
|
||||
|
||||
For any modification of crankshaft, such as adding new features,
|
||||
refactoring or bug-fixing, topic branch must be created out of the `develop`
|
||||
branch and be used for the development process.
|
||||
refactoring or bugfixing, a topic branch must be created out of the `develop`.
|
||||
|
||||
Modifications are done inside `src/pg/sql` and `src/py/crankshaft`.
|
||||
|
||||
@@ -14,81 +11,51 @@ Take into account:
|
||||
(inside `src/pg/test`, `src/py/crankshaft/test`) as well as to
|
||||
detect any bugs that are being fixed.
|
||||
* Add or modify the corresponding documentation files in the `doc` folder.
|
||||
Since we expect to have highly technical functions here, an extense
|
||||
background explanation would be of great help to users of this extension.
|
||||
* Convention: snake case(i.e. `snake_case` and not `CamelCase`)
|
||||
shall be used for all function names.
|
||||
Prefix function names intended for public use with `cdb_`
|
||||
and private functions (to be used only internally inside
|
||||
the extension) with `_cdb_`.
|
||||
* Naming conventions for function names:
|
||||
- use `CamelCase`
|
||||
- prefix "public" functions with `CDB_`. E.g: `CDB_SpatialMarkovTrend`
|
||||
- prefix "private" functions with an underscore. E.g: `_CDB_MyObscureInternalImplementationDetail`
|
||||
|
||||
Once the code is ready to be tested, update the local development installation
|
||||
with `sudo make install`.
|
||||
This will update the 'dev' version of the extension in `src/pg/` and
|
||||
make it available to PostgreSQL.
|
||||
It will also install the python package (crankshaft) in a virtual
|
||||
environment `env/dev`.
|
||||
|
||||
The version number of the Python package, defined in
|
||||
`src/pg/crankshaft/setup.py` will be overridden when
|
||||
the package is released and always match the extension version number,
|
||||
but for development it shall be kept as '0.0.0'.
|
||||
|
||||
Run the tests with `make test`.
|
||||
|
||||
To use the python extension for custom tests, activate the virtual
|
||||
environment with:
|
||||
|
||||
```
|
||||
source envs/dev/bin/activate
|
||||
```
|
||||
|
||||
Update extension in a working database with:
|
||||
|
||||
* `ALTER EXTENSION crankshaft UPDATE TO 'current';`
|
||||
`ALTER EXTENSION crankshaft UPDATE TO 'dev';`
|
||||
|
||||
Note: we keep the current development version install as 'dev' always;
|
||||
we update through the 'current' alias to allow changing the extension
|
||||
contents but not the version identifier. This will fail if the
|
||||
changes involve incompatible function changes such as a different
|
||||
return type; in that case the offending function (or the whole extension)
|
||||
should be dropped manually before the update.
|
||||
```sql
|
||||
ALTER EXTENSION crankshaft UPDATE TO 'current';
|
||||
ALTER EXTENSION crankshaft UPDATE TO 'dev';
|
||||
```
|
||||
|
||||
If the extension has not previously been installed in a database,
|
||||
it can be installed directly with:
|
||||
|
||||
* `CREATE EXTENSION IF NOT EXISTS plpythonu;`
|
||||
`CREATE EXTENSION IF NOT EXISTS postgis;`
|
||||
`CREATE EXTENSION IF NOT EXISTS cartodb;`
|
||||
`CREATE EXTENSION crankshaft WITH VERSION 'dev';`
|
||||
|
||||
Note: the development extension uses the development python virtual
|
||||
environment automatically.
|
||||
|
||||
Before proceeding to the release process peer code reviewing of the code is
|
||||
a must.
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS plpythonu;
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
CREATE EXTENSION crankshaft WITH VERSION 'dev';
|
||||
```
|
||||
|
||||
Once the feature or bugfix is completed and all the tests are passing
|
||||
a Pull-Request shall be created on the topic branch, reviewed by a peer
|
||||
and then merged back into the `develop` branch when all CI tests pass.
|
||||
a pull request shall be created, reviewed by a peer
|
||||
and then merged back into the `develop` branch once all the CI tests pass.
|
||||
|
||||
When the changes in the `develop` branch are to be released in a new
|
||||
version of the extension, a PR must be created on the `develop` branch.
|
||||
|
||||
The release manage will take hold of the PR at this moment to proceed
|
||||
to the release process for a new revision of the extension.
|
||||
## Relevant development targets in the Makefile
|
||||
|
||||
## Relevant development tasks available in the Makefile
|
||||
```shell
|
||||
# Show a short description of the available targets
|
||||
make help
|
||||
|
||||
# Generate the extension scripts and install the python package.
|
||||
sudo make install
|
||||
|
||||
# Run the tests against the installed extension.
|
||||
make test
|
||||
```
|
||||
* `make help` show a short description of the available targets
|
||||
|
||||
* `sudo make install` will generate the extension scripts for the development
|
||||
version ('dev'/'current') and install the python package into the
|
||||
development virtual environment `envs/dev`.
|
||||
Intended for use by developers.
|
||||
## Submitting contributions
|
||||
|
||||
* `make test` will run the tests for the installed development extension.
|
||||
Intended for use by developers.
|
||||
```
|
||||
Before opening a pull request (or submitting a contribution) you will need to sign a Contributor License Agreement (CLA) before making a submission, [learn more here](https://carto.com/contributions).
|
||||
|
||||
8
Makefile
8
Makefile
@@ -11,7 +11,6 @@ PYP_DIR = src/py
|
||||
# Generate and install developmet versions of the extension
|
||||
# and python package.
|
||||
# The extension is named 'dev' with a 'current' alias for easily upgrading.
|
||||
# The Python package is installed in a virtual environment envs/dev/
|
||||
# Requires sudo.
|
||||
install: ## Generate and install development version of the extension; requires sudo.
|
||||
$(MAKE) -C $(PYP_DIR) install
|
||||
@@ -29,7 +28,6 @@ release: ## Generate a new release of the extension. Only for telease manager
|
||||
$(MAKE) -C $(PYP_DIR) release
|
||||
|
||||
# Install the current release.
|
||||
# The Python package is installed in a virtual environment envs/X.Y.Z/
|
||||
# Requires sudo.
|
||||
# Use the RELEASE_VERSION environment variable to deploy a specific version:
|
||||
# sudo make deploy RELEASE_VERSION=1.0.0
|
||||
@@ -52,11 +50,7 @@ clean-release: ## clean up current release
|
||||
rm -rf release/python/$(RELEASE_VERSION)
|
||||
rm -f release/$(RELEASE_VERSION)--*.sql
|
||||
|
||||
# Cleanup all virtual environments
|
||||
clean-environments: ## clean up all virtual environments
|
||||
rm -rf envs/*
|
||||
|
||||
clean-all: clean-dev clean-release clean-environments
|
||||
clean-all: clean-dev clean-release
|
||||
|
||||
help:
|
||||
@IFS=$$'\n' ; \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
SELF_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
|
||||
EXTENSION = crankshaft
|
||||
PACKAGE = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
RELEASE_VERSION ?= $(EXTVERSION)
|
||||
SED = sed
|
||||
EXTENSION = crankshaft
|
||||
PACKAGE = crankshaft
|
||||
EXTVERSION = $(shell grep default_version $(SELF_DIR)/src/pg/$(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
|
||||
RELEASE_VERSION ?= $(EXTVERSION)
|
||||
SED = sed
|
||||
|
||||
57
NEWS.md
57
NEWS.md
@@ -1,3 +1,60 @@
|
||||
0.5.0 (2016-12-15)
|
||||
------------------
|
||||
* Updated PULL_REQUEST_TEMPLATE
|
||||
* Fixed a bug that flips the order of the numerator in denominator for calculating using Moran Local Rate because previously the code sorted the keys alphabetically.
|
||||
* Add new CDB_GetisOrdsG functions. Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values
|
||||
* Add new outlier detection functions: CDB_StaticOutlier, CDB_PercentOutlier and CDB_StdDevOutlier
|
||||
* Updates in the framework for accessing the Python functions.
|
||||
|
||||
0.4.2 (2016-09-22)
|
||||
------------------
|
||||
* Bugfix for cdb_areasofinterestglobal: import correct modules
|
||||
|
||||
0.4.1 (2016-09-21)
|
||||
------------------
|
||||
* Let the user set the resolution in CDB_Contour function
|
||||
* Add Nearest Neighbors method to CDB_SpatialInterpolation
|
||||
* Improve error reporting for moran and markov functions
|
||||
|
||||
0.4.0 (2016-08-30)
|
||||
------------------
|
||||
* Add CDB_Contour
|
||||
* Add CDB_PIA
|
||||
* Add CDB_Densify
|
||||
* Add CDB_TINmap
|
||||
|
||||
0.3.1 (2016-08-18)
|
||||
------------------
|
||||
* Fix Voronoi projection issue
|
||||
* Fix Voronoi spurious segments issue
|
||||
* Add tests for interpolation
|
||||
|
||||
0.3.0 (2016-08-17)
|
||||
------------------
|
||||
* Adds Voronoi function
|
||||
* Fixes barycenter method in interpolation
|
||||
|
||||
0.2.0 (2016-08-11)
|
||||
------------------
|
||||
* Adds Gravity Model
|
||||
|
||||
0.1.0 (2016-06-29)
|
||||
------------------
|
||||
* Adds Spatial Markov function
|
||||
* Adds Spacial interpolation function
|
||||
* Adds `CDB_pyAgg (columns Numeric[])` helper function
|
||||
* Adds Segmentation Functions
|
||||
|
||||
0.0.4 (2016-06-20)
|
||||
------------------
|
||||
* Remove cartodb extension dependency from tests
|
||||
* Declare all correct dependencies with correct versions in setup.py
|
||||
|
||||
0.0.3 (2016-06-16)
|
||||
------------------
|
||||
* Adds new functions: kmeans, weighted centroids.
|
||||
* Replaces moran functions with new areas of interest naming.
|
||||
|
||||
0.0.2 (2016-03-16)
|
||||
------------------
|
||||
* New versioning approach using per-version Python virtual environments
|
||||
|
||||
82
README.md
82
README.md
@@ -1,71 +1,67 @@
|
||||
# crankshaft
|
||||
# Crankshaft [](https://travis-ci.org/CartoDB/crankshaft)
|
||||
|
||||
CartoDB Spatial Analysis extension for PostgreSQL.
|
||||
CARTO Spatial Analysis extension for PostgreSQL.
|
||||
|
||||
## Code organization
|
||||
|
||||
* *doc* documentation
|
||||
* *src* source code
|
||||
* - *src/pg* contains the PostgreSQL extension source code
|
||||
* - *src/py* Python module source code
|
||||
* *release* reseleased versions
|
||||
* *env* base directory for Python virtual environments
|
||||
* `doc/` documentation
|
||||
* `src/` source code
|
||||
- `pg/` contains the PostgreSQL extension source code
|
||||
- `py/` Python module source code
|
||||
* `release` reseleased versions
|
||||
|
||||
## Requirements
|
||||
|
||||
* pip, virtualenv, PostgreSQL
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/master/src/py/README.md))
|
||||
* PostgreSQL
|
||||
* plpythonu and postgis extensions
|
||||
* python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/develop/src/py/README.md))
|
||||
|
||||
# Working Process -- Quickstart Guide
|
||||
# Development Process
|
||||
|
||||
We distinguish two roles regarding the development cycle of crankshaft:
|
||||
We distinguish two roles:
|
||||
|
||||
* *developers* will implement new functionality and bugfixes into
|
||||
the codebase and will request for new releases of the extension.
|
||||
* A *release manager* will attend these requests and will handle
|
||||
the release process. The release process is sequential:
|
||||
no concurrent releases will ever be in the works.
|
||||
the codebase.
|
||||
* A *release manager* will handle the release process.
|
||||
|
||||
We use the default `develop` branch as the basis for development.
|
||||
The `master` branch is used to merge and tag releases to be
|
||||
deployed in production.
|
||||
We use the branch `develop` as the main integration branch for development. The `master` is reserved to handle releases.
|
||||
|
||||
Developers shall create a new topic branch from `develop` for any new feature
|
||||
or bugfix and commit their changes to it and eventually merge back into
|
||||
the `develop` branch. When a new release is required a Pull Request
|
||||
will be open against the `develop` branch.
|
||||
The process is as follows:
|
||||
|
||||
1. Create a new **topic branch** from `develop` for any new feature
|
||||
or bugfix and commit their changes to it:
|
||||
|
||||
```shell
|
||||
git fetch && git checkout -b my-cool-feature origin/develop
|
||||
```
|
||||
1. Code, commit, push, repeat.
|
||||
1. Write some **tests** for your feature or bugfix.
|
||||
1. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/develop/NEWS.md) doc.
|
||||
1. Create a pull request and mention relevant people for a **peer review**.
|
||||
1. Address the comments and improvements you get from the peer review.
|
||||
1. Mention `@CartoDB/dataservices` in the PR to get it merged into `develop`.
|
||||
|
||||
In order for a pull request to be accepted, the following criteria should be met:
|
||||
* The peer review should pass and no major issue should be left unaddressed.
|
||||
* CI tests must pass (travis will take care of that).
|
||||
|
||||
The `develop` pull requests will be handled by the release manage,
|
||||
who will merge into master where new releases are prepared and tagged.
|
||||
The `master` branch is the sole responsibility of the release masters
|
||||
and developers must not commit or merge into it.
|
||||
|
||||
## Development Guidelines
|
||||
|
||||
For a detailed description of the development process please see
|
||||
the [CONTRIBUTING.md](https://github.com/CartoDB/crankshaft/blob/master/CONTRIBUTING.md) guide.
|
||||
the [CONTRIBUTING.md](https://github.com/CartoDB/crankshaft/blob/develop/CONTRIBUTING.md) guide.
|
||||
|
||||
Any modification to the source code (`src/pg/sql` for the SQL extension,
|
||||
`src/py/crankshaft` for the Python package) shall always be done
|
||||
in a topic branch created from the `develop` branch.
|
||||
|
||||
Tests, documentation and peer code reviewing are required for all
|
||||
modifications.
|
||||
## Testing
|
||||
|
||||
The tests (both for SQL and Python) are executed by running,
|
||||
from the top directory:
|
||||
The tests (both for SQL and Python) are executed by running, from the top directory:
|
||||
|
||||
```
|
||||
```shell
|
||||
sudo make install
|
||||
make test
|
||||
```
|
||||
|
||||
To request a new release, which will be handled by them
|
||||
release manager, a Pull Request must be created in the `develop`
|
||||
branch.
|
||||
|
||||
## Release
|
||||
|
||||
The release and deployment process is described in the
|
||||
[RELEASE.md](https://github.com/CartoDB/crankshaft/blob/master/RELEASE.md) guide and it is the responsibility of the designated
|
||||
release manager.
|
||||
The release process is described in the
|
||||
[RELEASE.md](https://github.com/CartoDB/crankshaft/blob/develop/RELEASE.md) guide and is the responsibility of the designated *release manager*.
|
||||
|
||||
115
RELEASE.md
115
RELEASE.md
@@ -1,93 +1,54 @@
|
||||
# Release & Deployment Process
|
||||
|
||||
Please read the Working Process/Quickstart Guide in README.md
|
||||
and the Development guidelines in CONTRIBUTING.md.
|
||||
|
||||
The release process of a new version of the extension
|
||||
shall be performed by the designated *Release Manager*.
|
||||
|
||||
Note that we expect to gradually automate more of this process.
|
||||
## Release steps
|
||||
1. Make sure `develop` branch passes all the tests.
|
||||
1. Merge `develop` into `master`
|
||||
1. Update the version number in `src/pg/crankshaft.control`.
|
||||
1. Generate the next release files with this command:
|
||||
|
||||
Having checked PR to be released it shall be
|
||||
merged back into the `master` branch to prepare the new release.
|
||||
```shell
|
||||
make release
|
||||
```
|
||||
1. Generate an upgrade path from the previous to the next release by copying the generated release file. E.g:
|
||||
|
||||
The version number in `pg/cranckshaft.control` must first be updated.
|
||||
To do so [Semantic Versioning 2.0](http://semver.org/) is in order.
|
||||
```shell
|
||||
cp release/crankshaft--X.Y.Z.sql release/crankshaft--A.B.C--X.Y.Z.sql
|
||||
```
|
||||
NOTE: you can rely on this thanks to the compatibility checks.
|
||||
|
||||
TODO: automate this step [#94](https://github.com/CartoDB/crankshaft/issues/94)
|
||||
2. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/master/NEWS.md) file
|
||||
1. Commit and push the generated files.
|
||||
1. Tag the release:
|
||||
|
||||
Thew `NEWS.md` will be updated.
|
||||
```
|
||||
git tag -a X.Y.Z -m "Release X.Y.Z"
|
||||
git push origin X.Y.Z
|
||||
```
|
||||
1. Deploy and test in staging
|
||||
1. Deploy and test in production
|
||||
1. Merge back into develop
|
||||
|
||||
We now will explain the process for the case of backwards-compatible
|
||||
releases (updating the minor or patch version numbers).
|
||||
|
||||
TODO: document the complex case of major releases.
|
||||
## Some remarks
|
||||
* Version numbers shall follow [Semantic Versioning 2.0](http://semver.org/).
|
||||
* CI tests will take care of **forward compatibility** of the extension at postgres level.
|
||||
* **Major version changes** (breaking forward compatibility) are a major event and are out of the scope of this doc. They **shall be avoided as much as we can**.
|
||||
* We will go forward, never backwards. **Generating upgrade paths automatically is easy** and we'll rely on the CI checks for that.
|
||||
|
||||
The next command must be executed to produce the main installation
|
||||
script for the new release, `release/cranckshaft--X.Y.Z.sql` and
|
||||
also to copy the python package to `release/python/X.Y.Z/crankshaft`.
|
||||
|
||||
```
|
||||
make release
|
||||
```
|
||||
|
||||
Then, the release manager shall produce upgrade and downgrade scripts
|
||||
to migrate to/from the previous release. In the case of minor/patch
|
||||
releases this simply consist in extracting the functions that have changed
|
||||
and placing them in the proper `release/cranckshaft--X.Y.Z--A.B.C.sql`
|
||||
file.
|
||||
## Deploy commands
|
||||
|
||||
The new release can be deployed for staging/smoke tests with this command:
|
||||
|
||||
```
|
||||
sudo make deploy
|
||||
```
|
||||
```shell
|
||||
sudo make deploy
|
||||
```
|
||||
|
||||
This will copy the current 'X.Y.Z' released version of the extension to
|
||||
PostgreSQL. The corresponding Python extension will be installed in a
|
||||
virtual environment in `envs/X.Y.Z`.
|
||||
To install a specific version 'X.Y.Z' different from the default one:
|
||||
|
||||
It can be activated with:
|
||||
|
||||
```
|
||||
source envs/X.Y.Z/bin/activate
|
||||
```
|
||||
|
||||
But note that this is needed only for using the package directly;
|
||||
the 'X.Y.Z' version of the extension will automatically use the
|
||||
python package from this virtual environment.
|
||||
|
||||
The `sudo make deploy` operation can be also used for installing
|
||||
the new version after it has been released.
|
||||
|
||||
To install a specific version 'X.Y.Z' different from the current one
|
||||
(which must be present in `releases/`) you can:
|
||||
|
||||
```
|
||||
sudo make deploy RELEASE_VERSION=X.Y.Z
|
||||
```
|
||||
|
||||
TODO: testing procedure for the new release.
|
||||
|
||||
TODO: procedure for staging deployment.
|
||||
|
||||
TODO: procedure for merging to master, tagging and deploying
|
||||
in production.
|
||||
|
||||
## Relevant release & deployment tasks available in the Makefile
|
||||
|
||||
```
|
||||
* `make help` show a short description of the available targets
|
||||
|
||||
* `make release` will generate a new release (version number defined in
|
||||
`src/pg/crankshaft.control`) into `release/`.
|
||||
Intended for use by the release manager.
|
||||
|
||||
* `sudo make deploy` will install the current release X.Y.Z from the
|
||||
`release/` files into PostgreSQL and a Python virtual environment
|
||||
`envs/X.Y.Z`.
|
||||
Intended for use by the release manager and deployment jobs.
|
||||
|
||||
* `sudo make deploy RELEASE_VERSION=X.Y.Z` will install specified version
|
||||
previously generated in `release/`
|
||||
into PostgreSQL and a Python virtual environment `envs/X.Y.Z`.
|
||||
Intended for use by the release manager and deployment jobs.
|
||||
```
|
||||
```shell
|
||||
sudo make deploy RELEASE_VERSION=X.Y.Z
|
||||
```
|
||||
|
||||
108
check-compatibility.sh
Executable file
108
check-compatibility.sh
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/bin/bash
|
||||
|
||||
export PGUSER=postgres
|
||||
|
||||
DBNAME=crankshaft_compatcheck
|
||||
|
||||
function die {
|
||||
echo $1
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Create fresh DB
|
||||
psql -c "CREATE DATABASE $DBNAME;" || die "Could not create DB"
|
||||
|
||||
# Hook for cleanup
|
||||
function cleanup {
|
||||
psql -c "DROP DATABASE IF EXISTS crankshaft_compatcheck;"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Deploy previous release
|
||||
(cd src/py && sudo make deploy RUN_OPTIONS="--no-deps") || die "Could not deploy python extension"
|
||||
(cd src/pg && sudo make deploy) || die " Could not deploy last release"
|
||||
psql -c "SELECT * FROM pg_available_extension_versions WHERE name LIKE 'crankshaft';"
|
||||
|
||||
# Install in the fresh DB
|
||||
psql $DBNAME <<'EOF'
|
||||
-- Install dependencies
|
||||
CREATE EXTENSION plpythonu;
|
||||
CREATE EXTENSION postgis VERSION '2.2.2';
|
||||
|
||||
-- Create role publicuser if it does not exist
|
||||
DO
|
||||
$$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_user
|
||||
WHERE usename = 'publicuser') THEN
|
||||
|
||||
CREATE ROLE publicuser LOGIN;
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Install the default version
|
||||
CREATE EXTENSION crankshaft;
|
||||
\dx
|
||||
EOF
|
||||
|
||||
# Save public function signatures
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE release_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
|
||||
# Deploy current dev branch
|
||||
make clean-dev || die "Could not clean dev files"
|
||||
sudo make install || die "Could not deploy current dev branch"
|
||||
|
||||
# Check it can be upgraded
|
||||
psql $DBNAME -c "ALTER EXTENSION crankshaft update to 'dev';" || die "Cannot upgrade to dev version"
|
||||
|
||||
# Check against saved public function signatures
|
||||
psql $DBNAME <<'EOF'
|
||||
CREATE TABLE dev_function_signatures AS
|
||||
SELECT
|
||||
p.proname as name,
|
||||
pg_catalog.pg_get_function_result(p.oid) as result_type,
|
||||
pg_catalog.pg_get_function_arguments(p.oid) as arguments,
|
||||
CASE
|
||||
WHEN p.proisagg THEN 'agg'
|
||||
WHEN p.proiswindow THEN 'window'
|
||||
WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger'
|
||||
ELSE 'normal'
|
||||
END as type
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE
|
||||
n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname LIKE 'cdb_%'
|
||||
ORDER BY 1, 2, 4;
|
||||
EOF
|
||||
|
||||
echo "Functions in development not in latest release (ok):"
|
||||
psql $DBNAME -c "SELECT * FROM dev_function_signatures EXCEPT SELECT * FROM release_function_signatures;"
|
||||
|
||||
echo "Functions in latest release not in development (compat issue):"
|
||||
psql $DBNAME -c "SELECT * FROM release_function_signatures EXCEPT SELECT * FROM dev_function_signatures;"
|
||||
|
||||
# Fail if there's a signature mismatch / missing functions
|
||||
psql $DBNAME -c "SELECT * FROM release_function_signatures EXCEPT SELECT * FROM dev_function_signatures;" | fgrep '(0 rows)' \
|
||||
|| die "Function signatures changed"
|
||||
24
check-up-to-date-with-master.sh
Executable file
24
check-up-to-date-with-master.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
if [[ "$CURRENT_BRANCH" == "master" || "$CURRENT_BRANCH" == "HEAD" ]]
|
||||
then
|
||||
echo "master branch or detached HEAD"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Add remote-master
|
||||
git remote add -t master remote-master https://github.com/CartoDB/crankshaft.git
|
||||
|
||||
# Fetch master reference
|
||||
git fetch --depth=1 remote-master master
|
||||
|
||||
# Compare HEAD with master
|
||||
# NOTE: travis by default uses --depth=50 so we are actually checking that the tip
|
||||
# of the branch is no more than 50 commits away from master as well.
|
||||
git rev-list HEAD | grep $(git rev-parse remote-master/master) ||
|
||||
{ echo "Your branch is not up to date with latest release";
|
||||
echo "Please update it by running the following:";
|
||||
echo " git fetch && git merge origin/develop";
|
||||
false; }
|
||||
272
doc/02_moran.md
272
doc/02_moran.md
@@ -1,169 +1,185 @@
|
||||
## Name
|
||||
## Areas of Interest Functions
|
||||
|
||||
CDB_AreasOfInterest -- returns a table with a cluster/outlier classification, the significance of a classification, an autocorrelation statistic (Local Moran's I), and the geometry id for each geometry in the original dataset.
|
||||
### CDB_AreasOfInterestLocal(subquery text, column_name text)
|
||||
|
||||
## Synopsis
|
||||
This function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. The classification happens through an autocorrelation statistic called Local Moran's I.
|
||||
|
||||
```sql
|
||||
table(numeric moran_val, text quadrant, numeric significance, int ids, numeric column_values) CDB_AreasOfInterest(text query, text column_name)
|
||||
#### Arguments
|
||||
|
||||
table(numeric moran_val, text quadrant, numeric significance, int ids, numeric column_values) CDB_AreasOfInterest(text query, text column_name, int permutations, text geom_column, text id_column, text weight_type, int num_ngbrs)
|
||||
```
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
## Description
|
||||
#### Returns
|
||||
|
||||
CDB_AreasOfInterest is a table-returning function that classifies the geometries in a table by an attribute and gives a significance for that classification. This information can be used to find "Areas of Interest" by using the correlation of a geometry's attribute with that of its neighbors. Areas can be clusters, outliers, or neither (depending on which significance value is used).
|
||||
A table with the following columns.
|
||||
|
||||
Inputs:
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` |
|
||||
| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. |
|
||||
| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. |
|
||||
| rowid | INT | Row id of the values which correspond to the input rows. |
|
||||
| vals | NUMERIC | Values from `'column_name'`. |
|
||||
|
||||
* `query` (required): an arbitrary query against tables you have access to (e.g., in your account, shared in your organization, or through the Data Observatory). This string must contain the following columns: an id `INT` (e.g., `cartodb_id`), geometry (e.g., `the_geom`), and the numeric attribute which is specified in `column_name`
|
||||
* `column_name` (required): column to perform the area of interest analysis tool on. The data must be numeric (e.g., `float`, `int`, etc.)
|
||||
* `permutations` (optional): used to calculate the significance of a classification. Defaults to 99, which is sufficient in most situations.
|
||||
* `geom_column` (optional): the name of the geometry column. Data must be of type `geometry`.
|
||||
* `id_column` (optional): the name of the id column (e.g., `cartodb_id`). Data must be of type `int` or `bigint` and have a unique condition on the data.
|
||||
* `weight_type` (optional): the type of weight used for determining what defines a neighborhood. Options are `knn` or `queen`.
|
||||
* `num_ngbrs` (optional): the number of neighbors in a neighborhood around a geometry. Only used if `knn` is chosen above.
|
||||
|
||||
Outputs:
|
||||
|
||||
* `moran_val`: underlying correlation statistic used in analysis
|
||||
* `quadrant`: human-readable interpretation of classification
|
||||
* `significance`: significance of classification (closer to 0 is more significant)
|
||||
* `ids`: id of original geometry (used for joining against original table if desired -- see examples)
|
||||
* `column_values`: original column values from `column_name`
|
||||
|
||||
Availability: crankshaft v0.0.1 and above
|
||||
|
||||
## Examples
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
t.the_geom_webmercator,
|
||||
t.cartodb_id,
|
||||
c.the_geom,
|
||||
aoi.quads,
|
||||
aoi.significance,
|
||||
aoi.quadrant As aoi_quadrant
|
||||
FROM
|
||||
observatory.acs2013 As t
|
||||
JOIN
|
||||
crankshaft.CDB_AreasOfInterest('SELECT * FROM observatory.acs2013',
|
||||
'gini_index')
|
||||
c.num_cyclists_per_total_population
|
||||
FROM CDB_AreasOfInterestLocal('SELECT * FROM commute_data'
|
||||
'num_cyclists_per_total_population') As aoi
|
||||
JOIN commute_data As c
|
||||
ON c.cartodb_id = aoi.rowid;
|
||||
```
|
||||
|
||||
## API Usage
|
||||
### CDB_AreasOfInterestGlobal(subquery text, column_name text)
|
||||
|
||||
Example
|
||||
This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values.
|
||||
|
||||
```text
|
||||
http://eschbacher.cartodb.com/api/v2/sql?q=SELECT * FROM crankshaft.CDB_AreasOfInterest('SELECT * FROM observatory.acs2013','gini_index')
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. |
|
||||
| significance | NUMERIC | The statistical significance of the `moran` measure. |
|
||||
|
||||
#### Examples
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM CDB_AreasOfInterestGlobal('SELECT * FROM commute_data', 'num_cyclists_per_total_population')
|
||||
```
|
||||
|
||||
Result
|
||||
```json
|
||||
{
|
||||
time: 0.120,
|
||||
total_rows: 100,
|
||||
rows: [{
|
||||
moran_vals: 0.7213,
|
||||
quadrant: 'High area',
|
||||
significance: 0.03,
|
||||
ids: 1,
|
||||
column_value: 0.22
|
||||
},
|
||||
{
|
||||
moran_vals: -0.7213,
|
||||
quadrant: 'Low outlier',
|
||||
significance: 0.13,
|
||||
ids: 2,
|
||||
column_value: 0.03
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
### CDB_AreasOfInterestLocalRate(subquery text, numerator_column text, denominator_column text)
|
||||
|
||||
Just like `CDB_AreasOfInterestLocal`, this function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. This function differs in that it calculates the classifications based on input `numerator` and `denominator` columns for finding the areas where there are clusters and outliers for the resulting rate of those two values.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. |
|
||||
| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` |
|
||||
| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. |
|
||||
| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. |
|
||||
| rowid | INT | Row id of the values which correspond to the input rows. |
|
||||
| vals | NUMERIC | Values from `'column_name'`. |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c.the_geom,
|
||||
aoi.quads,
|
||||
aoi.significance,
|
||||
c.cyclists_per_total_population
|
||||
FROM CDB_AreasOfInterestLocalRate('SELECT * FROM commute_data'
|
||||
'num_cyclists',
|
||||
'total_population') As aoi
|
||||
JOIN commute_data As c
|
||||
ON c.cartodb_id = aoi.rowid;
|
||||
```
|
||||
|
||||
## See Also
|
||||
### CDB_AreasOfInterestGlobalRate(subquery text, column_name text)
|
||||
|
||||
crankshaft's areas of interest functions:
|
||||
This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values.
|
||||
|
||||
* [CDB_AreasOfInterest_Global]()
|
||||
* [CDB_AreasOfInterest_Rate_Local]()
|
||||
* [CDB_AreasOfInterest_Rate_Global]()
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. |
|
||||
| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
PostGIS clustering functions:
|
||||
#### Returns
|
||||
|
||||
* [ST_ClusterIntersecting](http://postgis.net/docs/manual-2.2/ST_ClusterIntersecting.html)
|
||||
* [ST_ClusterWithin](http://postgis.net/docs/manual-2.2/ST_ClusterWithin.html)
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. |
|
||||
| significance | NUMERIC | The statistical significance of the `moran` measure. |
|
||||
|
||||
-- removing below, working into above
|
||||
#### Examples
|
||||
|
||||
#### What is Moran's I and why is it significant for CartoDB?
|
||||
```sql
|
||||
SELECT *
|
||||
FROM CDB_AreasOfInterestGlobalRate('SELECT * FROM commute_data',
|
||||
'num_cyclists',
|
||||
'total_population')
|
||||
```
|
||||
|
||||
Moran's I is a geostatistical calculation which gives a measure of the global
|
||||
clustering and presence of outliers within the geographies in a map. Here global
|
||||
means over all of the geographies in a dataset. Imagine mapping the incidence
|
||||
rates of cancer in neighborhoods of a city. If there were areas covering several
|
||||
neighborhoods with abnormally low rates of cancer, those areas are positively
|
||||
spatially correlated with one another and would be considered a cluster. If
|
||||
there was a single neighborhood with a high rate but with all neighbors on
|
||||
average having a low rate, it would be considered a spatial outlier.
|
||||
## Hotspot, Coldspot, and Outlier Functions
|
||||
|
||||
While Moran's I gives a global snapshot, there are local indicators for
|
||||
clustering called Local Indicators of Spatial Autocorrelation. Clustering is a
|
||||
process related to autocorrelation -- i.e., a process that compares a
|
||||
geography's attribute to the attribute in neighbor geographies.
|
||||
These functions are convenience functions for extracting only information that you are interested in exposing based on the outputs of the `CDB_AreasOfInterest` functions. For instance, you can use `CDB_GetSpatialHotspots` to output only the classifications of `HH` and `HL`.
|
||||
|
||||
For the example of cancer rates in neighborhoods, since these neighborhoods have
|
||||
a high value for rate of cancer, and all of their neighbors do as well, they are
|
||||
designated as "High High" or simply **HH**. For areas with multiple neighborhoods
|
||||
with low rates of cancer, they are designated as "Low Low" or **LL**. HH and LL
|
||||
naturally fit into the concept of clustering and are in the correlated
|
||||
variables.
|
||||
### Non-rate functions
|
||||
|
||||
"Anticorrelated" geogs are in **LH** and **HL** regions -- that is, regions
|
||||
where a geog has a high value and it's neighbors, on average, have a low value
|
||||
(or vice versa). An example of this is a "gated community" or placement of a
|
||||
city housing project in a rich region. These deliberate developments have
|
||||
opposite median income as compared to the neighbors around them. They have a
|
||||
high (or low) value while their neighbors have a low (or high) value. They exist
|
||||
typically as islands, and in rare circumstances can extend as chains dividing
|
||||
**LL** or **HH**.
|
||||
#### CDB_GetSpatialHotspots
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
Strong policies such as rent stabilization (probably) tend to prevent the
|
||||
clustering of high rent areas as they integrate middle class incomes. Luxury
|
||||
apartment buildings, which are a kind of gated community, probably tend to skew
|
||||
an area's median income upwards while housing projects have the opposite effect.
|
||||
What are the nuggets in the analysis?
|
||||
#### CDB_GetSpatialColdspots
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
Two functions are available to compute Moran I statistics:
|
||||
#### CDB_GetSpatialOutliers
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocal`.
|
||||
|
||||
* `cdb_moran_local` computes Moran I measures, quad classification and
|
||||
significance values from numerial values associated to geometry entities
|
||||
in an input table. The geometries should be contiguous polygons When
|
||||
then `queen` `w_type` is used.
|
||||
* `cdb_moran_local_rate` computes the same statistics using a ratio between
|
||||
numerator and denominator columns of a table.
|
||||
### Rate functions
|
||||
|
||||
The parameters for `cdb_moran_local` are:
|
||||
#### CDB_GetSpatialHotspotsRate
|
||||
|
||||
* `table` name of the table that contains the data values
|
||||
* `attr` name of the column
|
||||
* `signficance` significance threshold for the quads values
|
||||
* `num_ngbrs` number of neighbors to consider (default: 5)
|
||||
* `permutations` number of random permutations for calculation of
|
||||
pseudo-p values (default: 99)
|
||||
* `geom_column` number of the geometry column (default: "the_geom")
|
||||
* `id_col` PK column of the table (default: "cartodb_id")
|
||||
* `w_type` Weight types: can be "knn" for k-nearest neighbor weights
|
||||
or "queen" for contiguity based weights.
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
|
||||
The function returns a table with the following columns:
|
||||
#### CDB_GetSpatialColdspotsRate
|
||||
|
||||
* `moran` Moran's value
|
||||
* `quads` quad classification ('HH', 'LL', 'HL', 'LH' or 'Not significant')
|
||||
* `significance` significance value
|
||||
* `ids` id of the corresponding record in the input table
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
|
||||
Function `cdb_moran_local_rate` only differs in that the `attr` input
|
||||
parameter is substituted by `numerator` and `denominator`.
|
||||
#### CDB_GetSpatialOutliersRate
|
||||
|
||||
This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`.
|
||||
|
||||
47
doc/04_markov.md
Normal file
47
doc/04_markov.md
Normal file
@@ -0,0 +1,47 @@
|
||||
## Spatial Markov
|
||||
|
||||
### CDB_SpatialMarkovTrend(subquery text, column_names text array)
|
||||
|
||||
This function takes time series data associated with geometries and outputs likelihoods that the next value of a geometry will move up, down, or stay static as compared to the most recent measurement. For more information, read about [Spatial Dynamics in PySAL](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/dynamics.html).
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM real_estate_history`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| column_names | TEXT Array | Names of column that form the history of measurements for the geometries (e.g., `Array['y2011', 'y2012', 'y2013', 'y2014', 'y2015', 'y2016']`). |
|
||||
| num_classes (optional) | INT | Number of quantile classes to separate data into. |
|
||||
| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). |
|
||||
| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. |
|
||||
| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. |
|
||||
| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` |
|
||||
| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| trend | NUMERIC | The probability that the measure at this location will move up (a positive number) or down (a negative number) |
|
||||
| trend_up | NUMERIC | The probability that a measure will move up in subsequent steps of time |
|
||||
| trend_down | NUMERIC | The probability that a measure will move down in subsequent steps of time |
|
||||
| volatility | NUMERIC | A measure of the variance of the probabilities returned from the Spatial Markov predictions |
|
||||
| rowid | NUMERIC | id of the row that corresponds to the `id_col` (by default `cartodb_id` of the input rows) |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c.cartodb_id,
|
||||
c.the_geom,
|
||||
m.trend,
|
||||
m.trend_up,
|
||||
m.trend_down,
|
||||
m.volatility
|
||||
FROM CDB_SpatialMarkovTrend('SELECT * FROM nyc_real_estate'
|
||||
Array['m03y2009','m03y2010','m03y2011','m03y2012','m03y2013','m03y2014','m03y2015','m03y2016']) As m
|
||||
JOIN nyc_real_estate As c
|
||||
ON c.cartodb_id = m.rowid;
|
||||
```
|
||||
23
doc/04_pyAgg.md
Normal file
23
doc/04_pyAgg.md
Normal file
@@ -0,0 +1,23 @@
|
||||
## PyAgg Helper Function
|
||||
|
||||
### CDB_pyAgg (columns Numeric[])
|
||||
|
||||
Currently it's not possible to pass a multidiemensional array between plpsql and plpythonu. This function aims to
|
||||
help fix that by aggergating the columns provided in the argument across rows in to a rows * columns + 1 length 1D array. The first element of the array is the array\_length of the columns argument so that python can reconstruct
|
||||
the 2D array.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| columns | NUMERIC[] | The columns to aggregate across rows|
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| result | NUMERIC[] | An columns * rows + 1 array where the first entry is the no of columns|
|
||||
|
||||
|
||||
78
doc/07_gravity.md
Normal file
78
doc/07_gravity.md
Normal file
@@ -0,0 +1,78 @@
|
||||
## Gravity Model
|
||||
|
||||
Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight)
|
||||
|
||||
**CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)*
|
||||
|
||||
### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| t_id | bigint[] | Array of targets ID |
|
||||
| t_geom | geometry[] | Array of targets' geometries |
|
||||
| t_weight | numeric[] | Array of targets's weights |
|
||||
| s_id | bigint[] | Array of sources ID |
|
||||
| s_geom | geometry[] | Array of sources' geometries |
|
||||
| s_pop | numeric[] | Array of sources's population |
|
||||
| target | bigint | ID of the target under study |
|
||||
| radius | integer | Radius in meters around the target under study that will be taken into account|
|
||||
| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value |
|
||||
|
||||
### CDB_Gravity( target_query text, weight_column text, source_query text, pop_column text, target bigint, radius integer, minval numeric DEFAULT -10e307)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| target_query | text | Query that defines targets |
|
||||
| weight_column | text | Column name of weights |
|
||||
| source_query | text | Query that defines sources |
|
||||
| pop_column | text | Column name of population |
|
||||
| target | bigint | cartodb_id of the target under study |
|
||||
| radius | integer | Radius in meters around the target under study that will be taken into account|
|
||||
| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value |
|
||||
|
||||
|
||||
### Returns
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| the_geom | geometry | Geometries of the sources within the radius |
|
||||
| source_id | bigint | ID of the source |
|
||||
| target_id | bigint | Target ID from input |
|
||||
| dist | numeric | Distance in meters source to target (if not points, distance between centroids) |
|
||||
| h | numeric | Probability of patronage |
|
||||
| hpop | numeric | Patronaging population |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with t as (
|
||||
SELECT
|
||||
array_agg(cartodb_id::bigint) as id,
|
||||
array_agg(the_geom) as g,
|
||||
array_agg(coalesce(gla,0)::numeric) as w
|
||||
FROM
|
||||
abel.centros_comerciales_de_madrid
|
||||
WHERE not no_cc
|
||||
),
|
||||
s as (
|
||||
SELECT
|
||||
array_agg(cartodb_id::bigint) as id,
|
||||
array_agg(center) as g,
|
||||
array_agg(coalesce(t1_1, 0)::numeric) as p
|
||||
FROM
|
||||
sscc_madrid
|
||||
)
|
||||
select
|
||||
g.the_geom,
|
||||
trunc(g.h,2) as h,
|
||||
round(g.hpop) as hpop,
|
||||
trunc(g.dist/1000,2) as dist_km
|
||||
FROM t, s, CDB_Gravity1(t.id, t.g, t.w, s.id, s.g, s.p, newmall_ID, 100000, 5000) g
|
||||
```
|
||||
|
||||
|
||||
54
doc/08_interpolation.md
Normal file
54
doc/08_interpolation.md
Normal file
@@ -0,0 +1,54 @@
|
||||
## Spacial interpolation
|
||||
|
||||
Function to interpolate a numeric attribute of a point in a scatter dataset of points, using one of three methos:
|
||||
|
||||
* [Nearest neighbor(s)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
|
||||
* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system)
|
||||
* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting)
|
||||
|
||||
### CDB_SpatialInterpolation (query text, point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| query | text | query that returns at least `the_geom` and a numeric value as `attrib` |
|
||||
| point | geometry | The target point to calc the value |
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| p1 | integer | limit the number of neighbors, IDW: 0->no limit, NN: 0-> closest one|
|
||||
| p2 | integer | IDW: order of distance decay, 0-> order 1|
|
||||
|
||||
### CDB_SpatialInterpolation (geom geometry[], values numeric[], point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geom | geometry[] | Array of points's geometries |
|
||||
| values | numeric[] | Array of points' values for the param under study|
|
||||
| point | geometry | The target point to calc the value |
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| p1 | integer | limit the number of neighbors, IDW: 0->no limit, NN: 0-> closest one|
|
||||
| p2 | integer | IDW: order of distance decay, 0-> order 1|
|
||||
|
||||
### Returns
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| value | numeric | Interpolated value at the given point, `-888.888` if the given point is out of the boundaries of the source points set |
|
||||
|
||||
Default values:
|
||||
* -888.888: when using Barycentric, the target point is out of the realm of the input points
|
||||
* -777.777: asking for a method not available
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with a as (
|
||||
select
|
||||
array_agg(the_geom) as geomin,
|
||||
array_agg(temp::numeric) as colin
|
||||
from table_4804232032
|
||||
)
|
||||
SELECT CDB_SpatialInterpolation(geomin, colin, CDB_latlng(41.38, 2.15),1) FROM a;
|
||||
```
|
||||
38
doc/09_voronoi.md
Normal file
38
doc/09_voronoi.md
Normal file
@@ -0,0 +1,38 @@
|
||||
## Voronoi
|
||||
|
||||
Function to construct the [Voronoi Diagram](https://en.wikipedia.org/wiki/Voronoi_diagram) from a dataset of scatter points, clipped to the significant area
|
||||
|
||||
PostGIS wil include this in future versions ([doc for dev branch](http://postgis.net/docs/manual-dev/ST_Voronoi.html)) and will perform faster for sure, but in the meantime...
|
||||
|
||||
|
||||
### CDB_Voronoi (geom geometry[], buffer numeric DEFAULT 0.5, tolerance numeric DEFAULT 1e-9)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geom | geometry[] | Array of points's geometries |
|
||||
| buffer | numeric | enlargment ratio for the envelope area used for the restraints|
|
||||
| tolerance | numeric | Delaunay tolerance, optional |
|
||||
|
||||
### Returns
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| geom | geometry collection | Collection of polygons of the Voronoi cells|
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
WITH a AS (
|
||||
SELECT
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.403)', 4326),ST_GeomFromText('POINT(2.1228 41.380)', 4326),ST_GeomFromText('POINT(2.1511 41.374)', 4326),ST_GeomFromText('POINT(2.1528 41.413)', 4326),ST_GeomFromText('POINT(2.165 41.391)', 4326),ST_GeomFromText('POINT(2.1498 41.371)', 4326),ST_GeomFromText('POINT(2.1533 41.368)', 4326),ST_GeomFromText('POINT(2.131386 41.41399)', 4326)] AS geomin
|
||||
)
|
||||
SELECT
|
||||
st_transform(
|
||||
(st_dump(CDB_voronoi(geomin, 0.2, 1e-9)
|
||||
)).geom
|
||||
, 3857) as the_geom_webmercator
|
||||
FROM a;
|
||||
```
|
||||
62
doc/11_kmeans.md
Normal file
62
doc/11_kmeans.md
Normal file
@@ -0,0 +1,62 @@
|
||||
## K-Means Functions
|
||||
|
||||
### CDB_KMeans(subquery text, no_clusters INTEGER)
|
||||
|
||||
This function attempts to find n clusters within the input data. It will return a table to CartoDB ids and
|
||||
the number of the cluster each point in the input was assigend to.
|
||||
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
|
||||
| no\_clusters | INTEGER | The number of clusters to try and find |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the input table.|
|
||||
| cluster\_no | INTEGER | The cluster that this point belongs to. |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
customers.*,
|
||||
km.cluster_no
|
||||
FROM cdb_crankshaft.CDB_Kmeans('SELECT * from customers' , 6) km, customers_3
|
||||
WHERE customers.cartodb_id = km.cartodb_id
|
||||
```
|
||||
|
||||
### CDB_WeightedMean(subquery text, weight_column text, category_column text)
|
||||
|
||||
Function that computes the weighted centroid of a number of clusters by some weight column.
|
||||
|
||||
### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column and the columns specified as the weight and category columns|
|
||||
| weight\_column | TEXT | The name of the column to use as a weight |
|
||||
| category\_column | TEXT | The name of the column to use as a category |
|
||||
|
||||
### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| the\_geom | GEOMETRY | A point for the weighted cluster center |
|
||||
| class | INTEGER | The cluster class |
|
||||
|
||||
### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT ST_TRANSFORM(the_geom, 3857) as the_geom_webmercator, class
|
||||
FROM cdb_weighted_mean('SELECT *, customer_value FROM customers','customer_value','cluster_no')
|
||||
```
|
||||
83
doc/12_segmentation.md
Normal file
83
doc/12_segmentation.md
Normal file
@@ -0,0 +1,83 @@
|
||||
|
||||
## Segmentation Functions
|
||||
|
||||
### CDB_CreateAndPredictSegment(query TEXT, variable_name TEXT, target_query TEXT)
|
||||
|
||||
This function trains a [Gradient Boosting](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) model to attempt to predict the target data and then generates predictions for new data.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| query | TEXT | The input query to train the algorithm, which should have both the variable of interest and the features that will be used to predict it |
|
||||
| variable\_name| TEXT | Specify the variable in the query to predict, all other columns are assumed to be features |
|
||||
| target\_table | TEXT | The query which returns the `cartodb_id` and features for the rows your would like to predict the target variable for |
|
||||
| n\_estimators (optional) | INTEGER DEFAULT 1200 | Number of estimators to be used. Values should be between 1 and x. |
|
||||
| max\_depth (optional) | INTEGER DEFAULT 3 | Max tree depth. Values should be between 1 and n. |
|
||||
| subsample (optional) | DOUBLE PRECISION DEFAULT 0.5 | Subsample parameter for GradientBooster. Values should be within the range 0 to 1. |
|
||||
| learning\_rate (optional) | DOUBLE PRECISION DEFAULT 0.01 | Learning rate for the GradientBooster. Values should be between 0 and 1 (??) |
|
||||
| min\_samples\_leaf (optional) | INTEGER DEFAULT 1 | Minimum samples to use per leaf. Values should range from x to y |
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the target\_query |
|
||||
| prediction | NUMERIC | The predicted value of the variable of interest |
|
||||
| accuracy | NUMERIC | The mean squared accuracy of the model. |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
SELECT * from cdb_crankshaft.CDB_CreateAndPredictSegment(
|
||||
'SELECT agg, median_rent::numeric, male_pop::numeric, female_pop::numeric FROM late_night_agg',
|
||||
'agg',
|
||||
'SELECT row_number() OVER () As cartodb_id, median_rent, male_pop, female_pop FROM ml_learning_ny');
|
||||
```
|
||||
|
||||
### CDB_CreateAndPredictSegment(target numeric[], train_features numeric[], prediction_features numeric[], prediction_ids numeric[])
|
||||
|
||||
This function trains a [Gradient Boosting](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) model to attempt to predict the target data and then generates predictions for new data.
|
||||
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| target | numeric[] | An array of target values of the variable you want to predict|
|
||||
| train\_features| numeric[] | 1D array of length n features \* n\_rows + 1 with the first entry in the array being the number of features in each row. These are the features the model will be trained on. CDB\_Crankshaft.CDB_pyAgg(Array[feature1, feature2, feature3]::numeric[]) can be used to construct this. |
|
||||
| prediction\_features | numeric[] | 1D array of length nfeatures\* n\_rows\_ + 1 with the first entry in the array being the number of features in each row. These are the features that will be used to predict the target variable CDB\_Crankshaft.CDB\_pyAgg(Array[feature1, feature2, feature3]::numeric[]) can be used to construct this. |
|
||||
| prediction\_ids | numeric[] | 1D array of length n\_rows with the ids that can use used to re-join the data with inputs |
|
||||
| n\_estimators (optional) | INTEGER DEFAULT 1200 | Number of estimators to be used |
|
||||
| max\_depth (optional) | INTEGER DEFAULT 3 | Max tree depth |
|
||||
| subsample (optional) | DOUBLE PRECISION DEFAULT 0.5 | Subsample parameter for GradientBooster|
|
||||
| learning\_rate (optional) | DOUBLE PRECISION DEFAULT 0.01 | Learning rate for the GradientBooster |
|
||||
| min\_samples\_leaf (optional) | INTEGER DEFAULT 1 | Minimum samples to use per leaf |
|
||||
|
||||
|
||||
#### Returns
|
||||
|
||||
A table with the following columns.
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| cartodb\_id | INTEGER | The CartoDB id of the row in the target\_query |
|
||||
| prediction | NUMERIC | The predicted value of the variable of interest |
|
||||
| accuracy | NUMERIC | The mean squared accuracy of the model. |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
WITH training As (
|
||||
SELECT array_agg(agg) As target,
|
||||
cdb_crankshaft.CDB_PyAgg(Array[median_rent, male_pop, female_pop]::Numeric[]) As features
|
||||
FROM late_night_agg),
|
||||
target AS (
|
||||
SELECT cdb_crankshaft.CDB_PyAgg(Array[median_rent, male_pop, female_pop]::Numeric[]) As features,
|
||||
array_agg(cartodb_id) As cartodb_ids FROM late_night_agg)
|
||||
|
||||
SELECT cdb_crankshaft.CDB_CreateAndPredictSegment(training.target, training.features, target.features, target.cartodb_ids)
|
||||
FROM training, target;
|
||||
```
|
||||
33
doc/13_PIA.md
Normal file
33
doc/13_PIA.md
Normal file
@@ -0,0 +1,33 @@
|
||||
## Pole of inaccessibility (PIA)
|
||||
|
||||
Function to find the [PIA](https://en.wikipedia.org/wiki/Pole_of_inaccessibility) from a given polygon and tolerance, following the quadtree approach by [Vladimir Agafonkin](https://github.com/mourner) described [here](https://github.com/mapbox/polylabel)
|
||||
|
||||
|
||||
|
||||
### CDB_PIA (polygon geometry, tolerance numeric DEFAULT 1.0)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| polygon | geometry | Target polygon |
|
||||
| tolerance | numeric | Threshold to decide to take a cell into account |
|
||||
|
||||
### Returns
|
||||
|
||||
| Column Name | Type | Description |
|
||||
|-------------|------|-------------|
|
||||
| point | geometry| Pole of inaccessibility |
|
||||
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with a as(
|
||||
select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g
|
||||
),
|
||||
b as (
|
||||
select ST_Transform(g, 4326) as g from a
|
||||
)
|
||||
SELECT st_astext(CDB_PIA(g)) from b;
|
||||
```
|
||||
35
doc/14_densify.md
Normal file
35
doc/14_densify.md
Normal file
@@ -0,0 +1,35 @@
|
||||
## Densify function
|
||||
|
||||
Iterative densification of a set of scattered points using Delaunay triangulation. The new points are located at the centroids of the grid cells and have as assigned value the barycentric average value of the cell's vertex.
|
||||
|
||||
### CDB_Densify(geomin geometry[], colin numeric[], iterations integer)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geomin | geometry[] | Array of points geometries |
|
||||
| colin | numeric[] | Array of points' values |
|
||||
| iterations | integer | Number of iterations |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a table object
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geomout | geometry | Geometries of new dataset of points|
|
||||
| colout | numeric | Values of points|
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with data as (
|
||||
select
|
||||
ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin,
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin
|
||||
)
|
||||
select CDB_Densify(geomin, colin, 2) from data;
|
||||
```
|
||||
|
||||
|
||||
36
doc/15_tinmap.md
Normal file
36
doc/15_tinmap.md
Normal file
@@ -0,0 +1,36 @@
|
||||
## TINMAP function
|
||||
|
||||
Generates a fake contour map, in the form of a TIN map, from a set of scattered points.Depends on **CDB_Densify**.
|
||||
|
||||
Its iterative nature lets the user smooth the final result as much as desired, but with a exponential time cost increase.
|
||||
|
||||
### CDB_TINmap(geomin geometry[], colin numeric[], iterations integer)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geomin | geometry[] | Array of points geometries |
|
||||
| colin | numeric[] | Array of points' values |
|
||||
| iterations | integer | Number of iterations |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a table object
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geomout | geometry | Geometries of new dataset of polygons|
|
||||
| colout | numeric | Values of each cell|
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
with data as (
|
||||
select
|
||||
ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin,
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin
|
||||
)
|
||||
select CDB_TINmap(geomin, colin, 2) from data;
|
||||
```
|
||||
|
||||
40
doc/16_getis_ord_gstar.md
Normal file
40
doc/16_getis_ord_gstar.md
Normal file
@@ -0,0 +1,40 @@
|
||||
## Getis-Ord's G\*
|
||||
|
||||
Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values. The clustering of high values can be referred to as "hotspots" because these are areas of high activity or large (relative to the global mean) measurement values. Coldspots are clustered areas with low activity or small measurement values.
|
||||
|
||||
### CDB_GetisOrdsG(subquery text, column_name text)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| subquery | text | A query of the data you want to pass to the function. It must include `column_name`, a geometry column (usually `the_geom`) and an id column (usually `cartodb_id`) |
|
||||
| column_name | text | This is the column of interest for performing this analysis on. This column should be a numeric type. |
|
||||
| w_type (optional) | text | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation.](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html) |
|
||||
| num_ngbrs (optional) | integer | Default: 5. If `knn` is chosen, this will set the number of neighbors. If `knn` is not chosen, any entered value will be ignored. Use `NULL` if not choosing `knn`. |
|
||||
| permutations (optional) | integer | The number of permutations for calculating p-values. Default: 999 |
|
||||
| geom_col (optional) | text | The column where the geometry information is stored. The format must be PostGIS Geometry type (SRID 4326). Default: `the_geom`. |
|
||||
| id_col (optional) | text | The column that has the unique row identifier. |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a table with the following columns.
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| z_score | numeric | z-score, a measure of the intensity of clustering of high values (hotspots) or low values (coldspots). Positive values represent 'hotspots', while negative values represent 'coldspots'. |
|
||||
| p_value | numeric | p-value, a measure of the significance of the intensity of clustering |
|
||||
| p_z_sim | numeric | p-value based on standard normal approximation from permutations |
|
||||
| rowid | integer | The original `id_col` that can be used to associate the outputs with the original geometry and inputs |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
The following query returns the original table augmented with the values calculated from the Getis-Ord's G\* analysis.
|
||||
|
||||
```sql
|
||||
SELECT i.*, m.z_score, m.p_value
|
||||
FROM cdb_crankshaft.CDB_GetisOrdsG('SELECT * FROM incident_reports_clustered',
|
||||
'num_incidents') As m
|
||||
JOIN incident_reports_clustered As i
|
||||
ON i.cartodb_id = m.rowid;
|
||||
```
|
||||
163
doc/18_outliers.md
Normal file
163
doc/18_outliers.md
Normal file
@@ -0,0 +1,163 @@
|
||||
## Outlier Detection
|
||||
|
||||
This set of functions detects the presence of outliers. There are three functions for finding outliers from non-spatial data:
|
||||
|
||||
1. Static Outliers
|
||||
1. Percentage Outliers
|
||||
1. Standard Deviation Outliers
|
||||
|
||||
### CDB_StaticOutlier(column_value numeric, threshold numeric)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| column_value | numeric | The column of values on which to apply the threshold |
|
||||
| threshold | numeric | The static threshold which is used to indicate whether a `column_value` is an outlier or not |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a boolean (true/false) depending on whether a value is above or below (or equal to) the threshold
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| outlier | boolean | classification of whether a row is an outlier or not |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
With a table `website_visits` and a column of the number of website visits in units of 10,000 visits:
|
||||
|
||||
```
|
||||
| id | visits_10k |
|
||||
|----|------------|
|
||||
| 1 | 1 |
|
||||
| 2 | 3 |
|
||||
| 3 | 5 |
|
||||
| 4 | 1 |
|
||||
| 5 | 32 |
|
||||
| 6 | 3 |
|
||||
| 7 | 57 |
|
||||
| 8 | 2 |
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
id,
|
||||
CDB_StaticOutlier(visits_10k, 11.0) As outlier,
|
||||
visits_10k
|
||||
FROM website_visits
|
||||
```
|
||||
|
||||
```
|
||||
| id | outlier | visits_10k |
|
||||
|----|---------|------------|
|
||||
| 1 | f | 1 |
|
||||
| 2 | f | 3 |
|
||||
| 3 | f | 5 |
|
||||
| 4 | f | 1 |
|
||||
| 5 | t | 32 |
|
||||
| 6 | f | 3 |
|
||||
| 7 | t | 57 |
|
||||
| 8 | f | 2 |
|
||||
```
|
||||
|
||||
### CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[])
|
||||
|
||||
`CDB_PercentOutlier` calculates whether or not a value falls above a given threshold based on a percentage above the mean value of the input values.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| column_values | numeric[] | An array of the values to calculate the outlier classification on |
|
||||
| outlier_fraction | numeric | The threshold above which a column value divided by the mean of all values is considered an outlier |
|
||||
| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a table of the outlier classification with the following columns
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| is_outlier | boolean | classification of whether a row is an outlier or not |
|
||||
| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger).
|
||||
|
||||
```sql
|
||||
WITH cte As (
|
||||
SELECT
|
||||
unnest(Array[1,2,3,4,5,6,7,8]) As id,
|
||||
unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k
|
||||
)
|
||||
SELECT
|
||||
(CDB_PercentOutlier(array_agg(visits_10k), 2.0, array_agg(id))).*
|
||||
FROM cte;
|
||||
```
|
||||
|
||||
Output
|
||||
```
|
||||
| outlier | rowid |
|
||||
|---------+-------|
|
||||
| f | 1 |
|
||||
| f | 2 |
|
||||
| f | 3 |
|
||||
| f | 4 |
|
||||
| t | 5 |
|
||||
| f | 6 |
|
||||
| t | 7 |
|
||||
| f | 8 |
|
||||
```
|
||||
|
||||
### CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true)
|
||||
|
||||
`CDB_StdDevOutlier` calculates whether or not a value falls above or below a given threshold based on the number of standard deviations from the mean.
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| column_values | numeric[] | An array of the values to calculate the outlier classification on |
|
||||
| num_deviations | numeric | The threshold in units of standard deviation |
|
||||
| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) |
|
||||
| is_symmetric (optional) | boolean | Consider outliers that are symmetric about the mean (default: true) |
|
||||
|
||||
### Returns
|
||||
|
||||
Returns a table of the outlier classification with the following columns
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| is_outlier | boolean | classification of whether a row is an outlier or not |
|
||||
| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification |
|
||||
|
||||
#### Example Usage
|
||||
|
||||
This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger).
|
||||
|
||||
```sql
|
||||
WITH cte As (
|
||||
SELECT
|
||||
unnest(Array[1,2,3,4,5,6,7,8]) As id,
|
||||
unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k
|
||||
)
|
||||
SELECT
|
||||
(CDB_StdDevOutlier(array_agg(visits_10k), 2.0, array_agg(id))).*
|
||||
FROM cte;
|
||||
```
|
||||
|
||||
Output
|
||||
```
|
||||
| outlier | rowid |
|
||||
|---------+-------|
|
||||
| f | 1 |
|
||||
| f | 2 |
|
||||
| f | 3 |
|
||||
| f | 4 |
|
||||
| f | 5 |
|
||||
| f | 6 |
|
||||
| t | 7 |
|
||||
| f | 8 |
|
||||
```
|
||||
50
doc/19_contour.md
Normal file
50
doc/19_contour.md
Normal file
@@ -0,0 +1,50 @@
|
||||
## Contour maps
|
||||
|
||||
Function to generate a contour map from an scatter dataset of points, using one of these three methods:
|
||||
|
||||
* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
|
||||
* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system)
|
||||
* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting)
|
||||
|
||||
### CDB_Contour (geom geometry[], values numeric[], resolution integer, buffer numeric, method, classmethod integer, steps integer)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geom | geometry[] | Array of points's geometries |
|
||||
| values | numeric[] | Array of points' values for the param under study|
|
||||
| buffer | numeric | Value between 0 and 1 for spatial buffer of the set of points
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles |
|
||||
| steps | integer | Number of steps in the classification|
|
||||
| max_time | integer | if <= 0: max processing time in seconds (smart resolution) , if >0: resolution in meters
|
||||
|
||||
### Returns
|
||||
Returns a table object
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| the_geom | geometry | Geometries of the classified contour map|
|
||||
| avg_value | numeric | Avg value of the area|
|
||||
| min_value | numeric | Min value of the area|
|
||||
| max_value | numeric | Max value of the areal|
|
||||
| bin | integer | Index of the class of the area|
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
WITH a AS (
|
||||
SELECT
|
||||
ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals,
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.403)',4326),ST_GeomFromText('POINT(2.1228 41.380)',4326),ST_GeomFromText('POINT(2.1511 41.374)',4326),ST_GeomFromText('POINT(2.1528 41.413)',4326),ST_GeomFromText('POINT(2.165 41.391)',4326),ST_GeomFromText('POINT(2.1498 41.371)',4326),ST_GeomFromText('POINT(2.1533 41.368)',4326),ST_GeomFromText('POINT(2.131386 41.41399)',4326)] AS g
|
||||
),
|
||||
b as(
|
||||
SELECT
|
||||
foo.*
|
||||
FROM
|
||||
a,
|
||||
cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo
|
||||
)
|
||||
SELECT bin, avg_value from b order by bin;
|
||||
```
|
||||
413
release/crankshaft--0.0.2--0.0.3.sql
Normal file
413
release/crankshaft--0.0.2--0.0.3.sql
Normal file
@@ -0,0 +1,413 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- [MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_moran_local(TEXT, TEXT, float, INT, INT, TEXT, TEXT, TEXT);
|
||||
DROP FUNCTION IF EXISTS cdb_moran_local_rate(TEXT, TEXT, TEXT, FLOAT, INT, INT, TEXT, TEXT, TEXT);
|
||||
DROP FUNCTION IF EXISTS _cdb_crankshaft_virtualenvs_path();
|
||||
DROP FUNCTION IF EXISTS _cdb_crankshaft_activate_py();
|
||||
|
||||
-- [END MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
209
release/crankshaft--0.0.3--0.0.2.sql
Normal file
209
release/crankshaft--0.0.3--0.0.2.sql
Normal file
@@ -0,0 +1,209 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- [MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
|
||||
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialHotspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialColdspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialOutliers(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialHotspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialColdspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_GetSpatialOutliersRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
|
||||
DROP FUNCTION IF EXISTS CDB_KMeans(text,integer,integer);
|
||||
DROP AGGREGATE IF EXISTS CDB_WeightedMean(geometry(Point, 4326), NUMERIC);
|
||||
DROP FUNCTION IF EXISTS CDB_WeightedMeanS(Numeric[], GEOMETRY(Point, 4326), NUMERIC);
|
||||
DROP FUNCTION IF EXISTS CDB_WeightedMeanF(Numeric[]);
|
||||
|
||||
|
||||
-- [END MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.2'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path()
|
||||
RETURNS text
|
||||
AS $$
|
||||
BEGIN
|
||||
-- RETURN '/opt/virtualenvs/crankshaft';
|
||||
RETURN '/home/ubuntu/crankshaft/envs';
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT;
|
||||
|
||||
-- Use the crankshaft python module
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
import os
|
||||
# plpy.notice('%',str(os.environ))
|
||||
# activate virtualenv
|
||||
crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
|
||||
base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
|
||||
default_venv_path = os.path.join(base_path, crankshaft_version)
|
||||
venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path)
|
||||
activate_path = venv_path + '/bin/activate_this.py'
|
||||
exec(open(activate_path).read(), dict(__file__=activate_path))
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local (
|
||||
t TEXT,
|
||||
attr TEXT,
|
||||
significance float DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate
|
||||
CREATE OR REPLACE FUNCTION
|
||||
cdb_moran_local_rate(t TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
significance FLOAT DEFAULT 0.05,
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_column TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id',
|
||||
w_type TEXT DEFAULT 'knn')
|
||||
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||
AS $$
|
||||
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
8
release/crankshaft--0.0.3--0.0.4.sql
Normal file
8
release/crankshaft--0.0.3--0.0.4.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
403
release/crankshaft--0.0.3.sql
Normal file
403
release/crankshaft--0.0.3.sql
Normal file
@@ -0,0 +1,403 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
8
release/crankshaft--0.0.4--0.0.3.sql
Normal file
8
release/crankshaft--0.0.4--0.0.3.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.3'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
258
release/crankshaft--0.0.4--0.1.0.sql
Normal file
258
release/crankshaft--0.0.4--0.1.0.sql
Normal file
@@ -0,0 +1,258 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
|
||||
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.1.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- PyAgg stuff
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Segmentation stuff
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial interpolation
|
||||
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
403
release/crankshaft--0.0.4.sql
Normal file
403
release/crankshaft--0.0.4.sql
Normal file
@@ -0,0 +1,403 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
81
release/crankshaft--0.1.0--0.0.4.sql
Normal file
81
release/crankshaft--0.1.0--0.0.4.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
|
||||
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.0.4'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial Markov
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT);
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Spatial interpolation
|
||||
|
||||
DROP FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer,
|
||||
IN p1 numeric,
|
||||
IN p2 numeric
|
||||
);
|
||||
|
||||
DROP FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer,
|
||||
IN p1 numeric,
|
||||
IN p2 numeric
|
||||
);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Segmentation stuff
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER,
|
||||
max_depth INTEGER,
|
||||
subsample DOUBLE PRECISION,
|
||||
learning_rate DOUBLE PRECISION,
|
||||
min_samples_leaf INTEGER);
|
||||
|
||||
DROP FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER,
|
||||
max_depth INTEGER,
|
||||
subsample DOUBLE PRECISION,
|
||||
learning_rate DOUBLE PRECISION,
|
||||
min_samples_leaf INTEGER);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- PyAgg stuff
|
||||
|
||||
DROP AGGREGATE CDB_PyAgg(NUMERIC[]);
|
||||
DROP FUNCTION CDB_PyAggS(Numeric[], Numeric[]);
|
||||
827
release/crankshaft--0.1.0--0.2.0.sql
Normal file
827
release/crankshaft--0.1.0--0.2.0.sql
Normal file
@@ -0,0 +1,827 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.2.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
IN source_query text,
|
||||
IN pop_column text,
|
||||
IN target bigint,
|
||||
IN radius integer,
|
||||
IN minval numeric DEFAULT -10e307
|
||||
)
|
||||
RETURNS TABLE(
|
||||
the_geom geometry,
|
||||
source_id bigint,
|
||||
target_id bigint,
|
||||
dist numeric,
|
||||
h numeric,
|
||||
hpop numeric) AS $$
|
||||
DECLARE
|
||||
t_id bigint[];
|
||||
t_geom geometry[];
|
||||
t_weight numeric[];
|
||||
s_id bigint[];
|
||||
s_geom geometry[];
|
||||
s_pop numeric[];
|
||||
BEGIN
|
||||
EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight;
|
||||
EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop;
|
||||
RETURN QUERY
|
||||
SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN t_id bigint[],
|
||||
IN t_geom geometry[],
|
||||
IN t_weight numeric[],
|
||||
IN s_id bigint[],
|
||||
IN s_geom geometry[],
|
||||
IN s_pop numeric[],
|
||||
IN target bigint,
|
||||
IN radius integer,
|
||||
IN minval numeric DEFAULT -10e307
|
||||
)
|
||||
RETURNS TABLE(
|
||||
the_geom geometry,
|
||||
source_id bigint,
|
||||
target_id bigint,
|
||||
dist numeric,
|
||||
h numeric,
|
||||
hpop numeric) AS $$
|
||||
DECLARE
|
||||
t_type text;
|
||||
s_type text;
|
||||
t_center geometry[];
|
||||
s_center geometry[];
|
||||
BEGIN
|
||||
t_type := GeometryType(t_geom[1]);
|
||||
s_type := GeometryType(s_geom[1]);
|
||||
IF t_type = 'POINT' THEN
|
||||
t_center := t_geom;
|
||||
ELSE
|
||||
WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp;
|
||||
END IF;
|
||||
IF s_type = 'POINT' THEN
|
||||
s_center := s_geom;
|
||||
ELSE
|
||||
WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp;
|
||||
END IF;
|
||||
RETURN QUERY
|
||||
with target0 as(
|
||||
SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td
|
||||
),
|
||||
source0 as(
|
||||
SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp
|
||||
),
|
||||
prev0 as(
|
||||
SELECT
|
||||
source0.sg,
|
||||
source0.sd as sourc_id,
|
||||
coalesce(source0.sp,0) as sp,
|
||||
target.td as targ_id,
|
||||
coalesce(target.tw,0) as tw,
|
||||
GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance
|
||||
FROM source0
|
||||
CROSS JOIN LATERAL
|
||||
(
|
||||
SELECT
|
||||
*
|
||||
FROM target0
|
||||
WHERE tw > minval
|
||||
AND ST_DWithin(geography(source0.sc), geography(tc), radius)
|
||||
) AS target
|
||||
),
|
||||
deno as(
|
||||
SELECT
|
||||
sourc_id,
|
||||
sum(tw/distance) as h_deno
|
||||
FROM
|
||||
prev0
|
||||
GROUP BY sourc_id
|
||||
)
|
||||
SELECT
|
||||
p.sg as the_geom,
|
||||
p.sourc_id as source_id,
|
||||
p.targ_id as target_id,
|
||||
case when p.distance > 1 then p.distance else 0.0 end as dist,
|
||||
100*(p.tw/p.distance)/d.h_deno as h,
|
||||
p.sp*(p.tw/p.distance)/d.h_deno as hpop
|
||||
FROM
|
||||
prev0 p,
|
||||
deno d
|
||||
WHERE
|
||||
p.targ_id = target AND
|
||||
p.sourc_id = d.sourc_id;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
-- SELECT cdb_spatial_markov('SELECT * FROM real_estate',
|
||||
-- 'date_1')
|
||||
|
||||
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col_min text,
|
||||
-- time_col_max text,
|
||||
-- date_format text, -- '_YYYY_MM_DD'
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
-- -- 1 | Pt1 | 12/3 | 13.2
|
||||
-- -- 2 | Pt2 | 11/5 | 11.3
|
||||
-- -- 3 | Pt1 | 11/13 | 12.9
|
||||
-- -- 4 | Pt3 | 12/19 | 10.1
|
||||
-- -- ...
|
||||
--
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col text,
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
686
release/crankshaft--0.1.0.sql
Normal file
686
release/crankshaft--0.1.0.sql
Normal file
@@ -0,0 +1,686 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.1.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
-- SELECT cdb_spatial_markov('SELECT * FROM real_estate',
|
||||
-- 'date_1')
|
||||
|
||||
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col_min text,
|
||||
-- time_col_max text,
|
||||
-- date_format text, -- '_YYYY_MM_DD'
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
-- -- 1 | Pt1 | 12/3 | 13.2
|
||||
-- -- 2 | Pt2 | 11/5 | 11.3
|
||||
-- -- 3 | Pt1 | 11/13 | 12.9
|
||||
-- -- 4 | Pt3 | 12/19 | 10.1
|
||||
-- -- ...
|
||||
--
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col text,
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
1042
release/crankshaft--0.2.0--0.3.0.sql
Normal file
1042
release/crankshaft--0.2.0--0.3.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
827
release/crankshaft--0.2.0.sql
Normal file
827
release/crankshaft--0.2.0.sql
Normal file
@@ -0,0 +1,827 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||
-- Version number of the extension release
|
||||
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
||||
RETURNS text AS $$
|
||||
SELECT '0.2.0'::text;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
|
||||
-- Internal identifier of the installed extension instence
|
||||
-- e.g. 'dev' for current development version
|
||||
CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
|
||||
RETURNS text AS $$
|
||||
SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
|
||||
$$ language 'sql' STABLE STRICT;
|
||||
-- Internal function.
|
||||
-- Set the seeds of the RNGs (Random Number Generators)
|
||||
-- used internally.
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||
AS $$
|
||||
from crankshaft import random_seeds
|
||||
random_seeds.set_random_seeds(seed_value)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
||||
returns NUMERIC[] as $$
|
||||
BEGIN
|
||||
if array_upper(current_state,1) is null then
|
||||
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
||||
current_state[1] = array_upper(current_row,1);
|
||||
end if;
|
||||
return array_cat(current_state,current_row) ;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_pyagg'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) (
|
||||
SFUNC = CDB_PyAggS,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment(
|
||||
target NUMERIC[],
|
||||
features NUMERIC[],
|
||||
target_features NUMERIC[],
|
||||
target_ids NUMERIC[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.segmentation import create_and_predict_segment_agg
|
||||
model_params = {'n_estimators': n_estimators,
|
||||
'max_depth': max_depth,
|
||||
'subsample': subsample,
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf': min_samples_leaf}
|
||||
|
||||
def unpack2D(data):
|
||||
dimension = data.pop(0)
|
||||
a = np.array(data, dtype=float)
|
||||
return a.reshape(len(a)/dimension, dimension)
|
||||
|
||||
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
||||
unpack2D(features),
|
||||
unpack2D(target_features),
|
||||
target_ids,
|
||||
model_params)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
subsample DOUBLE PRECISION DEFAULT 0.5,
|
||||
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN target_query text,
|
||||
IN weight_column text,
|
||||
IN source_query text,
|
||||
IN pop_column text,
|
||||
IN target bigint,
|
||||
IN radius integer,
|
||||
IN minval numeric DEFAULT -10e307
|
||||
)
|
||||
RETURNS TABLE(
|
||||
the_geom geometry,
|
||||
source_id bigint,
|
||||
target_id bigint,
|
||||
dist numeric,
|
||||
h numeric,
|
||||
hpop numeric) AS $$
|
||||
DECLARE
|
||||
t_id bigint[];
|
||||
t_geom geometry[];
|
||||
t_weight numeric[];
|
||||
s_id bigint[];
|
||||
s_geom geometry[];
|
||||
s_pop numeric[];
|
||||
BEGIN
|
||||
EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight;
|
||||
EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop;
|
||||
RETURN QUERY
|
||||
SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_Gravity(
|
||||
IN t_id bigint[],
|
||||
IN t_geom geometry[],
|
||||
IN t_weight numeric[],
|
||||
IN s_id bigint[],
|
||||
IN s_geom geometry[],
|
||||
IN s_pop numeric[],
|
||||
IN target bigint,
|
||||
IN radius integer,
|
||||
IN minval numeric DEFAULT -10e307
|
||||
)
|
||||
RETURNS TABLE(
|
||||
the_geom geometry,
|
||||
source_id bigint,
|
||||
target_id bigint,
|
||||
dist numeric,
|
||||
h numeric,
|
||||
hpop numeric) AS $$
|
||||
DECLARE
|
||||
t_type text;
|
||||
s_type text;
|
||||
t_center geometry[];
|
||||
s_center geometry[];
|
||||
BEGIN
|
||||
t_type := GeometryType(t_geom[1]);
|
||||
s_type := GeometryType(s_geom[1]);
|
||||
IF t_type = 'POINT' THEN
|
||||
t_center := t_geom;
|
||||
ELSE
|
||||
WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp;
|
||||
END IF;
|
||||
IF s_type = 'POINT' THEN
|
||||
s_center := s_geom;
|
||||
ELSE
|
||||
WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp;
|
||||
END IF;
|
||||
RETURN QUERY
|
||||
with target0 as(
|
||||
SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td
|
||||
),
|
||||
source0 as(
|
||||
SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp
|
||||
),
|
||||
prev0 as(
|
||||
SELECT
|
||||
source0.sg,
|
||||
source0.sd as sourc_id,
|
||||
coalesce(source0.sp,0) as sp,
|
||||
target.td as targ_id,
|
||||
coalesce(target.tw,0) as tw,
|
||||
GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance
|
||||
FROM source0
|
||||
CROSS JOIN LATERAL
|
||||
(
|
||||
SELECT
|
||||
*
|
||||
FROM target0
|
||||
WHERE tw > minval
|
||||
AND ST_DWithin(geography(source0.sc), geography(tc), radius)
|
||||
) AS target
|
||||
),
|
||||
deno as(
|
||||
SELECT
|
||||
sourc_id,
|
||||
sum(tw/distance) as h_deno
|
||||
FROM
|
||||
prev0
|
||||
GROUP BY sourc_id
|
||||
)
|
||||
SELECT
|
||||
p.sg as the_geom,
|
||||
p.sourc_id as source_id,
|
||||
p.targ_id as target_id,
|
||||
case when p.distance > 1 then p.distance else 0.0 end as dist,
|
||||
100*(p.tw/p.distance)/d.h_deno as h,
|
||||
p.sp*(p.tw/p.distance)/d.h_deno as hpop
|
||||
FROM
|
||||
prev0 p,
|
||||
deno d
|
||||
WHERE
|
||||
p.targ_id = target AND
|
||||
p.sourc_id = d.sourc_id;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
-- 0: nearest neighbor
|
||||
-- 1: barymetric
|
||||
-- 2: IDW
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN query text,
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
output numeric;
|
||||
BEGIN
|
||||
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
||||
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
||||
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN point geometry,
|
||||
IN method integer DEFAULT 1,
|
||||
IN p1 numeric DEFAULT 0,
|
||||
IN p2 numeric DEFAULT 0
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
gs geometry[];
|
||||
vs numeric[];
|
||||
gs2 geometry[];
|
||||
vs2 numeric[];
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
output := -999.999;
|
||||
-- nearest
|
||||
IF method = 0 THEN
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
||||
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
||||
RETURN output;
|
||||
|
||||
-- barymetric
|
||||
ELSIF method = 1 THEN
|
||||
WITH a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
||||
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM d;
|
||||
IF g is null THEN
|
||||
-- out of the realm of the input data
|
||||
RETURN -888.888;
|
||||
END IF;
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
||||
RETURN output;
|
||||
|
||||
-- IDW
|
||||
-- p1: limit the number of neighbors, 0->no limit
|
||||
-- p2: order of distance decay, 0-> order 1
|
||||
ELSIF method = 2 THEN
|
||||
|
||||
IF p2 = 0 THEN
|
||||
p2 := 1;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
||||
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
||||
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
||||
IF p1::integer>0 THEN
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
FOR i IN 1..p1
|
||||
LOOP
|
||||
gs2 := gs2 || gs[i];
|
||||
vs2 := vs2 || vs[i];
|
||||
END LOOP;
|
||||
ELSE
|
||||
gs2:=gs;
|
||||
vs2:=vs;
|
||||
END IF;
|
||||
|
||||
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
||||
b as (
|
||||
SELECT
|
||||
(1/ST_distance(point, a.g)^p2::integer) as k,
|
||||
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
||||
FROM a
|
||||
)
|
||||
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
||||
RETURN output;
|
||||
|
||||
END IF;
|
||||
|
||||
RETURN -777.777;
|
||||
|
||||
END;
|
||||
$$
|
||||
language plpgsql IMMUTABLE;
|
||||
-- Moran's I Global Measure (public-facing)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocal(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspots(
|
||||
subquery TEXT,
|
||||
column_name TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspots(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliers(
|
||||
subquery TEXT,
|
||||
attr TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Global Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestGlobalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
-- Moran's I Local Rate (internal function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
_CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT,
|
||||
num_ngbrs INT,
|
||||
permutations INT,
|
||||
geom_col TEXT,
|
||||
id_col TEXT)
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_AreasOfInterestLocalRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for HH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialHotspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HH', 'HL');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LL and LH (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialColdspotsRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('LL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Moran's I Local Rate only for LH and HL (public-facing function)
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_GetSpatialOutliersRate(
|
||||
subquery TEXT,
|
||||
numerator TEXT,
|
||||
denominator TEXT,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
|
||||
SELECT moran, quads, significance, rowid, vals
|
||||
FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
WHERE quads IN ('HL', 'LH');
|
||||
|
||||
$$ LANGUAGE SQL;
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
ELSE
|
||||
newX = state[1] + ST_X(the_geom)*weight;
|
||||
newY = state[2] + ST_Y(the_geom)*weight;
|
||||
newW = state[3] + weight;
|
||||
END IF;
|
||||
RETURN Array[newX,newY,newW];
|
||||
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create aggregate if it did not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT *
|
||||
FROM pg_catalog.pg_proc p
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
||||
WHERE n.nspname = 'cdb_crankshaft'
|
||||
AND p.proname = 'cdb_weightedmean'
|
||||
AND p.proisagg)
|
||||
THEN
|
||||
CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) (
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
-- Spatial Markov
|
||||
|
||||
-- input table format:
|
||||
-- id | geom | date_1 | date_2 | date_3
|
||||
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
||||
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
||||
-- ...
|
||||
-- Sample Function call:
|
||||
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
||||
-- Array['date_1', 'date_2', 'date_3'])
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_SpatialMarkovTrend (
|
||||
subquery TEXT,
|
||||
time_cols TEXT[],
|
||||
num_classes INT DEFAULT 7,
|
||||
w_type TEXT DEFAULT 'knn',
|
||||
num_ngbrs INT DEFAULT 5,
|
||||
permutations INT DEFAULT 99,
|
||||
geom_col TEXT DEFAULT 'the_geom',
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
-- Sample function call:
|
||||
-- SELECT cdb_spatial_markov('SELECT * FROM real_estate',
|
||||
-- 'date_1')
|
||||
|
||||
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col_min text,
|
||||
-- time_col_max text,
|
||||
-- date_format text, -- '_YYYY_MM_DD'
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
--
|
||||
-- -- input table format:
|
||||
-- -- id | geom | date | measurement
|
||||
-- -- 1 | Pt1 | 12/3 | 13.2
|
||||
-- -- 2 | Pt2 | 11/5 | 11.3
|
||||
-- -- 3 | Pt1 | 11/13 | 12.9
|
||||
-- -- 4 | Pt3 | 12/19 | 10.1
|
||||
-- -- ...
|
||||
--
|
||||
-- CREATE OR REPLACE FUNCTION
|
||||
-- cdb_spatial_markov (
|
||||
-- subquery TEXT,
|
||||
-- time_col text,
|
||||
-- num_time_per_bin INT DEFAULT 1,
|
||||
-- permutations INT DEFAULT 99,
|
||||
-- geom_column TEXT DEFAULT 'the_geom',
|
||||
-- id_col TEXT DEFAULT 'cartodb_id',
|
||||
-- w_type TEXT DEFAULT 'knn',
|
||||
-- num_ngbrs int DEFAULT 5)
|
||||
-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||
-- AS $$
|
||||
-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
|
||||
-- from crankshaft.clustering import moran_local
|
||||
-- # TODO: use named parameters or a dictionary
|
||||
-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs)
|
||||
-- $$ LANGUAGE plpythonu;
|
||||
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||
-- from a polygon table over an arbitrary polygon
|
||||
-- (weighted by the area proportion overlapped)
|
||||
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||
--
|
||||
-- Parameters:
|
||||
-- * geom a Polygon geometry which defines the area where a value will be
|
||||
-- estimated as the area-weighted sum of a given table/column
|
||||
-- * target_table_name table name of the table that provides the values
|
||||
-- * target_column column name of the column that provides the values
|
||||
-- * schema_name optional parameter to defina the schema the target table
|
||||
-- belongs to, which is necessary if its not in the search_path.
|
||||
-- Note that target_table_name should never include the schema in it.
|
||||
-- Return value:
|
||||
-- Aereal-weighted interpolation of the column values over the geometry
|
||||
CREATE OR REPLACE
|
||||
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
result numeric;
|
||||
qualified_name text;
|
||||
BEGIN
|
||||
IF schema_name IS NULL THEN
|
||||
qualified_name := Format('%I', target_table_name);
|
||||
ELSE
|
||||
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||
END IF;
|
||||
EXECUTE Format('
|
||||
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||
FROM %s AS a
|
||||
WHERE $1 && a.the_geom
|
||||
', target_column, qualified_name)
|
||||
USING geom
|
||||
INTO result;
|
||||
RETURN result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
--
|
||||
-- Creates N points randomly distributed arround the polygon
|
||||
--
|
||||
-- @param g - the geometry to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
-- Make sure by default there are no permissions for publicuser
|
||||
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||
|
||||
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||
|
||||
-- Revoke execute permissions on all functions in the schema by default
|
||||
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
||||
1063
release/crankshaft--0.3.0--0.3.1.sql
Normal file
1063
release/crankshaft--0.3.0--0.3.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
1042
release/crankshaft--0.3.0.sql
Normal file
1042
release/crankshaft--0.3.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
1948
release/crankshaft--0.3.1--0.4.0.sql
Normal file
1948
release/crankshaft--0.3.1--0.4.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
1063
release/crankshaft--0.3.1.sql
Normal file
1063
release/crankshaft--0.3.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
1965
release/crankshaft--0.4.0--0.4.1.sql
Normal file
1965
release/crankshaft--0.4.0--0.4.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
1948
release/crankshaft--0.4.0.sql
Normal file
1948
release/crankshaft--0.4.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
1965
release/crankshaft--0.4.1--0.4.2.sql
Normal file
1965
release/crankshaft--0.4.1--0.4.2.sql
Normal file
File diff suppressed because it is too large
Load Diff
1965
release/crankshaft--0.4.1.sql
Normal file
1965
release/crankshaft--0.4.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
1965
release/crankshaft--0.4.2--0.5.0.sql
Normal file
1965
release/crankshaft--0.4.2--0.5.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
1965
release/crankshaft--0.4.2.sql
Normal file
1965
release/crankshaft--0.4.2.sql
Normal file
File diff suppressed because it is too large
Load Diff
2070
release/crankshaft--0.5.0--0.5.1.sql
Normal file
2070
release/crankshaft--0.5.0--0.5.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
2070
release/crankshaft--0.5.0.sql
Normal file
2070
release/crankshaft--0.5.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
2070
release/crankshaft--0.5.1.sql
Normal file
2070
release/crankshaft--0.5.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.0.2'
|
||||
requires = 'plpythonu, postgis, cartodb'
|
||||
default_version = '0.5.1'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
||||
2
release/python/0.0.3/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.3/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
@@ -0,0 +1,2 @@
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
260
release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
Normal file
260
release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr_name,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": numerator,
|
||||
"attr2": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
query = pu.construct_neighbor_query(w_type,
|
||||
{"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs})
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"subquery": subquery,
|
||||
"geom_col": geom_col,
|
||||
"id_col": id_col}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -0,0 +1 @@
|
||||
from pysal_utils import *
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type.lower() == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
else:
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
10
release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
48
release/python/0.0.3/crankshaft/setup.py
Normal file
48
release/python/0.0.3/crankshaft/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.3',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
1
release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
52
release/python/0.0.3/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.3/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
13
release/python/0.0.3/crankshaft/test/helper.py
Normal file
13
release/python/0.0.3/crankshaft/test/helper.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
34
release/python/0.0.3/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.3/crankshaft/test/mock_plpy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
38
release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
Normal file
38
release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
import numpy as np
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"
|
||||
}
|
||||
|
||||
def test_kmeans(self):
|
||||
data = self.cluster_data
|
||||
plpy._define_result('select' ,data)
|
||||
clusters = cc.kmeans('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1]==0]
|
||||
c2 = [a for a in clusters if a[1]==1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)),2)
|
||||
self.assertEqual(len(c1),20)
|
||||
self.assertEqual(len(c2),20)
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None? ', result == None
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None?', result == None
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
107
release/python/0.0.3/crankshaft/test/test_pysal_utils.py
Normal file
107
release/python/0.0.3/crankshaft/test/test_pysal_utils.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params), ans)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params),
|
||||
pu.knn(self.params))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
2
release/python/0.0.4/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.4/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import random_seeds
|
||||
import clustering
|
||||
@@ -0,0 +1,2 @@
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
260
release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
Normal file
260
release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr_name,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": attr,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = {"id_col": id_col,
|
||||
"attr1": numerator,
|
||||
"attr2": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
query = pu.construct_neighbor_query(w_type,
|
||||
{"id_col": id_col,
|
||||
"numerator": numerator,
|
||||
"denominator": denominator,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs})
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find units of significance
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = {"num_ngbrs": num_ngbrs,
|
||||
"attr1": attr1,
|
||||
"attr2": attr2,
|
||||
"subquery": subquery,
|
||||
"geom_col": geom_col,
|
||||
"id_col": id_col}
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -0,0 +1 @@
|
||||
from pysal_utils import *
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res: query results with attributes and neighbors
|
||||
"""
|
||||
if w_type.lower() == 'knn':
|
||||
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
weights = {x['id']: row_normed_weights for x in query_res}
|
||||
else:
|
||||
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
if len(x['neighbors']) > 0
|
||||
else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
|
||||
return ps.W(neighbors, weights)
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
|
||||
|
||||
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
attr_string = ""
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
"""
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
|
||||
|
||||
attr_string = []
|
||||
|
||||
for attr in attrs:
|
||||
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
10
release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
48
release/python/0.0.4/crankshaft/setup.py
Normal file
48
release/python/0.0.4/crankshaft/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.4',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
1
release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
52
release/python/0.0.4/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.4/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
13
release/python/0.0.4/crankshaft/test/helper.py
Normal file
13
release/python/0.0.4/crankshaft/test/helper.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
34
release/python/0.0.4/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.4/crankshaft/test/mock_plpy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
38
release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
Normal file
38
release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
import numpy as np
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"
|
||||
}
|
||||
|
||||
def test_kmeans(self):
|
||||
data = self.cluster_data
|
||||
plpy._define_result('select' ,data)
|
||||
clusters = cc.kmeans('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1]==0]
|
||||
c2 = [a for a in clusters if a[1]==1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)),2)
|
||||
self.assertEqual(len(c1),20)
|
||||
self.assertEqual(len(c2),20)
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None? ', result == None
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = self.moran_data
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
print 'result == None?', result == None
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
107
release/python/0.0.4/crankshaft/test/test_pysal_utils.py
Normal file
107
release/python/0.0.4/crankshaft/test/test_pysal_utils.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||
"i.\"{attr2}\"::numeric As attr2, "
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params), ans)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" IS NOT NULL AND " \
|
||||
"idx_replace.\"{attr2}\" <> 0"
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params), ans)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0 " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params), ans)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" <> 0)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" <> 0 " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params), ans)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params),
|
||||
pu.knn(self.params))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
5
release/python/0.1.0/crankshaft/crankshaft/__init__.py
Normal file
5
release/python/0.1.0/crankshaft/crankshaft/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Import all modules"""
|
||||
import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
@@ -0,0 +1,3 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
@@ -0,0 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import plpy
|
||||
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
262
release/python/0.1.0/crankshaft/crankshaft/clustering/moran.py
Normal file
262
release/python/0.1.0/crankshaft/crankshaft/clustering/moran.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
from collections import OrderedDict
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
## calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator)
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
plpy.notice('** Query: %s' % query)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
plpy.notice('** Query returned with %d rows' % len(result))
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
## calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError:
|
||||
plpy.error('Error: areas of interest query failed, check input parameters')
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
## collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
plpy.notice('** Constructing query')
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, " \
|
||||
"check input parameters")
|
||||
plpy.notice('** Query failed: "%s"' % query)
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
## collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
plpy.notice('** Finished calculations')
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -0,0 +1,2 @@
|
||||
"""Import all functions for pysal_utils"""
|
||||
from crankshaft.pysal_utils.pysal_utils import *
|
||||
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on
|
||||
translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
## Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res dict-like: query results with attributes and neighbors
|
||||
"""
|
||||
# if w_type.lower() == 'knn':
|
||||
# row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
# weights = {x['id']: row_normed_weights for x in query_res}
|
||||
# else:
|
||||
# weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
# if len(x['neighbors']) > 0
|
||||
# else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
return built_weight
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
"""
|
||||
|
||||
attr_string = ""
|
||||
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
if 'time_cols' in params:
|
||||
## if markov analysis
|
||||
attrs = params['time_cols']
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
else:
|
||||
## if moran's analysis
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]
|
||||
|
||||
for idx, val in enumerate(sorted(attrs)):
|
||||
attr_string += template % {"col": params[val], "alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Construct where conditions when building neighbors query
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
Input: dict of params:
|
||||
{'subquery': ...,
|
||||
'numerator': 'data1',
|
||||
'denominator': 'data2',
|
||||
'': ...}
|
||||
Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
|
||||
Input:
|
||||
{'subquery': ...,
|
||||
'time_cols': ['time1', 'time2', 'time3'],
|
||||
'etc': ...}
|
||||
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
|
||||
NULL AND idx_replace."time3" IS NOT NULL'
|
||||
"""
|
||||
attr_string = []
|
||||
template = "idx_replace.\"%s\" IS NOT NULL"
|
||||
|
||||
if 'time_cols' in params:
|
||||
## markov where clauses
|
||||
attrs = params['time_cols']
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % attr)
|
||||
else:
|
||||
## moran where clauses
|
||||
|
||||
# get keys
|
||||
attrs = sorted([k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')])
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % params[attr])
|
||||
|
||||
if len(attrs) == 2:
|
||||
attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
## to add more weight methods open a ticket or pull request
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
11
release/python/0.1.0/crankshaft/crankshaft/random_seeds.py
Normal file
11
release/python/0.1.0/crankshaft/crankshaft/random_seeds.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Random seed generator used for non-deterministic functions in crankshaft"""
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
@@ -0,0 +1 @@
|
||||
from segmentation import *
|
||||
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import sklearn
|
||||
import numpy as np
|
||||
import plpy
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
# Lower level functions
|
||||
#----------------------
|
||||
|
||||
def replace_nan_with_mean(array):
|
||||
"""
|
||||
Input:
|
||||
@param array: an array of floats which may have null-valued entries
|
||||
Output:
|
||||
array with nans filled in with the mean of the dataset
|
||||
"""
|
||||
# returns an array of rows and column indices
|
||||
indices = np.where(np.isnan(array))
|
||||
|
||||
# iterate through entries which have nan values
|
||||
for row, col in zip(*indices):
|
||||
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
|
||||
|
||||
return array
|
||||
|
||||
def get_data(variable, feature_columns, query):
|
||||
"""
|
||||
Fetch data from the database, clean, and package into
|
||||
numpy arrays
|
||||
Input:
|
||||
@param variable: name of the target variable
|
||||
@param feature_columns: list of column names
|
||||
@param query: subquery that data is pulled from for the packaging
|
||||
Output:
|
||||
prepared data, packaged into NumPy arrays
|
||||
"""
|
||||
|
||||
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
|
||||
|
||||
try:
|
||||
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
|
||||
variable=variable,
|
||||
columns=columns,
|
||||
query=query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to access data to build segmentation model: %s' % e)
|
||||
|
||||
# extract target data from plpy object
|
||||
target = np.array(data[0]['target'])
|
||||
|
||||
# put n feature data arrays into an n x m array of arrays
|
||||
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
|
||||
|
||||
return replace_nan_with_mean(target), replace_nan_with_mean(features)
|
||||
|
||||
# High level interface
|
||||
# --------------------
|
||||
|
||||
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
|
||||
"""
|
||||
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
|
||||
the function.
|
||||
|
||||
Input:
|
||||
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
|
||||
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
|
||||
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
|
||||
@param model_parameters: A dictionary containing parameters for the model.
|
||||
"""
|
||||
|
||||
clean_target = replace_nan_with_mean(target)
|
||||
clean_features = replace_nan_with_mean(features)
|
||||
target_features = replace_nan_with_mean(target_features)
|
||||
|
||||
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
|
||||
prediction = model.predict(target_features)
|
||||
accuracy_array = [accuracy]*prediction.shape[0]
|
||||
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
|
||||
|
||||
|
||||
|
||||
def create_and_predict_segment(query, variable, target_query, model_params):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
Stuart Lynn
|
||||
"""
|
||||
|
||||
## fetch column names
|
||||
try:
|
||||
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
## extract column names to be used in building the segmentation model
|
||||
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
|
||||
## get data from database
|
||||
target, features = get_data(variable, feature_columns, query)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
|
||||
accuracy_array = [accuracy]*result.shape[0]
|
||||
return zip(cartodb_ids, result, accuracy_array)
|
||||
|
||||
|
||||
def train_model(target, features, model_params, test_split):
|
||||
"""
|
||||
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
|
||||
Input:
|
||||
@param target: 1D Array of the variable that the model is to be trianed to predict
|
||||
@param features: 2D Array NSamples * NFeatures to use in trining the model
|
||||
@param model_params: A dictionary of model parameters, the full specification can be found on the
|
||||
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
|
||||
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
|
||||
"""
|
||||
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
|
||||
model = GradientBoostingRegressor(**model_params)
|
||||
model.fit(features_train, target_train)
|
||||
accuracy = calculate_model_accuracy(model, features, target)
|
||||
return model, accuracy
|
||||
|
||||
def calculate_model_accuracy(model, features, target):
|
||||
"""
|
||||
Calculate the mean squared error of the model prediction
|
||||
Input:
|
||||
@param model: model trained from input features
|
||||
@param features: features to make a prediction from
|
||||
@param target: target to compare prediction to
|
||||
Output:
|
||||
mean squared error of the model prection compared to the target
|
||||
"""
|
||||
prediction = model.predict(features)
|
||||
return metrics.mean_squared_error(prediction, target)
|
||||
|
||||
def predict_segment(model, features, target_query):
|
||||
"""
|
||||
Use the provided model to predict the values for the new feature set
|
||||
Input:
|
||||
@param model: The pretrained model
|
||||
@features: A list of features to use in the model prediction (list of column names)
|
||||
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
|
||||
"""
|
||||
|
||||
batch_size = 1000
|
||||
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
|
||||
|
||||
try:
|
||||
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
|
||||
joined_features=joined_features,
|
||||
target_query=target_query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
rows = cursor.fetch(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
|
||||
|
||||
#Need to fix this. Should be global mean. This will cause weird effects
|
||||
batch = replace_nan_with_mean(batch)
|
||||
prediction = model.predict(batch)
|
||||
results.append(prediction)
|
||||
|
||||
try:
|
||||
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
return cartodb_ids, np.concatenate(results)
|
||||
@@ -0,0 +1,2 @@
|
||||
"""Import all functions from clustering libraries."""
|
||||
from markov import *
|
||||
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Spatial dynamics measurements using Spatial Markov
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
def spatial_markov_trend(subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break distribution
|
||||
of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains the
|
||||
geometries
|
||||
@param id_col string (optional): name of column which has the ids of
|
||||
the table
|
||||
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a lower
|
||||
class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
|
||||
qvals = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
try:
|
||||
query_result = plpy.execute(
|
||||
pu.construct_neighbor_query(w_type, qvals)
|
||||
)
|
||||
if len(query_result) == 0:
|
||||
return zip([None], [None], [None], [None], [None])
|
||||
except plpy.SPIError, err:
|
||||
plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals)))
|
||||
plpy.error('Query failed, check the input parameters')
|
||||
return zip([None], [None], [None], [None], [None])
|
||||
|
||||
## build weight
|
||||
weights = pu.get_weight(query_result, w_type)
|
||||
weights.transform = 'r'
|
||||
|
||||
## prep time data
|
||||
t_data = get_time_data(query_result, time_cols)
|
||||
|
||||
plpy.debug('shape of t_data %d, %d' % t_data.shape)
|
||||
plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape)
|
||||
plpy.debug('first num elements: %f' % t_data[0, 0])
|
||||
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
|
||||
## get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
|
||||
## look up probablity distribution for each unit according to class and lag class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
## find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
## output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
def get_time_data(markov_data, time_cols):
|
||||
"""
|
||||
Extract the time columns and bin appropriately
|
||||
"""
|
||||
num_attrs = len(time_cols)
|
||||
return np.array([[x['attr' + str(i)] for x in markov_data]
|
||||
for i in range(1, num_attrs+1)], dtype=float).transpose()
|
||||
|
||||
## not currently used
|
||||
def rebin_data(time_data, num_time_per_bin):
|
||||
"""
|
||||
Convert an n x l matrix into an (n/m) x l matrix where the values are
|
||||
reduced (averaged) for the intervening states:
|
||||
1 2 3 4 1.5 3.5
|
||||
5 6 7 8 -> 5.5 7.5
|
||||
9 8 7 6 8.5 6.5
|
||||
5 4 3 2 4.5 2.5
|
||||
|
||||
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
|
||||
|
||||
This process effectively resamples the data at a longer time span n
|
||||
units longer than the input data.
|
||||
For cases when there is a remainder (remainder(5/3) = 2), the remaining
|
||||
two columns are binned together as the last time period, while the
|
||||
first three are binned together for the first period.
|
||||
|
||||
Input:
|
||||
@param time_data n x l ndarray: measurements of an attribute at
|
||||
different time intervals
|
||||
@param num_time_per_bin int: number of columns to average into a new
|
||||
column
|
||||
Output:
|
||||
ceil(n / m) x l ndarray of resampled time series
|
||||
"""
|
||||
|
||||
if time_data.shape[1] % num_time_per_bin == 0:
|
||||
## if fit is perfect, then use it
|
||||
n_max = time_data.shape[1] / num_time_per_bin
|
||||
else:
|
||||
## fit remainders into an additional column
|
||||
n_max = time_data.shape[1] / num_time_per_bin + 1
|
||||
|
||||
return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||
for i in range(n_max)]).T
|
||||
|
||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||
"""
|
||||
Given an array of transition matrices, look up the probability
|
||||
associated with the arrangements passed
|
||||
|
||||
Input:
|
||||
@param transition_matrix ndarray[k,k,k]:
|
||||
@param lag_indices ndarray:
|
||||
@param unit_indices ndarray:
|
||||
|
||||
Output:
|
||||
Array of probability distributions
|
||||
"""
|
||||
|
||||
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
|
||||
for i in range(len(lag_indices))])
|
||||
|
||||
def get_prob_stats(prob_dist, unit_indices):
|
||||
"""
|
||||
get the statistics of the probability distributions
|
||||
|
||||
Outputs:
|
||||
@param trend_up ndarray(float): sum of probabilities for upward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend_down ndarray(float): sum of probabilities for downward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend ndarray(float): difference of upward and downward
|
||||
movements
|
||||
"""
|
||||
|
||||
num_elements = len(unit_indices)
|
||||
trend_up = np.empty(num_elements, dtype=float)
|
||||
trend_down = np.empty(num_elements, dtype=float)
|
||||
trend = np.empty(num_elements, dtype=float)
|
||||
|
||||
for i in range(num_elements):
|
||||
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
|
||||
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
|
||||
if prob_dist[i, unit_indices[i]] > 0.0:
|
||||
trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]]
|
||||
else:
|
||||
trend[i] = None
|
||||
|
||||
## calculate volatility of distribution
|
||||
volatility = prob_dist.std(axis=1)
|
||||
|
||||
return trend_up, trend_down, trend, volatility
|
||||
49
release/python/0.1.0/crankshaft/setup.py
Normal file
49
release/python/0.1.0/crankshaft/setup.py
Normal file
@@ -0,0 +1,49 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.1.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
1
release/python/0.1.0/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.1.0/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
1
release/python/0.1.0/crankshaft/test/fixtures/markov.json
vendored
Normal file
1
release/python/0.1.0/crankshaft/test/fixtures/markov.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
|
||||
52
release/python/0.1.0/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.1.0/crankshaft/test/fixtures/moran.json
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
54
release/python/0.1.0/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.1.0/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user