Compare commits

...

151 Commits

Author SHA1 Message Date
Andy Eschbacher
e52dd64dad moves gwr to data analysis provider framework 2017-01-04 11:52:44 -05:00
Andy Eschbacher
d404a66d99 Merge branch 'develop' into pysal_gwr 2017-01-04 11:23:02 -05:00
Mario de Frutos
34161fd8a4 Merge pull request #152 from CartoDB/develop
Version 0.5.1
2016-12-12 14:18:05 +01:00
Mario de Frutos
850f3f6a31 Merge pull request #151 from CartoDB/fixes_050_deploy
Correct upgrade for 0.5.1 version
2016-12-12 13:59:05 +01:00
Mario de Frutos
021738d9f8 Correct upgrade for 0.5.1 version 2016-12-12 13:51:38 +01:00
Mario de Frutos
161bb14c08 Merge pull request #149 from CartoDB/develop
Release 0.5.0
2016-12-12 11:25:01 +01:00
Mario de Frutos
f8739b6a68 Version 0.5.0 release artifacts 2016-12-02 13:35:43 +01:00
Mario de Frutos
5df846fe66 Merge pull request #145 from CartoDB/adds-nonspatial-kmeans
updates internal framework for python functions
2016-12-02 13:23:18 +01:00
Mario de Frutos
b9c4e6e8ef Merge branch 'develop' into adds-nonspatial-kmeans 2016-12-02 13:09:59 +01:00
Mario de Frutos
5c34e08c7d Remove old configuration for postgresql 9.5 in travis 2016-12-02 13:09:09 +01:00
Mario de Frutos
3c8ac7d45d Remove default postgres-9.5 from travis 2016-12-02 12:36:11 +01:00
Andy Eschbacher
59dc9434f7 moves getis to class-based framework 2016-12-01 17:06:21 -05:00
Mario de Frutos
2c6fcfc294 Merge branch 'develop' into adds-nonspatial-kmeans 2016-12-01 16:26:52 +01:00
Mario de Frutos
15b460eeb9 Merge pull request #142 from CartoDB/markov-error-fix
fix error variable name bug, pep8 updates
2016-12-01 10:44:20 +01:00
Mario de Frutos
b0dcd7f572 Merge pull request #137 from CartoDB/adds-outlier-functions
Adds (nonspatial) outlier functions
2016-12-01 10:43:51 +01:00
Mario de Frutos
2547318f59 Merge pull request #127 from CartoDB/adds-getis-analysis
Adds getis analysis
2016-12-01 10:42:13 +01:00
Mario de Frutos
25e453a882 Merge pull request #122 from CartoDB/moran-query-ordering-fix
Moran query ordering fix
2016-12-01 10:41:19 +01:00
Mario de Frutos
62076bb48c Merge pull request #126 from CartoDB/stuartlynn-patch-1
Update PULL_REQUEST_TEMPLATE.md
2016-12-01 10:39:29 +01:00
Andy Eschbacher
6ab1c285d9 places query gen in kmeans data provider 2016-11-30 10:08:36 -05:00
Andy Eschbacher
7efb064fd9 Merge branch 'pysal_gwr' of github.com:TaylorOshan/crankshaft into pysal_gwr 2016-11-29 15:52:11 -05:00
Andy Eschbacher
77b7217368 force array data types 2016-11-29 15:51:59 -05:00
Taylor Oshan
413e6aa5c7 add bw flag that accepts user input 2016-11-29 13:44:36 -07:00
Taylor Oshan
22ce970062 add bandwidth column 2016-11-29 13:22:42 -07:00
Taylor Oshan
f4ccfe712b flatten results 2016-11-29 11:49:19 -07:00
Andy Eschbacher
e6a9397373 pep8 updates 2016-11-29 13:30:07 -05:00
Andy Eschbacher
a9b7d2a9cc Merge branch 'pysal_gwr' of github.com:TaylorOshan/crankshaft into pysal_gwr 2016-11-29 11:58:12 -05:00
Andy Eschbacher
a84806e820 update signature 2016-11-29 11:57:11 -05:00
Taylor Oshan
4a1efc4e3c simplify loops to idx; add json.dumps 2016-11-29 09:16:01 -07:00
Andy Eschbacher
b22f79b0cc Merge branch 'develop' into adds-nonspatial-kmeans 2016-11-29 10:17:18 -05:00
Andy Eschbacher
8aca98433b Merge branch 'pysal_gwr' of github.com:TaylorOshan/crankshaft into pysal_gwr 2016-11-29 10:14:21 -05:00
Andy Eschbacher
b8d08d5a96 Merge branch 'develop' into pysal_gwr 2016-11-29 10:14:07 -05:00
Mario de Frutos
7c63b66fdd Update travis yml to the new postgres-9.5 package 2016-11-29 15:52:29 +01:00
Taylor Oshan
8beb7220b2 gwr output 2016-11-28 15:53:59 -07:00
Andy Eschbacher
b39d0150c7 adds array agg to query 2016-11-28 15:49:10 -05:00
Andy Eschbacher
b399e883ad fix paths; fix query 2016-11-28 20:10:51 +00:00
Andy Eschbacher
6f400ee2b7 Merge branch 'pysal_gwr' of github.com:TaylorOshan/crankshaft into pysal_gwr 2016-11-28 14:54:47 -05:00
Taylor Oshan
c3e99cda30 pysal.contrib -> crankshaft.regression 2016-11-28 12:49:38 -07:00
Andy Eschbacher
5be5a48894 adds psql connector 2016-11-28 19:49:21 +00:00
Taylor Oshan
1c0e4fae47 merge in pg work 2016-11-28 12:36:37 -07:00
Andy Eschbacher
8b061bac72 adds basic pgsql file 2016-11-28 14:33:23 -05:00
Andy Eschbacher
76bd7ff783 update dependency paths 2016-11-28 14:33:02 -05:00
Andy Eschbacher
d7b4eaf110 remove ipython notebook checkpoints 2016-11-28 11:42:40 -05:00
Andy Eschbacher
1b969f6735 more robust table_refs defaults 2016-11-28 11:31:56 -05:00
Andy Eschbacher
6e50e43e1c add query for gwr 2016-11-28 11:07:23 -05:00
Taylor Oshan
cfa9111052 reformat output 2016-11-28 08:24:43 -07:00
Taylor Oshan
01c9195ea5 add glm/gwr base code; start gwr crankshaft func 2016-11-22 16:10:13 -07:00
Andy Eschbacher
db501a2f02 move query generation to inside analysis data provider class 2016-11-22 15:20:14 +00:00
Andy Eschbacher
6fe4fc9668 rename queryrunner in tests 2016-11-22 09:58:06 -05:00
Andy Eschbacher
280a5193ef rename queryrunner to analysisdataprovider 2016-11-22 09:32:39 -05:00
Andy Eschbacher
c27ec58948 Merge branch 'adds-nonspatial-kmeans' of https://github.com/CartoDB/crankshaft into adds-nonspatial-kmeans 2016-11-21 16:26:37 +00:00
Mario de Frutos
bb3ff43f0f Update .travis.yml 2016-11-21 17:25:08 +01:00
Andy Eschbacher
2f27622a6d strips out kmeans non spatial 2016-11-21 16:19:54 +00:00
Andy Eschbacher
c5a2746a53 Merge branch 'develop' into adds-nonspatial-kmeans 2016-11-21 15:46:44 +00:00
Mario de Frutos
538ab9a071 Changed to the last postgresql-9.5 package 2016-11-21 16:14:48 +01:00
Andy Eschbacher
c8f5448b7c seprates out query runner 2016-11-19 14:20:06 +00:00
Andy Eschbacher
224fbc2fc5 move to class based markov 2016-11-19 09:05:35 +00:00
Andy Eschbacher
2738c1f29c move to class-based module 2016-11-18 17:46:55 +00:00
Andy Eschbacher
a8bd122762 remove mock plpy dependencies 2016-11-18 17:46:29 +00:00
Andy Eschbacher
a9add4b49c rename results file 2016-11-18 17:40:57 +00:00
Andy Eschbacher
83f1900512 creates class-based approach to analysis methods 2016-11-18 17:26:24 +00:00
Andy Eschbacher
7eee4faac1 rename to match numbering elsewhere 2016-11-18 17:22:02 +00:00
Andy Eschbacher
84d33d841f tests for new class 2016-11-15 12:03:54 +01:00
Andy Eschbacher
ded26dc46b adding class for database response 2016-11-15 12:03:24 +01:00
Andy Eschbacher
0d40080f6c move back to colnames 2016-11-15 12:02:42 +01:00
Andy Eschbacher
0867e69d1f replace plpy method colnames 2016-11-15 11:19:15 +01:00
Andy Eschbacher
cbe8571546 fixes argument in not-standardize 2016-11-15 10:10:07 +01:00
Andy Eschbacher
af536757fe adds silhouettes to output 2016-11-14 23:29:38 +00:00
Andy Eschbacher
b6dae5e380 adding silhouette 2016-11-15 00:15:23 +01:00
Andy Eschbacher
64c4b6611c changes cluster centers to json 2016-11-10 16:56:04 +00:00
Andy Eschbacher
a188b2e104 adds missing arguments 2016-10-21 15:51:54 -06:00
Andy Eschbacher
4389c9538d small updates for readability 2016-10-21 10:13:21 -06:00
Javier Villar
2bc6b0782a Adding requirements.txt to master branch 2016-10-21 16:28:17 +02:00
Andy Eschbacher
3c6d73b7e2 Merge branch 'adds-nonspatial-kmeans' of https://github.com/CartoDB/crankshaft into adds-nonspatial-kmeans 2016-10-18 21:14:09 -06:00
Andy Eschbacher
3e0dba3522 update comments 2016-10-18 21:13:34 -06:00
Andy Eschbacher
5d8641732f change string formatting 2016-10-18 19:30:09 +00:00
Andy Eschbacher
f0c6cca766 fix key name 2016-10-18 13:05:56 -06:00
Andy Eschbacher
f800a35fd1 new format for input data 2016-10-18 13:01:31 -06:00
Andy Eschbacher
54bbd18b02 remove unneeded modules from test script 2016-10-18 12:12:38 -06:00
Andy Eschbacher
da23b002cf rename to match submodule name 2016-10-18 11:51:53 -06:00
Andy Eschbacher
a370a2da52 pep8 updates of test file 2016-10-18 11:50:59 -06:00
Andy Eschbacher
5404589058 Merge branch 'adds-nonspatial-kmeans' of https://github.com/CartoDB/crankshaft into adds-nonspatial-kmeans 2016-10-13 12:52:07 -04:00
Andy Eschbacher
b255fd3e06 make private functions more explictly private 2016-10-13 12:50:46 -04:00
Andy Eschbacher
0feaf36cf6 outputting consistent labels and centers 2016-10-13 15:52:00 +00:00
Andy Eschbacher
5d2a1881b1 make numpy with global scope in module 2016-10-13 15:00:28 +00:00
Andy Eschbacher
a95423174c adds back alias for kmeans removed by accident 2016-10-13 10:50:48 -04:00
Andy Eschbacher
4314f0f066 adds more robust data processing 2016-10-13 10:28:29 -04:00
Andy Eschbacher
c2e2359e65 addes minmax scaling for variables 2016-10-12 17:16:52 -04:00
Andy Eschbacher
361505fca9 fixes syntax errors 2016-10-12 21:13:51 +00:00
Andy Eschbacher
c47116571f properly close plpgsql function 2016-10-12 14:19:19 -04:00
Andy Eschbacher
3e1cef9958 fix output signature 2016-10-11 16:48:22 -04:00
Andy Eschbacher
947d6ba798 first add 2016-10-11 16:38:18 -04:00
Rafa de la Torre
ffd651b91a Merge remote-tracking branch 'origin/develop' into stuartlynn-patch-1 2016-10-11 15:26:03 +02:00
jvillarf
a271593fe9 Merge pull request #144 from CartoDB/2547_python_requirements_txt
Creating requirements.txt file for python
2016-10-07 16:52:59 +02:00
Javier Villar
83219270ae Copying requirements.txt to python 0.4.2 folder 2016-10-07 16:47:28 +02:00
Javier Villar
215e61396a Creating requirements.txt file for python 2016-10-07 13:45:09 +02:00
Andy Eschbacher
c7e690980f update column names in tests 2016-10-06 10:29:52 -04:00
Andy Eschbacher
da1449331c update signature variable names 2016-10-06 09:53:38 -04:00
Andy Eschbacher
c7f5c24510 update signature names 2016-10-06 09:53:22 -04:00
Andy Eschbacher
11c33ce3fa adds pep8 check item 2016-10-06 08:56:14 -04:00
Andy Eschbacher
0a53a6e71d fix error variable name bug, pep8 updates 2016-10-06 08:19:57 -04:00
Andy Eschbacher
fa4e5ae686 Merge branch 'develop' into adds-outlier-functions 2016-09-30 09:48:02 -04:00
Javier Goizueta
ecb4bd9606 Merge pull request #140 from CartoDB/138-fix-travis-tests
Reorder package installation
2016-09-30 11:40:44 +02:00
Javier Goizueta
ecc9814a88 Reorder package installation
Fixes #138
It seems that package postgresql-9.5-postgis-2.2 is now
indirectly depending on postgresql-9.5-postgis-2.3-scripts which
is not compatible with the packages in cartodb launchpad repos
2016-09-30 11:31:57 +02:00
Andy Eschbacher
6846014a4f adding docs 2016-09-29 11:42:11 -04:00
Andy Eschbacher
23b2ad57c5 test updates 2016-09-29 11:37:42 -04:00
Andy Eschbacher
99856ce956 flip inequality 2016-09-29 11:37:22 -04:00
Andy Eschbacher
f11982f531 Merge branch 'adds-outlier-functions' of https://github.com/CartoDB/crankshaft into adds-outlier-functions 2016-09-29 11:11:36 -04:00
Andy Eschbacher
bd05e7739d add test to produce error 2016-09-29 11:10:54 -04:00
Andy Eschbacher
5754087140 adds symmetric option for stddev outlier 2016-09-29 11:09:10 -04:00
Andy Eschbacher
8bc6f69a1b adding exceptions to improve robustness 2016-09-29 10:12:32 -04:00
Andy Eschbacher
b54c62890f adds hand-off doc line 2016-09-29 08:48:22 -04:00
Andy Eschbacher
acde384157 update tests 2016-09-28 16:27:41 -04:00
Andy Eschbacher
b8accb48fc adds tests 2016-09-28 15:55:56 -04:00
Andy Eschbacher
f2bb0b496b small fixes 2016-09-26 16:51:22 -04:00
Andy Eschbacher
aaa36569de first add 2016-09-26 16:26:34 -04:00
Andy Eschbacher
803816f5c9 Merge branch 'moran-query-ordering-fix' of https://github.com/CartoDB/crankshaft into moran-query-ordering-fix 2016-09-26 10:15:49 -04:00
Andy Eschbacher
1ef3f86474 small updates after ordering fix 2016-09-26 10:13:27 -04:00
Andy Eschbacher
f1d420a6f7 ordering fixes 2016-09-26 10:11:16 -04:00
Andy Eschbacher
06452562b9 fix ordering problems in input columns 2016-09-26 10:10:52 -04:00
Andy Eschbacher
07e4062237 Merge branch 'develop' into moran-query-ordering-fix 2016-09-23 13:25:36 -04:00
Andy Eschbacher
5443b67470 adding docs for getis ord's g 2016-09-22 08:58:22 -04:00
Andy Eschbacher
795413e46d cleaning test files 2016-09-21 12:01:42 -04:00
Andy Eschbacher
e5ea836493 fix json format 2016-09-21 11:53:17 -04:00
Andy Eschbacher
258322fcca update tests to queen weights from knn 2016-09-21 11:46:07 -04:00
Andy Eschbacher
166e9e223f minor formatting changes 2016-09-20 09:55:33 -04:00
Andy Eschbacher
29de72de33 output column renaming 2016-09-20 09:55:13 -04:00
Andy Eschbacher
eff548dec9 aligning parameters for fistures and tests 2016-09-19 17:17:51 -04:00
Andy Eschbacher
dcb364c3ee up default number of permutations 2016-09-19 17:16:53 -04:00
Andy Eschbacher
1d09eac3e7 adding pgsql tests 2016-09-19 16:10:29 -04:00
Andy Eschbacher
5127845100 Merge branch 'adds-getis-analysis' of https://github.com/cartodb/crankshaft into adds-getis-analysis 2016-09-19 19:24:43 +00:00
Andy Eschbacher
ee4eb795b7 adding getis fixture file 2016-09-19 19:24:23 +00:00
Andy Eschbacher
2ede55d165 pep8 updates 2016-09-19 12:17:01 -04:00
Andy Eschbacher
df5faa6745 Merge branch 'adds-getis-analysis' of https://github.com/cartodb/crankshaft into adds-getis-analysis
Conflicts:
	src/py/crankshaft/test/test_clustering_getis.py
2016-09-19 15:54:46 +00:00
Andy Eschbacher
06f0cb0dc4 updating how p values are tested 2016-09-19 15:45:10 +00:00
Stuart Lynn
11176b71b3 Update PULL_REQUEST_TEMPLATE.md 2016-09-19 10:47:59 -04:00
Andy Eschbacher
b5445da303 remove kinks in test 2016-09-14 12:45:43 +00:00
Andy Eschbacher
5d109acd8d remove debug messages 2016-09-13 17:59:15 -04:00
Andy Eschbacher
2937c97fea including correct fixtures 2016-09-13 17:47:30 -04:00
Andy Eschbacher
c392aec98a re-ordered columns 2016-09-13 15:32:32 -04:00
Andy Eschbacher
4e42625d79 fix indexing of fixture 2016-09-13 14:29:06 -04:00
Andy Eschbacher
b71152a884 adds fixtures and tests 2016-09-13 09:06:09 -04:00
Andy Eschbacher
ce4cc637ae adding permutations to interface 2016-09-13 09:05:24 -04:00
Andy Eschbacher
ccccf68066 fix module call 2016-09-12 11:39:21 -04:00
Andy Eschbacher
60f52633fa adds hotspot/coldspot function 2016-09-09 11:11:32 -04:00
Andy Eschbacher
1148aa417a additional test on alphabetical ordering 2016-09-06 09:23:59 -04:00
Andy Eschbacher
e29f6f2861 add more comments 2016-09-06 09:23:39 -04:00
Andy Eschbacher
44dc5811b5 updating tests for query ordering error 2016-09-01 16:47:57 -04:00
Andy Eschbacher
40481f1286 adding more tests 2016-08-29 17:10:58 -04:00
Andy Eschbacher
622235d787 :P adding commas 2016-08-29 16:52:40 -04:00
Andy Eschbacher
623613aa5c adding ordered dict to tests 2016-08-29 16:46:49 -04:00
Andy Eschbacher
a451fb5b6a minor ordering changes 2016-08-29 15:50:19 -04:00
126 changed files with 22926 additions and 768 deletions

View File

@@ -2,6 +2,9 @@
- [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)`
- [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades).
- [ ] Docs for public-facing functions are written
- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`.
- [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information)
- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore
- [ ] Video explaining the analysis and showing examples
- [ ] Analysis Documentation written [template](https://docs.google.com/a/cartodb.com/document/d/1X2KOtaiEBKWNMp8UjwcLB-kE9aIOw09aOjX3oaCjeME/edit?usp=sharing)
- [ ] Smoke test written
- [ ] Hand-off document for camshaft node written
- [ ] If function is in Python, code conforms to [PEP8 Style Guide](https://www.python.org/dev/peps/pep-0008/)

View File

@@ -35,14 +35,18 @@ before_install:
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgis
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-2ubuntu1
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-2ubuntu1
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-2ubuntu1
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \

View File

@@ -1,3 +1,11 @@
0.5.0 (2016-12-15)
------------------
* Updated PULL_REQUEST_TEMPLATE
* Fixed a bug that flips the order of the numerator in denominator for calculating using Moran Local Rate because previously the code sorted the keys alphabetically.
* Add new CDB_GetisOrdsG functions. Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values
* Add new outlier detection functions: CDB_StaticOutlier, CDB_PercentOutlier and CDB_StdDevOutlier
* Updates in the framework for accessing the Python functions.
0.4.2 (2016-09-22)
------------------
* Bugfix for cdb_areasofinterestglobal: import correct modules

40
doc/16_getis_ord_gstar.md Normal file
View File

@@ -0,0 +1,40 @@
## Getis-Ord's G\*
Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values. The clustering of high values can be referred to as "hotspots" because these are areas of high activity or large (relative to the global mean) measurement values. Coldspots are clustered areas with low activity or small measurement values.
### CDB_GetisOrdsG(subquery text, column_name text)
#### Arguments
| Name | Type | Description |
|------|------|-------------|
| subquery | text | A query of the data you want to pass to the function. It must include `column_name`, a geometry column (usually `the_geom`) and an id column (usually `cartodb_id`) |
| column_name | text | This is the column of interest for performing this analysis on. This column should be a numeric type. |
| w_type (optional) | text | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation.](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html) |
| num_ngbrs (optional) | integer | Default: 5. If `knn` is chosen, this will set the number of neighbors. If `knn` is not chosen, any entered value will be ignored. Use `NULL` if not choosing `knn`. |
| permutations (optional) | integer | The number of permutations for calculating p-values. Default: 999 |
| geom_col (optional) | text | The column where the geometry information is stored. The format must be PostGIS Geometry type (SRID 4326). Default: `the_geom`. |
| id_col (optional) | text | The column that has the unique row identifier. |
### Returns
Returns a table with the following columns.
| Name | Type | Description |
|------|------|-------------|
| z_score | numeric | z-score, a measure of the intensity of clustering of high values (hotspots) or low values (coldspots). Positive values represent 'hotspots', while negative values represent 'coldspots'. |
| p_value | numeric | p-value, a measure of the significance of the intensity of clustering |
| p_z_sim | numeric | p-value based on standard normal approximation from permutations |
| rowid | integer | The original `id_col` that can be used to associate the outputs with the original geometry and inputs |
#### Example Usage
The following query returns the original table augmented with the values calculated from the Getis-Ord's G\* analysis.
```sql
SELECT i.*, m.z_score, m.p_value
FROM cdb_crankshaft.CDB_GetisOrdsG('SELECT * FROM incident_reports_clustered',
'num_incidents') As m
JOIN incident_reports_clustered As i
ON i.cartodb_id = m.rowid;
```

163
doc/18_outliers.md Normal file
View File

@@ -0,0 +1,163 @@
## Outlier Detection
This set of functions detects the presence of outliers. There are three functions for finding outliers from non-spatial data:
1. Static Outliers
1. Percentage Outliers
1. Standard Deviation Outliers
### CDB_StaticOutlier(column_value numeric, threshold numeric)
#### Arguments
| Name | Type | Description |
|------|------|-------------|
| column_value | numeric | The column of values on which to apply the threshold |
| threshold | numeric | The static threshold which is used to indicate whether a `column_value` is an outlier or not |
### Returns
Returns a boolean (true/false) depending on whether a value is above or below (or equal to) the threshold
| Name | Type | Description |
|------|------|-------------|
| outlier | boolean | classification of whether a row is an outlier or not |
#### Example Usage
With a table `website_visits` and a column of the number of website visits in units of 10,000 visits:
```
| id | visits_10k |
|----|------------|
| 1 | 1 |
| 2 | 3 |
| 3 | 5 |
| 4 | 1 |
| 5 | 32 |
| 6 | 3 |
| 7 | 57 |
| 8 | 2 |
```
```sql
SELECT
id,
CDB_StaticOutlier(visits_10k, 11.0) As outlier,
visits_10k
FROM website_visits
```
```
| id | outlier | visits_10k |
|----|---------|------------|
| 1 | f | 1 |
| 2 | f | 3 |
| 3 | f | 5 |
| 4 | f | 1 |
| 5 | t | 32 |
| 6 | f | 3 |
| 7 | t | 57 |
| 8 | f | 2 |
```
### CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[])
`CDB_PercentOutlier` calculates whether or not a value falls above a given threshold based on a percentage above the mean value of the input values.
#### Arguments
| Name | Type | Description |
|------|------|-------------|
| column_values | numeric[] | An array of the values to calculate the outlier classification on |
| outlier_fraction | numeric | The threshold above which a column value divided by the mean of all values is considered an outlier |
| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) |
### Returns
Returns a table of the outlier classification with the following columns
| Name | Type | Description |
|------|------|-------------|
| is_outlier | boolean | classification of whether a row is an outlier or not |
| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification |
#### Example Usage
This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger).
```sql
WITH cte As (
SELECT
unnest(Array[1,2,3,4,5,6,7,8]) As id,
unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k
)
SELECT
(CDB_PercentOutlier(array_agg(visits_10k), 2.0, array_agg(id))).*
FROM cte;
```
Output
```
| outlier | rowid |
|---------+-------|
| f | 1 |
| f | 2 |
| f | 3 |
| f | 4 |
| t | 5 |
| f | 6 |
| t | 7 |
| f | 8 |
```
### CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true)
`CDB_StdDevOutlier` calculates whether or not a value falls above or below a given threshold based on the number of standard deviations from the mean.
#### Arguments
| Name | Type | Description |
|------|------|-------------|
| column_values | numeric[] | An array of the values to calculate the outlier classification on |
| num_deviations | numeric | The threshold in units of standard deviation |
| ids | int[] | An array of the unique row ids of the input data (usually `cartodb_id`) |
| is_symmetric (optional) | boolean | Consider outliers that are symmetric about the mean (default: true) |
### Returns
Returns a table of the outlier classification with the following columns
| Name | Type | Description |
|------|------|-------------|
| is_outlier | boolean | classification of whether a row is an outlier or not |
| rowid | int | original row id (e.g., input `cartodb_id`) of the row which has the outlier classification |
#### Example Usage
This example find outliers which are more than 100% larger than the average (that is, more than 2.0 times larger).
```sql
WITH cte As (
SELECT
unnest(Array[1,2,3,4,5,6,7,8]) As id,
unnest(Array[1,3,5,1,32,3,57,2]) As visits_10k
)
SELECT
(CDB_StdDevOutlier(array_agg(visits_10k), 2.0, array_agg(id))).*
FROM cte;
```
Output
```
| outlier | rowid |
|---------+-------|
| f | 1 |
| f | 2 |
| f | 3 |
| f | 4 |
| f | 5 |
| f | 6 |
| t | 7 |
| f | 8 |
```

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Spatial Analysis extension'
default_version = '0.4.2'
default_version = '0.5.1'
requires = 'plpythonu, postgis'
superuser = true
schema = cdb_crankshaft

View File

@@ -0,0 +1,5 @@
joblib==0.8.3
numpy==1.6.1
scipy==0.14.0
pysal==1.11.2
scikit-learn==0.14.1

View File

@@ -0,0 +1,6 @@
"""Import all modules"""
import crankshaft.random_seeds
import crankshaft.clustering
import crankshaft.space_time_dynamics
import crankshaft.segmentation
import analysis_data_provider

View File

@@ -0,0 +1,67 @@
"""class for fetching data"""
import plpy
import pysal_utils as pu
class AnalysisDataProvider:
def get_getis(self, w_type, params):
"""fetch data for getis ord's g"""
try:
query = pu.construct_neighbor_query(w_type, params)
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(4)
else:
return result
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_markov(self, w_type, params):
"""fetch data for spatial markov"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
if len(data) == 0:
return pu.empty_zipped_array(4)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_moran(self, w_type, params):
"""fetch data for moran's i analyses"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
# if there are no neighbors, exit
if len(data) == 0:
return pu.empty_zipped_array(2)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(2)
def get_nonspatial_kmeans(self, query):
"""fetch data for non-spatial kmeans"""
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_spatial_kmeans(self, params):
"""fetch data for spatial kmeans"""
query = ("SELECT "
"array_agg({id_col} ORDER BY {id_col}) as ids,"
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
"FROM ({subquery}) As a "
"WHERE {geom_col} IS NOT NULL").format(**params)
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)

View File

@@ -0,0 +1,4 @@
"""Import all functions from for clustering"""
from moran import *
from kmeans import *
from getis import *

View File

@@ -0,0 +1,50 @@
"""
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
"""
import pysal as ps
from collections import OrderedDict
# crankshaft modules
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
# High level interface ---------------------------------------
class Getis:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def getis_ord(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Getis-Ord's G*
Implementation building neighbors with a PostGIS database and PySAL's
Getis-Ord's G* hotspot/coldspot module.
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors if kNN is chosen
qvals = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_getis(w_type, qvals)
attr_vals = pu.get_attributes(result)
# build PySAL weight object
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate Getis-Ord's G* z- and p-values
getis = ps.esda.getisord.G_Local(attr_vals, weight,
star=True, permutations=permutations)
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)

View File

@@ -0,0 +1,32 @@
from sklearn.cluster import KMeans
import numpy as np
from crankshaft.analysis_data_provider import AnalysisDataProvider
class Kmeans:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def spatial(self, query, no_clusters, no_init=20):
"""
find centers based on clusters of latitude/longitude pairs
query: SQL query that has a WGS84 geometry (the_geom)
"""
params = {"subquery": query,
"geom_col": "the_geom",
"id_col": "cartodb_id"}
data = self.data_provider.get_spatial_kmeans(params)
# Unpack query response
xs = data[0]['xs']
ys = data[0]['ys']
ids = data[0]['ids']
km = KMeans(n_clusters=no_clusters, n_init=no_init)
labels = km.fit_predict(zip(xs, ys))
return zip(ids, labels)

View File

@@ -0,0 +1,208 @@
"""
Moran's I geostatistics (global clustering & outliers presence)
"""
# TODO: Fill in local neighbors which have null/NoneType values with the
# average of the their neighborhood
import pysal as ps
from collections import OrderedDict
from crankshaft.analysis_data_provider import AnalysisDataProvider
# crankshaft module
import crankshaft.pysal_utils as pu
# High level interface ---------------------------------------
class Moran:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def global_stat(self, subquery, attr_name,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I (global)
Implementation building neighbors with a PostGIS database and Moran's I
core clusters with PySAL.
Andy Eschbacher
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr_name),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
attr_vals = pu.get_attributes(result)
# calculate weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global
moran_global = ps.esda.moran.Moran(attr_vals, weight,
permutations=permutations)
return zip([moran_global.I], [moran_global.EI])
def local_stat(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I implementation for PL/Python
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors
params = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
attr_vals = pu.get_attributes(result)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
permutations=permutations)
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Rate (global)
Andy Eschbacher
"""
params = OrderedDict([("id_col", id_col),
("attr1", numerator),
("attr2", denominator)
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global rate
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
permutations=permutations)
return zip([lisa_rate.I], [lisa_rate.EI])
def local_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Local Rate
Andy Eschbacher
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
params = OrderedDict([("id_col", id_col),
("numerator", numerator),
("denominator", denominator),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
permutations=permutations)
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col,
w_type, num_ngbrs):
"""
Moran's I (local) Bivariate (untested)
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr1),
("attr2", attr2),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
attr1_vals = pu.get_attributes(result, 1)
attr2_vals = pu.get_attributes(result, 2)
# create weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
permutations=permutations)
# find clustering of significance
lisa_sig = quad_position(lisa.q)
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
# Low level functions ----------------------------------------
def map_quads(coord):
"""
Map a quadrant number to Moran's I designation
HH=1, LH=2, LL=3, HL=4
Input:
@param coord (int): quadrant of a specific measurement
Output:
classification (one of 'HH', 'LH', 'LL', or 'HL')
"""
if coord == 1:
return 'HH'
elif coord == 2:
return 'LH'
elif coord == 3:
return 'LL'
elif coord == 4:
return 'HL'
else:
return None
def quad_position(quads):
"""
Produce Moran's I classification based of n
Input:
@param quads ndarray: an array of quads classified by
1-4 (PySAL default)
Output:
@param list: an array of quads classied by 'HH', 'LL', etc.
"""
return [map_quads(q) for q in quads]

View File

@@ -0,0 +1,2 @@
"""Import all functions for pysal_utils"""
from crankshaft.pysal_utils.pysal_utils import *

View File

@@ -0,0 +1,211 @@
"""
Utilities module for generic PySAL functionality, mainly centered on
translating queries into numpy arrays or PySAL weights objects
"""
import numpy as np
import pysal as ps
def construct_neighbor_query(w_type, query_vals):
"""Return query (a string) used for finding neighbors
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
@param query_vals dict: values used to construct the query
"""
if w_type.lower() == 'knn':
return knn(query_vals)
else:
return queen(query_vals)
# Build weight object
def get_weight(query_res, w_type='knn', num_ngbrs=5):
"""
Construct PySAL weight from return value of query
@param query_res dict-like: query results with attributes and neighbors
"""
# if w_type.lower() == 'knn':
# row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
# weights = {x['id']: row_normed_weights for x in query_res}
# else:
# weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
# if len(x['neighbors']) > 0
# else [] for x in query_res}
neighbors = {x['id']: x['neighbors'] for x in query_res}
print 'len of neighbors: %d' % len(neighbors)
built_weight = ps.W(neighbors)
built_weight.transform = 'r'
return built_weight
def query_attr_select(params):
"""
Create portion of SELECT statement for attributes inolved in query.
Defaults to order in the params
@param params: dict of information used in query (column names,
table name, etc.)
Example:
OrderedDict([('numerator', 'price'),
('denominator', 'sq_meters'),
('subquery', 'SELECT * FROM interesting_data')])
Output:
"i.\"price\"::numeric As attr1, " \
"i.\"sq_meters\"::numeric As attr2, "
"""
attr_string = ""
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
if 'time_cols' in params:
# if markov analysis
attrs = params['time_cols']
for idx, val in enumerate(attrs):
attr_string += template % {"col": val, "alias_num": idx + 1}
else:
# if moran's analysis
attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
for idx, val in enumerate(attrs):
attr_string += template % {"col": params[val],
"alias_num": idx + 1}
return attr_string
def query_attr_where(params):
"""
Construct where conditions when building neighbors query
Create portion of WHERE clauses for weeding out NULL-valued geometries
Input: dict of params:
{'subquery': ...,
'numerator': 'data1',
'denominator': 'data2',
'': ...}
Output:
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
Input:
{'subquery': ...,
'time_cols': ['time1', 'time2', 'time3'],
'etc': ...}
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
NULL AND idx_replace."time3" IS NOT NULL'
"""
attr_string = []
template = "idx_replace.\"%s\" IS NOT NULL"
if 'time_cols' in params:
# markov where clauses
attrs = params['time_cols']
# add values to template
for attr in attrs:
attr_string.append(template % attr)
else:
# moran where clauses
# get keys
attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
# add values to template
for attr in attrs:
attr_string.append(template % params[attr])
if 'denominator' in attrs:
attr_string.append(
"idx_replace.\"%s\" <> 0" % params['denominator'])
out = " AND ".join(attr_string)
return out
def knn(params):
"""SQL query for k-nearest neighbors.
@param vars: dict of values to fill template
"""
attr_select = query_attr_select(params)
attr_where = query_attr_where(params)
replacements = {"attr_select": attr_select,
"attr_where_i": attr_where.replace("idx_replace", "i"),
"attr_where_j": attr_where.replace("idx_replace", "j")}
query = "SELECT " \
"i.\"{id_col}\" As id, " \
"%(attr_select)s" \
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
"FROM ({subquery}) As j " \
"WHERE " \
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
"%(attr_where_j)s " \
"ORDER BY " \
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
"LIMIT {num_ngbrs})" \
") As neighbors " \
"FROM ({subquery}) As i " \
"WHERE " \
"%(attr_where_i)s " \
"ORDER BY i.\"{id_col}\" ASC;" % replacements
return query.format(**params)
# SQL query for finding queens neighbors (all contiguous polygons)
def queen(params):
"""SQL query for queen neighbors.
@param params dict: information to fill query
"""
attr_select = query_attr_select(params)
attr_where = query_attr_where(params)
replacements = {"attr_select": attr_select,
"attr_where_i": attr_where.replace("idx_replace", "i"),
"attr_where_j": attr_where.replace("idx_replace", "j")}
query = "SELECT " \
"i.\"{id_col}\" As id, " \
"%(attr_select)s" \
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
"FROM ({subquery}) As j " \
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
"%(attr_where_j)s)" \
") As neighbors " \
"FROM ({subquery}) As i " \
"WHERE " \
"%(attr_where_i)s " \
"ORDER BY i.\"{id_col}\" ASC;" % replacements
return query.format(**params)
# to add more weight methods open a ticket or pull request
def get_attributes(query_res, attr_num=1):
"""
@param query_res: query results with attributes and neighbors
@param attr_num: attribute number (1, 2, ...)
"""
return np.array([x['attr' + str(attr_num)] for x in query_res],
dtype=np.float)
def empty_zipped_array(num_nones):
"""
prepare return values for cases of empty weights objects (no neighbors)
Input:
@param num_nones int: number of columns (e.g., 4)
Output:
[(None, None, None, None)]
"""
return [tuple([None] * num_nones)]

View File

@@ -0,0 +1,11 @@
"""Random seed generator used for non-deterministic functions in crankshaft"""
import random
import numpy
def set_random_seeds(value):
"""
Set the seeds of the RNGs (Random Number Generators)
used internally.
"""
random.seed(value)
numpy.random.seed(value)

View File

@@ -0,0 +1 @@
from segmentation import *

View File

@@ -0,0 +1,176 @@
"""
Segmentation creation and prediction
"""
import sklearn
import numpy as np
import plpy
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics
from sklearn.cross_validation import train_test_split
# Lower level functions
#----------------------
def replace_nan_with_mean(array):
"""
Input:
@param array: an array of floats which may have null-valued entries
Output:
array with nans filled in with the mean of the dataset
"""
# returns an array of rows and column indices
indices = np.where(np.isnan(array))
# iterate through entries which have nan values
for row, col in zip(*indices):
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
return array
def get_data(variable, feature_columns, query):
"""
Fetch data from the database, clean, and package into
numpy arrays
Input:
@param variable: name of the target variable
@param feature_columns: list of column names
@param query: subquery that data is pulled from for the packaging
Output:
prepared data, packaged into NumPy arrays
"""
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
try:
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
variable=variable,
columns=columns,
query=query))
except Exception, e:
plpy.error('Failed to access data to build segmentation model: %s' % e)
# extract target data from plpy object
target = np.array(data[0]['target'])
# put n feature data arrays into an n x m array of arrays
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
return replace_nan_with_mean(target), replace_nan_with_mean(features)
# High level interface
# --------------------
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
"""
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
the function.
Input:
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
@param model_parameters: A dictionary containing parameters for the model.
"""
clean_target = replace_nan_with_mean(target)
clean_features = replace_nan_with_mean(features)
target_features = replace_nan_with_mean(target_features)
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
prediction = model.predict(target_features)
accuracy_array = [accuracy]*prediction.shape[0]
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
def create_and_predict_segment(query, variable, target_query, model_params):
"""
generate a segment with machine learning
Stuart Lynn
"""
## fetch column names
try:
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
## extract column names to be used in building the segmentation model
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
## get data from database
target, features = get_data(variable, feature_columns, query)
model, accuracy = train_model(target, features, model_params, 0.2)
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
accuracy_array = [accuracy]*result.shape[0]
return zip(cartodb_ids, result, accuracy_array)
def train_model(target, features, model_params, test_split):
"""
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
Input:
@param target: 1D Array of the variable that the model is to be trianed to predict
@param features: 2D Array NSamples * NFeatures to use in trining the model
@param model_params: A dictionary of model parameters, the full specification can be found on the
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
"""
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
model = GradientBoostingRegressor(**model_params)
model.fit(features_train, target_train)
accuracy = calculate_model_accuracy(model, features, target)
return model, accuracy
def calculate_model_accuracy(model, features, target):
"""
Calculate the mean squared error of the model prediction
Input:
@param model: model trained from input features
@param features: features to make a prediction from
@param target: target to compare prediction to
Output:
mean squared error of the model prection compared to the target
"""
prediction = model.predict(features)
return metrics.mean_squared_error(prediction, target)
def predict_segment(model, features, target_query):
"""
Use the provided model to predict the values for the new feature set
Input:
@param model: The pretrained model
@features: A list of features to use in the model prediction (list of column names)
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
"""
batch_size = 1000
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
try:
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
joined_features=joined_features,
target_query=target_query))
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
results = []
while True:
rows = cursor.fetch(batch_size)
if not rows:
break
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
#Need to fix this. Should be global mean. This will cause weird effects
batch = replace_nan_with_mean(batch)
prediction = model.predict(batch)
results.append(prediction)
try:
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
return cartodb_ids, np.concatenate(results)

View File

@@ -0,0 +1,2 @@
"""Import all functions from clustering libraries."""
from markov import *

View File

@@ -0,0 +1,194 @@
"""
Spatial dynamics measurements using Spatial Markov
"""
# TODO: remove all plpy dependencies
import numpy as np
import pysal as ps
import plpy
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
class Markov:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def spatial_trend(self, subquery, time_cols, num_classes=7,
w_type='knn', num_ngbrs=5, permutations=0,
geom_col='the_geom', id_col='cartodb_id'):
"""
Predict the trends of a unit based on:
1. history of its transitions to different classes (e.g., 1st
quantile -> 2nd quantile)
2. average class of its neighbors
Inputs:
@param subquery string: e.g., SELECT the_geom, cartodb_id,
interesting_time_column FROM table_name
@param time_cols list of strings: list of strings of column names
@param num_classes (optional): number of classes to break
distribution of values into. Currently uses quantile bins.
@param w_type string (optional): weight type ('knn' or 'queen')
@param num_ngbrs int (optional): number of neighbors (if knn type)
@param permutations int (optional): number of permutations for test
stats
@param geom_col string (optional): name of column which contains
the geometries
@param id_col string (optional): name of column which has the ids
of the table
Outputs:
@param trend_up float: probablity that a geom will move to a higher
class
@param trend_down float: probablity that a geom will move to a
lower class
@param trend float: (trend_up - trend_down) / trend_static
@param volatility float: a measure of the volatility based on
probability stddev(prob array)
"""
if len(time_cols) < 2:
plpy.error('More than one time column needs to be passed')
params = {"id_col": id_col,
"time_cols": time_cols,
"geom_col": geom_col,
"subquery": subquery,
"num_ngbrs": num_ngbrs}
query_result = self.data_provider.get_markov(w_type, params)
# build weight
weights = pu.get_weight(query_result, w_type)
weights.transform = 'r'
# prep time data
t_data = get_time_data(query_result, time_cols)
sp_markov_result = ps.Spatial_Markov(t_data,
weights,
k=num_classes,
fixed=False,
permutations=permutations)
# get lag classes
lag_classes = ps.Quantiles(
ps.lag_spatial(weights, t_data[:, -1]),
k=num_classes).yb
# look up probablity distribution for each unit according to class and
# lag class
prob_dist = get_prob_dist(sp_markov_result.P,
lag_classes,
sp_markov_result.classes[:, -1])
# find the ups and down and overall distribution of each cell
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
# output the results
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
def get_time_data(markov_data, time_cols):
"""
Extract the time columns and bin appropriately
"""
num_attrs = len(time_cols)
return np.array([[x['attr' + str(i)] for x in markov_data]
for i in range(1, num_attrs+1)], dtype=float).transpose()
# not currently used
def rebin_data(time_data, num_time_per_bin):
"""
Convert an n x l matrix into an (n/m) x l matrix where the values are
reduced (averaged) for the intervening states:
1 2 3 4 1.5 3.5
5 6 7 8 -> 5.5 7.5
9 8 7 6 8.5 6.5
5 4 3 2 4.5 2.5
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
This process effectively resamples the data at a longer time span n
units longer than the input data.
For cases when there is a remainder (remainder(5/3) = 2), the remaining
two columns are binned together as the last time period, while the
first three are binned together for the first period.
Input:
@param time_data n x l ndarray: measurements of an attribute at
different time intervals
@param num_time_per_bin int: number of columns to average into a new
column
Output:
ceil(n / m) x l ndarray of resampled time series
"""
if time_data.shape[1] % num_time_per_bin == 0:
# if fit is perfect, then use it
n_max = time_data.shape[1] / num_time_per_bin
else:
# fit remainders into an additional column
n_max = time_data.shape[1] / num_time_per_bin + 1
return np.array(
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
for i in range(n_max)]).T
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
"""
Given an array of transition matrices, look up the probability
associated with the arrangements passed
Input:
@param transition_matrix ndarray[k,k,k]:
@param lag_indices ndarray:
@param unit_indices ndarray:
Output:
Array of probability distributions
"""
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
for i in range(len(lag_indices))])
def get_prob_stats(prob_dist, unit_indices):
"""
get the statistics of the probability distributions
Outputs:
@param trend_up ndarray(float): sum of probabilities for upward
movement (relative to the unit index of that prob)
@param trend_down ndarray(float): sum of probabilities for downward
movement (relative to the unit index of that prob)
@param trend ndarray(float): difference of upward and downward
movements
"""
num_elements = len(unit_indices)
trend_up = np.empty(num_elements, dtype=float)
trend_down = np.empty(num_elements, dtype=float)
trend = np.empty(num_elements, dtype=float)
for i in range(num_elements):
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
if prob_dist[i, unit_indices[i]] > 0.0:
trend[i] = (trend_up[i] - trend_down[i]) / (
prob_dist[i, unit_indices[i]])
else:
trend[i] = None
# calculate volatility of distribution
volatility = prob_dist.std(axis=1)
return trend_up, trend_down, trend, volatility

View File

@@ -0,0 +1,5 @@
joblib==0.8.3
numpy==1.6.1
scipy==0.14.0
pysal==1.11.2
scikit-learn==0.14.1

View File

@@ -0,0 +1,49 @@
"""
CartoDB Spatial Analysis Python Library
See:
https://github.com/CartoDB/crankshaft
"""
from setuptools import setup, find_packages
setup(
name='crankshaft',
version='0.5.0',
description='CartoDB Spatial Analysis Python Library',
url='https://github.com/CartoDB/crankshaft',
author='Data Services Team - CartoDB',
author_email='dataservices@cartodb.com',
license='MIT',
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Mapping comunity',
'Topic :: Maps :: Mapping Tools',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 2.7',
],
keywords='maps mapping tools spatial analysis geostatistics',
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
extras_require={
'dev': ['unittest'],
'test': ['unittest', 'nose', 'mock'],
},
# The choice of component versions is dictated by what's
# provisioned in the production servers.
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
requires=['pysal', 'numpy', 'sklearn'],
test_suite='test'
)

View File

@@ -0,0 +1 @@
[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]

View File

@@ -0,0 +1 @@
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]

View File

@@ -0,0 +1 @@
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]

View File

@@ -0,0 +1,52 @@
[[0.9319096128346788, "HH"],
[-1.135787401862846, "HL"],
[0.11732030672508517, "LL"],
[0.6152779669180425, "LL"],
[-0.14657336660125297, "LH"],
[0.6967858120189607, "LL"],
[0.07949310115714454, "HH"],
[0.4703198759258987, "HH"],
[0.4421125200498064, "HH"],
[0.5724288737143592, "LL"],
[0.8970743435692062, "LL"],
[0.18327334401918674, "LL"],
[-0.01466729201304962, "HL"],
[0.3481559372544409, "LL"],
[0.06547094736902978, "LL"],
[0.15482141569329988, "HH"],
[0.4373841193538136, "HH"],
[0.15971286468915544, "LL"],
[1.0543588860308968, "HH"],
[1.7372866900020818, "HH"],
[1.091998586053999, "LL"],
[0.1171572584252222, "HH"],
[0.08438455015300014, "LL"],
[0.06547094736902978, "LL"],
[0.15482141569329985, "HH"],
[1.1627044812890683, "HH"],
[0.06547094736902978, "LL"],
[0.795275137550483, "HH"],
[0.18562939195219, "LL"],
[0.3010757406693439, "LL"],
[2.8205795942839376, "HH"],
[0.11259190602909264, "LL"],
[-0.07116352791516614, "HL"],
[-0.09945240794119009, "LH"],
[0.18562939195219, "LL"],
[0.1832733440191868, "LL"],
[-0.39054253768447705, "HL"],
[-0.1672071289487642, "HL"],
[0.3337669247916343, "HH"],
[0.2584386102554792, "HH"],
[-0.19733845476322634, "HL"],
[-0.9379282899805409, "LH"],
[-0.028770969951095866, "LH"],
[0.051367269430983485, "LL"],
[-0.2172548045913472, "LH"],
[0.05136726943098351, "LL"],
[0.04191046803899837, "LL"],
[0.7482357030403517, "HH"],
[-0.014585767863118111, "LH"],
[0.5410013139159929, "HH"],
[1.0223932668429925, "LL"],
[1.4179402898927476, "LL"]]

View File

@@ -0,0 +1,54 @@
[
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,13 @@
import unittest
from mock_plpy import MockPlPy
plpy = MockPlPy()
import sys
sys.modules['plpy'] = plpy
import os
def fixture_file(name):
dir = os.path.dirname(os.path.realpath(__file__))
return os.path.join(dir, 'fixtures', name)

View File

@@ -0,0 +1,54 @@
import re
class MockCursor:
def __init__(self, data):
self.cursor_pos = 0
self.data = data
def fetch(self, batch_size):
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
self.cursor_pos += batch_size
return batch
class MockPlPy:
def __init__(self):
self._reset()
def _reset(self):
self.infos = []
self.notices = []
self.debugs = []
self.logs = []
self.warnings = []
self.errors = []
self.fatals = []
self.executes = []
self.results = []
self.prepares = []
self.results = []
def _define_result(self, query, result):
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
self.results.append([pattern, result])
def notice(self, msg):
self.notices.append(msg)
def debug(self, msg):
self.notices.append(msg)
def info(self, msg):
self.infos.append(msg)
def cursor(self, query):
data = self.execute(query)
return MockCursor(data)
# TODO: additional arguments
def execute(self, query):
for result in self.results:
if result[0].match(query):
return result[1]
return []

View File

@@ -0,0 +1,78 @@
import unittest
import numpy as np
from helper import fixture_file
from crankshaft.clustering import Getis
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
import json
from crankshaft.analysis_data_provider import AnalysisDataProvider
# Fixture files produced as follows
#
# import pysal as ps
# import numpy as np
# import random
#
# # setup variables
# f = ps.open(ps.examples.get_path("stl_hom.dbf"))
# y = np.array(f.by_col['HR8893'])
# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
#
# out_queen = [{"id": index + 1,
# "neighbors": [x+1 for x in w_queen.neighbors[index]],
# "value": val} for index, val in enumerate(y)]
#
# with open('neighbors_queen_getis.json', 'w') as f:
# f.write(str(out_queen))
#
# random.seed(1234)
# np.random.seed(1234)
# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
# permutations=999)
#
# with open('getis_queen.json', 'w') as f:
# f.write(str(zip(lgstar_queen.z_sim,
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mock_data):
self.mock_result = mock_data
def get_getis(self, w_type, param):
return self.mock_result
class GetisTest(unittest.TestCase):
"""Testing class for Getis-Ord's G* funtion
This test replicates the work done in PySAL documentation:
https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
"""
def setUp(self):
# load raw data for analysis
self.neighbors_data = json.loads(
open(fixture_file('neighbors_getis.json')).read())
# load pre-computed/known values
self.getis_data = json.loads(
open(fixture_file('getis.json')).read())
def test_getis_ord(self):
"""Test Getis-Ord's G*"""
data = [{'id': d['id'],
'attr1': d['value'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1234)
getis = Getis(FakeDataProvider(data))
result = getis.getis_ord('subquery', 'value',
'queen', None, 999, 'the_geom',
'cartodb_id')
result = [(row[0], row[1]) for row in result]
expected = np.array(self.getis_data)[:, 0:2]
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
self.assertAlmostEqual(res_z, exp_z, delta=1e-2)

View File

@@ -0,0 +1,56 @@
import unittest
import numpy as np
# from mock_plpy import MockPlPy
# plpy = MockPlPy()
#
# import sys
# sys.modules['plpy'] = plpy
from helper import fixture_file
from crankshaft.clustering import Kmeans
from crankshaft.analysis_data_provider import AnalysisDataProvider
import crankshaft.clustering as cc
from crankshaft import random_seeds
import json
from collections import OrderedDict
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mocked_result):
self.mocked_result = mocked_result
def get_spatial_kmeans(self, query):
return self.mocked_result
def get_nonspatial_kmeans(self, query, standarize):
return self.mocked_result
class KMeansTest(unittest.TestCase):
"""Testing class for k-means spatial"""
def setUp(self):
self.cluster_data = json.loads(
open(fixture_file('kmeans.json')).read())
self.params = {"subquery": "select * from table",
"no_clusters": "10"}
def test_kmeans(self):
"""
"""
data = [{'xs': d['xs'],
'ys': d['ys'],
'ids': d['ids']} for d in self.cluster_data]
random_seeds.set_random_seeds(1234)
kmeans = Kmeans(FakeDataProvider(data))
clusters = kmeans.spatial('subquery', 2)
labels = [a[1] for a in clusters]
c1 = [a for a in clusters if a[1] == 0]
c2 = [a for a in clusters if a[1] == 1]
self.assertEqual(len(np.unique(labels)), 2)
self.assertEqual(len(c1), 20)
self.assertEqual(len(c2), 20)

View File

@@ -0,0 +1,112 @@
import unittest
import numpy as np
from helper import fixture_file
from crankshaft.clustering import Moran
from crankshaft.analysis_data_provider import AnalysisDataProvider
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
import json
from collections import OrderedDict
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mock_data):
self.mock_result = mock_data
def get_moran(self, w_type, params):
return self.mock_result
class MoranTest(unittest.TestCase):
"""Testing class for Moran's I functions"""
def setUp(self):
self.params = {"id_col": "cartodb_id",
"attr1": "andy",
"attr2": "jay_z",
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.params_markov = {"id_col": "cartodb_id",
"time_cols": ["_2013_dec", "_2014_jan",
"_2014_feb"],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.neighbors_data = json.loads(
open(fixture_file('neighbors.json')).read())
self.moran_data = json.loads(
open(fixture_file('moran.json')).read())
def test_map_quads(self):
"""Test map_quads"""
from crankshaft.clustering import map_quads
self.assertEqual(map_quads(1), 'HH')
self.assertEqual(map_quads(2), 'LH')
self.assertEqual(map_quads(3), 'LL')
self.assertEqual(map_quads(4), 'HL')
self.assertEqual(map_quads(33), None)
self.assertEqual(map_quads('andy'), None)
def test_quad_position(self):
"""Test lisa_sig_vals"""
from crankshaft.clustering import quad_position
quads = np.array([1, 2, 3, 4], np.int)
ans = np.array(['HH', 'LH', 'LL', 'HL'])
test_ans = quad_position(quads)
self.assertTrue((test_ans == ans).all())
def test_local_stat(self):
"""Test Moran's I local"""
data = [OrderedDict([('id', d['id']),
('attr1', d['value']),
('neighbors', d['neighbors'])])
for d in self.neighbors_data]
moran = Moran(FakeDataProvider(data))
random_seeds.set_random_seeds(1234)
result = moran.local_stat('subquery', 'value',
'knn', 5, 99, 'the_geom', 'cartodb_id')
result = [(row[0], row[1]) for row in result]
zipped_values = zip(result, self.moran_data)
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
self.assertAlmostEqual(res_val, exp_val)
self.assertEqual(res_quad, exp_quad)
def test_moran_local_rate(self):
"""Test Moran's I rate"""
data = [{'id': d['id'],
'attr1': d['value'],
'attr2': 1,
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1234)
moran = Moran(FakeDataProvider(data))
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
'knn', 5, 99, 'the_geom', 'cartodb_id')
result = [(row[0], row[1]) for row in result]
zipped_values = zip(result, self.moran_data)
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
self.assertAlmostEqual(res_val, exp_val)
def test_moran(self):
"""Test Moran's I global"""
data = [{'id': d['id'],
'attr1': d['value'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1235)
moran = Moran(FakeDataProvider(data))
result = moran.global_stat('table', 'value',
'knn', 5, 99, 'the_geom',
'cartodb_id')
result_moran = result[0][0]
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)

View File

@@ -0,0 +1,160 @@
import unittest
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
from collections import OrderedDict
class PysalUtilsTest(unittest.TestCase):
"""Testing class for utility functions related to PySAL integrations"""
def setUp(self):
self.params1 = OrderedDict([("id_col", "cartodb_id"),
("attr1", "andy"),
("attr2", "jay_z"),
("subquery", "SELECT * FROM a_list"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params2 = OrderedDict([("id_col", "cartodb_id"),
("numerator", "price"),
("denominator", "sq_meters"),
("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params3 = OrderedDict([("id_col", "cartodb_id"),
("numerator", "sq_meters"),
("denominator", "price"),
("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params_array = {"id_col": "cartodb_id",
"time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
def test_query_attr_select(self):
"""Test query_attr_select"""
ans1 = ("i.\"andy\"::numeric As attr1, "
"i.\"jay_z\"::numeric As attr2, ")
ans2 = ("i.\"price\"::numeric As attr1, "
"i.\"sq_meters\"::numeric As attr2, ")
ans3 = ("i.\"sq_meters\"::numeric As attr1, "
"i.\"price\"::numeric As attr2, ")
ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
"i.\"_2014_jan\"::numeric As attr2, "
"i.\"_2014_feb\"::numeric As attr3, ")
self.assertEqual(pu.query_attr_select(self.params1), ans1)
self.assertEqual(pu.query_attr_select(self.params2), ans2)
self.assertEqual(pu.query_attr_select(self.params3), ans3)
self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
def test_query_attr_where(self):
"""Test pu.query_attr_where"""
ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
"idx_replace.\"jay_z\" IS NOT NULL")
ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
"idx_replace.\"_2014_jan\" IS NOT NULL AND "
"idx_replace.\"_2014_feb\" IS NOT NULL")
self.assertEqual(pu.query_attr_where(self.params1), ans1)
self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
def test_knn(self):
"""Test knn neighbors constructor"""
ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE " \
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL " \
"ORDER BY " \
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
"LIMIT 321)) As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL " \
"ORDER BY i.\"cartodb_id\" ASC;"
ans_array = "SELECT i.\"cartodb_id\" As id, " \
"i.\"_2013_dec\"::numeric As attr1, " \
"i.\"_2014_jan\"::numeric As attr2, " \
"i.\"_2014_feb\"::numeric As attr3, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"j.\"_2013_dec\" IS NOT NULL AND " \
"j.\"_2014_jan\" IS NOT NULL AND " \
"j.\"_2014_feb\" IS NOT NULL " \
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \
"LIMIT 321)) As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"_2013_dec\" IS NOT NULL AND " \
"i.\"_2014_jan\" IS NOT NULL AND " \
"i.\"_2014_feb\" IS NOT NULL "\
"ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.knn(self.params1), ans1)
self.assertEqual(pu.knn(self.params_array), ans_array)
def test_queen(self):
"""Test queen neighbors constructor"""
ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE " \
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"ST_Touches(i.\"the_geom\", " \
"j.\"the_geom\") AND " \
"j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL)" \
") As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL " \
"ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.queen(self.params1), ans1)
def test_construct_neighbor_query(self):
"""Test construct_neighbor_query"""
# Compare to raw knn query
self.assertEqual(pu.construct_neighbor_query('knn', self.params1),
pu.knn(self.params1))
def test_get_attributes(self):
"""Test get_attributes"""
## need to add tests
self.assertEqual(True, True)
def test_get_weight(self):
"""Test get_weight"""
self.assertEqual(True, True)
def test_empty_zipped_array(self):
"""Test empty_zipped_array"""
ans2 = [(None, None)]
ans4 = [(None, None, None, None)]
self.assertEqual(pu.empty_zipped_array(2), ans2)
self.assertEqual(pu.empty_zipped_array(4), ans4)

View File

@@ -0,0 +1,64 @@
import unittest
import numpy as np
from helper import plpy, fixture_file
import crankshaft.segmentation as segmentation
import json
class SegmentationTest(unittest.TestCase):
"""Testing class for Moran's I functions"""
def setUp(self):
plpy._reset()
def generate_random_data(self,n_samples,random_state, row_type=False):
x1 = random_state.uniform(size=n_samples)
x2 = random_state.uniform(size=n_samples)
x3 = random_state.randint(0, 4, size=n_samples)
y = x1+x2*x2+x3
cartodb_id = range(len(x1))
if row_type:
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
else:
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
def test_replace_nan_with_mean(self):
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
def test_create_and_predict_segment(self):
n_samples = 1000
random_state_train = np.random.RandomState(13)
random_state_test = np.random.RandomState(134)
training_data = self.generate_random_data(n_samples, random_state_train)
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
ids = [{'cartodb_ids': range(len(test_data))}]
rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
plpy._define_result('select \* from \(select \* from training\) a limit 1',rows)
plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
plpy._define_result('.*select \* from test.*' ,test_data)
model_parameters = {'n_estimators': 1200,
'max_depth': 3,
'subsample' : 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
result = segmentation.create_and_predict_segment(
'select * from training',
'target',
'select * from test',
model_parameters)
prediction = [r[1] for r in result]
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
self.assertEqual(len(result),len(test_data))
self.assertTrue( result[0][2] < 0.01)
self.assertTrue( accuracy < 0.5*np.mean(test_y) )

View File

@@ -0,0 +1,349 @@
import unittest
import numpy as np
import unittest
from helper import fixture_file
from crankshaft.space_time_dynamics import Markov
import crankshaft.space_time_dynamics as std
from crankshaft import random_seeds
from crankshaft.analysis_data_provider import AnalysisDataProvider
import json
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, data):
self.mock_result = data
def get_markov(self, w_type, params):
return self.mock_result
class SpaceTimeTests(unittest.TestCase):
"""Testing class for Markov Functions."""
def setUp(self):
self.params = {"id_col": "cartodb_id",
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.neighbors_data = json.loads(
open(fixture_file('neighbors_markov.json')).read())
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
self.time_data = np.array([i * np.ones(10, dtype=float)
for i in range(10)]).T
self.transition_matrix = np.array([
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
[0., 0.14, 0.74, 0.12, 0.],
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
[0., 0., 0., 0.16666667, 0.83333333]],
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
[0., 0., 0.06372549, 0.90196078, 0.03431373],
[0., 0., 0., 0.19444444, 0.80555556]],
[[0.84693878, 0.15306122, 0., 0., 0.],
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0., 0., 0., 0.10204082, 0.89795918]],
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
[0., 0., 0., 0.09661836, 0.90338164]],
[[0.33333333, 0.66666667, 0., 0., 0.],
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
[0., 0., 0., 0.02352941, 0.97647059]]]
)
def test_spatial_markov(self):
"""Test Spatial Markov."""
data = [{'id': d['id'],
'attr1': d['y1995'],
'attr2': d['y1996'],
'attr3': d['y1997'],
'attr4': d['y1998'],
'attr5': d['y1999'],
'attr6': d['y2000'],
'attr7': d['y2001'],
'attr8': d['y2002'],
'attr9': d['y2003'],
'attr10': d['y2004'],
'attr11': d['y2005'],
'attr12': d['y2006'],
'attr13': d['y2007'],
'attr14': d['y2008'],
'attr15': d['y2009'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
# print(str(data[0]))
markov = Markov(FakeDataProvider(data))
random_seeds.set_random_seeds(1234)
result = markov.spatial_trend('subquery',
['y1995', 'y1996', 'y1997', 'y1998',
'y1999', 'y2000', 'y2001', 'y2002',
'y2003', 'y2004', 'y2005', 'y2006',
'y2007', 'y2008', 'y2009'],
5, 'knn', 5, 0, 'the_geom',
'cartodb_id')
self.assertTrue(result is not None)
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
print result[0]
expected = self.markov_data
for ([res_trend, res_up, res_down, res_vol, res_id],
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
) in zip(result, expected):
self.assertAlmostEqual(res_trend, exp_trend)
def test_get_time_data(self):
"""Test get_time_data"""
data = [{'attr1': d['y1995'],
'attr2': d['y1996'],
'attr3': d['y1997'],
'attr4': d['y1998'],
'attr5': d['y1999'],
'attr6': d['y2000'],
'attr7': d['y2001'],
'attr8': d['y2002'],
'attr9': d['y2003'],
'attr10': d['y2004'],
'attr11': d['y2005'],
'attr12': d['y2006'],
'attr13': d['y2007'],
'attr14': d['y2008'],
'attr15': d['y2009']} for d in self.neighbors_data]
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
'y1999', 'y2000', 'y2001', 'y2002',
'y2003', 'y2004', 'y2005', 'y2006',
'y2007', 'y2008', 'y2009'])
# expected was prepared from PySAL example:
# f = ps.open(ps.examples.get_path("usjoin.csv"))
# pci = np.array([f.by_col[str(y)]
# for y in range(1995, 2010)]).transpose()
# rpci = pci / (pci.mean(axis = 0))
expected = np.array(
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
self.assertTrue(np.allclose(result, expected))
self.assertTrue(type(result) == type(expected))
self.assertTrue(result.shape == expected.shape)
def test_rebin_data(self):
"""Test rebin_data"""
# sample in double the time (even case since 10 % 2 = 0):
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
# = 0.5, 2.5, 4.5, 6.5, 8.5
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
for i in range(0, 10, 2)]).T
self.assertTrue(
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
# sample in triple the time (uneven since 10 % 3 = 1):
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
# = 1, 4, 7, 9
ans_odd = np.array([i * np.ones(10, dtype=float)
for i in (1, 4, 7, 9)]).T
self.assertTrue(
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
def test_get_prob_dist(self):
"""Test get_prob_dist"""
lag_indices = np.array([1, 2, 3, 4])
unit_indices = np.array([1, 3, 2, 4])
answer = np.array([
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0., 0., 0.02352941, 0.97647059]
])
result = std.get_prob_dist(self.transition_matrix,
lag_indices, unit_indices)
self.assertTrue(np.array_equal(result, answer))
def test_get_prob_stats(self):
"""Test get_prob_stats"""
probs = np.array([
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0., 0., 0.02352941, 0.97647059]
])
unit_indices = np.array([1, 3, 2, 4])
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
answer_trend = np.array([-0.03301887 / 0.88207547,
-0.05882353 / 0.87058824,
0.02475248 / 0.77722772,
-0.02352941 / 0.97647059])
answer_volatility = np.array([0.34221495, 0.33705421,
0.29226542, 0.38834223])
result = std.get_prob_stats(probs, unit_indices)
result_up = result[0]
result_down = result[1]
result_trend = result[2]
result_volatility = result[3]
self.assertTrue(np.allclose(result_up, answer_up))
self.assertTrue(np.allclose(result_down, answer_down))
self.assertTrue(np.allclose(result_trend, answer_trend))
self.assertTrue(np.allclose(result_volatility, answer_volatility))

View File

@@ -0,0 +1,6 @@
"""Import all modules"""
import crankshaft.random_seeds
import crankshaft.clustering
import crankshaft.space_time_dynamics
import crankshaft.segmentation
import analysis_data_provider

View File

@@ -0,0 +1,67 @@
"""class for fetching data"""
import plpy
import pysal_utils as pu
class AnalysisDataProvider:
def get_getis(self, w_type, params):
"""fetch data for getis ord's g"""
try:
query = pu.construct_neighbor_query(w_type, params)
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(4)
else:
return result
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_markov(self, w_type, params):
"""fetch data for spatial markov"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
if len(data) == 0:
return pu.empty_zipped_array(4)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_moran(self, w_type, params):
"""fetch data for moran's i analyses"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
# if there are no neighbors, exit
if len(data) == 0:
return pu.empty_zipped_array(2)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(2)
def get_nonspatial_kmeans(self, query):
"""fetch data for non-spatial kmeans"""
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_spatial_kmeans(self, params):
"""fetch data for spatial kmeans"""
query = ("SELECT "
"array_agg({id_col} ORDER BY {id_col}) as ids,"
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
"FROM ({subquery}) As a "
"WHERE {geom_col} IS NOT NULL").format(**params)
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)

View File

@@ -0,0 +1,4 @@
"""Import all functions from for clustering"""
from moran import *
from kmeans import *
from getis import *

View File

@@ -0,0 +1,50 @@
"""
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
"""
import pysal as ps
from collections import OrderedDict
# crankshaft modules
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
# High level interface ---------------------------------------
class Getis:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def getis_ord(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Getis-Ord's G*
Implementation building neighbors with a PostGIS database and PySAL's
Getis-Ord's G* hotspot/coldspot module.
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors if kNN is chosen
qvals = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_getis(w_type, qvals)
attr_vals = pu.get_attributes(result)
# build PySAL weight object
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate Getis-Ord's G* z- and p-values
getis = ps.esda.getisord.G_Local(attr_vals, weight,
star=True, permutations=permutations)
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)

View File

@@ -0,0 +1,32 @@
from sklearn.cluster import KMeans
import numpy as np
from crankshaft.analysis_data_provider import AnalysisDataProvider
class Kmeans:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def spatial(self, query, no_clusters, no_init=20):
"""
find centers based on clusters of latitude/longitude pairs
query: SQL query that has a WGS84 geometry (the_geom)
"""
params = {"subquery": query,
"geom_col": "the_geom",
"id_col": "cartodb_id"}
data = self.data_provider.get_spatial_kmeans(params)
# Unpack query response
xs = data[0]['xs']
ys = data[0]['ys']
ids = data[0]['ids']
km = KMeans(n_clusters=no_clusters, n_init=no_init)
labels = km.fit_predict(zip(xs, ys))
return zip(ids, labels)

View File

@@ -0,0 +1,208 @@
"""
Moran's I geostatistics (global clustering & outliers presence)
"""
# TODO: Fill in local neighbors which have null/NoneType values with the
# average of the their neighborhood
import pysal as ps
from collections import OrderedDict
from crankshaft.analysis_data_provider import AnalysisDataProvider
# crankshaft module
import crankshaft.pysal_utils as pu
# High level interface ---------------------------------------
class Moran:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def global_stat(self, subquery, attr_name,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I (global)
Implementation building neighbors with a PostGIS database and Moran's I
core clusters with PySAL.
Andy Eschbacher
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr_name),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
attr_vals = pu.get_attributes(result)
# calculate weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global
moran_global = ps.esda.moran.Moran(attr_vals, weight,
permutations=permutations)
return zip([moran_global.I], [moran_global.EI])
def local_stat(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I implementation for PL/Python
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors
params = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
attr_vals = pu.get_attributes(result)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
permutations=permutations)
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Rate (global)
Andy Eschbacher
"""
params = OrderedDict([("id_col", id_col),
("attr1", numerator),
("attr2", denominator)
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global rate
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
permutations=permutations)
return zip([lisa_rate.I], [lisa_rate.EI])
def local_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Local Rate
Andy Eschbacher
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
params = OrderedDict([("id_col", id_col),
("numerator", numerator),
("denominator", denominator),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
permutations=permutations)
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col,
w_type, num_ngbrs):
"""
Moran's I (local) Bivariate (untested)
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr1),
("attr2", attr2),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
# collect attributes
attr1_vals = pu.get_attributes(result, 1)
attr2_vals = pu.get_attributes(result, 2)
# create weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
permutations=permutations)
# find clustering of significance
lisa_sig = quad_position(lisa.q)
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
# Low level functions ----------------------------------------
def map_quads(coord):
"""
Map a quadrant number to Moran's I designation
HH=1, LH=2, LL=3, HL=4
Input:
@param coord (int): quadrant of a specific measurement
Output:
classification (one of 'HH', 'LH', 'LL', or 'HL')
"""
if coord == 1:
return 'HH'
elif coord == 2:
return 'LH'
elif coord == 3:
return 'LL'
elif coord == 4:
return 'HL'
else:
return None
def quad_position(quads):
"""
Produce Moran's I classification based of n
Input:
@param quads ndarray: an array of quads classified by
1-4 (PySAL default)
Output:
@param list: an array of quads classied by 'HH', 'LL', etc.
"""
return [map_quads(q) for q in quads]

View File

@@ -0,0 +1,2 @@
"""Import all functions for pysal_utils"""
from crankshaft.pysal_utils.pysal_utils import *

View File

@@ -0,0 +1,211 @@
"""
Utilities module for generic PySAL functionality, mainly centered on
translating queries into numpy arrays or PySAL weights objects
"""
import numpy as np
import pysal as ps
def construct_neighbor_query(w_type, query_vals):
"""Return query (a string) used for finding neighbors
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
@param query_vals dict: values used to construct the query
"""
if w_type.lower() == 'knn':
return knn(query_vals)
else:
return queen(query_vals)
# Build weight object
def get_weight(query_res, w_type='knn', num_ngbrs=5):
"""
Construct PySAL weight from return value of query
@param query_res dict-like: query results with attributes and neighbors
"""
# if w_type.lower() == 'knn':
# row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
# weights = {x['id']: row_normed_weights for x in query_res}
# else:
# weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
# if len(x['neighbors']) > 0
# else [] for x in query_res}
neighbors = {x['id']: x['neighbors'] for x in query_res}
print 'len of neighbors: %d' % len(neighbors)
built_weight = ps.W(neighbors)
built_weight.transform = 'r'
return built_weight
def query_attr_select(params):
"""
Create portion of SELECT statement for attributes inolved in query.
Defaults to order in the params
@param params: dict of information used in query (column names,
table name, etc.)
Example:
OrderedDict([('numerator', 'price'),
('denominator', 'sq_meters'),
('subquery', 'SELECT * FROM interesting_data')])
Output:
"i.\"price\"::numeric As attr1, " \
"i.\"sq_meters\"::numeric As attr2, "
"""
attr_string = ""
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
if 'time_cols' in params:
# if markov analysis
attrs = params['time_cols']
for idx, val in enumerate(attrs):
attr_string += template % {"col": val, "alias_num": idx + 1}
else:
# if moran's analysis
attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
for idx, val in enumerate(attrs):
attr_string += template % {"col": params[val],
"alias_num": idx + 1}
return attr_string
def query_attr_where(params):
"""
Construct where conditions when building neighbors query
Create portion of WHERE clauses for weeding out NULL-valued geometries
Input: dict of params:
{'subquery': ...,
'numerator': 'data1',
'denominator': 'data2',
'': ...}
Output:
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
Input:
{'subquery': ...,
'time_cols': ['time1', 'time2', 'time3'],
'etc': ...}
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
NULL AND idx_replace."time3" IS NOT NULL'
"""
attr_string = []
template = "idx_replace.\"%s\" IS NOT NULL"
if 'time_cols' in params:
# markov where clauses
attrs = params['time_cols']
# add values to template
for attr in attrs:
attr_string.append(template % attr)
else:
# moran where clauses
# get keys
attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
# add values to template
for attr in attrs:
attr_string.append(template % params[attr])
if 'denominator' in attrs:
attr_string.append(
"idx_replace.\"%s\" <> 0" % params['denominator'])
out = " AND ".join(attr_string)
return out
def knn(params):
"""SQL query for k-nearest neighbors.
@param vars: dict of values to fill template
"""
attr_select = query_attr_select(params)
attr_where = query_attr_where(params)
replacements = {"attr_select": attr_select,
"attr_where_i": attr_where.replace("idx_replace", "i"),
"attr_where_j": attr_where.replace("idx_replace", "j")}
query = "SELECT " \
"i.\"{id_col}\" As id, " \
"%(attr_select)s" \
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
"FROM ({subquery}) As j " \
"WHERE " \
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
"%(attr_where_j)s " \
"ORDER BY " \
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
"LIMIT {num_ngbrs})" \
") As neighbors " \
"FROM ({subquery}) As i " \
"WHERE " \
"%(attr_where_i)s " \
"ORDER BY i.\"{id_col}\" ASC;" % replacements
return query.format(**params)
# SQL query for finding queens neighbors (all contiguous polygons)
def queen(params):
"""SQL query for queen neighbors.
@param params dict: information to fill query
"""
attr_select = query_attr_select(params)
attr_where = query_attr_where(params)
replacements = {"attr_select": attr_select,
"attr_where_i": attr_where.replace("idx_replace", "i"),
"attr_where_j": attr_where.replace("idx_replace", "j")}
query = "SELECT " \
"i.\"{id_col}\" As id, " \
"%(attr_select)s" \
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
"FROM ({subquery}) As j " \
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
"%(attr_where_j)s)" \
") As neighbors " \
"FROM ({subquery}) As i " \
"WHERE " \
"%(attr_where_i)s " \
"ORDER BY i.\"{id_col}\" ASC;" % replacements
return query.format(**params)
# to add more weight methods open a ticket or pull request
def get_attributes(query_res, attr_num=1):
"""
@param query_res: query results with attributes and neighbors
@param attr_num: attribute number (1, 2, ...)
"""
return np.array([x['attr' + str(attr_num)] for x in query_res],
dtype=np.float)
def empty_zipped_array(num_nones):
"""
prepare return values for cases of empty weights objects (no neighbors)
Input:
@param num_nones int: number of columns (e.g., 4)
Output:
[(None, None, None, None)]
"""
return [tuple([None] * num_nones)]

View File

@@ -0,0 +1,11 @@
"""Random seed generator used for non-deterministic functions in crankshaft"""
import random
import numpy
def set_random_seeds(value):
"""
Set the seeds of the RNGs (Random Number Generators)
used internally.
"""
random.seed(value)
numpy.random.seed(value)

View File

@@ -0,0 +1 @@
from segmentation import *

View File

@@ -0,0 +1,176 @@
"""
Segmentation creation and prediction
"""
import sklearn
import numpy as np
import plpy
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics
from sklearn.cross_validation import train_test_split
# Lower level functions
#----------------------
def replace_nan_with_mean(array):
"""
Input:
@param array: an array of floats which may have null-valued entries
Output:
array with nans filled in with the mean of the dataset
"""
# returns an array of rows and column indices
indices = np.where(np.isnan(array))
# iterate through entries which have nan values
for row, col in zip(*indices):
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
return array
def get_data(variable, feature_columns, query):
"""
Fetch data from the database, clean, and package into
numpy arrays
Input:
@param variable: name of the target variable
@param feature_columns: list of column names
@param query: subquery that data is pulled from for the packaging
Output:
prepared data, packaged into NumPy arrays
"""
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
try:
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
variable=variable,
columns=columns,
query=query))
except Exception, e:
plpy.error('Failed to access data to build segmentation model: %s' % e)
# extract target data from plpy object
target = np.array(data[0]['target'])
# put n feature data arrays into an n x m array of arrays
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
return replace_nan_with_mean(target), replace_nan_with_mean(features)
# High level interface
# --------------------
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
"""
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
the function.
Input:
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
@param model_parameters: A dictionary containing parameters for the model.
"""
clean_target = replace_nan_with_mean(target)
clean_features = replace_nan_with_mean(features)
target_features = replace_nan_with_mean(target_features)
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
prediction = model.predict(target_features)
accuracy_array = [accuracy]*prediction.shape[0]
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
def create_and_predict_segment(query, variable, target_query, model_params):
"""
generate a segment with machine learning
Stuart Lynn
"""
## fetch column names
try:
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
## extract column names to be used in building the segmentation model
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
## get data from database
target, features = get_data(variable, feature_columns, query)
model, accuracy = train_model(target, features, model_params, 0.2)
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
accuracy_array = [accuracy]*result.shape[0]
return zip(cartodb_ids, result, accuracy_array)
def train_model(target, features, model_params, test_split):
"""
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
Input:
@param target: 1D Array of the variable that the model is to be trianed to predict
@param features: 2D Array NSamples * NFeatures to use in trining the model
@param model_params: A dictionary of model parameters, the full specification can be found on the
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
"""
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
model = GradientBoostingRegressor(**model_params)
model.fit(features_train, target_train)
accuracy = calculate_model_accuracy(model, features, target)
return model, accuracy
def calculate_model_accuracy(model, features, target):
"""
Calculate the mean squared error of the model prediction
Input:
@param model: model trained from input features
@param features: features to make a prediction from
@param target: target to compare prediction to
Output:
mean squared error of the model prection compared to the target
"""
prediction = model.predict(features)
return metrics.mean_squared_error(prediction, target)
def predict_segment(model, features, target_query):
"""
Use the provided model to predict the values for the new feature set
Input:
@param model: The pretrained model
@features: A list of features to use in the model prediction (list of column names)
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
"""
batch_size = 1000
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
try:
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
joined_features=joined_features,
target_query=target_query))
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
results = []
while True:
rows = cursor.fetch(batch_size)
if not rows:
break
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
#Need to fix this. Should be global mean. This will cause weird effects
batch = replace_nan_with_mean(batch)
prediction = model.predict(batch)
results.append(prediction)
try:
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
except Exception, e:
plpy.error('Failed to build segmentation model: %s' % e)
return cartodb_ids, np.concatenate(results)

View File

@@ -0,0 +1,2 @@
"""Import all functions from clustering libraries."""
from markov import *

View File

@@ -0,0 +1,194 @@
"""
Spatial dynamics measurements using Spatial Markov
"""
# TODO: remove all plpy dependencies
import numpy as np
import pysal as ps
import plpy
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
class Markov:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def spatial_trend(self, subquery, time_cols, num_classes=7,
w_type='knn', num_ngbrs=5, permutations=0,
geom_col='the_geom', id_col='cartodb_id'):
"""
Predict the trends of a unit based on:
1. history of its transitions to different classes (e.g., 1st
quantile -> 2nd quantile)
2. average class of its neighbors
Inputs:
@param subquery string: e.g., SELECT the_geom, cartodb_id,
interesting_time_column FROM table_name
@param time_cols list of strings: list of strings of column names
@param num_classes (optional): number of classes to break
distribution of values into. Currently uses quantile bins.
@param w_type string (optional): weight type ('knn' or 'queen')
@param num_ngbrs int (optional): number of neighbors (if knn type)
@param permutations int (optional): number of permutations for test
stats
@param geom_col string (optional): name of column which contains
the geometries
@param id_col string (optional): name of column which has the ids
of the table
Outputs:
@param trend_up float: probablity that a geom will move to a higher
class
@param trend_down float: probablity that a geom will move to a
lower class
@param trend float: (trend_up - trend_down) / trend_static
@param volatility float: a measure of the volatility based on
probability stddev(prob array)
"""
if len(time_cols) < 2:
plpy.error('More than one time column needs to be passed')
params = {"id_col": id_col,
"time_cols": time_cols,
"geom_col": geom_col,
"subquery": subquery,
"num_ngbrs": num_ngbrs}
query_result = self.data_provider.get_markov(w_type, params)
# build weight
weights = pu.get_weight(query_result, w_type)
weights.transform = 'r'
# prep time data
t_data = get_time_data(query_result, time_cols)
sp_markov_result = ps.Spatial_Markov(t_data,
weights,
k=num_classes,
fixed=False,
permutations=permutations)
# get lag classes
lag_classes = ps.Quantiles(
ps.lag_spatial(weights, t_data[:, -1]),
k=num_classes).yb
# look up probablity distribution for each unit according to class and
# lag class
prob_dist = get_prob_dist(sp_markov_result.P,
lag_classes,
sp_markov_result.classes[:, -1])
# find the ups and down and overall distribution of each cell
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
# output the results
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
def get_time_data(markov_data, time_cols):
"""
Extract the time columns and bin appropriately
"""
num_attrs = len(time_cols)
return np.array([[x['attr' + str(i)] for x in markov_data]
for i in range(1, num_attrs+1)], dtype=float).transpose()
# not currently used
def rebin_data(time_data, num_time_per_bin):
"""
Convert an n x l matrix into an (n/m) x l matrix where the values are
reduced (averaged) for the intervening states:
1 2 3 4 1.5 3.5
5 6 7 8 -> 5.5 7.5
9 8 7 6 8.5 6.5
5 4 3 2 4.5 2.5
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
This process effectively resamples the data at a longer time span n
units longer than the input data.
For cases when there is a remainder (remainder(5/3) = 2), the remaining
two columns are binned together as the last time period, while the
first three are binned together for the first period.
Input:
@param time_data n x l ndarray: measurements of an attribute at
different time intervals
@param num_time_per_bin int: number of columns to average into a new
column
Output:
ceil(n / m) x l ndarray of resampled time series
"""
if time_data.shape[1] % num_time_per_bin == 0:
# if fit is perfect, then use it
n_max = time_data.shape[1] / num_time_per_bin
else:
# fit remainders into an additional column
n_max = time_data.shape[1] / num_time_per_bin + 1
return np.array(
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
for i in range(n_max)]).T
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
"""
Given an array of transition matrices, look up the probability
associated with the arrangements passed
Input:
@param transition_matrix ndarray[k,k,k]:
@param lag_indices ndarray:
@param unit_indices ndarray:
Output:
Array of probability distributions
"""
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
for i in range(len(lag_indices))])
def get_prob_stats(prob_dist, unit_indices):
"""
get the statistics of the probability distributions
Outputs:
@param trend_up ndarray(float): sum of probabilities for upward
movement (relative to the unit index of that prob)
@param trend_down ndarray(float): sum of probabilities for downward
movement (relative to the unit index of that prob)
@param trend ndarray(float): difference of upward and downward
movements
"""
num_elements = len(unit_indices)
trend_up = np.empty(num_elements, dtype=float)
trend_down = np.empty(num_elements, dtype=float)
trend = np.empty(num_elements, dtype=float)
for i in range(num_elements):
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
if prob_dist[i, unit_indices[i]] > 0.0:
trend[i] = (trend_up[i] - trend_down[i]) / (
prob_dist[i, unit_indices[i]])
else:
trend[i] = None
# calculate volatility of distribution
volatility = prob_dist.std(axis=1)
return trend_up, trend_down, trend, volatility

View File

@@ -0,0 +1,5 @@
joblib==0.8.3
numpy==1.6.1
scipy==0.14.0
pysal==1.11.2
scikit-learn==0.14.1

View File

@@ -0,0 +1,49 @@
"""
CartoDB Spatial Analysis Python Library
See:
https://github.com/CartoDB/crankshaft
"""
from setuptools import setup, find_packages
setup(
name='crankshaft',
version='0.5.1',
description='CartoDB Spatial Analysis Python Library',
url='https://github.com/CartoDB/crankshaft',
author='Data Services Team - CartoDB',
author_email='dataservices@cartodb.com',
license='MIT',
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Mapping comunity',
'Topic :: Maps :: Mapping Tools',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 2.7',
],
keywords='maps mapping tools spatial analysis geostatistics',
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
extras_require={
'dev': ['unittest'],
'test': ['unittest', 'nose', 'mock'],
},
# The choice of component versions is dictated by what's
# provisioned in the production servers.
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
requires=['pysal', 'numpy', 'sklearn'],
test_suite='test'
)

View File

@@ -0,0 +1 @@
[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]

View File

@@ -0,0 +1 @@
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]

View File

@@ -0,0 +1 @@
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]

View File

@@ -0,0 +1,52 @@
[[0.9319096128346788, "HH"],
[-1.135787401862846, "HL"],
[0.11732030672508517, "LL"],
[0.6152779669180425, "LL"],
[-0.14657336660125297, "LH"],
[0.6967858120189607, "LL"],
[0.07949310115714454, "HH"],
[0.4703198759258987, "HH"],
[0.4421125200498064, "HH"],
[0.5724288737143592, "LL"],
[0.8970743435692062, "LL"],
[0.18327334401918674, "LL"],
[-0.01466729201304962, "HL"],
[0.3481559372544409, "LL"],
[0.06547094736902978, "LL"],
[0.15482141569329988, "HH"],
[0.4373841193538136, "HH"],
[0.15971286468915544, "LL"],
[1.0543588860308968, "HH"],
[1.7372866900020818, "HH"],
[1.091998586053999, "LL"],
[0.1171572584252222, "HH"],
[0.08438455015300014, "LL"],
[0.06547094736902978, "LL"],
[0.15482141569329985, "HH"],
[1.1627044812890683, "HH"],
[0.06547094736902978, "LL"],
[0.795275137550483, "HH"],
[0.18562939195219, "LL"],
[0.3010757406693439, "LL"],
[2.8205795942839376, "HH"],
[0.11259190602909264, "LL"],
[-0.07116352791516614, "HL"],
[-0.09945240794119009, "LH"],
[0.18562939195219, "LL"],
[0.1832733440191868, "LL"],
[-0.39054253768447705, "HL"],
[-0.1672071289487642, "HL"],
[0.3337669247916343, "HH"],
[0.2584386102554792, "HH"],
[-0.19733845476322634, "HL"],
[-0.9379282899805409, "LH"],
[-0.028770969951095866, "LH"],
[0.051367269430983485, "LL"],
[-0.2172548045913472, "LH"],
[0.05136726943098351, "LL"],
[0.04191046803899837, "LL"],
[0.7482357030403517, "HH"],
[-0.014585767863118111, "LH"],
[0.5410013139159929, "HH"],
[1.0223932668429925, "LL"],
[1.4179402898927476, "LL"]]

View File

@@ -0,0 +1,54 @@
[
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,13 @@
import unittest
from mock_plpy import MockPlPy
plpy = MockPlPy()
import sys
sys.modules['plpy'] = plpy
import os
def fixture_file(name):
dir = os.path.dirname(os.path.realpath(__file__))
return os.path.join(dir, 'fixtures', name)

View File

@@ -0,0 +1,54 @@
import re
class MockCursor:
def __init__(self, data):
self.cursor_pos = 0
self.data = data
def fetch(self, batch_size):
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
self.cursor_pos += batch_size
return batch
class MockPlPy:
def __init__(self):
self._reset()
def _reset(self):
self.infos = []
self.notices = []
self.debugs = []
self.logs = []
self.warnings = []
self.errors = []
self.fatals = []
self.executes = []
self.results = []
self.prepares = []
self.results = []
def _define_result(self, query, result):
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
self.results.append([pattern, result])
def notice(self, msg):
self.notices.append(msg)
def debug(self, msg):
self.notices.append(msg)
def info(self, msg):
self.infos.append(msg)
def cursor(self, query):
data = self.execute(query)
return MockCursor(data)
# TODO: additional arguments
def execute(self, query):
for result in self.results:
if result[0].match(query):
return result[1]
return []

View File

@@ -0,0 +1,78 @@
import unittest
import numpy as np
from helper import fixture_file
from crankshaft.clustering import Getis
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
import json
from crankshaft.analysis_data_provider import AnalysisDataProvider
# Fixture files produced as follows
#
# import pysal as ps
# import numpy as np
# import random
#
# # setup variables
# f = ps.open(ps.examples.get_path("stl_hom.dbf"))
# y = np.array(f.by_col['HR8893'])
# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
#
# out_queen = [{"id": index + 1,
# "neighbors": [x+1 for x in w_queen.neighbors[index]],
# "value": val} for index, val in enumerate(y)]
#
# with open('neighbors_queen_getis.json', 'w') as f:
# f.write(str(out_queen))
#
# random.seed(1234)
# np.random.seed(1234)
# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
# permutations=999)
#
# with open('getis_queen.json', 'w') as f:
# f.write(str(zip(lgstar_queen.z_sim,
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mock_data):
self.mock_result = mock_data
def get_getis(self, w_type, param):
return self.mock_result
class GetisTest(unittest.TestCase):
"""Testing class for Getis-Ord's G* funtion
This test replicates the work done in PySAL documentation:
https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
"""
def setUp(self):
# load raw data for analysis
self.neighbors_data = json.loads(
open(fixture_file('neighbors_getis.json')).read())
# load pre-computed/known values
self.getis_data = json.loads(
open(fixture_file('getis.json')).read())
def test_getis_ord(self):
"""Test Getis-Ord's G*"""
data = [{'id': d['id'],
'attr1': d['value'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1234)
getis = Getis(FakeDataProvider(data))
result = getis.getis_ord('subquery', 'value',
'queen', None, 999, 'the_geom',
'cartodb_id')
result = [(row[0], row[1]) for row in result]
expected = np.array(self.getis_data)[:, 0:2]
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
self.assertAlmostEqual(res_z, exp_z, delta=1e-2)

View File

@@ -0,0 +1,56 @@
import unittest
import numpy as np
# from mock_plpy import MockPlPy
# plpy = MockPlPy()
#
# import sys
# sys.modules['plpy'] = plpy
from helper import fixture_file
from crankshaft.clustering import Kmeans
from crankshaft.analysis_data_provider import AnalysisDataProvider
import crankshaft.clustering as cc
from crankshaft import random_seeds
import json
from collections import OrderedDict
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mocked_result):
self.mocked_result = mocked_result
def get_spatial_kmeans(self, query):
return self.mocked_result
def get_nonspatial_kmeans(self, query, standarize):
return self.mocked_result
class KMeansTest(unittest.TestCase):
"""Testing class for k-means spatial"""
def setUp(self):
self.cluster_data = json.loads(
open(fixture_file('kmeans.json')).read())
self.params = {"subquery": "select * from table",
"no_clusters": "10"}
def test_kmeans(self):
"""
"""
data = [{'xs': d['xs'],
'ys': d['ys'],
'ids': d['ids']} for d in self.cluster_data]
random_seeds.set_random_seeds(1234)
kmeans = Kmeans(FakeDataProvider(data))
clusters = kmeans.spatial('subquery', 2)
labels = [a[1] for a in clusters]
c1 = [a for a in clusters if a[1] == 0]
c2 = [a for a in clusters if a[1] == 1]
self.assertEqual(len(np.unique(labels)), 2)
self.assertEqual(len(c1), 20)
self.assertEqual(len(c2), 20)

View File

@@ -0,0 +1,112 @@
import unittest
import numpy as np
from helper import fixture_file
from crankshaft.clustering import Moran
from crankshaft.analysis_data_provider import AnalysisDataProvider
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
import json
from collections import OrderedDict
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, mock_data):
self.mock_result = mock_data
def get_moran(self, w_type, params):
return self.mock_result
class MoranTest(unittest.TestCase):
"""Testing class for Moran's I functions"""
def setUp(self):
self.params = {"id_col": "cartodb_id",
"attr1": "andy",
"attr2": "jay_z",
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.params_markov = {"id_col": "cartodb_id",
"time_cols": ["_2013_dec", "_2014_jan",
"_2014_feb"],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.neighbors_data = json.loads(
open(fixture_file('neighbors.json')).read())
self.moran_data = json.loads(
open(fixture_file('moran.json')).read())
def test_map_quads(self):
"""Test map_quads"""
from crankshaft.clustering import map_quads
self.assertEqual(map_quads(1), 'HH')
self.assertEqual(map_quads(2), 'LH')
self.assertEqual(map_quads(3), 'LL')
self.assertEqual(map_quads(4), 'HL')
self.assertEqual(map_quads(33), None)
self.assertEqual(map_quads('andy'), None)
def test_quad_position(self):
"""Test lisa_sig_vals"""
from crankshaft.clustering import quad_position
quads = np.array([1, 2, 3, 4], np.int)
ans = np.array(['HH', 'LH', 'LL', 'HL'])
test_ans = quad_position(quads)
self.assertTrue((test_ans == ans).all())
def test_local_stat(self):
"""Test Moran's I local"""
data = [OrderedDict([('id', d['id']),
('attr1', d['value']),
('neighbors', d['neighbors'])])
for d in self.neighbors_data]
moran = Moran(FakeDataProvider(data))
random_seeds.set_random_seeds(1234)
result = moran.local_stat('subquery', 'value',
'knn', 5, 99, 'the_geom', 'cartodb_id')
result = [(row[0], row[1]) for row in result]
zipped_values = zip(result, self.moran_data)
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
self.assertAlmostEqual(res_val, exp_val)
self.assertEqual(res_quad, exp_quad)
def test_moran_local_rate(self):
"""Test Moran's I rate"""
data = [{'id': d['id'],
'attr1': d['value'],
'attr2': 1,
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1234)
moran = Moran(FakeDataProvider(data))
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
'knn', 5, 99, 'the_geom', 'cartodb_id')
result = [(row[0], row[1]) for row in result]
zipped_values = zip(result, self.moran_data)
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
self.assertAlmostEqual(res_val, exp_val)
def test_moran(self):
"""Test Moran's I global"""
data = [{'id': d['id'],
'attr1': d['value'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
random_seeds.set_random_seeds(1235)
moran = Moran(FakeDataProvider(data))
result = moran.global_stat('table', 'value',
'knn', 5, 99, 'the_geom',
'cartodb_id')
result_moran = result[0][0]
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)

View File

@@ -0,0 +1,160 @@
import unittest
import crankshaft.pysal_utils as pu
from crankshaft import random_seeds
from collections import OrderedDict
class PysalUtilsTest(unittest.TestCase):
"""Testing class for utility functions related to PySAL integrations"""
def setUp(self):
self.params1 = OrderedDict([("id_col", "cartodb_id"),
("attr1", "andy"),
("attr2", "jay_z"),
("subquery", "SELECT * FROM a_list"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params2 = OrderedDict([("id_col", "cartodb_id"),
("numerator", "price"),
("denominator", "sq_meters"),
("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params3 = OrderedDict([("id_col", "cartodb_id"),
("numerator", "sq_meters"),
("denominator", "price"),
("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"),
("num_ngbrs", 321)])
self.params_array = {"id_col": "cartodb_id",
"time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
def test_query_attr_select(self):
"""Test query_attr_select"""
ans1 = ("i.\"andy\"::numeric As attr1, "
"i.\"jay_z\"::numeric As attr2, ")
ans2 = ("i.\"price\"::numeric As attr1, "
"i.\"sq_meters\"::numeric As attr2, ")
ans3 = ("i.\"sq_meters\"::numeric As attr1, "
"i.\"price\"::numeric As attr2, ")
ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
"i.\"_2014_jan\"::numeric As attr2, "
"i.\"_2014_feb\"::numeric As attr3, ")
self.assertEqual(pu.query_attr_select(self.params1), ans1)
self.assertEqual(pu.query_attr_select(self.params2), ans2)
self.assertEqual(pu.query_attr_select(self.params3), ans3)
self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
def test_query_attr_where(self):
"""Test pu.query_attr_where"""
ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
"idx_replace.\"jay_z\" IS NOT NULL")
ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
"idx_replace.\"_2014_jan\" IS NOT NULL AND "
"idx_replace.\"_2014_feb\" IS NOT NULL")
self.assertEqual(pu.query_attr_where(self.params1), ans1)
self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
def test_knn(self):
"""Test knn neighbors constructor"""
ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE " \
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL " \
"ORDER BY " \
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
"LIMIT 321)) As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL " \
"ORDER BY i.\"cartodb_id\" ASC;"
ans_array = "SELECT i.\"cartodb_id\" As id, " \
"i.\"_2013_dec\"::numeric As attr1, " \
"i.\"_2014_jan\"::numeric As attr2, " \
"i.\"_2014_feb\"::numeric As attr3, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"j.\"_2013_dec\" IS NOT NULL AND " \
"j.\"_2014_jan\" IS NOT NULL AND " \
"j.\"_2014_feb\" IS NOT NULL " \
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \
"LIMIT 321)) As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"_2013_dec\" IS NOT NULL AND " \
"i.\"_2014_jan\" IS NOT NULL AND " \
"i.\"_2014_feb\" IS NOT NULL "\
"ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.knn(self.params1), ans1)
self.assertEqual(pu.knn(self.params_array), ans_array)
def test_queen(self):
"""Test queen neighbors constructor"""
ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
"FROM (SELECT * FROM a_list) As j " \
"WHERE " \
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"ST_Touches(i.\"the_geom\", " \
"j.\"the_geom\") AND " \
"j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL)" \
") As neighbors " \
"FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL " \
"ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.queen(self.params1), ans1)
def test_construct_neighbor_query(self):
"""Test construct_neighbor_query"""
# Compare to raw knn query
self.assertEqual(pu.construct_neighbor_query('knn', self.params1),
pu.knn(self.params1))
def test_get_attributes(self):
"""Test get_attributes"""
## need to add tests
self.assertEqual(True, True)
def test_get_weight(self):
"""Test get_weight"""
self.assertEqual(True, True)
def test_empty_zipped_array(self):
"""Test empty_zipped_array"""
ans2 = [(None, None)]
ans4 = [(None, None, None, None)]
self.assertEqual(pu.empty_zipped_array(2), ans2)
self.assertEqual(pu.empty_zipped_array(4), ans4)

View File

@@ -0,0 +1,64 @@
import unittest
import numpy as np
from helper import plpy, fixture_file
import crankshaft.segmentation as segmentation
import json
class SegmentationTest(unittest.TestCase):
"""Testing class for Moran's I functions"""
def setUp(self):
plpy._reset()
def generate_random_data(self,n_samples,random_state, row_type=False):
x1 = random_state.uniform(size=n_samples)
x2 = random_state.uniform(size=n_samples)
x3 = random_state.randint(0, 4, size=n_samples)
y = x1+x2*x2+x3
cartodb_id = range(len(x1))
if row_type:
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
else:
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
def test_replace_nan_with_mean(self):
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
def test_create_and_predict_segment(self):
n_samples = 1000
random_state_train = np.random.RandomState(13)
random_state_test = np.random.RandomState(134)
training_data = self.generate_random_data(n_samples, random_state_train)
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
ids = [{'cartodb_ids': range(len(test_data))}]
rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
plpy._define_result('select \* from \(select \* from training\) a limit 1',rows)
plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
plpy._define_result('.*select \* from test.*' ,test_data)
model_parameters = {'n_estimators': 1200,
'max_depth': 3,
'subsample' : 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
result = segmentation.create_and_predict_segment(
'select * from training',
'target',
'select * from test',
model_parameters)
prediction = [r[1] for r in result]
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
self.assertEqual(len(result),len(test_data))
self.assertTrue( result[0][2] < 0.01)
self.assertTrue( accuracy < 0.5*np.mean(test_y) )

View File

@@ -0,0 +1,349 @@
import unittest
import numpy as np
import unittest
from helper import fixture_file
from crankshaft.space_time_dynamics import Markov
import crankshaft.space_time_dynamics as std
from crankshaft import random_seeds
from crankshaft.analysis_data_provider import AnalysisDataProvider
import json
class FakeDataProvider(AnalysisDataProvider):
def __init__(self, data):
self.mock_result = data
def get_markov(self, w_type, params):
return self.mock_result
class SpaceTimeTests(unittest.TestCase):
"""Testing class for Markov Functions."""
def setUp(self):
self.params = {"id_col": "cartodb_id",
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
"subquery": "SELECT * FROM a_list",
"geom_col": "the_geom",
"num_ngbrs": 321}
self.neighbors_data = json.loads(
open(fixture_file('neighbors_markov.json')).read())
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
self.time_data = np.array([i * np.ones(10, dtype=float)
for i in range(10)]).T
self.transition_matrix = np.array([
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
[0., 0.14, 0.74, 0.12, 0.],
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
[0., 0., 0., 0.16666667, 0.83333333]],
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
[0., 0., 0.06372549, 0.90196078, 0.03431373],
[0., 0., 0., 0.19444444, 0.80555556]],
[[0.84693878, 0.15306122, 0., 0., 0.],
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0., 0., 0., 0.10204082, 0.89795918]],
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
[0., 0., 0., 0.09661836, 0.90338164]],
[[0.33333333, 0.66666667, 0., 0., 0.],
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
[0., 0., 0., 0.02352941, 0.97647059]]]
)
def test_spatial_markov(self):
"""Test Spatial Markov."""
data = [{'id': d['id'],
'attr1': d['y1995'],
'attr2': d['y1996'],
'attr3': d['y1997'],
'attr4': d['y1998'],
'attr5': d['y1999'],
'attr6': d['y2000'],
'attr7': d['y2001'],
'attr8': d['y2002'],
'attr9': d['y2003'],
'attr10': d['y2004'],
'attr11': d['y2005'],
'attr12': d['y2006'],
'attr13': d['y2007'],
'attr14': d['y2008'],
'attr15': d['y2009'],
'neighbors': d['neighbors']} for d in self.neighbors_data]
# print(str(data[0]))
markov = Markov(FakeDataProvider(data))
random_seeds.set_random_seeds(1234)
result = markov.spatial_trend('subquery',
['y1995', 'y1996', 'y1997', 'y1998',
'y1999', 'y2000', 'y2001', 'y2002',
'y2003', 'y2004', 'y2005', 'y2006',
'y2007', 'y2008', 'y2009'],
5, 'knn', 5, 0, 'the_geom',
'cartodb_id')
self.assertTrue(result is not None)
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
print result[0]
expected = self.markov_data
for ([res_trend, res_up, res_down, res_vol, res_id],
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
) in zip(result, expected):
self.assertAlmostEqual(res_trend, exp_trend)
def test_get_time_data(self):
"""Test get_time_data"""
data = [{'attr1': d['y1995'],
'attr2': d['y1996'],
'attr3': d['y1997'],
'attr4': d['y1998'],
'attr5': d['y1999'],
'attr6': d['y2000'],
'attr7': d['y2001'],
'attr8': d['y2002'],
'attr9': d['y2003'],
'attr10': d['y2004'],
'attr11': d['y2005'],
'attr12': d['y2006'],
'attr13': d['y2007'],
'attr14': d['y2008'],
'attr15': d['y2009']} for d in self.neighbors_data]
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
'y1999', 'y2000', 'y2001', 'y2002',
'y2003', 'y2004', 'y2005', 'y2006',
'y2007', 'y2008', 'y2009'])
# expected was prepared from PySAL example:
# f = ps.open(ps.examples.get_path("usjoin.csv"))
# pci = np.array([f.by_col[str(y)]
# for y in range(1995, 2010)]).transpose()
# rpci = pci / (pci.mean(axis = 0))
expected = np.array(
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
self.assertTrue(np.allclose(result, expected))
self.assertTrue(type(result) == type(expected))
self.assertTrue(result.shape == expected.shape)
def test_rebin_data(self):
"""Test rebin_data"""
# sample in double the time (even case since 10 % 2 = 0):
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
# = 0.5, 2.5, 4.5, 6.5, 8.5
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
for i in range(0, 10, 2)]).T
self.assertTrue(
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
# sample in triple the time (uneven since 10 % 3 = 1):
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
# = 1, 4, 7, 9
ans_odd = np.array([i * np.ones(10, dtype=float)
for i in (1, 4, 7, 9)]).T
self.assertTrue(
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
def test_get_prob_dist(self):
"""Test get_prob_dist"""
lag_indices = np.array([1, 2, 3, 4])
unit_indices = np.array([1, 3, 2, 4])
answer = np.array([
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0., 0., 0.02352941, 0.97647059]
])
result = std.get_prob_dist(self.transition_matrix,
lag_indices, unit_indices)
self.assertTrue(np.array_equal(result, answer))
def test_get_prob_stats(self):
"""Test get_prob_stats"""
probs = np.array([
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
[0., 0., 0.09411765, 0.87058824, 0.03529412],
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
[0., 0., 0., 0.02352941, 0.97647059]
])
unit_indices = np.array([1, 3, 2, 4])
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
answer_trend = np.array([-0.03301887 / 0.88207547,
-0.05882353 / 0.87058824,
0.02475248 / 0.77722772,
-0.02352941 / 0.97647059])
answer_volatility = np.array([0.34221495, 0.33705421,
0.29226542, 0.38834223])
result = std.get_prob_stats(probs, unit_indices)
result_up = result[0]
result_down = result[1]
result_trend = result[2]
result_volatility = result[3]
self.assertTrue(np.allclose(result_up, answer_up))
self.assertTrue(np.allclose(result_down, answer_down))
self.assertTrue(np.allclose(result_trend, answer_trend))
self.assertTrue(np.allclose(result_volatility, answer_volatility))

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Spatial Analysis extension'
default_version = '0.4.2'
default_version = '0.5.1'
requires = 'plpythonu, postgis'
superuser = true
schema = cdb_crankshaft

View File

@@ -10,9 +10,11 @@ CREATE OR REPLACE FUNCTION
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
AS $$
from crankshaft.clustering import moran
from crankshaft.clustering import Moran
# TODO: use named parameters or a dictionary
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
moran = Moran()
return moran.global_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
-- Moran's I Local (internal function)
@@ -27,9 +29,11 @@ CREATE OR REPLACE FUNCTION
id_col TEXT)
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
AS $$
from crankshaft.clustering import moran_local
from crankshaft.clustering import Moran
moran = Moran()
# TODO: use named parameters or a dictionary
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
return moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
-- Moran's I Local (public-facing function)
@@ -120,9 +124,11 @@ CREATE OR REPLACE FUNCTION
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (moran FLOAT, significance FLOAT)
AS $$
from crankshaft.clustering import moran_local
from crankshaft.clustering import Moran
moran = Moran()
# TODO: use named parameters or a dictionary
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
@@ -140,9 +146,10 @@ CREATE OR REPLACE FUNCTION
RETURNS
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
AS $$
from crankshaft.clustering import moran_local_rate
from crankshaft.clustering import Moran
moran = Moran()
# TODO: use named parameters or a dictionary
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
-- Moran's I Local Rate (public-facing function)

View File

@@ -1,21 +1,24 @@
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
RETURNS table (cartodb_id integer, cluster_no integer) as $$
from crankshaft.clustering import kmeans
return kmeans(query,no_clusters,no_init)
-- Spatial k-means clustering
$$ language plpythonu;
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer, no_init integer default 20)
RETURNS table (cartodb_id integer, cluster_no integer) as $$
from crankshaft.clustering import Kmeans
kmeans = Kmeans()
return kmeans.spatial(query, no_clusters, no_init)
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
RETURNS Numeric[] AS
RETURNS Numeric[] AS
$$
DECLARE
DECLARE
newX NUMERIC;
newY NUMERIC;
newW NUMERIC;
BEGIN
IF weight IS NULL OR the_geom IS NULL THEN
IF weight IS NULL OR the_geom IS NULL THEN
newX = state[1];
newY = state[2];
newW = state[3];
@@ -30,12 +33,12 @@ END
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
RETURNS GEOMETRY AS
RETURNS GEOMETRY AS
$$
BEGIN
IF state[3] = 0 THEN
IF state[3] = 0 THEN
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
ELSE
ELSE
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
END IF;
END
@@ -56,7 +59,7 @@ BEGIN
SFUNC = CDB_WeightedMeanS,
FINALFUNC = CDB_WeightedMeanF,
STYPE = Numeric[],
INITCOND = "{0.0,0.0,0.0}"
INITCOND = "{0.0,0.0,0.0}"
);
END IF;
END

View File

@@ -22,10 +22,11 @@ CREATE OR REPLACE FUNCTION
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
AS $$
from crankshaft.space_time_dynamics import spatial_markov_trend
from crankshaft.space_time_dynamics import Markov
markov = Markov()
## TODO: use named parameters or a dictionary
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
-- input table format: identical to above but in a predictable format

19
src/pg/sql/16_getis.sql Normal file
View File

@@ -0,0 +1,19 @@
-- Getis-Ord's G
-- Hotspot/Coldspot Analysis tool
CREATE OR REPLACE FUNCTION
CDB_GetisOrdsG(
subquery TEXT,
column_name TEXT,
w_type TEXT DEFAULT 'knn',
num_ngbrs INT DEFAULT 5,
permutations INT DEFAULT 999,
geom_col TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (z_score NUMERIC, p_value NUMERIC, p_z_sim NUMERIC, rowid BIGINT)
AS $$
from crankshaft.clustering import Getis
getis = Getis()
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;
-- TODO: make a version that accepts the values as arrays

View File

@@ -0,0 +1,75 @@
-- Find outliers using a static threshold
--
CREATE OR REPLACE FUNCTION CDB_StaticOutlier(column_value numeric, threshold numeric)
RETURNS boolean
AS $$
BEGIN
RETURN column_value > threshold;
END;
$$ LANGUAGE plpgsql;
-- Find outliers by a percentage above the threshold
-- TODO: add symmetric option? `is_symmetric boolean DEFAULT false`
CREATE OR REPLACE FUNCTION CDB_PercentOutlier(column_values numeric[], outlier_fraction numeric, ids int[])
RETURNS TABLE(is_outlier boolean, rowid int)
AS $$
DECLARE
avg_val numeric;
out_vals boolean[];
BEGIN
SELECT avg(i) INTO avg_val
FROM unnest(column_values) As x(i);
IF avg_val = 0 THEN
RAISE EXCEPTION 'Mean value is zero. Try another outlier method.';
END IF;
SELECT array_agg(
outlier_fraction < i / avg_val) INTO out_vals
FROM unnest(column_values) As x(i);
RETURN QUERY
SELECT unnest(out_vals) As is_outlier,
unnest(ids) As rowid;
END;
$$ LANGUAGE plpgsql;
-- Find outliers above a given number of standard deviations from the mean
CREATE OR REPLACE FUNCTION CDB_StdDevOutlier(column_values numeric[], num_deviations numeric, ids int[], is_symmetric boolean DEFAULT true)
RETURNS TABLE(is_outlier boolean, rowid int)
AS $$
DECLARE
stddev_val numeric;
avg_val numeric;
out_vals boolean[];
BEGIN
SELECT stddev(i), avg(i) INTO stddev_val, avg_val
FROM unnest(column_values) As x(i);
IF stddev_val = 0 THEN
RAISE EXCEPTION 'Standard deviation of input data is zero';
END IF;
IF is_symmetric THEN
SELECT array_agg(
abs(i - avg_val) / stddev_val > num_deviations) INTO out_vals
FROM unnest(column_values) As x(i);
ELSE
SELECT array_agg(
(i - avg_val) / stddev_val > num_deviations) INTO out_vals
FROM unnest(column_values) As x(i);
END IF;
RETURN QUERY
SELECT unnest(out_vals) As is_outlier,
unnest(ids) As rowid;
END;
$$ LANGUAGE plpgsql;

11
src/pg/sql/21_gwr.sql Normal file
View File

@@ -0,0 +1,11 @@
CREATE OR REPLACE FUNCTION
CDB_GWR(subquery text, dep_var text, ind_vars text[],
bw numeric default null, fixed boolean default False, kernel text default 'bisquare')
RETURNS table(coeffs JSON, stand_errs JSON, t_vals JSON, predicted numeric, residuals numeric, r_squared numeric, rowid bigint, bandwidth numeric)
AS $$
from crankshaft.regression import gwr_cs
return gwr_cs.gwr(subquery, dep_var, ind_vars, bw, fixed, kernel)
$$ LANGUAGE plpythonu;

View File

@@ -149,135 +149,135 @@ _cdb_random_seeds
(1 row)
code|quads
01|LL
02|LH
03|HH
04|HH
05|LL
06|HH
07|LL
08|LL
09|LL
10|HH
11|HH
12|HL
13|LL
14|HH
01|HH
02|HL
03|LL
04|LL
05|LH
06|LL
07|HH
08|HH
09|HH
10|LL
11|LL
12|LL
13|HL
14|LL
15|LL
16|LL
17|LL
18|LH
19|LL
20|LL
21|HH
22|LL
23|HL
16|HH
17|HH
18|LL
19|HH
20|HH
21|LL
22|HH
23|LL
24|LL
25|LL
26|LL
25|HH
26|HH
27|LL
28|LL
29|LH
30|HH
31|LL
28|HH
29|LL
30|LL
31|HH
32|LL
33|LL
34|LL
35|LH
36|HL
37|LH
38|LH
39|LL
40|LL
41|LH
42|HL
43|LL
44|HL
45|LL
46|HL
33|HL
34|LH
35|LL
36|LL
37|HL
38|HL
39|HH
40|HH
41|HL
42|LH
43|LH
44|LL
45|LH
46|LL
47|LL
48|LL
49|HL
50|LL
51|HH
(51 rows)
48|HH
49|LH
50|HH
51|LL
52|LL
(52 rows)
_cdb_random_seeds
(1 row)
code|quads
03|HH
04|HH
06|HH
10|HH
11|HH
12|HL
14|HH
21|HH
23|HL
30|HH
36|HL
42|HL
44|HL
46|HL
49|HL
51|HH
(16 rows)
01|HH
02|HL
07|HH
08|HH
09|HH
13|HL
16|HH
17|HH
19|HH
20|HH
22|HH
25|HH
26|HH
28|HH
31|HH
33|HL
37|HL
38|HL
39|HH
40|HH
41|HL
48|HH
50|HH
(23 rows)
_cdb_random_seeds
(1 row)
code|quads
01|LL
02|LH
05|LL
07|LL
08|LL
09|LL
13|LL
03|LL
04|LL
05|LH
06|LL
10|LL
11|LL
12|LL
14|LL
15|LL
16|LL
17|LL
18|LH
19|LL
20|LL
22|LL
18|LL
21|LL
23|LL
24|LL
25|LL
26|LL
27|LL
28|LL
29|LH
31|LL
29|LL
30|LL
32|LL
33|LL
34|LL
35|LH
37|LH
38|LH
39|LL
40|LL
41|LH
43|LL
45|LL
34|LH
35|LL
36|LL
42|LH
43|LH
44|LL
45|LH
46|LL
47|LL
48|LL
50|LL
(35 rows)
49|LH
51|LL
52|LL
(29 rows)
_cdb_random_seeds
(1 row)
code|quads
02|LH
12|HL
18|LH
23|HL
29|LH
35|LH
36|HL
37|LH
38|LH
41|LH
42|HL
44|HL
46|HL
49|HL
(14 rows)
02|HL
05|LH
13|HL
33|HL
34|LH
37|HL
38|HL
41|HL
42|LH
43|LH
45|LH
49|LH
(12 rows)

View File

@@ -0,0 +1,21 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/getis_data.sql
SET client_min_messages TO WARNING;
\set ECHO none
_cdb_random_seeds
(1 row)
rowid|z_score|p_value
9|-0.7862|0.0500
22|-0.3955|0.0330
33|2.7045|0.0050
35|1.9524|0.0130
36|-1.2056|0.0170
37|3.4785|0.0020
38|-1.4622|0.0020
40|5.7098|0.0030
46|3.4704|0.0120
47|-0.9994|0.0320
48|-1.3650|0.0340
(11 rows)

View File

@@ -0,0 +1,23 @@
SET client_min_messages TO WARNING;
\set ECHO none
is_outlier|rowid
t|11
t|16
t|17
(3 rows)
is_outlier|rowid
t|16
t|17
(2 rows)
ERROR: Standard deviation of input data is zero
is_outlier|rowid
t|8
t|11
t|16
(3 rows)
is_outlier|rowid
t|8
t|9
t|11
t|16
(4 rows)

98
src/pg/test/fixtures/getis_data.sql vendored Normal file
View File

@@ -0,0 +1,98 @@
SET client_min_messages TO WARNING;
\set ECHO none
--
-- Getis-Ord's G* test dataset, subsetted from PySAL examples:
-- https://github.com/pysal/pysal/tree/952ea04029165048a774d9a1846cf86ad000c096/pysal/examples/stl
--
CREATE TABLE getis_data (
cartodb_id integer,
the_geom geometry(Geometry,4326),
hr8893 numeric
);
COPY getis_data (cartodb_id, the_geom, hr8893) FROM stdin;
22 0106000020E61000000100000001030000000100000007000000000000E0B10056C0000000C0B8964340FFFFFFFF4C1756C00000002054964340000000A00F1E56C00000004072964340000000C02D1E56C0000000A0439B434000000060381E56C00000000036B04340000000E0E20056C0000000608CB04340000000E0B10056C0000000C0B8964340 10.8557430000000004
32 0106000020E6100000010000000103000000010000000B000000FFFFFF1FC26656C0FFFFFFBFE25E4340000000A0D86656C0000000E0976F4340000000A03A6956C0000000C0966F434000000020526956C0000000E08A7F4340000000E0F26556C000000000C87F4340000000E0066656C0000000209C834340000000407F5056C0000000803C83434000000020635056C0000000E016814340000000A0F45056C0000000A0F980434000000060D25056C000000060FA5E4340FFFFFF1FC26656C0FFFFFFBFE25E4340 9.92424500000000087
10 0106000020E610000001000000010300000001000000170000000000000002CD56C000000080CDCC4340000000A054D456C000000020CCD74340000000607ED756C000000000C1DC434000000020E6D756C00000006071E143400000004007BB56C00000000007E2434000000080FABA56C0000000A079EC43400000000040B856C0000000E0D6EB4340FFFFFF3FEEA456C0000000A037EC4340000000C0A9A556C0000000A0ADE7434000000040F3A656C0000000E09FE543400000004063A956C0000000E034DA4340FFFFFF9F04A956C00000008005D74340000000402FA756C00000008069D243400000004046A556C00000002068C84340000000009EA556C0FFFFFF7F3CC34340000000C0C3A756C000000080BCB543400000006082A756C00000004051B2434000000040AABC56C00000006046B343400000006053C256C0FFFFFF7FE2B84340000000E01EC456C000000080ABBC4340000000C0FDC556C0000000E0B3C3434000000000FFC956C000000060BBC643400000000002CD56C000000080CDCC4340 3.79060700000000006
43 0106000020E6100000010000000103000000010000000F0000000000004025D856C000000020FA1A43400000008092E256C000000060481B434000000060BCE256C0000000C023144340000000A0D7E856C0FFFFFF1F1B14434000000020BEE956C0000000C030144340FFFFFF9FB0E956C0FFFFFF1F1425434000000000D4E956C0000000C00D5A4340000000A0D3E956C0000000202C5A43400000000004E656C00000004066574340000000E0EEE356C0000000A0E35643400000008099DF56C0000000601B5A43400000000033DB56C0000000804A5B43400000004001D856C00000006079594340000000E0A7D756C0000000E0553543400000004025D856C000000020FA1A4340 5.93309800000000021
6 0106000020E6100000010000000103000000010000000F000000000000A00F4256C0000000E008D4434000000000674956C00000004015D44340000000608C4956C00000004098E64340FFFFFFBF434C56C0FFFFFF3F77E84340000000004C4E56C000000020E5E74340000000C0624E56C0FFFFFF3F97F5434000000020B44956C000000000AFF54340000000E0C64956C00000004009074440FFFFFF1F523056C0FFFFFFDF91074440000000C0EB2F56C000000040BBE54340000000E0B93056C0000000E09FE54340000000E0D63056C0000000007DDE4340000000E0213456C0000000005ADE4340000000802E3456C000000020F7D34340000000A00F4256C0000000E008D44340 9.04867300000000085
16 0106000020E6100000010000000103000000010000001500000000000020D73356C000000060729B4340000000201F4956C000000000BE9B4340000000A0E34856C000000060CCAC434000000040094256C0FFFFFFDFB1AC4340000000A00F4256C0000000E008D44340000000802E3456C000000020F7D34340000000E0083456C0000000E0ADCA4340000000801D2E56C0000000A06FCA4340FFFFFF7F132E56C00000000079C34340000000607F2956C0000000402EC3434000000080652956C0000000A0EAC04340FFFFFF5FF22756C000000060E5C0434000000080F52756C000000080DDBE434000000020B32656C0000000E0E7BE434000000000AC2656C0FFFFFFFF38BD4340FFFFFFDFC12556C0000000C026BD434000000060C72556C000000060BAB9434000000040441E56C000000020C9B9434000000060381E56C00000000036B04340000000C02D1E56C0000000A0439B434000000020D73356C000000060729B4340 0.74509000000000003
29 0106000020E610000001000000010300000001000000080000000000002025FD55C0FFFFFF1F7F6D434000000080C61056C0000000A04C6D4340000000A0631756C000000000D56D4340000000C05E1756C0FFFFFF5F24754340FFFFFFFF4C1756C00000002054964340000000E0B10056C0000000C0B89643400000006029FD55C000000080C09643400000002025FD55C0FFFFFF1F7F6D4340 3.12759000000000009
54 0106000020E6100000010000000103000000010000000F000000FFFFFF1F090C57C0000000202E024340000000E0AA0B57C00000004000154340000000A0C10C57C0000000802015434000000060850C57C000000080EC244340FFFFFF9FB0E956C0FFFFFF1F1425434000000020BEE956C0000000C030144340000000A0D7E856C0FFFFFF1F1B144340FFFFFF3FF2E856C0000000A0A3064340FFFFFFDFD2F956C000000040E2064340000000C01EFB56C0000000201406434000000060BBFB56C0000000809A044340000000605AFD56C0000000E059054340000000405FFE56C0000000E06B014340000000A0F60157C0000000C081014340FFFFFF1F090C57C0000000202E024340 2.06432400000000005
13 0106000020E6100000010000000103000000010000000F000000000000009EA556C0FFFFFF7F3CC343400000004046A556C00000002068C84340000000402FA756C00000008069D24340FFFFFF9F04A956C00000008005D743400000004063A956C0000000E034DA434000000040F3A656C0000000E09FE54340000000E0F09E56C0000000005AE54340FFFFFF3F069F56C000000040B5E04340FFFFFF1F0E9856C000000020A3E0434000000060109856C0000000E07ED5434000000040E29556C0000000A077D54340FFFFFFDFDC9556C0000000A0EDD1434000000080399356C0000000A0D8D1434000000020699356C0000000E029C34340000000009EA556C0FFFFFF7F3CC34340 0
28 0106000020E61000000100000001030000000100000008000000000000C05E1756C0FFFFFF5F24754340000000C0DD2C56C0000000407A75434000000080DE3356C0000000406375434000000020D73356C000000060729B4340000000C02D1E56C0000000A0439B4340000000A00F1E56C00000004072964340FFFFFFFF4C1756C00000002054964340000000C05E1756C0FFFFFF5F24754340 1.57115800000000005
36 0106000020E6100000010000000103000000010000000D00000000000000EE2C56C000000060424E434000000040F72C56C000000000486A4340000000C0DD2C56C0000000407A754340000000C05E1756C0FFFFFF5F24754340000000A0631756C000000000D56D434000000080C61056C0000000A04C6D4340000000C0BE1056C0000000A0065F4340000000407C1256C0FFFFFF7FA75E434000000000BD1156C000000020A954434000000040D01256C0000000605C524340000000404F1256C000000040734F434000000040011156C000000000AF4D434000000000EE2C56C000000060424E4340 0
68 0106000020E61000000100000001030000000100000006000000000000809F2D56C00000002078CD4240000000E0F64256C0FFFFFFDF38CD424000000000CE4956C00000004053CD4240000000C0E94956C000000080CDEE424000000020682D56C0FFFFFF7F00EF4240000000809F2D56C00000002078CD4240 3.1461920000000001
27 0106000020E6100000010000000103000000010000000D000000000000407F5056C0000000803C83434000000060615056C000000040A99B4340000000201F4956C000000000BE9B434000000020D73356C000000060729B434000000080DE3356C00000004063754340000000C0DD2C56C0000000407A75434000000040F72C56C000000000486A4340000000202B4956C000000080AC69434000000040454956C000000040E25E434000000060D25056C000000060FA5E4340000000A0F45056C0000000A0F980434000000020635056C0000000E016814340000000407F5056C0000000803C834340 1.5920399999999999
40 0106000020E6100000010000000103000000010000000F00000000000000B89056C0000000C03144434000000000419456C0000000A03D4A434000000020609356C000000020CC55434000000020578F56C0000000A0725D4340000000004D8C56C0000000C05E614340000000E0F48A56C000000020B164434000000060CA8756C00000002077664340000000C0A68856C0000000C08A64434000000020758A56C0000000E0F4624340000000A0948C56C0000000E0AA5C4340FFFFFF7FF18C56C000000080A5594340000000C0BF8B56C0000000A052544340000000E0C18B56C0000000601D4E4340000000A06B8F56C0000000000A48434000000000B89056C0000000C031444340 45.9054059999999993
52 0106000020E61000000100000001030000000100000008000000000000E0562D56C0000000208910434000000020864856C00000000054104340FFFFFF9FB84956C0000000409110434000000080A84956C0000000A05B1B4340000000803D4956C0FFFFFF9FB63C434000000080062D56C0FFFFFF7FAE3C434000000060492D56C0000000603E214340000000E0562D56C00000002089104340 4.94153299999999973
59 0106000020E6100000010000000103000000010000000F0000000000006010C756C000000020A7DE4240FFFFFF9FF7C956C0FFFFFF7F75DE424000000060FBC956C0000000A01ED94240000000C080CA56C0000000001BD94240000000C086D456C000000040CED942400000000065D456C00000004041E442400000008044E256C0000000E0BEE44240FFFFFFFF04E256C0FFFFFFBFD013434000000060BCE256C0000000C0231443400000008092E256C000000060481B43400000004025D856C000000020FA1A4340FFFFFF5F2CD856C0000000E0381A43400000000009D656C0FFFFFFDF1C1A4340000000A08DC656C000000000241A43400000006010C756C000000020A7DE4240 6.82879400000000025
1 0106000020E61000000100000001030000000100000010000000FFFFFF3F746556C00000002049FD434000000080316556C0000000A0240C4440000000C0A76656C000000000330C444000000000C76656C0000000606C11444000000000106756C000000000A0294440000000C0375156C0000000402F2A444000000020305156C0000000E08D244440000000E0E54956C00000004094244440000000E0C64956C0000000400907444000000020B44956C000000000AFF54340000000C0624E56C0FFFFFF3F97F54340000000E05B5A56C00000006082F5434000000060655A56C00000002040F7434000000060B15F56C0000000807CF74340000000008A5F56C0000000C076FD4340FFFFFF3F746556C00000002049FD4340 1.62445799999999996
2 0106000020E6100000010000000103000000010000001000000000000080FABA56C0000000A079EC43400000004007BB56C00000000007E2434000000020E6D756C00000006071E14340000000006ED856C000000000E2E64340FFFFFF7FBFDC56C00000006078EE434000000000DDDC56C0000000A04FF1434000000080C7DB56C0000000206FF34340000000808BDB56C0000000C0FEF54340000000A09FDC56C0000000A018F94340000000C02FDF56C000000080BC004440000000A041E056C0000000008A084440FFFFFF3F08E156C0000000C038114440000000406BE056C0000000A0A8194440FFFFFFBFF5BA56C0000000E02A1944400000004014BB56C000000060E70D444000000080FABA56C0000000A079EC4340 2.25549199999999983
3 0106000020E61000000100000001030000000100000018000000000000A0D87F56C000000080FEF3434000000020048056C0FFFFFF7FB00E444000000080CB7D56C0000000E0F411444000000080237B56C0000000800812444000000060227956C0000000400F114440000000A04A7856C00000002084124440000000C0667756C0000000E0B810444000000080187656C0000000A001124440000000A0037556C0FFFFFF5F36104440000000005C7356C0000000206510444000000080D27156C00000004081114440000000A0657056C000000040EE104440000000A0EF6D56C000000060AE12444000000000BE6C56C0000000E073124440000000802A6A56C0000000402115444000000000C76656C0000000606C114440000000C0A76656C000000000330C444000000080316556C0000000A0240C4440FFFFFF3F746556C00000002049FD4340000000602C6D56C00000004009FD4340FFFFFF5F786D56C0FFFFFFDFC7F54340000000E0447156C000000080A6F54340000000A09C7156C0FFFFFF9F41F44340000000A0D87F56C000000080FEF34340 1.46788999999999992
4 0106000020E610000001000000010300000001000000160000000000008076A556C000000020BCF0434000000060BCA256C0FFFFFF5F7BF5434000000060F5A056C000000040AEFE4340000000204C9C56C00000008095024440000000A0609B56C000000060EC084440FFFFFFFFC49956C0000000C00C0A444000000000E19856C000000040490F444000000040A79656C0000000805C10444000000080199456C0000000E0120E4440FFFFFFBF879256C00000006094084440000000A0699156C00000006012084440000000807E9056C000000000F408444000000060378F56C00000000049074440000000A0E98C56C0FFFFFF5FE4074440000000E0F48B56C0000000201009444000000020618856C000000040ED08444000000020A88556C000000060CF0A4440FFFFFFDF0C8456C000000080250D444000000020048056C0FFFFFF7FB00E4440000000A0D87F56C000000080FEF34340FFFFFF3FDF7F56C0000000C047F043400000008076A556C000000020BCF04340 2.4842559999999998
5 0106000020E61000000100000001030000000100000012000000FFFFFF3FEEA456C0000000A037EC43400000000040B856C0000000E0D6EB434000000080FABA56C0000000A079EC43400000004014BB56C000000060E70D444000000020B9AC56C0000000409F0D4440000000A094AD56C0000000005B0A4440000000A07FAB56C0000000A0C1094440FFFFFFBFA8AB56C000000060DD054440000000E04BA956C0FFFFFFFFB30344400000002042A756C0FFFFFFBF900344400000002005A756C0000000409502444000000060D2A756C000000040EE014440000000C0D9A656C0000000008E004440000000E0C0A656C0000000E0F1FD434000000060F5A056C000000040AEFE434000000060BCA256C0FFFFFF5F7BF543400000008076A556C000000020BCF04340FFFFFF3FEEA456C0000000A037EC4340 0
15 0106000020E6100000010000000103000000010000000B000000000000806CDC56C00000006020A9434000000080A4ED56C000000020E3A94340000000A050EE56C0FFFFFFDF8BAB434000000000C4ED56C00000000058CD4340000000A039EE56C0000000C062CD4340000000C041EE56C00000006068D44340000000E031EE56C000000080D5D74340000000A054D456C000000020CCD743400000000002CD56C000000080CDCC434000000040CDDD56C0000000407EBA4340000000806CDC56C00000006020A94340 1.9498120000000001
7 0106000020E6100000010000000103000000010000001C000000000000C00D6D56C0000000A096C34340000000E0847B56C00000000075C3434000000060747B56C00000002076C74340000000601B7F56C0000000203CDA4340FFFFFF3FDF7F56C0000000C047F04340000000A0D87F56C000000080FEF34340000000A09C7156C0FFFFFF9F41F44340000000E0447156C000000080A6F54340FFFFFF5F786D56C0FFFFFFDFC7F54340000000602C6D56C00000004009FD4340FFFFFF3F746556C00000002049FD4340000000008A5F56C0000000C076FD434000000060B15F56C0000000807CF7434000000060655A56C00000002040F74340000000E05B5A56C00000006082F54340000000C0624E56C0FFFFFF3F97F54340000000004C4E56C000000020E5E7434000000080B55056C0000000C010E94340FFFFFFDF215256C0000000C096E5434000000020475556C0000000A0D3E14340000000A0245A56C0000000200DDF434000000060E75B56C000000000C0DF434000000000F25B56C0000000C0DBD74340FFFFFF5F635F56C0FFFFFFFF91D74340000000E04F5F56C000000020C1D24340FFFFFF7F996256C00000004094D24340000000A08E6256C0000000C0AAC34340000000C00D6D56C0000000A096C34340 6.02948899999999988
8 0106000020E6100000010000000103000000010000000E000000FFFFFF1F72F656C000000080A1D443400000004048F656C0000000008EF94340000000A09FDC56C0000000A018F94340000000808BDB56C0000000C0FEF5434000000080C7DB56C0000000206FF3434000000000DDDC56C0000000A04FF14340FFFFFF7FBFDC56C00000006078EE4340000000006ED856C000000000E2E6434000000020E6D756C00000006071E14340000000607ED756C000000000C1DC4340000000A054D456C000000020CCD74340000000E031EE56C000000080D5D74340000000C041EE56C00000006068D44340FFFFFF1F72F656C000000080A1D44340 1.8003849999999999
9 0106000020E6100000010000000103000000010000001200000000000040E88956C00000004046C3434000000020699356C0000000E029C3434000000080399356C0000000A0D8D14340FFFFFFDFDC9556C0000000A0EDD1434000000040E29556C0000000A077D5434000000060109856C0000000E07ED54340FFFFFF1F0E9856C000000020A3E04340FFFFFF3F069F56C000000040B5E04340000000E0F09E56C0000000005AE5434000000040F3A656C0000000E09FE54340000000C0A9A556C0000000A0ADE74340FFFFFF3FEEA456C0000000A037EC43400000008076A556C000000020BCF04340FFFFFF3FDF7F56C0000000C047F04340000000601B7F56C0000000203CDA434000000060747B56C00000002076C74340000000E0847B56C00000000075C3434000000040E88956C00000004046C34340 4.58125099999999996
30 0106000020E6100000010000000103000000010000000D0000000000000033DB56C0000000804A5B43400000008099DF56C0000000601B5A4340000000E0EEE356C0000000A0E35643400000000004E656C00000004066574340000000A0D3E956C0000000202C5A4340000000A02EE956C0FFFFFF1F09884340000000A0D9E856C0FFFFFFDF03934340000000A0B8DA56C000000060E592434000000080F4D056C0000000605F9243400000000038D156C0000000A0817F434000000000A0D156C0000000A0FF6B4340000000E0C8DA56C000000080986C43400000000033DB56C0000000804A5B4340 4.41689600000000038
11 0106000020E61000000100000001030000000100000013000000000000A08E6256C0000000C0AAC34340FFFFFF7F996256C00000004094D24340000000E04F5F56C000000020C1D24340FFFFFF5F635F56C0FFFFFFFF91D7434000000000F25B56C0000000C0DBD7434000000060E75B56C000000000C0DF4340000000A0245A56C0000000200DDF434000000020475556C0000000A0D3E14340FFFFFFDF215256C0000000C096E5434000000080B55056C0000000C010E94340000000004C4E56C000000020E5E74340FFFFFFBF434C56C0FFFFFF3F77E84340000000608C4956C00000004098E6434000000000674956C00000004015D44340000000A00F4256C0000000E008D4434000000040094256C0FFFFFFDFB1AC4340000000A0E34856C000000060CCAC434000000020666256C0FFFFFF9FBFAC4340000000A08E6256C0000000C0AAC34340 1.44743599999999994
12 0106000020E6100000010000000103000000010000001400000000000040441E56C000000020C9B9434000000060C72556C000000060BAB94340FFFFFFDFC12556C0000000C026BD434000000000AC2656C0FFFFFFFF38BD434000000020B32656C0000000E0E7BE434000000080F52756C000000080DDBE4340FFFFFF5FF22756C000000060E5C0434000000080652956C0000000A0EAC04340000000607F2956C0000000402EC34340FFFFFF7F132E56C00000000079C34340000000801D2E56C0000000A06FCA4340000000E0083456C0000000E0ADCA4340000000802E3456C000000020F7D34340000000E0213456C0000000005ADE4340000000E0D63056C0000000007DDE4340000000E0B93056C0000000E09FE54340000000C0EB2F56C000000040BBE54340FFFFFF5F741E56C00000008040E54340000000605A1E56C0000000E042D3434000000040441E56C000000020C9B94340 1.19196600000000008
14 0106000020E6100000010000000103000000010000000A000000000000E0E20056C0000000608CB0434000000060381E56C00000000036B0434000000040441E56C000000020C9B94340000000605A1E56C0000000E042D3434000000040BB0356C00000000030D4434000000060CF0356C000000080C9D74340FFFFFFBFFDFD55C0000000E02BD84340000000A0C5FD55C0000000400DBE434000000040E60056C0FFFFFF9F20BE4340000000E0E20056C0000000608CB04340 1.60801700000000003
17 0106000020E61000000100000001030000000100000011000000000000A0B8DA56C000000060E5924340000000806CDC56C00000006020A9434000000040CDDD56C0000000407EBA43400000000002CD56C000000080CDCC434000000000FFC956C000000060BBC64340000000C0FDC556C0000000E0B3C34340000000E01EC456C000000080ABBC43400000006053C256C0FFFFFF7FE2B8434000000040AABC56C00000006046B34340000000806EB656C0000000A0DBAC4340000000C0E0B156C0000000A0FDA54340000000C03CAF56C000000040B89F4340000000A0DEAE56C0FFFFFF7FC49C4340000000E0F3CB56C0FFFFFF9F039D4340000000205CCC56C0000000805392434000000080F4D056C0000000605F924340000000A0B8DA56C000000060E5924340 4.17331800000000008
18 0106000020E6100000010000000103000000010000000E000000FFFFFF3FF56C56C000000020977F4340000000A07A6D56C0000000605DAD4340000000800F6D56C0000000A06CAD4340000000C00D6D56C0000000A096C34340000000A08E6256C0000000C0AAC3434000000020666256C0FFFFFF9FBFAC4340000000A0E34856C000000060CCAC4340000000201F4956C000000000BE9B434000000060615056C000000040A99B4340000000407F5056C0000000803C834340000000E0066656C0000000209C834340000000E0F26556C000000000C87F434000000020526956C0000000E08A7F4340FFFFFF3FF56C56C000000020977F4340 3.78325200000000006
19 0106000020E61000000100000001030000000100000009000000000000A0B78956C0000000A0BD7F4340000000C0C08956C0000000000BA1434000000040E88956C00000004046C34340000000E0847B56C00000000075C34340000000C00D6D56C0000000A096C34340000000800F6D56C0000000A06CAD4340000000A07A6D56C0000000605DAD4340FFFFFF3FF56C56C000000020977F4340000000A0B78956C0000000A0BD7F4340 2.08513599999999988
20 0106000020E61000000100000001030000000100000015000000000000C0C08956C0000000000BA1434000000060188D56C00000008040A04340000000C0278D56C0000000E0E29C4340000000E04C9456C000000080CC9C4340000000A0579456C0000000E0B4964340FFFFFFFF819F56C0000000806D9643400000006068A056C0FFFFFF1FBB944340000000203AA156C0000000E0CA9743400000002061A456C000000080AE974340000000C08CA556C0000000A0BB964340FFFFFF9F44A556C0000000609794434000000040EFA656C0000000C00C8F43400000004050A756C0FFFFFFBFEA9343400000000062A656C0FFFFFFFF6A9B434000000080DCA756C0000000208AAE43400000006082A756C00000004051B24340000000C0C3A756C000000080BCB54340000000009EA556C0FFFFFF7F3CC3434000000020699356C0000000E029C3434000000040E88956C00000004046C34340000000C0C08956C0000000000BA14340 2.17630200000000018
21 0106000020E6100000010000000103000000010000001B00000000000040EFA656C0000000C00C8F4340FFFFFF5F1DA756C000000020C58D434000000020E1A456C0000000E00F84434000000020D1A456C0000000A0B2804340FFFFFFDF3AA356C000000040247C4340000000800BA256C0FFFFFF5F987A434000000020409F56C000000000CD7B4340000000E0119E56C000000060C67A434000000080F2A156C000000040207243400000004080A456C0000000A0876F43400000004024A856C0000000E0BD704340FFFFFFDFCEAA56C000000060B67743400000004030AD56C000000060D6844340FFFFFF1F49AD56C00000006072874340000000802FAC56C000000060FE8B434000000000DFAD56C00000008075924340FFFFFFDFF6AD56C00000006012994340000000A0DEAE56C0FFFFFF7FC49C4340000000C03CAF56C000000040B89F4340000000C0E0B156C0000000A0FDA54340000000806EB656C0000000A0DBAC434000000040AABC56C00000006046B343400000006082A756C00000004051B2434000000080DCA756C0000000208AAE43400000000062A656C0FFFFFFFF6A9B43400000004050A756C0FFFFFFBFEA93434000000040EFA656C0000000C00C8F4340 6.30934699999999982
23 0106000020E6100000010000000103000000010000000B000000FFFFFF1F491457C000000060E99F4340FFFFFF5F1C1457C00000000075AC4340000000A050EE56C0FFFFFFDF8BAB434000000080A4ED56C000000020E3A94340000000806CDC56C00000006020A94340000000A0B8DA56C000000060E5924340000000A0D9E856C0FFFFFFDF03934340000000A02EE956C0FFFFFF1F0988434000000020290757C0FFFFFF3F92884340000000A0B00657C000000080FE9E4340FFFFFF1F491457C000000060E99F4340 4.21135400000000004
24 0106000020E61000000100000001030000000100000019000000000000A0B78956C0000000A0BD7F4340FFFFFF1FDD9156C000000060B47F434000000000DA9156C0000000205D764340000000A0769456C00000006063764340000000C06F9A56C0000000A02D7B4340000000E0119E56C000000060C67A434000000020409F56C000000000CD7B4340000000800BA256C0FFFFFF5F987A4340FFFFFFDF3AA356C000000040247C434000000020D1A456C0000000A0B280434000000020E1A456C0000000E00F844340FFFFFF5F1DA756C000000020C58D434000000040EFA656C0000000C00C8F4340FFFFFF9F44A556C00000006097944340000000C08CA556C0000000A0BB9643400000002061A456C000000080AE974340000000203AA156C0000000E0CA9743400000006068A056C0FFFFFF1FBB944340FFFFFFFF819F56C0000000806D964340000000A0579456C0000000E0B4964340000000E04C9456C000000080CC9C4340000000C0278D56C0000000E0E29C434000000060188D56C00000008040A04340000000C0C08956C0000000000BA14340000000A0B78956C0000000A0BD7F4340 0.804810000000000025
47 0106000020E61000000100000001030000000100000009000000FFFFFF9FD60956C0000000003521434000000020F21756C000000040DA20434000000060492D56C0000000603E21434000000080062D56C0FFFFFF7FAE3C434000000000EE2C56C000000060424E434000000040011156C000000000AF4D434000000040780956C0000000005A4D4340000000808A0956C000000080C1494340FFFFFF9FD60956C00000000035214340 0.969790999999999959
25 0106000020E6100000010000000103000000010000001E000000000000A0491957C0FFFFFF9F355E434000000020381A57C0000000A05B61434000000000201957C0000000803D654340000000802D1957C000000040E867434000000060B41B57C0000000C078694340000000A0611E57C000000040A06E4340000000C0FF1F57C00000006083754340000000803C2457C0000000E0ED7B4340000000A0332457C00000004047804340FFFFFF9FB41B57C0FFFFFF5F08A04340FFFFFF1F491457C000000060E99F4340000000A0B00657C000000080FE9E434000000020290757C0FFFFFF3F92884340FFFFFFBFC70857C000000020EC874340000000C0F20957C000000020DE764340000000C0E50A57C000000040E172434000000000260A57C0000000A03A71434000000000750A57C000000080796F4340FFFFFF7F310957C00000000064684340FFFFFF3FFF0B57C000000020765E434000000000060E57C000000000B45B434000000000640E57C00000000022594340000000A0C40C57C0000000801D57434000000040600C57C0000000C049544340FFFFFF5F100E57C000000000B851434000000060091157C0000000E05353434000000020B21257C0FFFFFFDF48554340FFFFFF3FC11657C00000004060564340000000005F1657C0FFFFFF5FD95B4340000000A0491957C0FFFFFF9F355E4340 3.21533099999999994
26 0106000020E6100000010000000103000000010000001D000000000000008BBD56C0000000608A6F43400000006075C756C0000000A0EE6F43400000000093C756C0FFFFFF3FF3764340FFFFFFBFD7CC56C0FFFFFF7F7277434000000020BECC56C0FFFFFFFF1B7F43400000000038D156C0000000A0817F434000000080F4D056C0000000605F924340000000205CCC56C00000008053924340000000E0F3CB56C0FFFFFF9F039D4340000000A0DEAE56C0FFFFFF7FC49C4340FFFFFFDFF6AD56C0000000601299434000000000DFAD56C00000008075924340000000802FAC56C000000060FE8B4340FFFFFF1F49AD56C000000060728743400000004030AD56C000000060D6844340FFFFFFDFCEAA56C000000060B6774340FFFFFFDF6EAC56C000000060557743400000002088AC56C000000060887543400000000071AD56C000000080B07543400000004047AD56C0000000C04C744340000000C0FDAE56C0000000406D754340000000C0DAAE56C0000000802B774340000000C070B256C0000000A0FE754340FFFFFFBF86B356C000000040A3744340000000A00EB456C0000000A0907043400000008058B856C000000000FB7143400000004020BC56C0000000C09A714340000000C058BD56C0000000C09E724340000000008BBD56C0000000608A6F4340 2.83366400000000018
38 0106000020E6100000010000000103000000010000000E000000000000605FFD55C000000020A5494340000000808A0956C000000080C149434000000040780956C0000000005A4D434000000040011156C000000000AF4D4340000000404F1256C000000040734F434000000040D01256C0000000605C52434000000000BD1156C000000020A9544340000000407C1256C0FFFFFF7FA75E4340000000C0BE1056C0000000A0065F434000000080C61056C0000000A04C6D43400000002025FD55C0FFFFFF1F7F6D434000000080B9FA55C0000000607E6D434000000020A9FA55C00000008093494340000000605FFD55C000000020A5494340 1.00387500000000007
31 0106000020E6100000010000000103000000010000001B000000FFFFFF5F100E57C000000000B851434000000040600C57C0000000C049544340000000A0C40C57C0000000801D57434000000000640E57C0000000002259434000000000060E57C000000000B45B4340FFFFFF3FFF0B57C000000020765E4340FFFFFF7F310957C0000000006468434000000000750A57C000000080796F434000000000260A57C0000000A03A714340000000C0E50A57C000000040E1724340000000C0F20957C000000020DE764340FFFFFFBFC70857C000000020EC87434000000020290757C0FFFFFF3F92884340000000A02EE956C0FFFFFF1F09884340000000A0D3E956C0000000202C5A434000000000D4E956C0000000C00D5A4340FFFFFF7F6BEF56C0000000A0605A4340FFFFFFFFACF056C000000040975843400000008054F356C0FFFFFFDFFB56434000000060C1F656C0000000E0725643400000000014FD56C000000000744C4340000000E005FF56C000000020904B434000000040220257C0000000405C484340000000E0880657C0FFFFFFDFF147434000000000EA0A57C0000000407C4A4340000000E0B10C57C000000020054D4340FFFFFF5F100E57C000000000B8514340 3.01748599999999989
33 0106000020E6100000010000000103000000010000001400000000000060CA8756C00000002077664340000000603D8756C0000000C04C6A434000000000808856C0FFFFFF1F306D4340000000809C8F56C0000000A00E75434000000000DA9156C0000000205D764340FFFFFF1FDD9156C000000060B47F4340000000A0B78956C0000000A0BD7F4340FFFFFF3FF56C56C000000020977F434000000020526956C0000000E08A7F4340000000A03A6956C0000000C0966F4340000000A0D86656C0000000E0976F4340FFFFFF1FC26656C0FFFFFFBFE25E434000000000AD6656C0000000408054434000000060BA6D56C00000006031544340000000C0BF8B56C0000000A052544340FFFFFF7FF18C56C000000080A5594340000000A0948C56C0000000E0AA5C434000000020758A56C0000000E0F4624340000000C0A68856C0000000C08A64434000000060CA8756C00000002077664340 7.97395700000000041
34 0106000020E61000000100000001030000000100000015000000FFFFFF5FFEBD56C0000000C0ED454340000000C0E0C056C0000000401048434000000040E0C356C0000000C0AC4D434000000060B0C556C000000000094E4340000000201BC956C0000000E0D74C4340FFFFFF3F1ECD56C0000000204D4E4340000000806BCE56C0000000600150434000000020DDCF56C0000000A015544340000000E0F7D256C0000000801D584340000000E067D556C000000080E65943400000004001D856C000000060795943400000000033DB56C0000000804A5B4340000000E0C8DA56C000000080986C434000000000A0D156C0000000A0FF6B43400000000038D156C0000000A0817F434000000020BECC56C0FFFFFFFF1B7F4340FFFFFFBFD7CC56C0FFFFFF7F727743400000000093C756C0FFFFFF3FF37643400000006075C756C0000000A0EE6F4340000000008BBD56C0000000608A6F4340FFFFFF5FFEBD56C0000000C0ED454340 5.00546399999999991
35 0106000020E61000000100000001030000000100000031000000FFFFFF9F36AF56C0000000E037514340000000602AB256C0FFFFFFDFE74943400000008057B356C000000020CF4A434000000060A8B456C0000000C07D4A4340000000005FBA56C00000006033454340FFFFFF5FFEBD56C0000000C0ED454340000000008BBD56C0000000608A6F4340000000C058BD56C0000000C09E7243400000004020BC56C0000000C09A7143400000008058B856C000000000FB714340000000A00EB456C0000000A090704340FFFFFFBF86B356C000000040A3744340000000C070B256C0000000A0FE754340000000C0DAAE56C0000000802B774340000000C0FDAE56C0000000406D7543400000004047AD56C0000000C04C7443400000000071AD56C000000080B07543400000002088AC56C00000006088754340FFFFFFDF6EAC56C00000006055774340FFFFFFDFCEAA56C000000060B67743400000004024A856C0000000E0BD7043400000004080A456C0000000A0876F434000000080F2A156C00000004020724340000000E0119E56C000000060C67A4340000000C06F9A56C0000000A02D7B4340000000A0769456C0000000606376434000000000DA9156C0000000205D764340000000809C8F56C0000000A00E75434000000000808856C0FFFFFF1F306D4340000000603D8756C0000000C04C6A434000000060CA8756C0000000207766434000000060A28856C0000000A04C69434000000020CB8C56C0000000409A69434000000080AB9056C0000000608F6D4340FFFFFFFF6D9256C0000000204F714340000000C05C9456C000000040DD714340000000E0A39556C0000000A065704340000000400B9756C000000040A26A434000000040CC9956C0000000A0AA694340FFFFFFFFBA9B56C000000060646A4340000000A0EF9C56C0FFFFFF1FCB694340000000605F9F56C0000000202B614340000000402CA256C000000000995C4340000000C00CA356C00000006094584340000000A08CA656C0000000E04C57434000000000F0A856C000000020B5584340000000A086AB56C0FFFFFF5FCD5643400000004012AC56C0000000004C544340FFFFFF9F36AF56C0000000E037514340 2.46389099999999983
37 0106000020E6100000010000000103000000010000002A0000000000006054AF56C0000000C0573B4340FFFFFF9F36AF56C0000000E0375143400000004012AC56C0000000004C544340000000A086AB56C0FFFFFF5FCD56434000000000F0A856C000000020B5584340000000A08CA656C0000000E04C574340000000C00CA356C00000006094584340000000402CA256C000000000995C4340000000605F9F56C0000000202B614340000000A0EF9C56C0FFFFFF1FCB694340FFFFFFFFBA9B56C000000060646A434000000040CC9956C0000000A0AA694340000000400B9756C000000040A26A4340000000E0A39556C0000000A065704340000000C05C9456C000000040DD714340FFFFFFFF6D9256C0000000204F71434000000080AB9056C0000000608F6D434000000020CB8C56C0000000409A69434000000060A28856C0000000A04C69434000000060CA8756C00000002077664340000000E0F48A56C000000020B1644340000000004D8C56C0000000C05E61434000000020578F56C0000000A0725D434000000020609356C000000020CC55434000000000419456C0000000A03D4A434000000000B89056C0000000C031444340000000A0029156C00000006064424340FFFFFF5F519356C0000000A0B336434000000020BC9556C00000004007324340000000406F9656C00000008005364340FFFFFF1FAA9556C0000000A07439434000000040129A56C0000000C07E3A4340FFFFFF1FE19A56C000000040623D4340FFFFFF3F2B9A56C000000000263E4340000000E0379A56C00000002001404340FFFFFFFFF5A556C0000000801A404340000000C049A756C0000000A07B3C4340000000801EAA56C000000020A63D4340FFFFFFDFD4AA56C000000060A538434000000060CFAB56C000000040BE384340000000202CAC56C000000040A33B43400000006054AF56C0000000C0573B4340 7.37797400000000003
39 0106000020E61000000100000001030000000100000008000000000000A04E4956C0000000E03140434000000040454956C000000040E25E4340000000202B4956C000000080AC69434000000040F72C56C000000000486A434000000000EE2C56C000000060424E434000000080062D56C0FFFFFF7FAE3C4340000000803D4956C0FFFFFF9FB63C4340000000A04E4956C0000000E031404340 3.19004699999999985
60 0106000020E6100000010000000103000000010000000A0000000000006098A956C0000000C019DE42400000006010C756C000000020A7DE4240000000A08DC656C000000000241A43400000002017B256C0000000209F1A434000000080FEAB56C0000000C0710E434000000080D3AB56C0FFFFFF5F2F0C434000000080FFA956C000000080EA0C43400000006018AA56C0000000E0FF0A434000000060E4A856C000000080440A43400000006098A956C0000000C019DE4240 3.26394699999999993
41 0106000020E6100000010000000103000000010000001700000000000040454956C000000040E25E4340000000A04E4956C0000000E031404340000000A0FB5056C0000000001A414340000000E00D5356C0000000004C404340000000A0DA5656C000000040A4414340000000A07D5956C0000000A0833E4340000000608F5B56C0000000C0273F4340000000E0405D56C0000000803A3E4340000000E0CB5E56C000000040AD3B4340000000E08A6156C0000000A0793D434000000060B56256C0000000A09A3C4340000000A0A06456C0000000A0963D434000000020996756C000000060AB3B4340000000E0156856C0000000A085394340FFFFFF3F536956C0000000C06A384340FFFFFF5F876A56C0000000809638434000000080DD6A56C000000020BB364340000000806B6D56C0000000E09E35434000000060BA6D56C0000000603154434000000000AD6656C00000004080544340FFFFFF1FC26656C0FFFFFFBFE25E434000000060D25056C000000060FA5E434000000040454956C000000040E25E4340 2.44759700000000002
42 0106000020E610000001000000010300000001000000210000000000000014FD56C000000000744C4340000000C0D70157C000000020CE464340000000E0CC0057C0000000A0ED404340FFFFFF1F3C0257C0000000A0CE3C434000000060C80457C0000000E02D3C4340000000A0020757C000000000763A4340000000602E0957C000000060EF3B434000000020B90A57C000000060DE3B434000000060340A57C0000000402D38434000000060310857C00000004012354340000000E0F50757C0000000E095324340000000E0D40857C000000080DD304340000000E0690B57C0FFFFFF3F3B30434000000000520C57C000000060732E4340000000208D0C57C000000000AB2A434000000060D80E57C000000080CC2A4340000000C05C1057C0000000209229434000000040131257C000000040C52A4340000000C0281A57C000000080262B4340000000E0D81957C000000000F6354340FFFFFF9FB91F57C0FFFFFFDF7D364340000000A0491957C0FFFFFF9F355E4340000000005F1657C0FFFFFF5FD95B4340FFFFFF3FC11657C0000000406056434000000020B21257C0FFFFFFDF4855434000000060091157C0000000E053534340FFFFFF5F100E57C000000000B8514340000000E0B10C57C000000020054D434000000000EA0A57C0000000407C4A4340000000E0880657C0FFFFFFDFF147434000000040220257C0000000405C484340000000E005FF56C000000020904B43400000000014FD56C000000000744C4340 1.29495800000000005
44 0106000020E6100000010000000103000000010000001700000000000060850C57C000000080EC244340000000208D0C57C000000000AB2A434000000000520C57C000000060732E4340000000E0690B57C0FFFFFF3F3B304340000000E0D40857C000000080DD304340000000E0F50757C0000000E09532434000000060310857C0000000401235434000000060340A57C0000000402D38434000000020B90A57C000000060DE3B4340000000602E0957C000000060EF3B4340000000A0020757C000000000763A434000000060C80457C0000000E02D3C4340FFFFFF1F3C0257C0000000A0CE3C4340000000E0CC0057C0000000A0ED404340000000C0D70157C000000020CE4643400000000014FD56C000000000744C434000000060C1F656C0000000E0725643400000008054F356C0FFFFFFDFFB564340FFFFFFFFACF056C00000004097584340FFFFFF7F6BEF56C0000000A0605A434000000000D4E956C0000000C00D5A4340FFFFFF9FB0E956C0FFFFFF1F1425434000000060850C57C000000080EC244340 4.13399699999999992
45 0106000020E610000001000000010300000001000000190000000000002017B256C0000000209F1A4340000000A08DC656C000000000241A43400000000009D656C0FFFFFFDF1C1A4340FFFFFF5F2CD856C0000000E0381A43400000004025D856C000000020FA1A4340000000E0A7D756C0000000E0553543400000004001D856C00000006079594340000000E067D556C000000080E6594340000000E0F7D256C0000000801D58434000000020DDCF56C0000000A015544340000000806BCE56C00000006001504340FFFFFF3F1ECD56C0000000204D4E4340000000201BC956C0000000E0D74C434000000060B0C556C000000000094E434000000040E0C356C0000000C0AC4D4340000000C0E0C056C00000004010484340FFFFFF5FFEBD56C0000000C0ED454340000000005FBA56C0000000603345434000000060A8B456C0000000C07D4A43400000008057B356C000000020CF4A4340000000602AB256C0FFFFFFDFE7494340FFFFFF9F36AF56C0000000E0375143400000006054AF56C0000000C0573B43400000006067AF56C000000040593243400000002017B256C0000000209F1A4340 4.29831099999999999
46 0106000020E6100000010000000103000000010000001200000000000060DE7956C0000000E08D1C4340000000C0897B56C0000000806323434000000060177B56C0000000807E244340000000204D7A56C0000000A0CD234340FFFFFF7FB97A56C0000000409227434000000000028256C0000000E0EB27434000000020048256C0000000002F2A4340000000A04E8956C00000002053344340000000C05A8956C000000020A5364340000000A0029156C0000000606442434000000000B89056C0000000C031444340000000A06B8F56C0000000000A484340000000E0C18B56C0000000601D4E4340000000C0BF8B56C0000000A05254434000000060BA6D56C00000006031544340000000806B6D56C0000000E09E35434000000080B86D56C0FFFFFFFF081C434000000060DE7956C0000000E08D1C4340 27.4838270000000016
48 0106000020E6100000010000000103000000010000000E000000FFFFFF9FD60956C00000000035214340000000808A0956C000000080C1494340000000605FFD55C000000020A549434000000060C8FC55C0FFFFFF5FDF444340000000C0AAFD55C0000000808E3F434000000080F9FC55C000000060DA394340000000E068FD55C000000060D537434000000000E6FC55C0000000C05E364340000000E0A8FE55C0000000C056334340FFFFFFFFA6FE55C00000004061304340000000E0ACFD55C0000000E0D02C4340000000407CFD55C000000040D92543400000006042FF55C00000004040214340FFFFFF9FD60956C00000000035214340 0
49 0106000020E6100000010000000103000000010000001600000000000060DE7956C0000000E08D1C434000000020918256C0000000809A1C4340000000207D8256C0000000008611434000000020488D56C000000040610B434000000080429056C000000040A30F434000000060899256C0000000405A154340000000C08C9556C0000000C02718434000000060589756C000000080FD1D434000000060A39756C0000000606A294340FFFFFFBFF49656C0FFFFFF1FC32E434000000020BC9556C00000004007324340FFFFFF5F519356C0000000A0B3364340000000A0029156C00000006064424340000000C05A8956C000000020A5364340000000A04E8956C0000000205334434000000020048256C0000000002F2A434000000000028256C0000000E0EB274340FFFFFF7FB97A56C00000004092274340000000204D7A56C0000000A0CD23434000000060177B56C0000000807E244340000000C0897B56C0000000806323434000000060DE7956C0000000E08D1C4340 2.93446600000000002
50 0106000020E6100000010000000103000000010000001600000000000020396656C0000000A0C31B434000000080B86D56C0FFFFFFFF081C4340000000806B6D56C0000000E09E35434000000080DD6A56C000000020BB364340FFFFFF5F876A56C00000008096384340FFFFFF3F536956C0000000C06A384340000000E0156856C0000000A08539434000000020996756C000000060AB3B4340000000A0A06456C0000000A0963D434000000060B56256C0000000A09A3C4340000000E08A6156C0000000A0793D4340000000E0CB5E56C000000040AD3B4340000000E0405D56C0000000803A3E4340000000608F5B56C0000000C0273F4340000000A07D5956C0000000A0833E4340000000A0DA5656C000000040A4414340000000E00D5356C0000000004C404340000000A0FB5056C0000000001A414340000000A04E4956C0000000E031404340000000803D4956C0FFFFFF9FB63C434000000080A84956C0000000A05B1B434000000020396656C0000000A0C31B4340 4.45642699999999969
51 0106000020E6100000010000000103000000010000002400000000000060E4A856C000000080440A43400000006018AA56C0000000E0FF0A434000000080FFA956C000000080EA0C434000000080D3AB56C0FFFFFF5F2F0C434000000080FEAB56C0000000C0710E43400000002017B256C0000000209F1A43400000006067AF56C000000040593243400000006054AF56C0000000C0573B4340000000202CAC56C000000040A33B434000000060CFAB56C000000040BE384340FFFFFFDFD4AA56C000000060A5384340000000801EAA56C000000020A63D4340000000C049A756C0000000A07B3C4340FFFFFFFFF5A556C0000000801A404340000000E0379A56C00000002001404340FFFFFF3F2B9A56C000000000263E4340FFFFFF1FE19A56C000000040623D434000000040129A56C0000000C07E3A4340FFFFFF1FAA9556C0000000A074394340000000406F9656C0000000800536434000000020BC9556C00000004007324340FFFFFFBFF49656C0FFFFFF1FC32E434000000060A39756C0000000606A29434000000060589756C000000080FD1D4340000000C08C9556C0000000C02718434000000060899256C0000000405A15434000000080429056C000000040A30F434000000060029356C000000020C60B4340FFFFFFDF129556C000000060CA0C4340000000E0919A56C0000000E0CE054340FFFFFF7F92A656C0FFFFFFBF5400434000000060F6A756C0000000E03B014340FFFFFF9F39A756C0000000E0A3024340000000E08CA756C0000000A00E064340000000A003A756C0000000C05F09434000000060E4A856C000000080440A4340 4.62926400000000005
53 0106000020E6100000010000000103000000010000001F00000000000040E21957C0000000A0A6024340000000E0092157C0000000E02C03434000000000242157C0000000608104434000000060892357C0000000405C064340FFFFFF7FC12357C0000000A0EA074340000000A0D82457C00000000027084340FFFFFF7FDB2457C0000000E0450C434000000080C22557C0000000A0750C434000000020B82557C000000040160E434000000020652657C0000000402B0E434000000020632657C0000000A05C114340FFFFFF9F102757C00000006080114340000000A0F62657C0FFFFFFDF91154340000000C0FE2857C0FFFFFF3FEE154340000000A0342957C000000080811A4340FFFFFFDFD72C57C0000000A03B1C4340000000C0662C57C000000040292C434000000060CC2857C0FFFFFFBF582C4340000000800C2857C0FFFFFFBF01374340FFFFFF9FB91F57C0FFFFFFDF7D364340000000E0D81957C000000000F6354340000000C0281A57C000000080262B434000000040131257C000000040C52A4340000000C05C1057C0000000209229434000000060D80E57C000000080CC2A4340000000208D0C57C000000000AB2A434000000060850C57C000000080EC244340000000A0C10C57C00000008020154340000000E0AA0B57C00000004000154340FFFFFF1F090C57C0000000202E02434000000040E21957C0000000A0A6024340 3.99004100000000017
55 0106000020E6100000010000000103000000010000002200000000000000181856C000000020FEF4424000000020F21756C000000040DA204340FFFFFF9FD60956C000000000352143400000006042FF55C00000004040214340FFFFFF9FB8FE55C0000000E0DB1E4340000000C01AFF55C0000000600E1E43400000006096FE55C0FFFFFFFFB0194340000000A0AAFB55C0000000A0E7154340FFFFFFBFA5FB55C0000000E02914434000000020D6FC55C00000006086114340000000E04DFE55C000000080DD104340000000E02F0156C000000000390D434000000000CA0056C000000000D20B434000000060C0FD55C000000040620C4340000000406BFE55C0000000206209434000000000390256C000000040EC06434000000000C20256C000000080C605434000000080A70256C000000020E704434000000080630156C0000000C04A044340000000A0DE0156C0000000E00D014340000000A0630156C0FFFFFF9FCEFC424000000080B80256C0000000E066FA4240FFFFFF5FAC0256C0000000A09DF74240FFFFFFBF220456C00000002003F74240000000600D0556C000000000D5F8424000000040600556C0000000803AF64240000000C0F20156C0000000A073F54240000000A0B30156C0000000E0EFF34240FFFFFF1FDF0256C000000040B0F24240FFFFFFBF670656C0FFFFFF5FFDF34240000000407E0656C00000006099F24240000000A0390956C0000000E0E8F54240000000E0CA0956C0000000201CF5424000000000181856C000000020FEF44240 3.04025299999999987
56 0106000020E6100000010000000103000000010000000600000000000000181856C000000020FEF4424000000020572D56C0FFFFFF7F74F44240000000E0562D56C0000000208910434000000060492D56C0000000603E21434000000020F21756C000000040DA20434000000000181856C000000020FEF44240 3.90541099999999997
57 0106000020E6100000010000000103000000010000001700000000000020488D56C000000040610B4340000000207D8256C0000000008611434000000020918256C0000000809A1C434000000060DE7956C0000000E08D1C434000000080B86D56C0FFFFFFFF081C434000000020396656C0000000A0C31B434000000000936656C0000000601DFA424000000040B46A56C0000000607CEB424000000060E56B56C0FFFFFFFF16EA4240000000403A6C56C00000008003E74240000000E09E6E56C0000000A0A5EB424000000080827656C000000020D9F34240FFFFFF5F1B7756C000000000E7F34240FFFFFFDF797756C00000000029F24240FFFFFF9FA27956C0000000A01DF0424000000020067C56C0000000C063F0424000000080A67E56C0000000A0B8F44240000000A0537D56C00000006058FB424000000020B18056C0000000A012FC4240000000E0AE8256C0FFFFFF5F21FF424000000040A38756C00000008021044340000000209F8856C0000000E0E706434000000020488D56C000000040610B4340 4.33283899999999988
58 0106000020E6100000010000000103000000010000000B00000000000020864856C00000000054104340000000C0FA4856C0FFFFFF3FC70D434000000000E64756C000000060EA0B4340FFFFFFFFF54856C0000000E00F06434000000080504956C0000000C0DEFE4240000000E0744B56C0000000A07CF9424000000000936656C0000000601DFA424000000020396656C0000000A0C31B434000000080A84956C0000000A05B1B4340FFFFFF9FB84956C0000000409110434000000020864856C00000000054104340 3.8941110000000001
61 0106000020E61000000100000001030000000100000013000000000000A0F60157C0000000C081014340000000405FFE56C0000000E06B014340000000605AFD56C0000000E05905434000000060BBFB56C0000000809A044340000000C01EFB56C00000002014064340FFFFFFDFD2F956C000000040E2064340FFFFFF3FF2E856C0000000A0A3064340000000A0D7E856C0FFFFFF1F1B14434000000060BCE256C0000000C023144340FFFFFFFF04E256C0FFFFFFBFD01343400000008044E256C0000000E0BEE44240000000803CF456C000000040BEE4424000000080DBF356C0FFFFFF9F97DF42400000004063F456C0FFFFFFBF64DB4240000000007EF456C000000000A6CC4240000000A0000257C00000002054CD424000000080730157C0000000C094E3424000000080EA0157C0000000409BE44240000000A0F60157C0000000C081014340 3.28216300000000016
62 0106000020E6100000010000000103000000010000000C00000000000020572D56C0FFFFFF7F74F4424000000020682D56C0FFFFFF7F00EF4240000000C0E94956C000000080CDEE4240000000E0EC4956C0000000607AF94240000000E0744B56C0000000A07CF9424000000080504956C0000000C0DEFE4240FFFFFFFFF54856C0000000E00F06434000000000E64756C000000060EA0B4340000000C0FA4856C0FFFFFF3FC70D434000000020864856C00000000054104340000000E0562D56C0000000208910434000000020572D56C0FFFFFF7F74F44240 3.29576199999999986
63 0106000020E61000000100000001030000000100000013000000000000E0718756C00000000011D64240FFFFFF5F418A56C0FFFFFF3F72DA4240FFFFFFDFFD8C56C00000004087D64240000000E0A59D56C0FFFFFF5FA4F0424000000000CE9456C0000000603BFE4240000000E0919A56C0000000E0CE054340FFFFFFDF129556C000000060CA0C434000000060029356C000000020C60B434000000080429056C000000040A30F434000000020488D56C000000040610B4340000000209F8856C0000000E0E706434000000040A38756C00000008021044340000000E0AE8256C0FFFFFF5F21FF424000000020B18056C0000000A012FC4240000000A0537D56C00000006058FB424000000080A67E56C0000000A0B8F4424000000020067C56C0000000C063F04240000000E0798056C0000000E0DEE84240000000E0718756C00000000011D64240 7.24967900000000043
64 0106000020E6100000010000000103000000010000001100000000000060BC8956C00000004054D24240FFFFFF7F82A256C00000008046D2424000000040D6A956C0000000C024D242400000006098A956C0000000C019DE424000000060E4A856C000000080440A4340000000A003A756C0000000C05F094340000000E08CA756C0000000A00E064340FFFFFF9F39A756C0000000E0A302434000000060F6A756C0000000E03B014340FFFFFF7F92A656C0FFFFFFBF54004340000000E0919A56C0000000E0CE05434000000000CE9456C0000000603BFE4240000000E0A59D56C0FFFFFF5FA4F04240FFFFFFDFFD8C56C00000004087D64240FFFFFF5F418A56C0FFFFFF3F72DA4240000000E0718756C00000000011D6424000000060BC8956C00000004054D24240 3.04184600000000005
65 0106000020E6100000010000000103000000010000000B000000000000E0F40F57C0000000C0B5CD4240FFFFFF9FF10F57C0000000000DD34240000000403A1A57C0000000E038DB4240000000001E1A57C00000002044EE424000000040E21957C0000000A0A6024340FFFFFF1F090C57C0000000202E024340000000A0F60157C0000000C08101434000000080EA0157C0000000409BE4424000000080730157C0000000C094E34240000000A0000257C00000002054CD4240000000E0F40F57C0000000C0B5CD4240 1.61801799999999996
66 0106000020E6100000010000000103000000010000001400000000000000CE4956C00000004053CD4240000000C0655D56C0000000A09ECD4240FFFFFF7F825D56C0FFFFFF1FA9CA4240FFFFFF7F835E56C000000080FAC8424000000020996156C0000000E035C9424000000000DB6056C0FFFFFFBFD6CE4240000000403A6156C0000000803FD3424000000020DB6056C00000000005D74240000000A0606156C000000020EFD84240FFFFFF3F366556C0000000A061DA424000000040A76A56C0000000006BDF424000000040416B56C00000002059E44240000000403A6C56C00000008003E7424000000060E56B56C0FFFFFFFF16EA424000000040B46A56C0000000607CEB424000000000936656C0000000601DFA4240000000E0744B56C0000000A07CF94240000000E0EC4956C0000000607AF94240000000C0E94956C000000080CDEE424000000000CE4956C00000004053CD4240 4.91080100000000019
67 0106000020E6100000010000000103000000010000002000000000000060BC8956C00000004054D24240000000E0718756C00000000011D64240000000E0798056C0000000E0DEE8424000000020067C56C0000000C063F04240FFFFFF9FA27956C0000000A01DF04240FFFFFFDF797756C00000000029F24240FFFFFF5F1B7756C000000000E7F3424000000080827656C000000020D9F34240000000E09E6E56C0000000A0A5EB4240000000403A6C56C00000008003E7424000000040416B56C00000002059E4424000000040A76A56C0000000006BDF4240FFFFFF3F366556C0000000A061DA4240000000A0606156C000000020EFD8424000000020DB6056C00000000005D74240000000403A6156C0000000803FD3424000000000DB6056C0FFFFFFBFD6CE424000000020996156C0000000E035C94240000000A0D56556C0FFFFFF3F7FC9424000000020FD6656C00000006065CC4240000000A08C6856C000000080A3CB424000000020126956C000000000DBCC424000000060DB6B56C0000000801BCB4240000000E03A6C56C00000008035CC424000000020756D56C000000020BACC4240000000A0296E56C000000080E9CB4240000000C0F16E56C0000000E0A9CC4240000000E0817156C0FFFFFF3F6FCB4240000000A0D07356C000000080EECC424000000060627756C000000060F8CC424000000000908956C0000000E0B3CC424000000060BC8956C00000004054D24240 1.99145700000000003
69 0106000020E6100000010000000103000000010000001100000000000060E6E956C000000040FFB54240000000C0B1F056C00000006063B6424000000060EAF056C0000000E033CC4240000000007EF456C000000000A6CC42400000004063F456C0FFFFFFBF64DB424000000080DBF356C0FFFFFF9F97DF4240000000803CF456C000000040BEE442400000008044E256C0000000E0BEE442400000000065D456C00000004041E44240000000C086D456C000000040CED94240000000C080CA56C0000000001BD942400000008089CA56C00000008095CB4240000000408AD456C000000000C3CB4240000000A067D456C000000060BDC042400000008032CE56C00000006039C04240000000C049CE56C0000000E0D8B4424000000060E6E956C000000040FFB54240 7.26665000000000028
70 0106000020E61000000100000001030000000100000014000000FFFFFF7F82A256C00000008046D24240FFFFFF1FC4A256C00000000069CC4240000000E078A356C00000002057CC4240000000408CA356C0000000C0F4A74240FFFFFF7FD8A356C0000000A004A34240000000808AAF56C0000000E0CBA24240FFFFFF3F5BB056C000000020F4A24240000000800DB056C00000002043AF424000000040D5B156C00000004066AF424000000080CAB156C0000000600DCD4240FFFFFF7F5EC056C0000000805BCD4240000000C00AC756C00000000081CB42400000008089CA56C00000008095CB4240000000C080CA56C0000000001BD9424000000060FBC956C0000000A01ED94240FFFFFF9FF7C956C0FFFFFF7F75DE42400000006010C756C000000020A7DE42400000006098A956C0000000C019DE424000000040D6A956C0000000C024D24240FFFFFF7F82A256C00000008046D24240 3.11090400000000011
71 0106000020E6100000010000000103000000010000000900000000000060378E56C0000000A0EBA74240000000408CA356C0000000C0F4A74240000000E078A356C00000002057CC4240FFFFFF1FC4A256C00000000069CC4240FFFFFF7F82A256C00000008046D2424000000060BC8956C00000004054D2424000000000908956C0000000E0B3CC424000000040968956C000000040EAA7424000000060378E56C0000000A0EBA74240 2.98027100000000011
72 0106000020E6100000010000000103000000010000000D00000000000080CD0557C0000000407487424000000000A11057C000000060E7874240000000A0F00F57C000000080AAA04240FFFFFF7F761057C0000000A0EEA0424000000000221057C0000000C028BD4240000000E0F40F57C0000000C0B5CD4240000000A0000257C00000002054CD4240000000007EF456C000000000A6CC424000000060EAF056C0000000E033CC4240000000C0B1F056C00000006063B6424000000060E6E956C000000040FFB5424000000000B0EA56C0000000004286424000000080CD0557C00000004074874240 3.86676699999999984
73 0106000020E6100000010000000103000000010000000E000000000000E0FF5D56C0000000C071AB424000000020E35B56C00000002088AD4240000000605D5B56C0000000409CB4424000000020085D56C00000000002BA424000000080AA5F56C0000000E0F0BE424000000020996156C0000000E035C94240FFFFFF7F835E56C000000080FAC84240FFFFFF7F825D56C0FFFFFF1FA9CA4240000000C0655D56C0000000A09ECD424000000000CE4956C00000004053CD4240000000E0F64256C0FFFFFFDF38CD4240000000002E4356C0FFFFFFFF29AB4240000000C0BC4F56C0000000C03CAB4240000000E0FF5D56C0000000C071AB4240 1.86840800000000007
74 0106000020E61000000100000001030000000100000024000000000000808AAF56C0000000E0CBA24240000000A093AF56C0000000E0549542400000006089B056C00000002032954240000000E0B1B056C0FFFFFFFF0E924240000000202CB256C00000002007924240000000C071B256C000000060B4864240FFFFFFBF2FBE56C0000000A04E874240000000802ABE56C000000080B48C4240FFFFFF7F90C156C0FFFFFF9FCB8C424000000080BAC156C0000000C003924240000000E09DC256C0000000A01B92424000000020C3C256C0000000807895424000000020D4C456C00000008020954240000000C0EAC456C0FFFFFF7F84964240000000E019C656C0000000409A964240000000E035C656C000000080E899424000000000C3C856C000000000D7994240000000A0D9C856C0000000C0899E42400000006055C856C0FFFFFF1FAB9E42400000000047C856C0000000804BA04240000000005BCA56C0000000A0D1A04240000000805DCA56C0000000204EA84240000000E096CB56C0000000E080A8424000000080B0CB56C000000040A1B44240000000C049CE56C0000000E0D8B442400000008032CE56C00000006039C04240000000A067D456C000000060BDC04240000000408AD456C000000000C3CB42400000008089CA56C00000008095CB4240000000C00AC756C00000000081CB4240FFFFFF7F5EC056C0000000805BCD424000000080CAB156C0000000600DCD424000000040D5B156C00000004066AF4240000000800DB056C00000002043AF4240FFFFFF3F5BB056C000000020F4A24240000000808AAF56C0000000E0CBA24240 12.5770339999999994
75 0106000020E6100000010000000103000000010000000D000000000000A0A47756C0000000C0DA904240000000E07A7D56C000000060D090424000000060B67D56C00000000057884240000000E0BD7F56C00000008017884240000000C0D87F56C0000000805986424000000040568756C00000000039864240000000605B8756C0000000E0188B4240000000E02F8E56C0000000A0058B424000000060378E56C0000000A0EBA7424000000040968956C000000040EAA7424000000000908956C0000000E0B3CC424000000060627756C000000060F8CC4240000000A0A47756C0000000C0DA904240 7.80359900000000017
76 0106000020E6100000010000000103000000010000002800000000000060627756C000000060F8CC4240000000A0D07356C000000080EECC4240000000E0817156C0FFFFFF3F6FCB4240000000C0F16E56C0000000E0A9CC4240000000A0296E56C000000080E9CB424000000020756D56C000000020BACC4240000000E03A6C56C00000008035CC424000000060DB6B56C0000000801BCB424000000020126956C000000000DBCC4240000000A08C6856C000000080A3CB424000000020FD6656C00000006065CC4240000000A0D56556C0FFFFFF3F7FC9424000000020996156C0000000E035C9424000000080AA5F56C0000000E0F0BE424000000020085D56C00000000002BA4240000000605D5B56C0000000409CB4424000000020E35B56C00000002088AD4240000000E0FF5D56C0000000C071AB424000000080096056C0FFFFFF1F2BAA424000000080E36056C00000000009A7424000000080E36056C0FFFFFF1F61A3424000000080555F56C0000000A0C4A0424000000060D06056C000000000079F4240000000C0F46556C000000000279E424000000000FE6556C0000000A01B9D424000000040236856C000000020EE9C4240FFFFFF7F196856C000000000C59B424000000000CD6956C0000000E06A9B424000000060CC6956C00000008027994240000000A0336C56C00000000008994240FFFFFFFF326C56C0FFFFFF9F2C974240FFFFFF7F876D56C0FFFFFF5FD296424000000020906D56C0000000208094424000000020866E56C0000000806194424000000080986E56C0000000E0FC924240000000A0A07056C000000080BF924240000000A0B27056C000000060B790424000000060BB7156C000000060A7904240000000A0A47756C0000000C0DA90424000000060627756C000000060F8CC4240 3.47149000000000019
77 0106000020E6100000010000000103000000010000001800000000000000B0EA56C0000000004286424000000060E6E956C000000040FFB54240000000C049CE56C0000000E0D8B4424000000080B0CB56C000000040A1B44240000000E096CB56C0000000E080A84240000000805DCA56C0000000204EA84240000000005BCA56C0000000A0D1A042400000000047C856C0000000804BA042400000006055C856C0FFFFFF1FAB9E4240000000A0D9C856C0000000C0899E424000000000C3C856C000000000D7994240000000E035C656C000000080E8994240000000E019C656C0000000409A964240000000C0EAC456C0FFFFFF7F8496424000000020D4C456C0000000802095424000000020C3C256C00000008078954240000000E09DC256C0000000A01B92424000000080BAC156C0000000C003924240FFFFFF7F90C156C0FFFFFF9FCB8C4240000000E031C756C000000080E88A4240000000806ECE56C0000000E0E18A424000000040AFCE56C000000020DF704240000000C0B7EA56C0000000C06A71424000000000B0EA56C00000000042864240 4.33482199999999995
78 0106000020E6100000010000000103000000010000001F000000000000E08FAB56C0000000807A76424000000040ABAC56C000000080A176424000000020BFAC56C0000000A0BC7B4240FFFFFFDFF6AD56C000000000E37B424000000000FEAD56C0000000604F7F42400000004052AF56C000000000847F42400000000061AF56C0FFFFFFBF6B864240000000C071B256C000000060B4864240000000202CB256C00000002007924240000000E0B1B056C0FFFFFFFF0E9242400000006089B056C00000002032954240000000A093AF56C0000000E054954240000000808AAF56C0000000E0CBA24240FFFFFF7FD8A356C0000000A004A34240000000408CA356C0000000C0F4A7424000000060378E56C0000000A0EBA74240000000E02F8E56C0000000A0058B4240000000605B8756C0000000E0188B424000000040568756C0000000003986424000000060BC8956C0000000604186424000000060E68956C0000000809C81424000000060D28A56C0000000207C81424000000000F58A56C000000060D07E424000000000108C56C0000000A0A07E424000000060338C56C000000040987C4240FFFFFFFF318D56C000000080777C424000000040438D56C0000000A0217B4240000000607A8E56C000000080E27A424000000000938E56C000000080EC77424000000040D09056C00000004054764240000000E08FAB56C0000000807A764240 8.45153700000000008
\.
CREATE INDEX getis_data_gix ON getis_data USING GIST(the_geom);

View File

@@ -0,0 +1,15 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/getis_data.sql
-- set random seed
SELECT cdb_crankshaft._cdb_random_seeds(1234);
-- test against PySAL example dataset 'stl_hom'
SELECT rowid, round(z_score, 4) As z_score, round(p_value, 4) As p_value
FROM cdb_crankshaft.CDB_GetisOrdsG(
'select * from getis_data',
'hr8893', 'queen', NULL, 999,
'the_geom', 'cartodb_id') As t(z_score, p_value, p_z_sim, rowid)
WHERE round(p_value, 4) <= 0.05
ORDER BY rowid ASC;

View File

@@ -0,0 +1,85 @@
SET client_min_messages TO WARNING;
\set ECHO none
\pset format unaligned
--
-- postgres=# select round(avg(i), 3) as avg,
-- round(stddev(i), 3) as stddev,
-- round(avg(i) + stddev(i), 3) as one_stddev,
-- round(avg(i) + 2 * stddev(i), 3) As two_stddev
-- from unnest(ARRAY[1,3,2,3,5,1,2,32,12,3,57,2,1,4,2,100]) As x(i);
-- avg | stddev | one_stddev | two_stddev
-- --------+--------+------------+------------
-- 14.375 | 27.322 | 41.697 | 69.020
-- With an threshold of 1.0 standard deviation, ids 11, 16, and 17 are outliers
WITH a AS (
SELECT
ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals, ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids
), b As (
SELECT
(cdb_crankshaft.cdb_StdDevOutlier(vals, 1.0, ids)).*
FROM a
ORDER BY ids)
SELECT *
FROM b
WHERE is_outlier IS TRUE;
-- With a threshold of 2.0 standard deviations, id 16 is the only outlier
WITH a AS (
SELECT
ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals,
ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids
), b As (
SELECT
(cdb_crankshaft.CDB_StdDevOutlier(vals, 2.0, ids)).*
FROM a
ORDER BY ids)
SELECT *
FROM b
WHERE is_outlier IS TRUE;
-- With a Stddev of zero, should throw back error
-- With a threshold of 2.0 standard deviations, id 16 is the only outlier
WITH a AS (
SELECT
ARRAY[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]::numeric[] As vals,
ARRAY[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]::int[] As ids
), b As (
SELECT
(cdb_crankshaft.CDB_StdDevOutlier(vals, 1.0, ids)).*
FROM a
ORDER BY ids)
SELECT *
FROM b
WHERE is_outlier IS TRUE;
-- With a ratio threshold of 2.0 threshold (100% above or below the mean)
-- which is greater than ~21, which are values
WITH a AS (
SELECT
ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals,
ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids
), b As (
SELECT
(cdb_crankshaft.CDB_PercentOutlier(vals, 2.0, ids)).*
FROM a
ORDER BY ids)
SELECT *
FROM b
WHERE is_outlier IS TRUE;
-- With a static threshold of 11, what are the outliers
WITH a AS (
SELECT
ARRAY[1,3,2,3,5,1,2,32,12, 3,57, 2, 1, 4, 2,100,-100]::numeric[] As vals,
ARRAY[1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, 16, 17]::int[] As ids
), b As (
SELECT unnest(vals) As v, unnest(ids) as i
FROM a
)
SELECT cdb_crankshaft.CDB_StaticOutlier(v, 11.0) As is_outlier, i As rowid
FROM b
WHERE cdb_crankshaft.CDB_StaticOutlier(v, 11.0) is True
ORDER BY i;

View File

@@ -3,3 +3,5 @@ import crankshaft.random_seeds
import crankshaft.clustering
import crankshaft.space_time_dynamics
import crankshaft.segmentation
import crankshaft.regression
import analysis_data_provider

View File

@@ -0,0 +1,76 @@
"""class for fetching data"""
import plpy
import pysal_utils as pu
class AnalysisDataProvider:
def get_getis(self, w_type, params):
"""fetch data for getis ord's g"""
try:
query = pu.construct_neighbor_query(w_type, params)
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(4)
else:
return result
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_markov(self, w_type, params):
"""fetch data for spatial markov"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
if len(data) == 0:
return pu.empty_zipped_array(4)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_moran(self, w_type, params):
"""fetch data for moran's i analyses"""
try:
query = pu.construct_neighbor_query(w_type, params)
data = plpy.execute(query)
# if there are no neighbors, exit
if len(data) == 0:
return pu.empty_zipped_array(2)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(2)
def get_nonspatial_kmeans(self, query):
"""fetch data for non-spatial kmeans"""
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_spatial_kmeans(self, params):
"""fetch data for spatial kmeans"""
query = ("SELECT "
"array_agg({id_col} ORDER BY {id_col}) as ids,"
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
"FROM ({subquery}) As a "
"WHERE {geom_col} IS NOT NULL").format(**params)
try:
data = plpy.execute(query)
return data
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)
def get_gwr(self, params):
"""fetch data for gwr analysis"""
query = pu.gwr_query(params)
try:
query_result = plpy.execute(query)
return query_result
except plpy.SPIError, err:
plpy.error('Analysis failed: %s' % err)

View File

@@ -1,3 +1,4 @@
"""Import all functions from for clustering"""
from moran import *
from kmeans import *
from getis import *

View File

@@ -0,0 +1,50 @@
"""
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
"""
import pysal as ps
from collections import OrderedDict
# crankshaft modules
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
# High level interface ---------------------------------------
class Getis:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def getis_ord(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Getis-Ord's G*
Implementation building neighbors with a PostGIS database and PySAL's
Getis-Ord's G* hotspot/coldspot module.
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors if kNN is chosen
qvals = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_getis(w_type, qvals)
attr_vals = pu.get_attributes(result)
# build PySAL weight object
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate Getis-Ord's G* z- and p-values
getis = ps.esda.getisord.G_Local(attr_vals, weight,
star=True, permutations=permutations)
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)

View File

@@ -1,18 +1,32 @@
from sklearn.cluster import KMeans
import plpy
import numpy as np
def kmeans(query, no_clusters, no_init=20):
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
array_agg(ST_X(the_geom) order by cartodb_id) xs,
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
where the_geom is not null
'''.format(query=query))
from crankshaft.analysis_data_provider import AnalysisDataProvider
xs = data[0]['xs']
ys = data[0]['ys']
ids = data[0]['ids']
km = KMeans(n_clusters= no_clusters, n_init=no_init)
labels = km.fit_predict(zip(xs,ys))
return zip(ids,labels)
class Kmeans:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
def spatial(self, query, no_clusters, no_init=20):
"""
find centers based on clusters of latitude/longitude pairs
query: SQL query that has a WGS84 geometry (the_geom)
"""
params = {"subquery": query,
"geom_col": "the_geom",
"id_col": "cartodb_id"}
data = self.data_provider.get_spatial_kmeans(params)
# Unpack query response
xs = data[0]['xs']
ys = data[0]['ys']
ids = data[0]['ids']
km = KMeans(n_clusters=no_clusters, n_init=no_init)
labels = km.fit_predict(zip(xs, ys))
return zip(ids, labels)

View File

@@ -6,8 +6,8 @@ Moran's I geostatistics (global clustering & outliers presence)
# average of the their neighborhood
import pysal as ps
import plpy
from collections import OrderedDict
from crankshaft.analysis_data_provider import AnalysisDataProvider
# crankshaft module
import crankshaft.pysal_utils as pu
@@ -15,204 +15,162 @@ import crankshaft.pysal_utils as pu
# High level interface ---------------------------------------
def moran(subquery, attr_name,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I (global)
Implementation building neighbors with a PostGIS database and Moran's I
core clusters with PySAL.
Andy Eschbacher
"""
qvals = OrderedDict([("id_col", id_col),
("attr1", attr_name),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
class Moran:
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
else:
self.data_provider = data_provider
query = pu.construct_neighbor_query(w_type, qvals)
def global_stat(self, subquery, attr_name,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I (global)
Implementation building neighbors with a PostGIS database and Moran's I
core clusters with PySAL.
Andy Eschbacher
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr_name),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
try:
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(2)
except plpy.SPIError, e:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(2)
result = self.data_provider.get_moran(w_type, params)
# collect attributes
attr_vals = pu.get_attributes(result)
# collect attributes
attr_vals = pu.get_attributes(result)
# calculate weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global
moran_global = ps.esda.moran.Moran(attr_vals, weight,
permutations=permutations)
# calculate moran global
moran_global = ps.esda.moran.Moran(attr_vals, weight,
permutations=permutations)
return zip([moran_global.I], [moran_global.EI])
return zip([moran_global.I], [moran_global.EI])
def local_stat(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I implementation for PL/Python
Andy Eschbacher
"""
def moran_local(subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I implementation for PL/Python
Andy Eschbacher
"""
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors
# geometries with attributes that are null are ignored
# resulting in a collection of not as near neighbors
params = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
qvals = OrderedDict([("id_col", id_col),
("attr1", attr),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
query = pu.construct_neighbor_query(w_type, qvals)
attr_vals = pu.get_attributes(result)
weight = pu.get_weight(result, w_type, num_ngbrs)
try:
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(5)
except plpy.SPIError, e:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(5)
attr_vals = pu.get_attributes(result)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
permutations=permutations)
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def moran_rate(subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Rate (global)
Andy Eschbacher
"""
qvals = OrderedDict([("id_col", id_col),
("attr1", numerator),
("attr2", denominator)
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
query = pu.construct_neighbor_query(w_type, qvals)
try:
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(2)
except plpy.SPIError, e:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(2)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate moran global rate
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
# calculate LISA values
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
permutations=permutations)
return zip([lisa_rate.I], [lisa_rate.EI])
# find quadrants for each geometry
quads = quad_position(lisa.q)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def moran_local_rate(subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Local Rate
def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Rate (global)
Andy Eschbacher
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
"""
params = OrderedDict([("id_col", id_col),
("attr1", numerator),
("attr2", denominator)
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
qvals = OrderedDict([("id_col", id_col),
("numerator", numerator),
("denominator", denominator),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
result = self.data_provider.get_moran(w_type, params)
query = pu.construct_neighbor_query(w_type, qvals)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
try:
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(5)
except plpy.SPIError, e:
plpy.error('Analysis failed: %s' % e)
return pu.empty_zipped_array(5)
weight = pu.get_weight(result, w_type, num_ngbrs)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
# calculate moran global rate
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
permutations=permutations)
weight = pu.get_weight(result, w_type, num_ngbrs)
return zip([lisa_rate.I], [lisa_rate.EI])
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
permutations=permutations)
def local_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Local Rate
Andy Eschbacher
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
# find quadrants for each geometry
quads = quad_position(lisa.q)
params = OrderedDict([("id_col", id_col),
("numerator", numerator),
("denominator", denominator),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
result = self.data_provider.get_moran(w_type, params)
# collect attributes
numer = pu.get_attributes(result, 1)
denom = pu.get_attributes(result, 2)
def moran_local_bv(subquery, attr1, attr2,
permutations, geom_col, id_col, w_type, num_ngbrs):
"""
Moran's I (local) Bivariate (untested)
"""
weight = pu.get_weight(result, w_type, num_ngbrs)
qvals = OrderedDict([("id_col", id_col),
("attr1", attr1),
("attr2", attr2),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
permutations=permutations)
query = pu.construct_neighbor_query(w_type, qvals)
# find quadrants for each geometry
quads = quad_position(lisa.q)
try:
result = plpy.execute(query)
# if there are no neighbors, exit
if len(result) == 0:
return pu.empty_zipped_array(4)
except plpy.SPIError:
plpy.error("Error: areas of interest query failed, "
"check input parameters")
return pu.empty_zipped_array(4)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
# collect attributes
attr1_vals = pu.get_attributes(result, 1)
attr2_vals = pu.get_attributes(result, 2)
def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col,
w_type, num_ngbrs):
"""
Moran's I (local) Bivariate (untested)
"""
# create weights
weight = pu.get_weight(result, w_type, num_ngbrs)
params = OrderedDict([("id_col", id_col),
("attr1", attr1),
("attr2", attr2),
("geom_col", geom_col),
("subquery", subquery),
("num_ngbrs", num_ngbrs)])
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
permutations=permutations)
result = self.data_provider.get_moran(w_type, params)
# find clustering of significance
lisa_sig = quad_position(lisa.q)
# collect attributes
attr1_vals = pu.get_attributes(result, 1)
attr2_vals = pu.get_attributes(result, 2)
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
# create weights
weight = pu.get_weight(result, w_type, num_ngbrs)
# calculate LISA values
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
permutations=permutations)
# find clustering of significance
lisa_sig = quad_position(lisa.q)
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
# Low level functions ----------------------------------------

View File

@@ -42,19 +42,33 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5):
return built_weight
def query_attr_select(params):
def query_attr_select(params, table_ref=True):
"""
Create portion of SELECT statement for attributes inolved in query.
Defaults to order in the params
@param params: dict of information used in query (column names,
table name, etc.)
Example:
OrderedDict([('numerator', 'price'),
('denominator', 'sq_meters'),
('subquery', 'SELECT * FROM interesting_data')])
Output:
"i.\"price\"::numeric As attr1, " \
"i.\"sq_meters\"::numeric As attr2, "
"""
attr_string = ""
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
template = "\"%(col)s\"::numeric As attr%(alias_num)s, "
if 'time_cols' in params:
# if markov analysis
attrs = params['time_cols']
if table_ref:
template = "i." + template
if ('time_cols' in params) or ('ind_vars' in params):
# if markov or gwr analysis
attrs = (params['time_cols'] if 'time_cols' in params
else params['ind_vars'])
if 'ind_vars' in params:
template = "array_agg(\"%(col)s\"::numeric) As attr%(alias_num)s, "
for idx, val in enumerate(attrs):
attr_string += template % {"col": val, "alias_num": idx + 1}
@@ -64,14 +78,14 @@ def query_attr_select(params):
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
for idx, val in enumerate(sorted(attrs)):
for idx, val in enumerate(attrs):
attr_string += template % {"col": params[val],
"alias_num": idx + 1}
return attr_string
def query_attr_where(params):
def query_attr_where(params, table_ref=True):
"""
Construct where conditions when building neighbors query
Create portion of WHERE clauses for weeding out NULL-valued geometries
@@ -80,8 +94,8 @@ def query_attr_where(params):
'numerator': 'data1',
'denominator': 'data2',
'': ...}
Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2"
IS NOT NULL'
Output:
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
Input:
{'subquery': ...,
'time_cols': ['time1', 'time2', 'time3'],
@@ -90,11 +104,14 @@ def query_attr_where(params):
NULL AND idx_replace."time3" IS NOT NULL'
"""
attr_string = []
template = "idx_replace.\"%s\" IS NOT NULL"
template = "\"%s\" IS NOT NULL"
if table_ref:
template = "idx_replace." + template
if 'time_cols' in params:
# markov where clauses
attrs = params['time_cols']
if ('time_cols' in params) or ('ind_vars' in params):
# markov or gwr where clauses
attrs = (params['time_cols'] if 'time_cols' in params
else params['ind_vars'])
# add values to template
for attr in attrs:
attr_string.append(template % attr)
@@ -102,15 +119,17 @@ def query_attr_where(params):
# moran where clauses
# get keys
attrs = sorted([k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')])
attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]
# add values to template
for attr in attrs:
attr_string.append(template % params[attr])
if len(attrs) == 2:
attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]])
if 'denominator' in attrs:
attr_string.append(
"idx_replace.\"%s\" <> 0" % params['denominator'])
out = " AND ".join(attr_string)
@@ -122,8 +141,8 @@ def knn(params):
@param vars: dict of values to fill template
"""
attr_select = query_attr_select(params)
attr_where = query_attr_where(params)
attr_select = query_attr_select(params, table_ref=True)
attr_where = query_attr_where(params, table_ref=True)
replacements = {"attr_select": attr_select,
"attr_where_i": attr_where.replace("idx_replace", "i"),
@@ -177,6 +196,32 @@ def queen(params):
return query.format(**params)
def gwr_query(params):
"""
GWR query
"""
replacements = {"ind_vars_select": query_attr_select(params,
table_ref=None),
"ind_vars_where": query_attr_where(params,
table_ref=None)}
query = '''
SELECT
array_agg(ST_X(ST_Centroid({geom_col}))) As x,
array_agg(ST_Y(ST_Centroid({geom_col}))) As y,
array_agg({dep_var}) As dep_var,
%(ind_vars_select)s
array_agg({id_col}) As rowid
FROM ({subquery}) As q
WHERE
{dep_var} IS NOT NULL AND
%(ind_vars_where)s
''' % replacements
return query.format(**params).strip()
# to add more weight methods open a ticket or pull request

View File

@@ -0,0 +1,2 @@
from crankshaft.regression.gwr import *
from crankshaft.regression.glm import *

View File

@@ -0,0 +1,444 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Import GLM and pysal\n",
"import os\n",
"import numpy as np\n",
"os.chdir('/Users/toshan/dev/pysal/pysal/contrib/glm')\n",
"from glm import GLM\n",
"import pysal\n",
"import pandas as pd\n",
"import statsmodels.formula.api as smf\n",
"import statsmodels.api as sm\n",
"from family import Gaussian, Binomial, Poisson, QuasiPoisson\n",
"\n",
"from statsmodels.api import families"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Prepare some test data - columbus example\n",
"db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')\n",
"y = np.array(db.by_col(\"HOVAL\"))\n",
"y = np.reshape(y, (49,1))\n",
"X = []\n",
"#X.append(np.ones(len(y)))\n",
"X.append(db.by_col(\"INC\"))\n",
"X.append(db.by_col(\"CRIME\"))\n",
"X = np.array(X).T"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 46.42818268]\n",
" [ 0.62898397]\n",
" [ -0.48488854]]\n"
]
}
],
"source": [
"#First fit pysal OLS model\n",
"from pysal.spreg import ols\n",
"OLS = ols.OLS(y, X)\n",
"print OLS.betas"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"[ 46.42818268 0.62898397 -0.48488854]\n",
"[ 46.42818268 0.62898397 -0.48488854]\n"
]
}
],
"source": [
"#Then fit Gaussian GLM\n",
"\n",
"#create Gaussian GLM model object\n",
"model = GLM(y, X, Gaussian())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [46.4282, 0.6290, -0.4849]\n",
"print results.params\n",
"\n",
"# Gaussian GLM results from statsmodels\n",
"sm_model = smf.GLM(y, sm.add_constant(X), family=families.Gaussian())\n",
"sm_results = sm_model.fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 2\n",
"<class 'family.Gaussian'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"print results.df_model, sm_results.df_model\n",
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"[ 3.92159085 0.01183491 -0.01371397]\n",
"[ 3.92159085 0.01183491 -0.01371397]\n"
]
}
],
"source": [
"#Now fit a Poisson GLM \n",
"\n",
"poisson_y = np.round(y).astype(int)\n",
"\n",
"#create Poisson GLM model object\n",
"model = GLM(poisson_y, X, Poisson())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [3.91926, 0.01198, -0.01371]\n",
"print results.params.T\n",
"\n",
"# Poisson GLM results from statsmodels\n",
"sm_results = smf.GLM(poisson_y, sm.add_constant(X), family=families.Poisson()).fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Poisson'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"[ 0.13049161 0.00511599 0.00193769] [ 0.13049161 0.00511599 0.00193769]\n"
]
}
],
"source": [
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n",
"print results.bse, sm_results.bse"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-5.33638276 0.0287754 ]\n",
"[-5.33638276 0.0287754 ]\n"
]
}
],
"source": [
"#Now fit a binomial GLM\n",
"londonhp = pd.read_csv('/Users/toshan/projects/londonhp.csv')\n",
"#londonhp = pd.read_csv('/Users/qszhao/Dropbox/pysal/pysal/contrib/gwr/londonhp.csv')\n",
"y = londonhp['BATH2'].values\n",
"y = np.reshape(y, (316,1))\n",
"X = londonhp['FLOORSZ'].values\n",
"X = np.reshape(X, (316,1))\n",
"\n",
"#create logistic GLM model object\n",
"model = GLM(y, X, Binomial())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [-5.33638, 0.02878]\n",
"print results.params.T\n",
"\n",
"# Logistic GLM results from statsmodels\n",
"sm_results = smf.GLM(y, sm.add_constant(X), family=families.Binomial()).fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 1\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"print results.df_model, sm_results.df_model\n",
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.QuasiPoisson'>\n",
"<class 'family.QuasiPoisson'>\n",
"<class 'family.QuasiPoisson'>\n"
]
}
],
"source": [
"#create QUasiPoisson GLM model object\n",
"model = GLM(poisson_y, X, QuasiPoisson())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,4 @@
import glm
import family
import utils
import iwls

View File

@@ -0,0 +1,959 @@
from __future__ import print_function
import numpy as np
from scipy import stats
from utils import cache_readonly
class Results(object):
"""
Class to contain model results
Parameters
----------
model : class instance
the previously specified model instance
params : array
parameter estimates from the fit model
"""
def __init__(self, model, params, **kwd):
self.__dict__.update(kwd)
self.initialize(model, params, **kwd)
self._data_attr = []
def initialize(self, model, params, **kwd):
self.params = params
self.model = model
if hasattr(model, 'k_constant'):
self.k_constant = model.k_constant
def predict(self, exog=None, transform=True, *args, **kwargs):
"""
Call self.model.predict with self.params as the first argument.
Parameters
----------
exog : array-like, optional
The values for which you want to predict.
transform : bool, optional
If the model was fit via a formula, do you want to pass
exog through the formula. Default is True. E.g., if you fit
a model y ~ log(x1) + log(x2), and transform is True, then
you can pass a data structure that contains x1 and x2 in
their original form. Otherwise, you'd need to log the data
first.
args, kwargs :
Some models can take additional arguments or keywords, see the
predict method of the model for the details.
Returns
-------
prediction : ndarray or pandas.Series
See self.model.predict
"""
if transform and hasattr(self.model, 'formula') and exog is not None:
from patsy import dmatrix
exog = dmatrix(self.model.data.design_info.builder,
exog)
if exog is not None:
exog = np.asarray(exog)
if exog.ndim == 1 and (self.model.exog.ndim == 1 or
self.model.exog.shape[1] == 1):
exog = exog[:, None]
exog = np.atleast_2d(exog) # needed in count model shape[1]
return self.model.predict(self.params, exog, *args, **kwargs)
#TODO: public method?
class LikelihoodModelResults(Results):
"""
Class to contain results from likelihood models
Parameters
-----------
model : LikelihoodModel instance or subclass instance
LikelihoodModelResults holds a reference to the model that is fit.
params : 1d array_like
parameter estimates from estimated model
normalized_cov_params : 2d array
Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
scale : float
For (some subset of models) scale will typically be the
mean square error from the estimated model (sigma^2)
Returns
-------
**Attributes**
mle_retvals : dict
Contains the values returned from the chosen optimization method if
full_output is True during the fit. Available only if the model
is fit by maximum likelihood. See notes below for the output from
the different methods.
mle_settings : dict
Contains the arguments passed to the chosen optimization method.
Available if the model is fit by maximum likelihood. See
LikelihoodModel.fit for more information.
model : model instance
LikelihoodResults contains a reference to the model that is fit.
params : ndarray
The parameters estimated for the model.
scale : float
The scaling factor of the model given during instantiation.
tvalues : array
The t-values of the standard errors.
Notes
-----
The covariance of params is given by scale times normalized_cov_params.
Return values by solver if full_output is True during fit:
'newton'
fopt : float
The value of the (negative) loglikelihood at its
minimum.
iterations : int
Number of iterations performed.
score : ndarray
The score vector at the optimum.
Hessian : ndarray
The Hessian at the optimum.
warnflag : int
1 if maxiter is exceeded. 0 if successful convergence.
converged : bool
True: converged. False: did not converge.
allvecs : list
List of solutions at each iteration.
'nm'
fopt : float
The value of the (negative) loglikelihood at its
minimum.
iterations : int
Number of iterations performed.
warnflag : int
1: Maximum number of function evaluations made.
2: Maximum number of iterations reached.
converged : bool
True: converged. False: did not converge.
allvecs : list
List of solutions at each iteration.
'bfgs'
fopt : float
Value of the (negative) loglikelihood at its minimum.
gopt : float
Value of gradient at minimum, which should be near 0.
Hinv : ndarray
value of the inverse Hessian matrix at minimum. Note
that this is just an approximation and will often be
different from the value of the analytic Hessian.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
warnflag : int
1: Maximum number of iterations exceeded. 2: Gradient
and/or function calls are not changing.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
'lbfgs'
fopt : float
Value of the (negative) loglikelihood at its minimum.
gopt : float
Value of gradient at minimum, which should be near 0.
fcalls : int
Number of calls to loglike.
warnflag : int
Warning flag:
- 0 if converged
- 1 if too many function evaluations or too many iterations
- 2 if stopped for another reason
converged : bool
True: converged. False: did not converge.
'powell'
fopt : float
Value of the (negative) loglikelihood at its minimum.
direc : ndarray
Current direction set.
iterations : int
Number of iterations performed.
fcalls : int
Number of calls to loglike.
warnflag : int
1: Maximum number of function evaluations. 2: Maximum number
of iterations.
converged : bool
True : converged. False: did not converge.
allvecs : list
Results at each iteration.
'cg'
fopt : float
Value of the (negative) loglikelihood at its minimum.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
warnflag : int
1: Maximum number of iterations exceeded. 2: Gradient and/
or function calls not changing.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
'ncg'
fopt : float
Value of the (negative) loglikelihood at its minimum.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
hcalls : int
Number of calls to hessian.
warnflag : int
1: Maximum number of iterations exceeded.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
"""
# by default we use normal distribution
# can be overwritten by instances or subclasses
use_t = False
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
**kwargs):
super(LikelihoodModelResults, self).__init__(model, params)
self.normalized_cov_params = normalized_cov_params
self.scale = scale
# robust covariance
# We put cov_type in kwargs so subclasses can decide in fit whether to
# use this generic implementation
if 'use_t' in kwargs:
use_t = kwargs['use_t']
if use_t is not None:
self.use_t = use_t
if 'cov_type' in kwargs:
cov_type = kwargs.get('cov_type', 'nonrobust')
cov_kwds = kwargs.get('cov_kwds', {})
if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
else:
from statsmodels.base.covtype import get_robustcov_results
if cov_kwds is None:
cov_kwds = {}
use_t = self.use_t
# TODO: we shouldn't need use_t in get_robustcov_results
get_robustcov_results(self, cov_type=cov_type, use_self=True,
use_t=use_t, **cov_kwds)
def normalized_cov_params(self):
raise NotImplementedError
def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
use_t=None, **cov_kwds):
from statsmodels.base.covtype import get_robustcov_results
if cov_kwds is None:
cov_kwds = {}
if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
else:
# TODO: we shouldn't need use_t in get_robustcov_results
get_robustcov_results(self, cov_type=cov_type, use_self=True,
use_t=use_t, **cov_kwds)
@cache_readonly
def llf(self):
return self.model.loglike(self.params)
@cache_readonly
def bse(self):
return np.sqrt(np.diag(self.cov_params()))
@cache_readonly
def tvalues(self):
"""
Return the t-statistic for a given parameter estimate.
"""
return self.params / self.bse
@cache_readonly
def pvalues(self):
if self.use_t:
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
return stats.t.sf(np.abs(self.tvalues), df_resid)*2
else:
return stats.norm.sf(np.abs(self.tvalues))*2
def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
other=None):
"""
Returns the variance/covariance matrix.
The variance/covariance matrix can be of a linear contrast
of the estimates of params or all params multiplied by scale which
will usually be an estimate of sigma^2. Scale is assumed to be
a scalar.
Parameters
----------
r_matrix : array-like
Can be 1d, or 2d. Can be used alone or with other.
column : array-like, optional
Must be used on its own. Can be 0d or 1d see below.
scale : float, optional
Can be specified or not. Default is None, which means that
the scale argument is taken from the model.
other : array-like, optional
Can be used when r_matrix is specified.
Returns
-------
cov : ndarray
covariance matrix of the parameter estimates or of linear
combination of parameter estimates. See Notes.
Notes
-----
(The below are assumed to be in matrix notation.)
If no argument is specified returns the covariance matrix of a model
``(scale)*(X.T X)^(-1)``
If contrast is specified it pre and post-multiplies as follows
``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
If contrast and other are specified returns
``(scale) * r_matrix (X.T X)^(-1) other.T``
If column is specified returns
``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
OR
``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
"""
if (hasattr(self, 'mle_settings') and
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
dot_fun = nan_dot
else:
dot_fun = np.dot
if (cov_p is None and self.normalized_cov_params is None and
not hasattr(self, 'cov_params_default')):
raise ValueError('need covariance of parameters for computing '
'(unnormalized) covariances')
if column is not None and (r_matrix is not None or other is not None):
raise ValueError('Column should be specified without other '
'arguments.')
if other is not None and r_matrix is None:
raise ValueError('other can only be specified with r_matrix')
if cov_p is None:
if hasattr(self, 'cov_params_default'):
cov_p = self.cov_params_default
else:
if scale is None:
scale = self.scale
cov_p = self.normalized_cov_params * scale
if column is not None:
column = np.asarray(column)
if column.shape == ():
return cov_p[column, column]
else:
#return cov_p[column][:, column]
return cov_p[column[:, None], column]
elif r_matrix is not None:
r_matrix = np.asarray(r_matrix)
if r_matrix.shape == ():
raise ValueError("r_matrix should be 1d or 2d")
if other is None:
other = r_matrix
else:
other = np.asarray(other)
tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
return tmp
else: # if r_matrix is None and column is None:
return cov_p
#TODO: make sure this works as needed for GLMs
def t_test(self, r_matrix, cov_p=None, scale=None,
use_t=None):
"""
Compute a t-test for a each linear hypothesis of the form Rb = q
Parameters
----------
r_matrix : array-like, str, tuple
- array : If an array is given, a p x k 2d array or length k 1d
array specifying the linear restrictions. It is assumed
that the linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q). If q is given,
can be either a scalar or a length p row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
An optional `scale` to use. Default is the scale specified
by the model fit.
use_t : bool, optional
If use_t is None, then the default of the model is used.
If use_t is True, then the p-values are based on the t
distribution.
If use_t is False, then the p-values are based on the normal
distribution.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
The available results have the same elements as the parameter table
in `summary()`.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> r = np.zeros_like(results.params)
>>> r[5:] = [1,-1]
>>> print(r)
[ 0. 0. 0. 0. 0. 1. -1.]
r tests that the coefficients on the 5th and 6th independent
variable are the same.
>>> T_test = results.t_test(r)
>>> print(T_test)
<T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
>>> T_test.effect
-1829.2025687192481
>>> T_test.sd
455.39079425193762
>>> T_test.tvalue
-4.0167754636411717
>>> T_test.pvalue
0.0015163772380899498
Alternatively, you can specify the hypothesis tests using a string
>>> from statsmodels.formula.api import ols
>>> dta = sm.datasets.longley.load_pandas().data
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
>>> results = ols(formula, dta).fit()
>>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
>>> t_test = results.t_test(hypotheses)
>>> print(t_test)
See Also
---------
tvalues : individual t statistics
f_test : for F tests
patsy.DesignInfo.linear_constraint
"""
from patsy import DesignInfo
names = self.model.data.param_names
LC = DesignInfo(names).linear_constraint(r_matrix)
r_matrix, q_matrix = LC.coefs, LC.constants
num_ttests = r_matrix.shape[0]
num_params = r_matrix.shape[1]
if (cov_p is None and self.normalized_cov_params is None and
not hasattr(self, 'cov_params_default')):
raise ValueError('Need covariance of parameters for computing '
'T statistics')
if num_params != self.params.shape[0]:
raise ValueError('r_matrix and params are not aligned')
if q_matrix is None:
q_matrix = np.zeros(num_ttests)
else:
q_matrix = np.asarray(q_matrix)
q_matrix = q_matrix.squeeze()
if q_matrix.size > 1:
if q_matrix.shape[0] != num_ttests:
raise ValueError("r_matrix and q_matrix must have the same "
"number of rows")
if use_t is None:
#switch to use_t false if undefined
use_t = (hasattr(self, 'use_t') and self.use_t)
_t = _sd = None
_effect = np.dot(r_matrix, self.params)
# nan_dot multiplies with the convention nan * 0 = 0
# Perform the test
if num_ttests > 1:
_sd = np.sqrt(np.diag(self.cov_params(
r_matrix=r_matrix, cov_p=cov_p)))
else:
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
_t = (_effect - q_matrix) * recipr(_sd)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_t:
return ContrastResults(effect=_effect, t=_t, sd=_sd,
df_denom=df_resid)
else:
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
df_denom=df_resid,
distribution='norm')
def f_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None):
"""
Compute the F-test for a joint linear hypothesis.
This is a special case of `wald_test` that always uses the F
distribution.
Parameters
----------
r_matrix : array-like, str, or tuple
- array : An r x k array where r is the number of restrictions to
test and k is the number of regressors. It is assumed
that the linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
either a scalar or a length k row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
Default is 1.0 for no scaling.
invcov : array-like, optional
A q x q array to specify an inverse covariance matrix based on a
restrictions matrix.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> A = np.identity(len(results.params))
>>> A = A[1:,:]
This tests that each coefficient is jointly statistically
significantly different from zero.
>>> print(results.f_test(A))
<F contrast: F=330.28533923463488, p=4.98403052872e-10,
df_denom=9, df_num=6>
Compare this to
>>> results.fvalue
330.2853392346658
>>> results.f_pvalue
4.98403096572e-10
>>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
This tests that the coefficient on the 2nd and 3rd regressors are
equal and jointly that the coefficient on the 5th and 6th regressors
are equal.
>>> print(results.f_test(B))
<F contrast: F=9.740461873303655, p=0.00560528853174, df_denom=9,
df_num=2>
Alternatively, you can specify the hypothesis tests using a string
>>> from statsmodels.datasets import longley
>>> from statsmodels.formula.api import ols
>>> dta = longley.load_pandas().data
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
>>> results = ols(formula, dta).fit()
>>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
>>> f_test = results.f_test(hypotheses)
>>> print(f_test)
See Also
--------
statsmodels.stats.contrast.ContrastResults
wald_test
t_test
patsy.DesignInfo.linear_constraint
Notes
-----
The matrix `r_matrix` is assumed to be non-singular. More precisely,
r_matrix (pX pX.T) r_matrix.T
is assumed invertible. Here, pX is the generalized inverse of the
design matrix of the model. There can be problems in non-OLS models
where the rank of the covariance of the noise is not full.
"""
res = self.wald_test(r_matrix, cov_p=cov_p, scale=scale,
invcov=invcov, use_f=True)
return res
#TODO: untested for GLMs?
def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
use_f=None):
"""
Compute a Wald-test for a joint linear hypothesis.
Parameters
----------
r_matrix : array-like, str, or tuple
- array : An r x k array where r is the number of restrictions to
test and k is the number of regressors. It is assumed that the
linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
either a scalar or a length p row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
Default is 1.0 for no scaling.
invcov : array-like, optional
A q x q array to specify an inverse covariance matrix based on a
restrictions matrix.
use_f : bool
If True, then the F-distribution is used. If False, then the
asymptotic distribution, chisquare is used. If use_f is None, then
the F distribution is used if the model specifies that use_t is True.
The test statistic is proportionally adjusted for the distribution
by the number of constraints in the hypothesis.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
See also
--------
statsmodels.stats.contrast.ContrastResults
f_test
t_test
patsy.DesignInfo.linear_constraint
Notes
-----
The matrix `r_matrix` is assumed to be non-singular. More precisely,
r_matrix (pX pX.T) r_matrix.T
is assumed invertible. Here, pX is the generalized inverse of the
design matrix of the model. There can be problems in non-OLS models
where the rank of the covariance of the noise is not full.
"""
if use_f is None:
#switch to use_t false if undefined
use_f = (hasattr(self, 'use_t') and self.use_t)
from patsy import DesignInfo
names = self.model.data.param_names
LC = DesignInfo(names).linear_constraint(r_matrix)
r_matrix, q_matrix = LC.coefs, LC.constants
if (self.normalized_cov_params is None and cov_p is None and
invcov is None and not hasattr(self, 'cov_params_default')):
raise ValueError('need covariance of parameters for computing '
'F statistics')
cparams = np.dot(r_matrix, self.params[:, None])
J = float(r_matrix.shape[0]) # number of restrictions
if q_matrix is None:
q_matrix = np.zeros(J)
else:
q_matrix = np.asarray(q_matrix)
if q_matrix.ndim == 1:
q_matrix = q_matrix[:, None]
if q_matrix.shape[0] != J:
raise ValueError("r_matrix and q_matrix must have the same "
"number of rows")
Rbq = cparams - q_matrix
if invcov is None:
cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
if np.isnan(cov_p).max():
raise ValueError("r_matrix performs f_test for using "
"dimensions that are asymptotically "
"non-normal")
invcov = np.linalg.inv(cov_p)
if (hasattr(self, 'mle_settings') and
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
else:
F = np.dot(np.dot(Rbq.T, invcov), Rbq)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_f:
F /= J
return ContrastResults(F=F, df_denom=df_resid,
df_num=invcov.shape[0])
else:
return ContrastResults(chi2=F, df_denom=J, statistic=F,
distribution='chi2', distargs=(J,))
def wald_test_terms(self, skip_single=False, extra_constraints=None,
combine_terms=None):
"""
Compute a sequence of Wald tests for terms over multiple columns
This computes joined Wald tests for the hypothesis that all
coefficients corresponding to a `term` are zero.
`Terms` are defined by the underlying formula or by string matching.
Parameters
----------
skip_single : boolean
If true, then terms that consist only of a single column and,
therefore, refers only to a single parameter is skipped.
If false, then all terms are included.
extra_constraints : ndarray
not tested yet
combine_terms : None or list of strings
Each string in this list is matched to the name of the terms or
the name of the exogenous variables. All columns whose name
includes that string are combined in one joint test.
Returns
-------
test_result : result instance
The result instance contains `table` which is a pandas DataFrame
with the test results: test statistic, degrees of freedom and
pvalues.
Examples
--------
>>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
data).fit()
>>> res_ols.wald_test_terms()
<class 'statsmodels.stats.contrast.WaldTestResults'>
F P>F df constraint df denom
Intercept 279.754525 2.37985521351e-22 1 51
C(Duration, Sum) 5.367071 0.0245738436636 1 51
C(Weight, Sum) 12.432445 3.99943118767e-05 2 51
C(Duration, Sum):C(Weight, Sum) 0.176002 0.83912310946 2 51
>>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
data).fit(cov_type='HC0')
>>> wt = res_poi.wald_test_terms(skip_single=False,
combine_terms=['Duration', 'Weight'])
>>> print(wt)
chi2 P>chi2 df constraint
Intercept 15.695625 7.43960374424e-05 1
C(Weight) 16.132616 0.000313940174705 2
C(Duration) 1.009147 0.315107378931 1
C(Weight):C(Duration) 0.216694 0.897315972824 2
Duration 11.187849 0.010752286833 3
Weight 30.263368 4.32586407145e-06 4
"""
# lazy import
from collections import defaultdict
result = self
if extra_constraints is None:
extra_constraints = []
if combine_terms is None:
combine_terms = []
design_info = getattr(result.model.data.orig_exog, 'design_info', None)
if design_info is None and extra_constraints is None:
raise ValueError('no constraints, nothing to do')
identity = np.eye(len(result.params))
constraints = []
combined = defaultdict(list)
if design_info is not None:
for term in design_info.terms:
cols = design_info.slice(term)
name = term.name()
constraint_matrix = identity[cols]
# check if in combined
for cname in combine_terms:
if cname in name:
combined[cname].append(constraint_matrix)
k_constraint = constraint_matrix.shape[0]
if skip_single:
if k_constraint == 1:
continue
constraints.append((name, constraint_matrix))
combined_constraints = []
for cname in combine_terms:
combined_constraints.append((cname, np.vstack(combined[cname])))
else:
# check by exog/params names if there is no formula info
for col, name in enumerate(result.model.exog_names):
constraint_matrix = identity[col]
# check if in combined
for cname in combine_terms:
if cname in name:
combined[cname].append(constraint_matrix)
if skip_single:
continue
constraints.append((name, constraint_matrix))
combined_constraints = []
for cname in combine_terms:
combined_constraints.append((cname, np.vstack(combined[cname])))
use_t = result.use_t
distribution = ['chi2', 'F'][use_t]
res_wald = []
index = []
for name, constraint in constraints + combined_constraints + extra_constraints:
wt = result.wald_test(constraint)
row = [wt.statistic.item(), wt.pvalue, constraint.shape[0]]
if use_t:
row.append(wt.df_denom)
res_wald.append(row)
index.append(name)
# distribution nerutral names
col_names = ['statistic', 'pvalue', 'df_constraint']
if use_t:
col_names.append('df_denom')
# TODO: maybe move DataFrame creation to results class
from pandas import DataFrame
table = DataFrame(res_wald, index=index, columns=col_names)
res = WaldTestResults(None, distribution, None, table=table)
# TODO: remove temp again, added for testing
res.temp = constraints + combined_constraints + extra_constraints
return res
def conf_int(self, alpha=.05, cols=None, method='default'):
"""
Returns the confidence interval of the fitted parameters.
Parameters
----------
alpha : float, optional
The significance level for the confidence interval.
ie., The default `alpha` = .05 returns a 95% confidence interval.
cols : array-like, optional
`cols` specifies which confidence intervals to return
method : string
Not Implemented Yet
Method to estimate the confidence_interval.
"Default" : uses self.bse which is based on inverse Hessian for MLE
"hjjh" :
"jac" :
"boot-bse"
"boot_quant"
"profile"
Returns
--------
conf_int : array
Each row contains [lower, upper] limits of the confidence interval
for the corresponding parameter. The first column contains all
lower, the second column contains all upper limits.
Examples
--------
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> results.conf_int()
array([[-5496529.48322745, -1467987.78596704],
[ -177.02903529, 207.15277984],
[ -0.1115811 , 0.03994274],
[ -3.12506664, -0.91539297],
[ -1.5179487 , -0.54850503],
[ -0.56251721, 0.460309 ],
[ 798.7875153 , 2859.51541392]])
>>> results.conf_int(cols=(2,3))
array([[-0.1115811 , 0.03994274],
[-3.12506664, -0.91539297]])
Notes
-----
The confidence interval is based on the standard normal distribution.
Models wish to use a different distribution should overwrite this
method.
"""
bse = self.bse
if self.use_t:
dist = stats.t
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
q = dist.ppf(1 - alpha / 2, df_resid)
else:
dist = stats.norm
q = dist.ppf(1 - alpha / 2)
if cols is None:
lower = self.params - q * bse
upper = self.params + q * bse
else:
cols = np.asarray(cols)
lower = self.params[cols] - q * bse[cols]
upper = self.params[cols] + q * bse[cols]
return np.asarray(lzip(lower, upper))
def save(self, fname, remove_data=False):
'''
save a pickle of this instance
Parameters
----------
fname : string or filehandle
fname can be a string to a file path or filename, or a filehandle.
remove_data : bool
If False (default), then the instance is pickled without changes.
If True, then all arrays with length nobs are set to None before
pickling. See the remove_data method.
In some cases not all arrays will be set to None.
Notes
-----
If remove_data is true and the model result does not implement a
remove_data method then this will raise an exception.
'''
from statsmodels.iolib.smpickle import save_pickle
if remove_data:
self.remove_data()
save_pickle(self, fname)
@classmethod
def load(cls, fname):
'''
load a pickle, (class method)
Parameters
----------
fname : string or filehandle
fname can be a string to a file path or filename, or a filehandle.
Returns
-------
unpickled instance
'''
from statsmodels.iolib.smpickle import load_pickle
return load_pickle(fname)
def remove_data(self):
'''remove data arrays, all nobs arrays from result and model
This reduces the size of the instance, so it can be pickled with less
memory. Currently tested for use with predict from an unpickled
results and model instance.
.. warning:: Since data and some intermediate results have been removed
calculating new statistics that require them will raise exceptions.
The exception will occur the first time an attribute is accessed
that has been set to None.
Not fully tested for time series models, tsa, and might delete too much
for prediction or not all that would be possible.
The list of arrays to delete is maintained as an attribute of the
result and model instance, except for cached values. These lists could
be changed before calling remove_data.
'''
def wipe(obj, att):
#get to last element in attribute path
p = att.split('.')
att_ = p.pop(-1)
try:
obj_ = reduce(getattr, [obj] + p)
#print(repr(obj), repr(att))
#print(hasattr(obj_, att_))
if hasattr(obj_, att_):
#print('removing3', att_)
setattr(obj_, att_, None)
except AttributeError:
pass
model_attr = ['model.' + i for i in self.model._data_attr]
for att in self._data_attr + model_attr:
#print('removing', att)
wipe(self, att)
data_in_cache = getattr(self, 'data_in_cache', [])
data_in_cache += ['fittedvalues', 'resid', 'wresid']
for key in data_in_cache:
try:
self._cache[key] = None
except (AttributeError, KeyError):
pass
def lzip(*args, **kwargs):
return list(zip(*args, **kwargs))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,326 @@
import numpy as np
import numpy.linalg as la
from pysal.spreg.utils import RegressionPropsY, spdot
import pysal.spreg.user_output as USER
from utils import cache_readonly
from base import LikelihoodModelResults
import family
from iwls import iwls
__all__ = ['GLM']
class GLM(RegressionPropsY):
"""
Generalised linear models. Can currently estimate Guassian, Poisson and
Logisitc regression coefficients. GLM object prepares model input and fit
method performs estimation which then returns a GLMResults object.
Parameters
----------
y : array
n*1, dependent variable.
X : array
n*k, independent variable, exlcuding the constant.
family : string
Model type: 'Gaussian', 'Poisson', 'Binomial'
Attributes
----------
y : array
n*1, dependent variable.
X : array
n*k, independent variable, including constant.
family : string
Model type: 'Gaussian', 'Poisson', 'logistic'
n : integer
Number of observations
k : integer
Number of independent variables
df_model : float
k-1, where k is the number of variables (including
intercept)
df_residual : float
observations minus variables (n-k)
mean_y : float
Mean of y
std_y : float
Standard deviation of y
fit_params : dict
Parameters passed into fit method to define estimation
routine.
normalized_cov_params : array
k*k, approximates [X.T*X]-1
"""
def __init__(self, y, X, family=family.Gaussian(), constant=True):
"""
Initialize class
"""
self.n = USER.check_arrays(y, X)
USER.check_y(y, self.n)
self.y = y
if constant:
self.X = USER.check_constant(X)
else:
self.X = X
self.family = family
self.k = self.X.shape[1]
self.fit_params = {}
def fit(self, ini_betas=None, tol=1.0e-6, max_iter=200, solve='iwls'):
"""
Method that fits a model with a particular estimation routine.
Parameters
----------
ini_betas : array
k*1, initial coefficient values, including constant.
Default is None, which calculates initial values during
estimation.
tol: float
Tolerence for estimation convergence.
max_iter : integer
Maximum number of iterations if convergence not
achieved.
solve :string
Technique to solve MLE equations.
'iwls' = iteratively (re)weighted least squares (default)
"""
self.fit_params['ini_betas'] = ini_betas
self.fit_params['tol'] = tol
self.fit_params['max_iter'] = max_iter
self.fit_params['solve']=solve
if solve.lower() == 'iwls':
params, predy, w, n_iter = iwls(self.y, self.X, self.family,
ini_betas=ini_betas, tol=tol, max_iter=max_iter)
self.fit_params['n_iter'] = n_iter
return GLMResults(self, params.flatten(), predy, w)
@cache_readonly
def df_model(self):
return self.X.shape[1] - 1
@cache_readonly
def df_resid(self):
return self.n - self.df_model - 1
class GLMResults(LikelihoodModelResults):
"""
Results of estimated GLM and diagnostics.
Parameters
----------
model : GLM object
Pointer to GLM object with estimation parameters.
params : array
k*1, estimared coefficients
mu : array
n*1, predicted y values.
w : array
n*1, final weight used for iwls
Attributes
----------
model : GLM Object
Points to GLM object for which parameters have been
estimated.
y : array
n*1, dependent variable.
x : array
n*k, independent variable, including constant.
family : string
Model type: 'Gaussian', 'Poisson', 'Logistic'
n : integer
Number of observations
k : integer
Number of independent variables
df_model : float
k-1, where k is the number of variables (including
intercept)
df_residual : float
observations minus variables (n-k)
fit_params : dict
parameters passed into fit method to define estimation
routine.
scale : float
sigma squared used for subsequent computations.
params : array
n*k, estimared beta coefficients
w : array
n*1, final weight values of x
mu : array
n*1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas which has been
appropriately scaled by sigma-squared
bse : array
k*1, standard errors of betas
pvalues : array
k*1, two-tailed pvalues of parameters
tvalues : array
k*1, the tvalues of the standard errors
null : array
n*1, predicted values of y for null model
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
null_deviance : float
value of the deviance function for the model fit with
a constant as the only regressor
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of log-likelihood function evaluated at null
aic : float
AIC
bic : float
BIC
D2 : float
percent deviance explained
adj_D2 : float
adjusted percent deviance explained
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
resid_response : array
response residuals; defined as y-mu
resid_pearson : array
Pearson residuals; defined as (y-mu)/sqrt(VAR(mu))
where VAR is the distribution specific variance
function; see family.py and varfuncs.py for more information.
resid_working : array
Working residuals; the working residuals are defined as
resid_response/link'(mu); see links.py for the
derivatives of the link functions.
resid_anscombe : array
Anscombe residuals; see family.py for
distribution-specific Anscombe residuals.
resid_deviance : array
deviance residuals; see family.py for
distribution-specific deviance residuals.
pearson_chi2 : float
chi-Squared statistic is defined as the sum
of the squares of the Pearson residuals
normalized_cov_params : array
k*k, approximates [X.T*X]-1
"""
def __init__(self, model, params, mu, w):
self.model = model
self.n = model.n
self.y = model.y.T.flatten()
self.X = model.X
self.k = model.k
self.family = model.family
self.fit_params = model.fit_params
self.params = params
self.w = w
self.mu = mu.flatten()
self._cache = {}
@cache_readonly
def df_model(self):
return self.model.df_model
@cache_readonly
def df_resid(self):
return self.model.df_resid
@cache_readonly
def normalized_cov_params(self):
return la.inv(spdot(self.w.T, self.w))
@cache_readonly
def resid_response(self):
return (self.y-self.mu)
@cache_readonly
def resid_pearson(self):
return ((self.y-self.mu) /
np.sqrt(self.family.variance(self.mu)))
@cache_readonly
def resid_working(self):
return (self.resid_response / self.family.link.deriv(self.mu))
@cache_readonly
def resid_anscombe(self):
return (self.family.resid_anscombe(self.y, self.mu))
@cache_readonly
def resid_deviance(self):
return (self.family.resid_dev(self.y, self.mu))
@cache_readonly
def pearson_chi2(self):
chisq = (self.y - self.mu)**2 / self.family.variance(self.mu)
chisqsum = np.sum(chisq)
return chisqsum
@cache_readonly
def null(self):
y = np.reshape(self.y, (-1,1))
model = self.model
X = np.ones((len(y), 1))
null_mod = GLM(y, X, family=self.family, constant=False)
return null_mod.fit().mu
@cache_readonly
def scale(self):
if isinstance(self.family, (family.Binomial, family.Poisson)):
return 1.
else:
return (((np.power(self.resid_response, 2) /
self.family.variance(self.mu))).sum() /
(self.df_resid))
@cache_readonly
def deviance(self):
return self.family.deviance(self.y, self.mu)
@cache_readonly
def null_deviance(self):
return self.family.deviance(self.y, self.null)
@cache_readonly
def llnull(self):
return self.family.loglike(self.y, self.null, scale=self.scale)
@cache_readonly
def llf(self):
return self.family.loglike(self.y, self.mu, scale=self.scale)
@cache_readonly
def aic(self):
if isinstance(self.family, family.QuasiPoisson):
return np.nan
else:
return -2 * self.llf + 2*(self.df_model+1)
@cache_readonly
def bic(self):
return (self.deviance -
(self.model.n - self.df_model - 1) *
np.log(self.model.n))
@cache_readonly
def D2(self):
return 1 - (self.deviance / self.null_deviance)
@cache_readonly
def adj_D2(self):
return 1.0 - (float(self.n) - 1.0)/(float(self.n) - float(self.k)) * (1.0-self.D2)
@cache_readonly
def pseudoR2(self):
return 1 - (self.llf/self.llnull)
@cache_readonly
def adj_pseudoR2(self):
return 1 - ((self.llf-self.k)/self.llnull)

View File

@@ -0,0 +1,84 @@
import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse import linalg as spla
from pysal.spreg.utils import spdot, spmultiply
from family import Binomial, Poisson
def _compute_betas(y, x):
"""
compute MLE coefficients using iwls routine
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
"""
xT = x.T
xtx = spdot(xT, x)
xtx_inv = la.inv(xtx)
xtx_inv = sp.csr_matrix(xtx_inv)
xTy = spdot(xT, y, array_out=False)
betas = spdot(xtx_inv, xTy)
return betas
def _compute_betas_gwr(y, x, wi):
"""
compute MLE coefficients using iwls routine
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
"""
xT = (x * wi).T
xtx = np.dot(xT, x)
xtx_inv = la.inv(xtx)
xtx_inv_xt = np.dot(xtx_inv, xT)
betas = np.dot(xtx_inv_xt, y)
return betas, xtx_inv_xt
def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=None):
"""
Iteratively re-weighted least squares estimation routine
"""
n_iter = 0
diff = 1.0e6
if ini_betas is None:
betas = np.zeros((x.shape[1], 1), np.float)
else:
betas = ini_betas
if isinstance(family, Binomial):
y = family.link._clean(y)
if isinstance(family, Poisson):
y_off = y/offset
y_off = family.starting_mu(y_off)
v = family.predict(y_off)
mu = family.starting_mu(y)
else:
mu = family.starting_mu(y)
v = family.predict(mu)
while diff > tol and n_iter < max_iter:
n_iter += 1
w = family.weights(mu)
z = v + (family.link.deriv(mu)*(y-mu))
w = np.sqrt(w)
if type(x) != np.ndarray:
w = sp.csr_matrix(w)
z = sp.csr_matrix(z)
wx = spmultiply(x, w, array_out=False)
wz = spmultiply(z, w, array_out=False)
if wi is None:
n_betas = _compute_betas(wz, wx)
else:
n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
v = spdot(x, n_betas)
mu = family.fitted(v)
if isinstance(family, Poisson):
mu = mu * offset
diff = min(abs(n_betas-betas))
betas = n_betas
if wi is None:
return betas, mu, wx, n_iter
else:
return betas, mu, v, w, z, xtx_inv_xt, n_iter

Some files were not shown because too many files have changed in this diff Show More