62 Commits
1.6.0 ... 1.9.0

Author SHA1 Message Date
Mario de Frutos
a0535a6d02 Merge pull request #332 from CartoDB/develop
Release 1.9.0
2018-04-20 10:57:20 +02:00
Mario de Frutos
198ac90a0e Update NEWS.md 2018-04-20 10:34:26 +02:00
Mario de Frutos
19c5b09f2b Merge pull request #331 from CartoDB/remove_new_fields
Remove new fields not used to simplify deploy process
2018-04-20 10:13:07 +02:00
Mario de Frutos
eb31a8f40a Remove new fields not used to simplify deploy process 2018-04-19 19:52:19 +02:00
Mario de Frutos
0a9fb4d51d Release 1.9.0 artifacts 2018-04-17 17:53:17 +02:00
Mario de Frutos
b5afd0734b Include instructions to load fixtures 2018-04-17 17:50:27 +02:00
Mario de Frutos
3e9e2f69c4 Merge pull request #325 from CartoDB/421-geom_numer_timespan_refactor
Refactor geom_numer_timespan
2018-04-17 17:47:39 +02:00
Mario de Frutos
0241f03329 All tests fixed 2018-04-17 17:41:04 +02:00
Mario de Frutos
3816c2af8b Include tiger non-clipped county for the fixtures 2018-04-16 12:56:32 +02:00
Mario de Frutos
ed3b7de9e0 Fix not valid column in exploration functions 2018-04-16 11:38:28 +02:00
Mario de Frutos
350b1716e0 New fixtures after fix in bigmetadata 2018-04-16 11:14:26 +02:00
Mario de Frutos
4fc22dc188 Fixes for generate fixtures 2018-04-13 10:31:27 +02:00
Mario de Frutos
6ce5e278e9 New fixtures including table_to_table 2018-04-13 10:11:15 +02:00
Mario de Frutos
c0126bf5c7 Remove sed command from generate_fixtures 2018-04-13 09:54:57 +02:00
Mario de Frutos
eb761cc6e7 Add table_to_table to the fixtures too 2018-04-12 18:55:31 +02:00
Mario de Frutos
0db98f4020 New fixtures with last changes 2018-04-12 18:52:33 +02:00
Mario de Frutos
e9dbc97772 Improve fixtures generator 2018-04-12 18:35:45 +02:00
Mario de Frutos
8f9c8cf164 Updated NEWS.md 2018-04-12 17:19:59 +02:00
Antonio
b891034146 Changed name of numer timespans to ensure backwards compatibility 2018-04-12 17:09:19 +02:00
Antonio
89f76e2a1a Refactor geom_numer_timespan 2018-04-12 17:09:19 +02:00
Mario de Frutos
850b3c2524 Merge pull request #327 from CartoDB/Mitigate_collisions_in_suggested_name
Modified the denominated suggested_name to mitigate collisions
2018-04-12 17:07:01 +02:00
Mario de Frutos
5ee349f4e4 Merge pull request #324 from CartoDB/422-Refactor_GetAvailableTimespans
Refactor OBS_GetAvailableTimespans
2018-04-11 10:48:54 +02:00
Antonio
b4ba9b5d1d Fixed tests 2018-04-11 10:36:58 +02:00
Antonio
be82c87bb1 Fixed CR suggestion 2018-04-11 10:36:57 +02:00
Antonio
7aac256892 Refactor OBS_GetAvailableTimespans 2018-04-11 10:36:57 +02:00
Mario de Frutos
17345f4fca Merge pull request #329 from CartoDB/postgresql_10_support
Postgresql 10 support
2018-04-09 10:43:37 +02:00
Juan Ignacio Sánchez Lara
db585177ab Explicit pre-2.4 PostGIS equal operator
Before PostGIS 2.4, `=` meant equality of bounding boxes, but now it's
strict equalty.
2018-04-06 13:59:31 +02:00
Mario de Frutos
fb17d05714 Improve Travis testing
- Added multiple versions of PostgreSQL to test (9.5, 9.6 and 10)
- Added multiple versions of Postgis to test
- Cleand travis yaml in favor of a script done in other project thanks
to Paul Ramsey
2018-04-04 15:42:25 +02:00
Mario de Frutos
e469fb7920 Fix travis problem showing regression diffs 2018-04-04 15:42:04 +02:00
Antonio
6d23509557 Modified the denominated suggested_name to mitigate collisions 2018-02-23 10:46:01 +01:00
Antonio Carlón
2183b7fc26 Merge pull request #319 from CartoDB/383-More_null_columns_in_EU
Python 3
2017-11-28 16:58:13 +01:00
Antonio
1d5f8a6452 Py3 2017-11-23 12:36:59 +01:00
Javier Torres
f583cca67a Merge pull request #317 from CartoDB/316-docs_schema
Remove cdb_observatory references
2017-11-07 15:11:40 +01:00
Javier Torres
b4507b42b1 Remove cdb_observatory references 2017-11-07 09:31:19 +01:00
csobier
3c475d72df Merge pull request #315 from CartoDB/docs-edit-line245
edited description in line 245
2017-10-18 09:54:07 -04:00
csobier
8c72122fb8 modified line245 based on @ethervoid's comment 2017-10-18 09:01:52 -04:00
csobier
b93e7e8843 edited description in line 245
@ethervoid , looks good! i just rewrote the English a bit. Let me know if this is okay then we can update all the live docs.
2017-10-18 08:19:34 -04:00
Mario de Frutos
ff0989f8fc Merge pull request #314 from CartoDB/develop
Release 1.8.0
2017-10-18 10:16:55 +02:00
Mario de Frutos
0a753e95c0 Release 1.8.0 artifacts 2017-10-18 10:09:25 +02:00
Mario de Frutos
b62e3ea963 Merge pull request #313 from CartoDB/add_numgeoms_getavailablegeometries
OBS_GetAvailableGeometries now receives number of geometries from input
2017-10-18 10:05:46 +02:00
Mario de Frutos
1da0b8cb6b Update doc with new field 2017-10-18 10:00:15 +02:00
csobier
a39de46531 docs fixed links
@inigomedina , just a docs url fix. Need to merge to fix live docs. Thanks!
2017-10-13 08:14:06 -04:00
Mario de Frutos
94b8e7492d OBS_GetAvailableGeometries now receives number of geometries from input
We need the number of geometries to pass them to the get score function
in order to get an accurate score for the input in order to suggest
what is the geometry that fits better for the input we have
2017-10-10 18:09:26 +02:00
Antonio Carlón
91ece26c06 Merge pull request #311 from CartoDB/remove_wof_tests
Remove WOF perftests.
2017-09-25 17:00:58 +02:00
Javier Torres
74b9d209c0 Use precise for travis tests, cartodb ppas don't have trusty anymore 2017-09-21 16:16:13 +02:00
Javier Torres
4ae889dfdc Remove WOF perftests. This is needed for tests to pass since we don't have WOF in our current dump 2017-09-21 10:36:05 +02:00
Mario de Frutos
3353ad0a32 Update NEWS.md 2017-08-18 16:43:39 +02:00
Mario de Frutos
b4ef3c77a9 Merge pull request #306 from CartoDB/develop
Release 1.7.0
2017-08-18 16:41:21 +02:00
Mario de Frutos
90a2421b6e Merge pull request #305 from CartoDB/obs_metadatavalidation_doc
OBS_MetadataValidation doc
2017-08-18 16:35:26 +02:00
Mario de Frutos
fd21709ca1 Fix missing schema for FIRST function 2017-08-18 16:20:13 +02:00
Javier Torres
3791511d7d Merge pull request #308 from CartoDB/307-TestsForDifferentPointsFixed
307 tests for different points fixed
2017-08-18 15:13:06 +02:00
Antonio
fad541c3fc Fixed broken tests and refactor 2017-08-18 11:19:15 +02:00
Antonio
48ed086fec Fixed tests for different test points per numerator 2017-08-17 16:31:40 +02:00
csobier
7e550cf909 applied quick copyedit to new docs code added 2017-08-11 08:00:00 -04:00
Mario de Frutos
6ab17bf8be New version 1.7.0 artifacts 2017-08-10 14:19:37 +02:00
Mario de Frutos
1f7f8015ad OBS_MetadataValidation doc 2017-08-10 13:29:42 +02:00
Mario de Frutos
6066ef028d Merge pull request #303 from CartoDB/precheck_metadata
OBS_MetadataValidation
2017-08-09 17:45:24 +02:00
Mario de Frutos
3c2e997a85 Add travis support to execute the tests 2017-08-09 17:16:47 +02:00
Mario de Frutos
cef99c6343 OBS_MetadataValidation
New function to check the metadata in order to search for errors like
for example if we have the metadata for a median aggregation and the
normalization is by are it'll fail.
2017-08-09 16:11:10 +02:00
Mario de Frutos
50d975ce9b Generate new fixtures to include new meta table
- Include the obs_meta_geom_numer_timespan table
2017-08-09 16:10:39 +02:00
Javier Torres
c56633dd2a Format NEWS.md 2017-07-31 10:15:00 +02:00
Michelle Ho
2b26c5ad64 fixing parentheses for obs_getdata with ids 2017-07-24 13:13:22 -04:00
31 changed files with 194507 additions and 15760 deletions

View File

@@ -2,6 +2,11 @@
I'd like to request a new data observatory extension deploy: dump + extension
**VERY IMPORTANT!!!**
PLEASE USE `python scripts/generate_fixtures.py` TO GENERATE NEW FIXTURES FOR
THE NEW DUMP AND OVERRIDE IT IN THIS PROJECT BEFORE PASS THE TESTS
## Performance comparison to last deployment
Please include link here to comparison perftests:
@@ -18,6 +23,6 @@ Please put here the dump id to be deployed: <dump_id>
Add down here the PR links to be added and deployed:
-
-
// @CartoDB/dataservices
// @CartoDB/datateam

23
.travis.yml Normal file
View File

@@ -0,0 +1,23 @@
language: c
group: travis_stable
sudo: required
env:
global:
- PAGER=cat
matrix:
include:
- env: PGSQL_VERSION=9.5 POSTGIS_VERSION=2.2
- env: PGSQL_VERSION=9.6 POSTGIS_VERSION=2.3
- env: PGSQL_VERSION=10 POSTGIS_VERSION=2.4
before_install:
- sudo bash $TRAVIS_BUILD_DIR/scripts/ci/install_postgres.sh
install:
- sudo make install
script:
- cd src/pg
- make test || { cat $TRAVIS_BUILD_DIR/src/pg/test/regression.diffs; false; }

View File

@@ -28,8 +28,8 @@ Run the tests with `make test`.
Update extension in a working database with:
```
ALTER EXTENSION observatory VERSION TO 'current';
ALTER EXTENSION observatory VERSION TO 'dev';
ALTER EXTENSION observatory UPDATE TO 'current';
ALTER EXTENSION observatory UPDATE TO 'dev';
```
Note: we keep the current development version install as 'dev' always;

62
NEWS.md
View File

@@ -1,4 +1,40 @@
1.9.0 (2018-04-20)
------------------
__Improvements__
* Improved `OBS_GetAvailableGeometries` for the DO Timespans project ([#325](https://github.com/CartoDB/observatory-extension/pull/325))
* Improved `OBS_GetAvailableTimespans` for the DO Timespans project ([#324](https://github.com/CartoDB/bigmetadata/issues/324))
* Modified the denominated suggested_name to mitigate collisions ([#327](https://github.com/CartoDB/observatory-extension/pull/327))
* Fixed some errors so now the extension supports PostgreSQL 10 ([#329](https://github.com/CartoDB/observatory-extension/pull/329))
* Fixed documentation
* Add support for multiple PostgreSQL and Postgis versions in our travis script for test purposes
1.8.0 (2017-10-18)
------------------
__Improvements__
* Add `number_geometries` field to `OBS_GetAvailableGeometries` in order to provide the number of geometries from the source data to be used in the score calculation ([#313](https://github.com/CartoDB/observatory-extension/issues/313))
1.7.0 (2017-08-18)
------------------
__Improvements__
* Add Travis support to execute the extension tests ([#183](https://github.com/CartoDB/observatory-extension/issues/183))
__API Changes__
* Add new function `OBS_MetadataValidation` ([#303](https://github.com/CartoDB/observatory-extension/pull/303))
__Bugfixes__
* Fixed parentheses for obs_getdata with ids
* Fixed failing tests due changes in the data dump for some TIGER geometries
1.6.0 (2017-07-20)
------------------
__Improvements__
@@ -17,6 +53,7 @@ __Bugfixes__
1.5.1 (2017-05-16)
------------------
__Improvements__
@@ -26,6 +63,7 @@ __Improvements__
([#285](https://github.com/CartoDB/observatory-extension/pull/285))
1.5.0 (2017-04-24)
------------------
__API Changes__
@@ -39,6 +77,7 @@ __API Changes__
([#282](https://github.com/CartoDB/observatory-extension/pull/282))
1.4.0 (2017-03-21)
------------------
__API Changes__
@@ -59,16 +98,19 @@ __Improvements__
boundary selection
1.3.5 (2017-03-15)
------------------
No changes. Artifact to allow for data update.
1.3.4 (2017-03-10)
------------------
__Bugfixes__
* Remove erroneously committed `RAISE NOTICE` in `OBS_GetData`
1.3.3 (2017-03-10)
------------------
__Bugfixes__
@@ -91,6 +133,7 @@ __Improvements__
([#267](https://github.com/CartoDB/observatory-extension/pull/267))
1.3.2 (2017-03-02)
------------------
__Bugfixes__
@@ -98,6 +141,7 @@ __Bugfixes__
This fixes issues with Camshaft.
1.3.1 (2017-02-16)
------------------
__Improvements__
@@ -108,6 +152,7 @@ __Improvements__
called for measures for polygons
1.3.0 (2017-01-17)
------------------
__API Changes__
@@ -132,9 +177,8 @@ __Bugfixes__
* Remove unnecessary dependency on `postgres_fdw`
* `OBS_GetData()` now aggregates measures with mixed geoms correctly
__API Changes__
1.2.1 (2017-01-17)
------------------
__Improvements__
@@ -142,6 +186,7 @@ __Improvements__
([#243](https://github.com/CartoDB/observatory-extension/pull/233))
1.2.0 (2016-12-28)
------------------
__API Changes__
@@ -162,6 +207,7 @@ __Improvements__
* Return both `table_id` and `column_id` from `_OBS_GetGeometryScores`
1.1.7 (2016-12-15)
------------------
__Improvements__
@@ -174,6 +220,7 @@ __Improvements__
* Yields a ~50% improvement in performance for `_OBSGetGeomeryScores`.
1.1.6 (2016-12-08)
------------------
__Bugfixes__
@@ -200,6 +247,7 @@ __Improvements__
- Add ability to persist results to JSON for graph visualization later
1.1.5 (2016-11-29)
------------------
__Bugfixes__
@@ -207,6 +255,7 @@ __Bugfixes__
a geometry where it does not exist ([#220](https://github.com/CartoDB/observatory-extension/issues/220)).
1.1.4 (2016-11-21)
------------------
__Bugfixes__
@@ -214,10 +263,12 @@ __Bugfixes__
`OBS_GetLegacyMetadata` ([#216](https://github.com/CartoDB/observatory-extension/issues/216)).
1.1.3 (2016-11-15)
------------------
* Temporarily ignore EU data for the sake of testing
1.1.2 (2016-11-09)
------------------
__Improvements__
@@ -233,12 +284,14 @@ __API Changes (Internal)__
* Add internal `_OBS_GetGeometryScores`
1.1.1 (2016-10-14)
------------------
__Improvements__
* Test points for Canada and France ([#204](https://github.com/CartoDB/observatory-extension/issues/120))
1.1.0 (2016-10-04)
------------------
__Bugfixes__
@@ -261,6 +314,7 @@ __API Changes__
is also referred to here ([CartoDB/design#68](https://github.com/CartoDB/design/issues/68)).
1.0.7 (2016-09-20)
------------------
__Bugfixes__
@@ -272,6 +326,7 @@ __Improvements__
* Automatic tests work for Canada and Thailand
1.0.6 (2016-09-08)
------------------
__Improvements__
@@ -279,6 +334,7 @@ __Improvements__
framework logic from the observatory measure functions.
1.0.5 (2016-08-12)
------------------
__Improvements__
@@ -286,6 +342,7 @@ __Improvements__
any HTTP SQL API.
1.0.4 (2016-07-26)
------------------
__Bugfixes__
@@ -294,6 +351,7 @@ __Bugfixes__
([#173](https://github.com/CartoDB/observatory-extension/issues/173))
1.0.3 (2016-07-25)
------------------
__Bugfixes__

View File

@@ -2,7 +2,7 @@
Use the following functions to retrieve [Boundary](https://carto.com/docs/carto-engine/data/overview/#boundary-data) data. Data ranges from small areas (e.g. US Census Block Groups) to large areas (e.g. Countries). You can access boundaries by point location lookup, bounding box lookup, direct ID access and several other methods described below.
You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](http://docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](https://carto.com/docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
## OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)
@@ -123,7 +123,7 @@ SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group')
## OBS_GetBoundaryId(point_geometry, boundary_id)
The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](http://docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow.
The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow.
#### Arguments

View File

@@ -97,7 +97,7 @@ valid_timespan | Boolean | True if the `timespan` argument is a valid timespan f
Obtain all numerators that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326))
```
@@ -105,7 +105,7 @@ Obtain all numerators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
@@ -113,7 +113,7 @@ Obtain all numerators that are available within a small rectangle and are
employment related for the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}');
```
@@ -121,7 +121,7 @@ Obtain all numerators that are available within a small rectangle and are
related to both employment and age & gender for the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}');
```
@@ -129,7 +129,7 @@ Obtain all numerators that work with US population (`us.census.acs.B01003001`)
as a denominator.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_denom IS True;
```
@@ -138,7 +138,7 @@ Obtain all numerators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
@@ -146,7 +146,7 @@ WHERE valid_geom IS True;
Obtain all numerators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
@@ -191,7 +191,7 @@ valid_timespan | Boolean | True if the `timespan` argument is a valid timespan f
Obtain all denominators that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
@@ -199,14 +199,14 @@ Obtain all denominators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all denominators for male population (`us.census.acs.B01001002`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002')
WHERE valid_numer IS True;
```
@@ -215,7 +215,7 @@ Obtain all denominators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
@@ -223,12 +223,12 @@ WHERE valid_geom IS True;
Obtain all denominators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
## OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan)
## OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan, number_geometries)
Return available geometries within a boundary and with the specified
`filter_tags`.
@@ -242,6 +242,7 @@ filter_tags | Text[] | a list of filters. Only geometries for which all of thes
numer_id | Text | the ID of a numerator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional)
denom_id | Text | the ID of a denominator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional)
timespan | Text | the ID of a timespan to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional)
number_geometries | Integer | an additional variable that is used to adjust the calculation of the [score](https://carto.com/docs/carto-engine/data/discovery-functions/#returns-4) (optional)
#### Returns
@@ -274,7 +275,7 @@ meanmediansize | Numeric | Ignored
Obtain all geometries that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
@@ -282,14 +283,14 @@ Obtain all geometries that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all geometries that work with total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
@@ -297,7 +298,7 @@ WHERE valid_numer IS True;
Obtain all geometries with timespan `2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015')
WHERE valid_timespan IS True;
```
@@ -342,14 +343,14 @@ valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for th
Obtain all timespans that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
Obtain all timespans for total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
@@ -358,7 +359,7 @@ Obtain all timespans that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```

View File

@@ -108,7 +108,7 @@ The ```OBS_GetMeasure(polygon, measure_id)``` function returns any Data Observat
Name |Description
--- | ---
polygon_geometry | a WGS84 polygon geometry (the_geom)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
@@ -143,7 +143,7 @@ The ```OBS_GetMeasureById(geom_ref, measure_id, boundary_id)``` function returns
Name |Description
--- | ---
geom_ref | a geometry reference (e.g., a US Census geoid)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span (optional) | time span of interest (e.g., 2010 - 2014). If `NULL` is passed, the measure from the most recent data will be used.
@@ -215,7 +215,7 @@ extent | A geometry of the extent of the input geometries
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optionally additional parameters about that column
num_timespan_options | How many historical time periods to include. Defaults to 1
num_score_options | How many alternative boundary levels to include. Defaults to 1
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest.
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest.
The schema of the metadata input objects are as follows:
@@ -321,6 +321,55 @@ SELECT OBS_GetMeta(
) FROM tablename
```
## OBS_MetadataValidation(extent geometry, geometry_type text, metadata json, target_geoms)
The ```OBS_MetadataValidation``` function performs a validation check over the known issues using the extent, type of geometry, and metadata that is being used in the ```OBS_GetMeta``` function.
#### Arguments
Name | Description
---- | -----------
extent | A geometry of the extent of the input geometries
geometry_type | The geometry type of the source data
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optional additional parameters about that column
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest
The schema of the metadata input objects are as follows:
Metadata Input Key | Description
--- | -----------
numer_id | The identifier for the desired measurement. If left blank, a `geom_id` is specified and the column returns a geometry, instead of a measurement
geom_id | Identifier for a desired geographic boundary level used to calculate measures. If undefined, this is automatically assigned. If defined, `numer_id` is blank and the column returns a geometry, instead of a measurement
normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. If the metadata object specifies a geometry, this is ignored
denom_id | When `normalization` is 'denominated', this is the identifier for a desired normalization column. This is automatically assigned. If the metadata object specifies a geometry, this is ignored
numer_timespan | The desired timespan for the measurement. If left unspecified, it defaults to the most recent timespan available
geom_timespan | The desired timespan for the geometry. If left unspecified, it defaults to the timespan matching `numer_timespan`
target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata. For example, if you are passing in points
target_geoms | Override global `target_geoms` for this element of metadata
max_timespan_rank | Only include timespans of this recency (For example, `1` is only the most recent timespan). There is no limit by default
max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). The default is `1`
#### Returns
Key | Description
--- | -----------
valid | A boolean field that represents if the validation was successful or not
errors | A text array with all possible errors
#### Examples
Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`.
```SQL
SELECT OBS_MetadataValidation(
ST_SetSRID(ST_Extent(the_geom), 4326),
ST_GeometryType(the_geom),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
COUNT(*)::INTEGER
) FROM tablename
GROUP BY ST_GeometryType(the_geom)
```
## OBS_GetData(geomvals array[geomval], metadata json)
The ```OBS_GetData(geomvals, metadata)``` function returns a measure and/or
@@ -465,7 +514,7 @@ WITH meta AS (
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename)
SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((fips) FROM tablename),
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta))
```
@@ -481,7 +530,7 @@ WITH meta AS (
) meta FROM tablename),
data as (
SELECT id AS fips, (data->0->>'value') AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((fips) FROM tablename),
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.6.0'
default_version = '1.9.0'
requires = 'postgis'
superuser = true
schema = cdb_observatory

View File

@@ -0,0 +1,38 @@
#!/bin/bash
# echo commands
set -x
# exit on error
set -e
dpkg -l | grep postgresql
# Add the PDGD repository
apt-key adv --keyserver keys.gnupg.net --recv-keys 7FCC7D46ACCC4CF8
add-apt-repository "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main"
apt-get update
# Remove those all PgSQL versions except the one we're testing
PGSQL_VERSIONS=(9.2 9.3 9.4 9.5 9.6 10)
/etc/init.d/postgresql stop # stop travis default instance
for V in "${PGSQL_VERSIONS[@]}"; do
if [ "$V" != "$PGSQL_VERSION" ]; then
apt-get -y remove --purge postgresql-${V} postgresql-client-${V} postgresql-contrib-${V} postgresql-${V}-postgis-2.3-scripts
else
apt-get -y remove --purge postgresql-${V}-postgis-2.3-scripts
fi
done
apt-get -y autoremove
# Install PostgreSQL
apt-get -y install postgresql-${PGSQL_VERSION} postgresql-${PGSQL_VERSION}-postgis-${POSTGIS_VERSION} postgresql-server-dev-${PGSQL_VERSION}
# Configure it to accept local connections from postgres
echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" > /etc/postgresql/${PGSQL_VERSION}/main/pg_hba.conf
# Restart PostgreSQL
/etc/init.d/postgresql restart ${PGSQL_VERSION}
dpkg -l | grep postgresql

View File

@@ -2,12 +2,18 @@ import os
import psycopg2
import subprocess
PGUSER = os.environ.get('PGUSER', 'postgres')
PGPASSWORD = os.environ.get('PGPASSWORD', '')
PGHOST=os.environ.get('PGHOST', 'localhost')
PGPORT=os.environ.get('PGPORT', '5432')
PGDATABASE=os.environ.get('PGDATABASE', 'postgres')
DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format(
user=os.environ.get('PGUSER', 'postgres'),
password=os.environ.get('PGPASSWORD', ''),
host=os.environ.get('PGHOST', 'localhost'),
port=os.environ.get('PGPORT', '5432'),
database=os.environ.get('PGDATABASE', 'postgres'),
user=PGUSER,
password=PGPASSWORD,
host=PGHOST,
port=PGPORT,
database=PGDATABASE
))
CURSOR = DB_CONN.cursor()
@@ -51,9 +57,9 @@ def get_tablename_query(column_id, boundary_id, timespan):
METADATA_TABLES = ['obs_table', 'obs_column_table', 'obs_column', 'obs_column_tag',
'obs_tag', 'obs_column_to_column', 'obs_dump_version', 'obs_meta',
'obs_meta_numer', 'obs_meta_denom', 'obs_meta_geom',
'obs_meta_timespan', 'obs_column_table_tile',
'obs_column_table_tile_simple']
'obs_table_to_table', 'obs_meta_numer', 'obs_meta_denom',
'obs_meta_geom', 'obs_meta_timespan', 'obs_meta_geom_numer_timespan',
'obs_column_table_tile', 'obs_column_table_tile_simple']
FIXTURES = [
('us.census.acs.B01003001_quantile', 'us.census.tiger.census_tract', '2010 - 2014'),
@@ -207,15 +213,13 @@ FIXTURES = [
('us.census.spielman_singleton_segments.X55', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.zillow.AllHomes_Zhvi', 'us.census.tiger.zcta5', '2014-01'),
('us.zillow.AllHomes_Zhvi', 'us.census.tiger.zcta5', '2016-06'),
('whosonfirst.wof_country_name', 'whosonfirst.wof_country_geom', '2016'),
('us.census.acs.B01003001', 'us.census.tiger.zcta5_clipped', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.block_group_clipped', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract_clipped', '2010 - 2014'),
('us.census.tiger.fullname', 'us.census.tiger.pointlm_geom', '2016'),
('us.census.tiger.fullname', 'us.census.tiger.prisecroads_geom', '2016'),
('us.census.tiger.name', 'us.census.tiger.county', '2015'),
('us.census.tiger.name', 'us.census.tiger.county_clipped', '2015'),
('us.census.tiger.name', 'us.census.tiger.block_group', '2015'),
('us.census.acs.B01003001', 'us.census.tiger.zcta5', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.census.tiger.place_geoname', 'us.census.tiger.place_clipped', '2015'),
('us.census.tiger.county_geoname', 'us.census.tiger.county_clipped', '2015'),
('us.census.tiger.county_geoname', 'us.census.tiger.county', '2015'),
('us.census.tiger.block_group_geoname', 'us.census.tiger.block_group', '2015'),
]
OUTFILE_PATH = os.path.join(os.path.dirname(__file__), '..',
@@ -230,27 +234,35 @@ def dump(cols, tablename, where=''):
tablename=tablename,
))
subprocess.check_call('pg_dump -x --section=pre-data -t observatory.{tablename} '
subprocess.check_call('PGPASSWORD={pgpassword} PGUSER={pguser} PGHOST={pghost} PGDATABASE={pgdb} '
'pg_dump -x --section=pre-data -t observatory.{tablename} '
' | sed "s:SET search_path.*::" '
' | sed "s:CREATE TABLE :CREATE TABLE observatory.:" '
' | sed "s:ALTER TABLE.*OWNER.*::" '
' | sed "s:SET idle_in_transaction_session_timeout.*::" '
' >> {outfile}'.format(
tablename=tablename,
outfile=OUTFILE_PATH,
pgpassword=PGPASSWORD,
pghost=PGHOST,
pgdb=PGDATABASE,
pguser=PGUSER
), shell=True)
with open(OUTFILE_PATH, 'a') as outfile:
outfile.write('COPY observatory."{}" FROM stdin WITH CSV HEADER;\n'.format(tablename))
subprocess.check_call('''
psql -c "COPY (SELECT {cols} \
PGPASSWORD={pgpassword} psql -U {pguser} -d {pgdb} -h {pghost} -c "COPY (SELECT {cols} \
FROM observatory.{tablename} {where}) \
TO STDOUT WITH CSV HEADER" >> {outfile}'''.format(
cols=cols,
tablename=tablename,
where=where,
outfile=OUTFILE_PATH,
pgpassword=PGPASSWORD,
pghost=PGHOST,
pgdb=PGDATABASE,
pguser=PGUSER
), shell=True)
with open(OUTFILE_PATH, 'a') as outfile:
@@ -347,6 +359,10 @@ def main():
timespans=','.join(["'{}'".format(x) for _, _, x in FIXTURES]),
table_ids=','.join(["'{}'".format(x) for _, _, x in unique_tables])
)
elif tablename in ('obs_table_to_table'):
where = '''WHERE source_id IN ({table_ids})'''.format(
table_ids=','.join(["'{}'".format(x) for _, _, x in unique_tables])
)
else:
where = ''
dump('*', tablename, where)
@@ -355,12 +371,6 @@ def main():
if 'zcta5' in table_id or 'zillow_zip' in table_id:
where = '\'11%\''
compare = 'LIKE'
elif 'pri_sec_roads' in table_id or 'point_landmark' in table_id:
dump('*', tablename, 'WHERE geom && ST_MakeEnvelope(-74,40.69,-73.9,40.72, 4326)')
continue
elif 'whosonfirst' in table_id:
where = "('85632785','85633051','85633111','85633147','85633253','85633267')"
compare = 'IN'
elif 'county' in table_id and 'tiger' in table_id:
where = "('48061', '36047')"
compare = 'IN'

View File

@@ -1,3 +1,4 @@
requests
nose
nose_parameterized
psycopg2

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.6.0'
default_version = '1.9.0'
requires = 'postgis'
superuser = true
schema = cdb_observatory

View File

@@ -249,10 +249,10 @@ BEGIN
'suggested_name', cdb_observatory.FIRST(
LOWER(TRIM(BOTH '_' FROM regexp_replace(CASE WHEN numer_id IS NOT NULL
THEN CASE
WHEN normalization ILIKE 'area%%' THEN numer_colname || ' per sq km'
WHEN normalization ILIKE 'denom%%' THEN numer_colname || ' rate'
ELSE numer_colname
END || ' ' || numer_timespan
WHEN normalization ILIKE 'area%%' THEN numer_colname || ' per sq km' || ' ' || numer_timespan
WHEN normalization ILIKE 'denom%%' THEN numer_colname || ' ' || numer_timespan || ' by ' || denom_colname
ELSE numer_colname || ' ' || numer_timespan
END
ELSE geom_name || ' ' || geom_timespan
END, '[^a-zA-Z0-9]+', '_', 'g')))
),
@@ -1076,3 +1076,46 @@ BEGIN
RETURN result;
END;
$$ LANGUAGE plpgsql STABLE;
-- MetadataValidation checks the metadata parameters and the geometry type
-- of the data in order to find possible wrong cases
CREATE OR REPLACE FUNCTION cdb_observatory.obs_metadatavalidation(
geometry_extent geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL
)
RETURNS TABLE(valid boolean, errors text[]) AS $$
DECLARE
meta json;
errors text[];
BEGIN
errors := (ARRAY[])::TEXT[];
IF geometry_type IN ('ST_Polygon', 'ST_MultiPolygon') THEN
FOR meta IN EXECUTE 'SELECT json_array_elements(cdb_observatory.OBS_GetMeta($1, $2, 1, 1, $3))' USING geometry_extent, params, target_geoms
LOOP
IF (meta->>'normalization' = 'denominated' AND meta->>'denom_id' is NULL) THEN
errors := array_append(errors, 'Normalizated measure should have a numerator and a denominator. Please review the provided options.');
END IF;
IF (meta->>'numer_aggregate' IS NULL) THEN
errors := array_append(errors, 'For polygon geometries, aggregation is mandatory. Please review the provided options');
END IF;
IF (meta->>'numer_aggregate' IN ('median', 'average') AND meta->>'denom_id' IS NULL) THEN
errors := array_append(errors, 'Median or average aggregation for polygons requires a denominator to provide weights. Please review the provided options');
END IF;
IF (meta->>'numer_aggregate' IN ('median', 'average') AND meta->>'normalization' NOT LIKE 'pre%') THEN
errors := array_append(errors, format('Median or average aggregation only supports prenormalized normalization, %s passed. Please review the provided options', meta->>'normalization'));
END IF;
END LOOP;
IF CARDINALITY(errors) > 0 THEN
RETURN QUERY EXECUTE 'SELECT FALSE, $1' USING errors;
ELSE
RETURN QUERY SELECT TRUE, ARRAY[]::TEXT[];
END IF;
ELSE
RETURN QUERY SELECT TRUE, ARRAY[]::TEXT[];
END IF;
RETURN;
END;
$$ LANGUAGE plpgsql STABLE;

View File

@@ -323,7 +323,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableGeometries(
filter_tags TEXT[] DEFAULT NULL,
numer_id TEXT DEFAULT NULL,
denom_id TEXT DEFAULT NULL,
timespan TEXT DEFAULT NULL
timespan TEXT DEFAULT NULL,
number_geoms INTEGER DEFAULT NULL
) RETURNS TABLE (
geom_id TEXT,
geom_name TEXT,
@@ -382,7 +383,7 @@ BEGIN
CASE WHEN $1 IS NOT NULL AND $1 != '' THEN
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND onu.numer_id = $1 AND ($3 = ANY(onu.timespans) OR $3 IN (select(unnest(o.timespans)))))
ELSE
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND ($3 = ANY(onu.timespans) OR $3 IN (select(unnest(o.timespans)))))
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND ($3 = ANY(onu.geom_timespans) OR $3 IN (select(unnest(o.timespans)))))
END
ELSE
false
@@ -390,15 +391,16 @@ BEGIN
FROM observatory.obs_meta_geom o
WHERE %s (geom_tags ?& $4 OR CARDINALITY($4) = 0)
), scores AS (
SELECT * FROM cdb_observatory._OBS_GetGeometryScores($5,
(SELECT ARRAY_AGG(geom_id) FROM available_geoms)
SELECT * FROM cdb_observatory._OBS_GetGeometryScores(bounds => $5,
filter_geom_ids => (SELECT ARRAY_AGG(geom_id) FROM available_geoms),
desired_num_geoms => $6::integer
)
) SELECT DISTINCT ON (geom_id) available_geoms.*, score, numtiles, notnull_percent, numgeoms,
percentfill, estnumgeoms, meanmediansize
FROM available_geoms, scores
WHERE available_geoms.geom_id = scores.column_id
$string$, geom_clause)
USING numer_id, denom_id, timespan, filter_tags, bounds;
USING numer_id, denom_id, timespan, filter_tags, bounds, number_geoms;
RETURN;
END
$$ LANGUAGE plpgsql;
@@ -446,12 +448,12 @@ BEGIN
NULL::TEXT timespan_aggregate,
NULL::TEXT timespan_license,
NULL::TEXT timespan_source,
NULL::TEXT timespan_type,
timespan_type::TEXT,
NULL::JSONB timespan_extra,
NULL::JSONB timespan_tags,
$1 = ANY(numers) valid_numer,
$2 = ANY(denoms) valid_denom,
$3 = ANY(geoms) valid_geom_id
COALESCE($1 = ANY(numers), false) valid_numer,
COALESCE($2 = ANY(denoms), false) valid_denom,
COALESCE($3 = ANY(geoms), false) valid_geom_id
FROM observatory.obs_meta_timespan
WHERE %s (timespan_tags ?& $4 OR CARDINALITY($4) = 0)
$string$, geom_clause)

View File

@@ -2,3 +2,171 @@
CREATE EXTENSION postgis;
-- Install the extension
CREATE EXTENSION observatory VERSION 'dev';
\i test/fixtures/load_fixtures.sql
SET client_min_messages TO WARNING;
\set ECHO none
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)

View File

@@ -1,6 +1,5 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/load_fixtures.sql
SET client_min_messages TO WARNING;
\set ECHO none
_obs_geomtable_with_returned_table
@@ -21,7 +20,6 @@ t
obs_dumpversion_notnull
t
(1 row)
ERROR: Error performing intersection: TopologyException: found non-noded intersection between LINESTRING (-97.1968 25.9574, -97.1971 25.9576) and LINESTRING (-97.197 25.9575, -97.1972 25.9576) at -97.19699802694231 25.957551976080605
complex_safe_intersection_works
t
(1 row)

View File

@@ -240,12 +240,6 @@ t|t|t
id|correct_num_geoms|correct_pop|correct_bg_names
t|t|t|t
(1 row)
id|correct_num_points
t|t
(1 row)
id|correct_num_points|pointgeom_names
t|t|t
(1 row)
id|obs_getdata_by_id_one_measure_null
t|t
(1 row)
@@ -299,8 +293,17 @@ tract_sample|tract_max_error|tract_avg_error|tract_min_error
25|t|t|t
50|t|t|t
100|t|t|t
761|t|t|t
741|t|t|t
(9 rows)
no_bg_point_error
t
(1 row)
valid|errors
t|{}
(1 row)
valid|errors
f|{"Median or average aggregation only supports prenormalized normalization, denominated passed. Please review the provided options"}
(1 row)
valid|errors
f|{"Normalizated measure should have a numerator and a denominator. Please review the provided options."}
(1 row)

View File

@@ -232,15 +232,15 @@ us.census.tiger.zcta5|9
us.census.tiger.county|0
(4 rows)
column_id|_obs_geometryscores_numgeoms_50km_buffer
us.census.tiger.block_group|10817
us.census.tiger.block_group|10818
us.census.tiger.census_tract|3396
us.census.tiger.zcta5|484
us.census.tiger.zcta5|483
us.census.tiger.county|11
(4 rows)
column_id|_obs_geometryscores_numgeoms_500km_buffer
us.census.tiger.block_group|48567
us.census.tiger.census_tract|15823
us.census.tiger.zcta5|6466
us.census.tiger.block_group|48569
us.census.tiger.census_tract|15825
us.census.tiger.zcta5|6465
us.census.tiger.county|295
(4 rows)
column_id|_obs_geometryscores_numgeoms_2500km_buffer

View File

@@ -42,12 +42,6 @@ t
obs_getboundarybyid_boundary_id_mismatch_geom_id
t
(1 row)
_obs_getboundariesbygeometry_roads_around_cartodb
t
(1 row)
_obs_getboundariesbygeometry_points_around_cartodb
t
(1 row)
_obs_getboundariesbygeometry_tracts_around_cartodb
t
(1 row)
@@ -60,9 +54,6 @@ t
obs_getboundariesbygeometry_tracts_around_null_island
t
(1 row)
obs_getboundariesbygeometry_wof
t
(1 row)
obs_getboundariesbypointandradius_around_cartodb
t
(1 row)

View File

@@ -8,27 +8,28 @@ DROP TABLE IF EXISTS observatory.obs_tag;
DROP TABLE IF EXISTS observatory.obs_column_to_column;
DROP TABLE IF EXISTS observatory.obs_dump_version;
DROP TABLE IF EXISTS observatory.obs_meta;
DROP TABLE IF EXISTS observatory.obs_table_to_table;
DROP TABLE IF EXISTS observatory.obs_meta_numer;
DROP TABLE IF EXISTS observatory.obs_meta_denom;
DROP TABLE IF EXISTS observatory.obs_meta_geom;
DROP TABLE IF EXISTS observatory.obs_meta_timespan;
DROP TABLE IF EXISTS observatory.obs_meta_geom_numer_timespan;
DROP TABLE IF EXISTS observatory.obs_column_table_tile;
DROP TABLE IF EXISTS observatory.obs_column_table_tile_simple;
DROP TABLE IF EXISTS observatory.obs_78fb6c1d6ff6505225175922c2c389ce48d7632c;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_fcd4e4f5610f6764973ef8c0c215b2e80bec8963;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_7615e8622a68bfc5fe37c69c9880edfb40250103;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_e32f8e59c7c8861ee5ee4029b3ace2af9a5c9caf;
DROP TABLE IF EXISTS observatory.obs_23cb5063486bd7cf36f17e89e5e65cd31b331f6e;
DROP TABLE IF EXISTS observatory.obs_1ea93bbc109c87c676b3270789dacf7a1430db6c;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_8e30e6b3792430b410ba5b9e49cdc6a0d404d48f;
DROP TABLE IF EXISTS observatory.obs_08025e1287e3af2b5de571d06562ba8d3bdb48e9;
DROP TABLE IF EXISTS observatory.obs_fae094ddb7157380e2495b9867e1f067fdbdf288;
DROP TABLE IF EXISTS observatory.obs_d03c931c9b7f9df54c3fae95bb7f958fe3187c71;
DROP TABLE IF EXISTS observatory.obs_a6811c89ed79ab4339d89a86907b586439cc74df;
DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13;
DROP TABLE IF EXISTS observatory.obs_3b537fe9a1dcdd3be4a53f64429e30a836ecb6ee;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_8e30e6b3792430b410ba5b9e49cdc6a0d404d48f;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_9b319c207dfa600c2296a6d46055e54a4c00f646;
DROP TABLE IF EXISTS observatory.obs_9b319c207dfa600c2296a6d46055e54a4c00f646;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;

File diff suppressed because one or more lines are too long

View File

@@ -3,3 +3,5 @@ CREATE EXTENSION postgis;
-- Install the extension
CREATE EXTENSION observatory VERSION 'dev';
\i test/fixtures/load_fixtures.sql

View File

@@ -1,6 +1,5 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/load_fixtures.sql
SET client_min_messages TO WARNING;
\set ECHO none
@@ -48,12 +47,6 @@ SELECT cdb_observatory._OBS_StandardizeMeasureName('test 343 %% 2 qqq }}{{}}') =
SELECT cdb_observatory.OBS_DumpVersion()
IS NOT NULL AS OBS_DumpVersion_notnull;
-- Should fail to perform intersection
SELECT ST_IsValid(ST_Intersection(
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county'),
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county_clipped')
)) AS complex_intersection_fails;
-- Should succeed in intersecting
SELECT ST_IsValid(cdb_observatory.safe_intersection(
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county'),

View File

@@ -22,14 +22,14 @@ SELECT cdb_observatory.OBS_GetSegmentSnapshot(
)::text is null as null_island_segmentation;
-- Point-based OBS_GetMeasure with zillow
SELECT abs(OBS_GetMeasure_zhvi_point - 597900) / 597900 < 5.0 AS OBS_GetMeasure_zhvi_point_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.94602417945862, 40.6768220087458), 4326),
SELECT abs(OBS_GetMeasure_zhvi_point - 446000) / 446000 < 5.0 AS OBS_GetMeasure_zhvi_point_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.90820503234865, 40.69469600456701), 4326),
'us.zillow.AllHomes_Zhvi', null, 'us.census.tiger.zcta5', '2014-01'
) As t(OBS_GetMeasure_zhvi_point);
-- Point-based OBS_GetMeasure with later measure
SELECT abs(OBS_GetMeasure_zhvi_point_default_latest - 995400) / 995400 < 5.0 AS OBS_GetMeasure_zhvi_point_default_latest_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.94602417945862, 40.6768220087458), 4326),
SELECT abs(OBS_GetMeasure_zhvi_point_default_latest - 701400) / 701400 < 5.0 AS OBS_GetMeasure_zhvi_point_default_latest_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.90820503234865, 40.69469600456701), 4326),
'us.zillow.AllHomes_Zhvi', null, 'us.census.tiger.zcta5', '2016-06'
) As t(OBS_GetMeasure_zhvi_point_default_latest);
@@ -416,7 +416,7 @@ SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
-- OBS_GetMeta provides suggested name for simple meta request with denom
SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
'[{"numer_id": "us.census.acs.B01001002", "normalization": "denom"}]'
)->0->>'suggested_name' = 'male_pop_rate_2010_2014' obs_getmeta_suggested_name_denom;
)->0->>'suggested_name' = 'male_pop_2010_2014_by_total_pop' obs_getmeta_suggested_name_denom;
-- OBS_GetData/OBS_GetMeta by id with empty list/null
WITH data AS (SELECT * FROM cdb_observatory.OBS_GetData(ARRAY[]::TEXT[], null))
@@ -677,7 +677,7 @@ data AS (SELECT * FROM cdb_observatory.OBS_GetData(
(SELECT meta FROM meta)))
SELECT id = 1 id,
data->0->>'value' = 'Hispanic Black mix multilingual, high poverty, renters, uses public transport' data_poly_categorical,
abs((data->1->>'value')::Numeric - 15787) / 15787 < 0.0001 valcol
abs((data->1->>'value')::Numeric - 15790) / 15790 < 0.0001 valcol
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with polygons inside a polygon
@@ -700,46 +700,22 @@ data AS (SELECT * FROM cdb_observatory.OBS_GetData(
(SELECT meta FROM meta), false))
SELECT every(id = 1) is TRUE id,
count(distinct (data->0->>'value')::geometry) = 16 correct_num_geoms,
abs(sum((data->1->>'value')::numeric) - 12327) / 12327 < 0.001 correct_pop
abs(sum((data->1->>'value')::numeric) - 12329) / 12329 < 0.001 correct_pop
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with polygons inside a polygon + one measure + one text
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.acs.B01003001", "normalization": "predenom", "geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.tiger.name", "geom_id": "us.census.tiger.block_group"}]') meta),
'[{"geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.acs.B01003001", "normalization": "predenom", "geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.tiger.block_group_geoname", "geom_id": "us.census.tiger.block_group"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) is TRUE id,
count(distinct (data->0->>'value')::geometry) = 16 correct_num_geoms,
abs(sum((data->1->>'value')::numeric) - 12327) / 12327 < 0.001 correct_pop,
abs(sum((data->1->>'value')::numeric) - 12329) / 12329 < 0.001 correct_pop,
array_agg(distinct data->2->>'value') = '{"Block Group 1","Block Group 2","Block Group 3","Block Group 4","Block Group 5"}' correct_bg_names
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with points inside a polygon
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.pointlm_geom"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) AS id,
count(distinct (data->0->>'value')::geometry(point, 4326)) = 3 correct_num_points
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with points inside a polygon + one text
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.pointlm_geom"}, {"geom_id": "us.census.tiger.pointlm_geom", "numer_id": "us.census.tiger.fullname"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) AS id,
count(distinct (data->0->>'value')::geometry(point, 4326)) = 3 correct_num_points,
array_agg(data->1->>'value') = '{"Bushwick Yards","Edward Block Square","Bushwick Houses"}' pointgeom_names
FROM data;
-- OBS_GetData by id with one standard measure
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
@@ -896,7 +872,8 @@ WITH _geoms AS (
FALSE
)
WHERE data->0->>'geomref' LIKE '36047%'
ORDER BY RANDOM()
and (data->1->>'value')::numeric > 1000
ORDER BY geom_ref
), geoms AS (
SELECT *, row_number() OVER () cartodb_id FROM _geoms
), samples AS (
@@ -973,3 +950,9 @@ WITH _geoms AS (
FROM geoms, results
WHERE cartodb_id = id
;
-- OBS_MetadataValidation
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B01003001","denom_id": null,"normalization": "prenormalized","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B25058001","denom_id": null,"normalization": "denominated","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B15003001","denom_id": null,"normalization": "denominated","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);

View File

@@ -503,7 +503,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
@@ -532,8 +532,8 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
= ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 2500000)::Geometry(Geometry, 4326),
@@ -593,7 +593,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@@ -613,7 +613,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_25000_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,4 @@
from nose.tools import assert_equal, assert_is_not_none
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import izip_longest
@@ -55,84 +54,50 @@ SKIP_COLUMNS = set([
u'us.census.tiger.mtfcc',
u'whosonfirst.wof_county_name',
u'whosonfirst.wof_region_name',
'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
, 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
, 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
, 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
, 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
, 'fr.insee.P12_ACTOCC15P_ILT45D'
, 'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
, 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
, 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
, 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
, 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
, 'fr.insee.P12_ACTOCC15P_ILT45D'
, 'uk.ons.LC3202WA0007'
, 'uk.ons.LC3202WA0010'
, 'uk.ons.LC3202WA0004'
, 'uk.ons.LC3204WA0004'
, 'uk.ons.LC3204WA0007'
, 'uk.ons.LC3204WA0010'
, 'br.geo.subdistritos_name'
u'fr.insee.P12_RP_CHOS',
u'fr.insee.P12_RP_HABFOR',
u'fr.insee.P12_RP_EAUCH',
u'fr.insee.P12_RP_BDWC',
u'fr.insee.P12_RP_MIDUR',
u'fr.insee.P12_RP_CLIM',
u'fr.insee.P12_RP_MIBOIS',
u'fr.insee.P12_RP_CASE',
u'fr.insee.P12_RP_TTEGOU',
u'fr.insee.P12_RP_ELEC',
u'fr.insee.P12_ACTOCC15P_ILT45D',
u'fr.insee.P12_RP_CHOS',
u'fr.insee.P12_RP_HABFOR',
u'fr.insee.P12_RP_EAUCH',
u'fr.insee.P12_RP_BDWC',
u'fr.insee.P12_RP_MIDUR',
u'fr.insee.P12_RP_CLIM',
u'fr.insee.P12_RP_MIBOIS',
u'fr.insee.P12_RP_CASE',
u'fr.insee.P12_RP_TTEGOU',
u'fr.insee.P12_RP_ELEC',
u'fr.insee.P12_ACTOCC15P_ILT45D',
u'uk.ons.LC3202WA0007',
u'uk.ons.LC3202WA0010',
u'uk.ons.LC3202WA0004',
u'uk.ons.LC3204WA0004',
u'uk.ons.LC3204WA0007',
u'uk.ons.LC3204WA0010',
u'br.geo.subdistritos_name'
])
MEASURE_COLUMNS = query('''
SELECT ARRAY_AGG(DISTINCT numer_id) numer_ids,
SELECT cdb_observatory.FIRST(distinct numer_id) numer_ids,
numer_aggregate,
denom_reltype,
section_tags
denom_reltype
FROM observatory.obs_meta
WHERE numer_weight > 0
AND numer_id NOT IN ('{skip}')
AND numer_id NOT LIKE 'eu.%' --Skipping Eurostat
AND section_tags IS NOT NULL
AND subsection_tags IS NOT NULL
GROUP BY numer_aggregate, section_tags, denom_reltype
GROUP BY numer_id, numer_aggregate, denom_reltype
'''.format(skip="', '".join(SKIP_COLUMNS))).fetchall()
#CATEGORY_COLUMNS = query('''
#SELECT distinct numer_id
#FROM observatory.obs_meta
#WHERE numer_type ILIKE 'text'
#AND numer_weight > 0
#''').fetchall()
#
#BOUNDARY_COLUMNS = query('''
#SELECT id FROM observatory.obs_column
#WHERE type ILIKE 'geometry'
#AND weight > 0
#''').fetchall()
#
#US_CENSUS_MEASURE_COLUMNS = query('''
#SELECT distinct numer_name
#FROM observatory.obs_meta
#WHERE numer_type ILIKE 'numeric'
#AND 'us.census.acs' = ANY (subsection_tags)
#AND numer_weight > 0
#''').fetchall()
#def default_geometry_id(column_id):
# '''
# Returns default test point for the column_id.
# '''
# if column_id == 'whosonfirst.wof_disputed_geom':
# return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
# elif column_id == 'whosonfirst.wof_marinearea_geom':
# return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
# elif column_id in ('us.census.tiger.school_district_elementary',
# 'us.census.tiger.school_district_secondary',
# 'us.census.tiger.school_district_elementary_clipped',
# 'us.census.tiger.school_district_secondary_clipped'):
# return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
# elif column_id.startswith('es.ine'):
# return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
# elif column_id.startswith('us.zillow'):
# return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
# elif column_id.startswith('ca.'):
# return ''
# else:
# return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_lonlat(column_id):
'''
@@ -142,11 +107,6 @@ def default_lonlat(column_id):
return (76.57, 33.78)
elif column_id == 'whosonfirst.wof_marinearea_geom':
return (-68.47, 43.33)
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return (40.7025, -73.7067)
elif column_id.startswith('uk'):
if 'WA' in column_id:
return (51.46844551219723, -3.184833526611328)
@@ -158,30 +118,19 @@ def default_lonlat(column_id):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('mx.'):
return (19.41347699386547, -99.17019367218018)
elif column_id.startswith('th.'):
return (13.725377712079784, 100.49263000488281)
# cols for French Guyana only
#elif column_id in ('fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
# , 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
# , 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
# , 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
# , 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
# , 'fr.insee.P12_ACTOCC15P_ILT45D'
# , 'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
# , 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
# , 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
# , 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
# , 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
# , 'fr.insee.P12_ACTOCC15P_ILT45D'):
# return (4.938408371206558, -52.32908248901367)
elif column_id.startswith('fr.'):
return (48.860875144709475, 2.3613739013671875)
elif column_id.startswith('ca.'):
return (43.65594991256823, -79.37965393066406)
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped',
'us.census.tiger.school_district_elementary_geoname',
'us.census.tiger.school_district_secondary_geoname'):
return (40.7025, -73.7067)
elif column_id.startswith('us.census.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.dma.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.ihme.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.bls.'):
@@ -192,8 +141,6 @@ def default_lonlat(column_id):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.epa.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('eu.'):
raise SkipTest('No tests for Eurostat!')
elif column_id.startswith('br.'):
return (-23.53, -46.63)
elif column_id.startswith('au.'):
@@ -202,56 +149,65 @@ def default_lonlat(column_id):
raise Exception('No catalog point set for {}'.format(column_id))
def default_point(column_id):
lat, lng = default_lonlat(column_id)
def default_point(test_point):
lat, lng = test_point
return 'ST_SetSRID(ST_MakePoint({lng}, {lat}), 4326)'.format(
lat=lat, lng=lng)
def default_area(column_id):
def default_area(test_point):
'''
Returns default test area for the column_id
'''
point = default_point(column_id)
point = default_point(test_point)
area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 250), 4326)'.format(
point=point)
return area
#@parameterized(US_CENSUS_MEASURE_COLUMNS)
#def test_get_us_census_measure_points(name):
# resp = query('''
#SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}')
# '''.format(name=name.replace("'", "''"),
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point('')))
# rows = resp.fetchall()
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
def filter_points():
return MEASURE_COLUMNS
def grouped_measure_columns():
for numer_ids, numer_aggregate, denom_reltype, section_tags in MEASURE_COLUMNS:
def filter_areas():
filtered = []
for numer_ids, numer_aggregate, denom_reltype in MEASURE_COLUMNS:
if numer_aggregate is None or numer_aggregate.lower() not in ('sum', 'median', 'average'):
continue
if numer_aggregate.lower() in ('median', 'average') \
and (denom_reltype is None or denom_reltype.lower() != 'universe'):
continue
filtered.append((numer_ids, numer_aggregate, denom_reltype))
return filtered
def grouped_measure_columns(filtered_columns):
groupbypoint = dict()
for row in filtered_columns:
numer_ids = row[0]
point = default_lonlat(numer_ids)
if point in groupbypoint:
groupbypoint[point].append(numer_ids)
else:
groupbypoint[point] = [numer_ids]
for point, numer_ids in groupbypoint.iteritems():
for colgroup in grouper(numer_ids, 50):
yield [c for c in colgroup if c], numer_aggregate, denom_reltype, section_tags
yield point, [c for c in colgroup if c]
@parameterized(grouped_measure_columns())
def test_get_measure_points(numer_ids, numer_aggregate, denom_reltype, section_tags):
_test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, default_point(numer_ids[0]))
@parameterized(grouped_measure_columns(filter_points()))
def test_get_measure_points(point, numer_ids):
_test_measures(numer_ids, default_point(point))
@parameterized(grouped_measure_columns())
def test_get_measure_areas(numer_ids, numer_aggregate, denom_reltype, section_tags):
if numer_aggregate is None or numer_aggregate.lower() not in ('sum', 'median', 'average'):
return
if numer_aggregate.lower() in ('median', 'average') \
and (denom_reltype is None \
or denom_reltype.lower() != 'universe'):
return
_test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, default_area(numer_ids[0]))
@parameterized(grouped_measure_columns(filter_areas()))
def test_get_measure_areas(point, numer_ids):
_test_measures(numer_ids, default_area(point))
def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom):
def _test_measures(numer_ids, geom):
in_params = []
for numer_id in numer_ids:
in_params.append({
@@ -284,90 +240,3 @@ def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom
assert_equal(len(vals), len(in_params))
for i, val in enumerate(vals):
assert_is_not_none(val, 'NULL for {}'.format(in_params[i]['numer_id']))
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_areas(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetCategory({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_points(column_id):
# if column_id in SKIP_COLUMNS:
# raise SkipTest('Column {} should be skipped'.format(column_id))
# resp = query('''
#SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# rows = resp.fetchall()
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundaries_by_geometry(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundariesByGeometry({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_points_by_geometry(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetPointsByGeometry({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_points(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundary({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_id(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundaryId({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_by_id(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundaryById({geometry_id}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# geometry_id=default_geometry_id(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])

View File

@@ -44,33 +44,7 @@ for q in (
-73.81885528564453,41.745696344339564, 4326),
'us.census.tiger.county_clipped')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_simple',
'''CREATE TABLE obs_perftest_country_simple (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_simple (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_complex',
'''CREATE TABLE obs_perftest_country_complex (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_complex (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
#'''SET statement_timeout = 5000;'''
):
LIMIT 50;'''):
q_formatted = q.format(
schema='cdb_observatory.' if USE_SCHEMA else '',
)
@@ -118,18 +92,10 @@ def record(params, results):
('complex', '_OBS_GetGeometryScores', 'NULL', 1),
('complex', '_OBS_GetGeometryScores', 'NULL', 500),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 1),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 500),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 5000),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 1),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 500),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 5000),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000)
])
def test_getgeometryscores_performance(geom_complexity, api_method, filters, target_geoms):
print api_method, geom_complexity, filters, target_geoms
print(api_method, geom_complexity, filters, target_geoms)
rownums = (1, 5, 10, ) if 'complex' in geom_complexity else (5, 25, 50,)
results = []
@@ -152,7 +118,7 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({
@@ -211,7 +177,7 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
('complex', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
])
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom, boundary):
print api_method, geom_complexity, normalization, geom, boundary
print(api_method, geom_complexity, normalization, geom, boundary)
col = 'measure' if 'measure' in api_method.lower() else 'category'
results = []
@@ -235,7 +201,7 @@ def test_getmeasure_performance(geom_complexity, api_method, normalization, geom
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({
@@ -283,7 +249,7 @@ def test_getmeasure_performance(geom_complexity, api_method, normalization, geom
('complex', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
])
def test_getdata_performance(geom_complexity, normalization, geom, boundary):
print geom_complexity, normalization, geom, boundary
print(geom_complexity, normalization, geom, boundary)
cols = ['us.census.acs.B01001002',
'us.census.acs.B01001003',
@@ -339,7 +305,7 @@ def test_getdata_performance(geom_complexity, normalization, geom, boundary):
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({