173 Commits

Author SHA1 Message Date
Jesús Arroyo Torrens
1a22b907b6 Merge pull request #361 from CartoDB/chore/ch111328/remove-do-v1-api
Mark as deprecated
2021-03-25 16:56:52 +01:00
Jesús Arroyo Torrens
4aed149bd6 Mark repo as deprecated 2021-03-09 09:19:31 +01:00
Jesús Arroyo Torrens
aea815237a Merge pull request #360 from CartoDB/doc-multi-version
Doc multi version
2020-07-07 12:07:30 +02:00
Jesús Arroyo Torrens
f3dbccdbf3 Move v2 doc to root 2020-06-30 05:29:13 +02:00
Jesús Arroyo Torrens
f23df546b8 Merge branch 'master' into doc-multi-version 2020-06-30 04:32:40 +02:00
Jesús Arroyo Torrens
7dd14ba29b Add v1/v2 docs 2020-06-29 20:34:40 +02:00
Raúl Marín
7f969ae064 Merge pull request #359 from CartoDB/update_CI
Update ci
2020-04-02 15:08:49 +02:00
Raúl Marín
3295dd069f Update NEWS 2020-04-02 13:18:32 +02:00
Raúl Marín
66031a9167 Adapt test for PG12 and Postgis 3.0 2020-04-02 13:13:59 +02:00
Raúl Marín
ebf2c92f08 Adapt src for Postgis 3.0 2020-04-02 13:13:38 +02:00
Raúl Marín
d6990134b6 Disable mvt functions
They are unreleased and use python without declaring it.
To be re-enabled with a refactor if necessary
2020-04-02 13:12:52 +02:00
Raúl Marín
9caeacf912 Travis: Move back to barebone travis and test with PG12 2020-04-02 13:10:44 +02:00
ibrahim menem
947566ee19 Merge pull request #353 from CartoDB/rjimenezda-patch-1
Fix broken link in docs
2020-02-13 17:32:26 +01:00
Mario de Frutos Dieguez
6a063ca0eb Merge pull request #357 from CartoDB/pg11_support
Support for PG11
2019-04-12 11:53:47 +02:00
Mario de Frutos Dieguez
a0b2c86645 Support for PG11
- Start using Docker to test in TravisCI
- Support for PG10 and PG11 and remove support for old versions
2019-04-12 10:59:58 +02:00
Javier Torres
ccbc73cd6c Merge pull request #356 from CartoDB/update_tests
Update perftests with new geometries
2019-01-09 15:10:48 +01:00
Javier Torres
7932726a0e Canada columns with 0 denominator 2019-01-09 10:46:49 +01:00
Javier Torres
63ad09cd87 New school districts column name in tests 2019-01-08 23:50:33 +01:00
Javier Torres
d8e99d75aa Update perftests with new geometries 2019-01-08 20:17:46 +01:00
Juan Ignacio Sánchez Lara
bac5237d84 Merge branch 'master' into develop 2018-12-27 15:52:20 +01:00
Román Jiménez
70e027bfd0 Fix broken link in docs
I've been looking into the docs & noticed a broken link.

BTW what's the deal with having this stuff on master and not on develop? It looks like the branches have diverged
2018-09-11 18:46:02 +02:00
Juan Ignacio Sánchez Lara
50bddfd08d Merge pull request #352 from CartoDB/version-fix
Changes in PostGIS version to use 4 numbers
2018-08-20 10:20:17 +02:00
Alejandro Guirao Rodríguez
9ed3c45547 Changes in PostGIS version to use 4 numbers 2018-08-17 11:07:25 +02:00
Juan Ignacio Sánchez Lara
0451478099 Merge pull request #351 from CartoDB/carto_package
carto-package.json
2018-08-13 17:13:53 +02:00
Juan Ignacio Sánchez Lara
78799a59a4 carto-package.json 2018-08-13 16:45:19 +02:00
Antonio Carlón
8acd5d39b6 Merge pull request #349 from CartoDB/Fix_whole_file_date_query
Fix whole file date query
2018-07-23 13:38:41 +02:00
antoniocarlon
509d1d727a Fix whole file date query 2018-07-23 13:38:01 +02:00
Antonio Carlón
2f76772b4a Merge pull request #348 from CartoDB/Fixed_typo
Fixed typo
2018-07-23 12:26:31 +02:00
antoniocarlon
9ca4d755c9 Fixed typo 2018-07-23 12:25:51 +02:00
Antonio Carlón
9b62a0980f Merge pull request #347 from CartoDB/Fix_table_naming
Fixed table naming
2018-07-23 12:22:39 +02:00
antoniocarlon
01aea9e698 Fixed table naming 2018-07-23 12:21:48 +02:00
Antonio Carlón
42226aa263 Merge pull request #346 from CartoDB/Change_tiler_schema_name
Changed tiler schema name
2018-07-23 12:02:59 +02:00
antoniocarlon
d21b34bb32 Changed tiler schema name 2018-07-23 12:02:11 +02:00
Antonio Carlón
d3db90a823 Merge pull request #345 from CartoDB/536-Change_mc_schema
Change MC schema
2018-07-23 11:19:06 +02:00
antoniocarlon
5e829bc402 Change MC schema 2018-07-23 09:36:16 +02:00
Mario de Frutos
e89582eea6 Merge pull request #334 from CartoDB/333-MVT_returning_function
Created MVT returning function for DO
2018-07-20 12:37:00 +02:00
Mario de Frutos
efec69c957 Merge pull request #343 from CartoDB/342-Return_whole_xyz_from_zoom
New function to return the whole table from a zoom level/geography
2018-07-20 12:09:31 +02:00
Mario de Frutos
1af8ab01f6 Merge pull request #344 from CartoDB/531-Improved_query_performance
Improved query performance removing outer joins
2018-07-20 11:55:28 +02:00
antoniocarlon
44727ac44c Improved query performance removing outer joins 2018-07-16 16:50:41 +02:00
antoniocarlon
f4b8c39870 New function to return the whole table from a zoom level/geography 2018-07-12 15:19:21 +02:00
antoniocarlon
c354d48f28 Updated NEWS.md 2018-07-03 15:53:19 +02:00
antoniocarlon
96b5d84394 CR suggestion 2018-07-03 15:01:28 +02:00
Mario de Frutos
a32d0e0141 Merge pull request #326 from CartoDB/developer-center
New docs folder for developer center
2018-07-02 15:36:05 +02:00
Mario de Frutos
52fd8de04e Merge pull request #338 from CartoDB/516-Multicategory_for_MC
OBS_GetMCDOMVT can receive several categories
2018-06-27 17:32:40 +02:00
antoniocarlon
0a029404c8 Added CR suggestion 2018-06-27 17:10:45 +02:00
antoniocarlon
d932be1a9f MC measurements are not required + areas 2018-06-27 15:52:25 +02:00
antoniocarlon
b979167156 Added function to get dates from a given month 2018-06-26 14:39:04 +02:00
antoniocarlon
c6ef9b7f3a Added filter by months 2018-06-15 11:19:07 +02:00
antoniocarlon
4e57558da6 Fixed categories 2018-06-14 16:29:36 +02:00
antoniocarlon
3172a11d9d Fixed categories 2018-06-14 16:05:20 +02:00
antoniocarlon
56a83f3eb5 Added multiple categories option 2018-06-14 14:51:13 +02:00
Antonio Carlón
f2710e9505 Merge pull request #337 from CartoDB/336-Cache_for_GetMeta
Added cache for OBS_GetMeta function results
2018-06-14 09:59:37 +02:00
antoniocarlon
f5d6716acc Added plpythonu 2018-06-13 14:24:04 +02:00
antoniocarlon
e72b4ea7b6 Added plpythonu 2018-06-13 14:03:42 +02:00
antoniocarlon
0f9c53ddc2 Added plpythonu 2018-06-13 13:43:23 +02:00
antoniocarlon
f148057e45 Added plpythonu 2018-06-13 13:08:50 +02:00
antoniocarlon
865c1a38c3 Added plpythonu 2018-06-13 12:53:44 +02:00
antoniocarlon
6924cc4512 Added cache for OBS_GetMeta function results 2018-06-13 11:00:21 +02:00
antoniocarlon
c62d1cd4b8 Fixed MVT projection 2018-06-11 17:53:25 +02:00
Mario de Frutos
0d172353fb Revert "Added transform to 3857"
This reverts commit d1c302a9df.
2018-05-30 17:20:06 +02:00
Mario de Frutos
23088efc91 Function OBS_GetMCDOMVT must return jsonb 2018-05-30 16:49:36 +02:00
Mario de Frutos
e2017d41cf Add exception for the mc geography levels in the new file 2018-05-30 16:38:26 +02:00
antoniocarlon
d1c302a9df Added transform to 3857 2018-05-30 09:11:27 +02:00
csubira
ef51209ee1 Remove broken links issue 348 devcenter 2018-05-28 17:42:39 +02:00
antoniocarlon
7c559fe7d5 If getmeta returns null, retry using only the geography level 2018-05-22 13:39:41 +02:00
antoniocarlon
0ae15880e0 Function returning MVT ready geoms and MVT data in JSON 2018-05-18 12:49:58 +02:00
antoniocarlon
811009a8ad Using clipped geometries parametrized 2018-05-17 17:03:27 +02:00
antoniocarlon
f1668777d5 Using shoreline clipped geoms 2018-05-17 16:57:04 +02:00
antoniocarlon
f050c05a55 Added outer joins on geometries 2018-05-17 15:11:14 +02:00
antoniocarlon
467883726b Added area normalization option 2018-05-17 14:13:06 +02:00
antoniocarlon
915e67c061 Added DO+MasterCard MVT returning function 2018-05-16 13:34:32 +02:00
Mario de Frutos
2e365b1eee Py3 for zip_longest 2018-05-15 12:45:59 +02:00
antoniocarlon
9f62a8c127 Fixing errors. Request multiple measurements is now allowed 2018-05-11 15:30:37 +02:00
antoniocarlon
d050b968a4 Fixed field naming 2018-05-09 14:47:00 +02:00
antoniocarlon
78a7ccbe4b Fix y coordinate 2018-05-09 13:40:26 +02:00
antoniocarlon
92155b6b33 Fixed area > 0 2018-05-09 13:27:03 +02:00
antoniocarlon
5c4bb80f57 Fixed area/perimeter relation 2018-05-09 13:18:19 +02:00
antoniocarlon
b8d00c49cb Fixed areas 2018-05-09 12:46:00 +02:00
antoniocarlon
dcf10edd9e Added tolerance for the areas 2018-05-09 12:29:42 +02:00
antoniocarlon
db637f6e52 Returning simple JSON 2018-05-09 11:36:24 +02:00
antoniocarlon
4b128ea3e8 Fixing bounds 2018-05-09 11:20:17 +02:00
antoniocarlon
93790e9f0d Fixed MVT data 2018-05-09 10:16:03 +02:00
antoniocarlon
d309a25379 Fixed MVT data 2018-05-09 10:14:59 +02:00
Mario de Frutos
50008ea149 WIP 2018-05-09 09:39:20 +02:00
antoniocarlon
5f9f77ded1 Fixed errors 2018-05-08 17:50:01 +02:00
antoniocarlon
3fba8a3091 Inverting y coordinate 2018-05-08 16:54:53 +02:00
antoniocarlon
fafd4a364e Fixed naming issue 2018-05-08 15:52:30 +02:00
antoniocarlon
46d7027cdb Created MVT returning function for DO 2018-05-04 15:18:55 +02:00
csubira
2cc14a9892 Rename avatar img 2018-04-25 18:18:26 +02:00
Mario de Frutos
a0535a6d02 Merge pull request #332 from CartoDB/develop
Release 1.9.0
2018-04-20 10:57:20 +02:00
Mario de Frutos
198ac90a0e Update NEWS.md 2018-04-20 10:34:26 +02:00
Mario de Frutos
19c5b09f2b Merge pull request #331 from CartoDB/remove_new_fields
Remove new fields not used to simplify deploy process
2018-04-20 10:13:07 +02:00
Mario de Frutos
eb31a8f40a Remove new fields not used to simplify deploy process 2018-04-19 19:52:19 +02:00
Iñigo Medina
43408f8de0 Update 02-accessing-the-data-observatory.md 2018-04-18 12:39:44 +02:00
Mario de Frutos
0a9fb4d51d Release 1.9.0 artifacts 2018-04-17 17:53:17 +02:00
Mario de Frutos
b5afd0734b Include instructions to load fixtures 2018-04-17 17:50:27 +02:00
Mario de Frutos
3e9e2f69c4 Merge pull request #325 from CartoDB/421-geom_numer_timespan_refactor
Refactor geom_numer_timespan
2018-04-17 17:47:39 +02:00
Mario de Frutos
0241f03329 All tests fixed 2018-04-17 17:41:04 +02:00
Iñigo Medina
a3fc313ffb Update 02-contribute.md 2018-04-17 14:42:39 +02:00
Mario de Frutos
3816c2af8b Include tiger non-clipped county for the fixtures 2018-04-16 12:56:32 +02:00
Mario de Frutos
ed3b7de9e0 Fix not valid column in exploration functions 2018-04-16 11:38:28 +02:00
Mario de Frutos
350b1716e0 New fixtures after fix in bigmetadata 2018-04-16 11:14:26 +02:00
Iñigo Medina
b3f2809851 Update 02-accessing-the-data-observatory.md 2018-04-13 17:56:20 +02:00
Iñigo Medina
a5a2d59e0b Update 01-overview.md 2018-04-13 17:55:26 +02:00
Iñigo Medina
5a19090b41 Update 06-discovery-functions.md 2018-04-13 12:06:47 +02:00
Iñigo Medina
cff0576368 Update 05-boundary-functions.md 2018-04-13 12:05:22 +02:00
Iñigo Medina
2778986e15 Update 04-measures-functions.md 2018-04-13 12:04:50 +02:00
Iñigo Medina
5aba51d86d Update 04-measures-functions.md 2018-04-13 11:56:21 +02:00
Iñigo Medina
7340003c91 Add files via upload 2018-04-13 11:51:58 +02:00
Iñigo Medina
10b9fd3e72 Update 06-discovery-functions.md 2018-04-13 11:48:39 +02:00
Iñigo Medina
ab0fb6abf9 Update 05-boundary-functions.md 2018-04-13 11:47:31 +02:00
Iñigo Medina
ddb69e61a3 Rename 03-discovery-functions.md to 06-discovery-functions.md 2018-04-13 11:46:39 +02:00
Iñigo Medina
85883a1686 Rename 02-boundary-functions.md to 05-boundary-functions.md 2018-04-13 11:46:19 +02:00
Iñigo Medina
120d4e7c71 Rename 01-measures-functions.md to 04-measures-functions.md 2018-04-13 11:45:52 +02:00
Iñigo Medina
b9f29821f2 Update 01-measures-functions.md 2018-04-13 11:42:37 +02:00
Iñigo Medina
a766233af0 Create 03-versioning.md 2018-04-13 11:32:38 +02:00
Iñigo Medina
0e7898fdab Create 02-authentication.md 2018-04-13 11:31:33 +02:00
Iñigo Medina
c10c5c5021 Create 01-introduction.md 2018-04-13 11:30:46 +02:00
Mario de Frutos
4fc22dc188 Fixes for generate fixtures 2018-04-13 10:31:27 +02:00
Mario de Frutos
6ce5e278e9 New fixtures including table_to_table 2018-04-13 10:11:15 +02:00
Mario de Frutos
c0126bf5c7 Remove sed command from generate_fixtures 2018-04-13 09:54:57 +02:00
Iñigo Medina
7f6a72a65f Create examples.json 2018-04-13 01:03:19 +02:00
Iñigo Medina
8e714ddf2f Rename 02-license.md to 03-license.md 2018-04-13 01:00:10 +02:00
Iñigo Medina
a3f817b2b8 Create 02-contribute.md 2018-04-13 00:59:56 +02:00
Mario de Frutos
eb761cc6e7 Add table_to_table to the fixtures too 2018-04-12 18:55:31 +02:00
Mario de Frutos
0db98f4020 New fixtures with last changes 2018-04-12 18:52:33 +02:00
Mario de Frutos
e9dbc97772 Improve fixtures generator 2018-04-12 18:35:45 +02:00
Mario de Frutos
8f9c8cf164 Updated NEWS.md 2018-04-12 17:19:59 +02:00
Antonio
b891034146 Changed name of numer timespans to ensure backwards compatibility 2018-04-12 17:09:19 +02:00
Antonio
89f76e2a1a Refactor geom_numer_timespan 2018-04-12 17:09:19 +02:00
Mario de Frutos
850b3c2524 Merge pull request #327 from CartoDB/Mitigate_collisions_in_suggested_name
Modified the denominated suggested_name to mitigate collisions
2018-04-12 17:07:01 +02:00
Mario de Frutos
5ee349f4e4 Merge pull request #324 from CartoDB/422-Refactor_GetAvailableTimespans
Refactor OBS_GetAvailableTimespans
2018-04-11 10:48:54 +02:00
Antonio
b4ba9b5d1d Fixed tests 2018-04-11 10:36:58 +02:00
Antonio
be82c87bb1 Fixed CR suggestion 2018-04-11 10:36:57 +02:00
Antonio
7aac256892 Refactor OBS_GetAvailableTimespans 2018-04-11 10:36:57 +02:00
Mario de Frutos
17345f4fca Merge pull request #329 from CartoDB/postgresql_10_support
Postgresql 10 support
2018-04-09 10:43:37 +02:00
Juan Ignacio Sánchez Lara
db585177ab Explicit pre-2.4 PostGIS equal operator
Before PostGIS 2.4, `=` meant equality of bounding boxes, but now it's
strict equalty.
2018-04-06 13:59:31 +02:00
Mario de Frutos
fb17d05714 Improve Travis testing
- Added multiple versions of PostgreSQL to test (9.5, 9.6 and 10)
- Added multiple versions of Postgis to test
- Cleand travis yaml in favor of a script done in other project thanks
to Paul Ramsey
2018-04-04 15:42:25 +02:00
Mario de Frutos
e469fb7920 Fix travis problem showing regression diffs 2018-04-04 15:42:04 +02:00
Antonio
6d23509557 Modified the denominated suggested_name to mitigate collisions 2018-02-23 10:46:01 +01:00
csubira
e07ee0b882 Add new docs folder structure 2018-02-21 16:48:37 +01:00
Antonio Carlón
2183b7fc26 Merge pull request #319 from CartoDB/383-More_null_columns_in_EU
Python 3
2017-11-28 16:58:13 +01:00
Antonio
1d5f8a6452 Py3 2017-11-23 12:36:59 +01:00
Javier Torres
f583cca67a Merge pull request #317 from CartoDB/316-docs_schema
Remove cdb_observatory references
2017-11-07 15:11:40 +01:00
Javier Torres
b4507b42b1 Remove cdb_observatory references 2017-11-07 09:31:19 +01:00
csobier
3c475d72df Merge pull request #315 from CartoDB/docs-edit-line245
edited description in line 245
2017-10-18 09:54:07 -04:00
csobier
8c72122fb8 modified line245 based on @ethervoid's comment 2017-10-18 09:01:52 -04:00
csobier
b93e7e8843 edited description in line 245
@ethervoid , looks good! i just rewrote the English a bit. Let me know if this is okay then we can update all the live docs.
2017-10-18 08:19:34 -04:00
Mario de Frutos
ff0989f8fc Merge pull request #314 from CartoDB/develop
Release 1.8.0
2017-10-18 10:16:55 +02:00
Mario de Frutos
0a753e95c0 Release 1.8.0 artifacts 2017-10-18 10:09:25 +02:00
Mario de Frutos
b62e3ea963 Merge pull request #313 from CartoDB/add_numgeoms_getavailablegeometries
OBS_GetAvailableGeometries now receives number of geometries from input
2017-10-18 10:05:46 +02:00
Mario de Frutos
1da0b8cb6b Update doc with new field 2017-10-18 10:00:15 +02:00
csobier
a39de46531 docs fixed links
@inigomedina , just a docs url fix. Need to merge to fix live docs. Thanks!
2017-10-13 08:14:06 -04:00
Mario de Frutos
94b8e7492d OBS_GetAvailableGeometries now receives number of geometries from input
We need the number of geometries to pass them to the get score function
in order to get an accurate score for the input in order to suggest
what is the geometry that fits better for the input we have
2017-10-10 18:09:26 +02:00
Antonio Carlón
91ece26c06 Merge pull request #311 from CartoDB/remove_wof_tests
Remove WOF perftests.
2017-09-25 17:00:58 +02:00
Javier Torres
74b9d209c0 Use precise for travis tests, cartodb ppas don't have trusty anymore 2017-09-21 16:16:13 +02:00
Javier Torres
4ae889dfdc Remove WOF perftests. This is needed for tests to pass since we don't have WOF in our current dump 2017-09-21 10:36:05 +02:00
Mario de Frutos
3353ad0a32 Update NEWS.md 2017-08-18 16:43:39 +02:00
Mario de Frutos
b4ef3c77a9 Merge pull request #306 from CartoDB/develop
Release 1.7.0
2017-08-18 16:41:21 +02:00
Mario de Frutos
90a2421b6e Merge pull request #305 from CartoDB/obs_metadatavalidation_doc
OBS_MetadataValidation doc
2017-08-18 16:35:26 +02:00
Mario de Frutos
fd21709ca1 Fix missing schema for FIRST function 2017-08-18 16:20:13 +02:00
Javier Torres
3791511d7d Merge pull request #308 from CartoDB/307-TestsForDifferentPointsFixed
307 tests for different points fixed
2017-08-18 15:13:06 +02:00
Antonio
fad541c3fc Fixed broken tests and refactor 2017-08-18 11:19:15 +02:00
Antonio
48ed086fec Fixed tests for different test points per numerator 2017-08-17 16:31:40 +02:00
csobier
7e550cf909 applied quick copyedit to new docs code added 2017-08-11 08:00:00 -04:00
Mario de Frutos
6ab17bf8be New version 1.7.0 artifacts 2017-08-10 14:19:37 +02:00
Mario de Frutos
1f7f8015ad OBS_MetadataValidation doc 2017-08-10 13:29:42 +02:00
Mario de Frutos
6066ef028d Merge pull request #303 from CartoDB/precheck_metadata
OBS_MetadataValidation
2017-08-09 17:45:24 +02:00
Mario de Frutos
3c2e997a85 Add travis support to execute the tests 2017-08-09 17:16:47 +02:00
Mario de Frutos
cef99c6343 OBS_MetadataValidation
New function to check the metadata in order to search for errors like
for example if we have the metadata for a median aggregation and the
normalization is by are it'll fail.
2017-08-09 16:11:10 +02:00
Mario de Frutos
50d975ce9b Generate new fixtures to include new meta table
- Include the obs_meta_geom_numer_timespan table
2017-08-09 16:10:39 +02:00
Javier Torres
c56633dd2a Format NEWS.md 2017-07-31 10:15:00 +02:00
Michelle Ho
2b26c5ad64 fixing parentheses for obs_getdata with ids 2017-07-24 13:13:22 -04:00
59 changed files with 197678 additions and 15880 deletions

View File

@@ -2,6 +2,11 @@
I'd like to request a new data observatory extension deploy: dump + extension
**VERY IMPORTANT!!!**
PLEASE USE `python scripts/generate_fixtures.py` TO GENERATE NEW FIXTURES FOR
THE NEW DUMP AND OVERRIDE IT IN THIS PROJECT BEFORE PASS THE TESTS
## Performance comparison to last deployment
Please include link here to comparison perftests:
@@ -18,6 +23,6 @@ Please put here the dump id to be deployed: <dump_id>
Add down here the PR links to be added and deployed:
-
-
// @CartoDB/dataservices
// @CartoDB/datateam

43
.travis.yml Normal file
View File

@@ -0,0 +1,43 @@
language: c
sudo: required
env:
global:
- PGUSER=postgres
- PGDATABASE=postgres
- PGOPTIONS='-c client_min_messages=NOTICE'
jobs:
include:
- env: POSTGRESQL_VERSION="9.6" POSTGIS_VERSION="2.5"
dist: xenial
- env: POSTGRESQL_VERSION="10" POSTGIS_VERSION="2.5"
dist: xenial
- env: POSTGRESQL_VERSION="11" POSTGIS_VERSION="2.5"
dist: xenial
- env: POSTGRESQL_VERSION="12" POSTGIS_VERSION="2.5"
dist: bionic
- env: POSTGRESQL_VERSION="12" POSTGIS_VERSION="3"
dist: bionic
script:
- sudo apt-get install -y --allow-unauthenticated --no-install-recommends --no-install-suggests postgresql-$POSTGRESQL_VERSION postgresql-client-$POSTGRESQL_VERSION postgresql-server-dev-$POSTGRESQL_VERSION postgresql-common
- if [[ $POSTGRESQL_VERSION == '9.6' ]]; then sudo apt-get install -y postgresql-contrib-9.6; fi;
- sudo apt-get install -y --allow-unauthenticated postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION postgresql-$POSTGRESQL_VERSION-postgis-$POSTGIS_VERSION-scripts postgis
# For pre12, install plpython2. For PG12 install plpython3
- if [[ $POSTGRESQL_VERSION != '12' ]]; then sudo apt-get install -y postgresql-plpython-$POSTGRESQL_VERSION python python-redis; else sudo apt-get install -y postgresql-plpython3-12 python3 python3-redis; fi;
- sudo pg_dropcluster --stop $POSTGRESQL_VERSION main
- sudo rm -rf /etc/postgresql/$POSTGRESQL_VERSION /var/lib/postgresql/$POSTGRESQL_VERSION /var/ramfs/postgresql/$POSTGRESQL_VERSION
- sudo pg_createcluster -u postgres $POSTGRESQL_VERSION main --start -- --auth-local trust --auth-host password
- export PGPORT=$(pg_lsclusters | grep $POSTGRESQL_VERSION | awk '{print $3}')
- cd src/pg/
- make
- sudo make install
- make installcheck
after_failure:
- pg_lsclusters
- cat test/regression.out
- cat test/regression.diffs
- echo $PGPORT
- sudo cat /var/log/postgresql/postgresql-$POSTGRESQL_VERSION-main.log

View File

@@ -28,8 +28,8 @@ Run the tests with `make test`.
Update extension in a working database with:
```
ALTER EXTENSION observatory VERSION TO 'current';
ALTER EXTENSION observatory VERSION TO 'dev';
ALTER EXTENSION observatory UPDATE TO 'current';
ALTER EXTENSION observatory UPDATE TO 'dev';
```
Note: we keep the current development version install as 'dev' always;

69
NEWS.md
View File

@@ -1,4 +1,47 @@
1.10.0 (2018-07-??)
-------------------
__Improvements__
* Updated for PostgreSQL 12 and PostGIS 3.0 compatibility.
1.9.0 (2018-04-20)
------------------
__Improvements__
* Improved `OBS_GetAvailableGeometries` for the DO Timespans project ([#325](https://github.com/CartoDB/observatory-extension/pull/325))
* Improved `OBS_GetAvailableTimespans` for the DO Timespans project ([#324](https://github.com/CartoDB/bigmetadata/issues/324))
* Modified the denominated suggested_name to mitigate collisions ([#327](https://github.com/CartoDB/observatory-extension/pull/327))
* Fixed some errors so now the extension supports PostgreSQL 10 ([#329](https://github.com/CartoDB/observatory-extension/pull/329))
* Fixed documentation
* Add support for multiple PostgreSQL and Postgis versions in our travis script for test purposes
1.8.0 (2017-10-18)
------------------
__Improvements__
* Add `number_geometries` field to `OBS_GetAvailableGeometries` in order to provide the number of geometries from the source data to be used in the score calculation ([#313](https://github.com/CartoDB/observatory-extension/issues/313))
1.7.0 (2017-08-18)
------------------
__Improvements__
* Add Travis support to execute the extension tests ([#183](https://github.com/CartoDB/observatory-extension/issues/183))
__API Changes__
* Add new function `OBS_MetadataValidation` ([#303](https://github.com/CartoDB/observatory-extension/pull/303))
__Bugfixes__
* Fixed parentheses for obs_getdata with ids
* Fixed failing tests due changes in the data dump for some TIGER geometries
1.6.0 (2017-07-20)
------------------
__Improvements__
@@ -17,6 +60,7 @@ __Bugfixes__
1.5.1 (2017-05-16)
------------------
__Improvements__
@@ -26,6 +70,7 @@ __Improvements__
([#285](https://github.com/CartoDB/observatory-extension/pull/285))
1.5.0 (2017-04-24)
------------------
__API Changes__
@@ -39,6 +84,7 @@ __API Changes__
([#282](https://github.com/CartoDB/observatory-extension/pull/282))
1.4.0 (2017-03-21)
------------------
__API Changes__
@@ -59,16 +105,19 @@ __Improvements__
boundary selection
1.3.5 (2017-03-15)
------------------
No changes. Artifact to allow for data update.
1.3.4 (2017-03-10)
------------------
__Bugfixes__
* Remove erroneously committed `RAISE NOTICE` in `OBS_GetData`
1.3.3 (2017-03-10)
------------------
__Bugfixes__
@@ -91,6 +140,7 @@ __Improvements__
([#267](https://github.com/CartoDB/observatory-extension/pull/267))
1.3.2 (2017-03-02)
------------------
__Bugfixes__
@@ -98,6 +148,7 @@ __Bugfixes__
This fixes issues with Camshaft.
1.3.1 (2017-02-16)
------------------
__Improvements__
@@ -108,6 +159,7 @@ __Improvements__
called for measures for polygons
1.3.0 (2017-01-17)
------------------
__API Changes__
@@ -132,9 +184,8 @@ __Bugfixes__
* Remove unnecessary dependency on `postgres_fdw`
* `OBS_GetData()` now aggregates measures with mixed geoms correctly
__API Changes__
1.2.1 (2017-01-17)
------------------
__Improvements__
@@ -142,6 +193,7 @@ __Improvements__
([#243](https://github.com/CartoDB/observatory-extension/pull/233))
1.2.0 (2016-12-28)
------------------
__API Changes__
@@ -162,6 +214,7 @@ __Improvements__
* Return both `table_id` and `column_id` from `_OBS_GetGeometryScores`
1.1.7 (2016-12-15)
------------------
__Improvements__
@@ -174,6 +227,7 @@ __Improvements__
* Yields a ~50% improvement in performance for `_OBSGetGeomeryScores`.
1.1.6 (2016-12-08)
------------------
__Bugfixes__
@@ -200,6 +254,7 @@ __Improvements__
- Add ability to persist results to JSON for graph visualization later
1.1.5 (2016-11-29)
------------------
__Bugfixes__
@@ -207,6 +262,7 @@ __Bugfixes__
a geometry where it does not exist ([#220](https://github.com/CartoDB/observatory-extension/issues/220)).
1.1.4 (2016-11-21)
------------------
__Bugfixes__
@@ -214,10 +270,12 @@ __Bugfixes__
`OBS_GetLegacyMetadata` ([#216](https://github.com/CartoDB/observatory-extension/issues/216)).
1.1.3 (2016-11-15)
------------------
* Temporarily ignore EU data for the sake of testing
1.1.2 (2016-11-09)
------------------
__Improvements__
@@ -233,12 +291,14 @@ __API Changes (Internal)__
* Add internal `_OBS_GetGeometryScores`
1.1.1 (2016-10-14)
------------------
__Improvements__
* Test points for Canada and France ([#204](https://github.com/CartoDB/observatory-extension/issues/120))
1.1.0 (2016-10-04)
------------------
__Bugfixes__
@@ -261,6 +321,7 @@ __API Changes__
is also referred to here ([CartoDB/design#68](https://github.com/CartoDB/design/issues/68)).
1.0.7 (2016-09-20)
------------------
__Bugfixes__
@@ -272,6 +333,7 @@ __Improvements__
* Automatic tests work for Canada and Thailand
1.0.6 (2016-09-08)
------------------
__Improvements__
@@ -279,6 +341,7 @@ __Improvements__
framework logic from the observatory measure functions.
1.0.5 (2016-08-12)
------------------
__Improvements__
@@ -286,6 +349,7 @@ __Improvements__
any HTTP SQL API.
1.0.4 (2016-07-26)
------------------
__Bugfixes__
@@ -294,6 +358,7 @@ __Bugfixes__
([#173](https://github.com/CartoDB/observatory-extension/issues/173))
1.0.3 (2016-07-25)
------------------
__Bugfixes__

View File

@@ -1,64 +1,5 @@
# Observatory extension
CartoDB extension that implements the row-level functions needed by the Observatory Service.
## :warning: Deprecated :warning:
## Code organization
```
.
├── doc # documentation
├── release # released versions
└── src # source code
└── pg
├── sql
└── test
├── expected
├── fixtures
└── sql
```
# Development workflow
We distinguish two roles regarding the development cycle:
* *developers* will implement new functionality and bugfixes into
the codebase and will request for new releases of the extension.
* A *release manager* will attend these requests and will handle
the release process. The release process is sequential:
no concurrent releases will ever be in the works.
We use the default `develop` branch as the basis for development.
The `master` branch is used to merge and tag releases to be
deployed in production.
Developers shall create a new topic branch from `develop` for any new feature
or bugfix and commit their changes to it and eventually merge back into
the `develop` branch. When a new release is required a Pull Request
will be open against the `develop` branch.
The `develop` pull requests will be handled by the release manage,
who will merge into master where new releases are prepared and tagged.
The `master` branch is the sole responsibility of the release masters
and developers must not commit or merge into it.
## Development Guidelines
For a detailed description of the development process please see
the [CONTRIBUTING.md](CONTRIBUTING.md) guide.
Any modification to the source code
shall always be done in a topic branch created from the `develop` branch.
Tests, documentation and peer code reviews are required for all
modifications.
The tests are executed by running this from the top directory:
```
sudo make install
make test
```
## Release
The release and deployment process is described in the
[RELEASE.md](RELEASE.md) guide and it is the responsibility of the designated
release manager.
This repository has been deprecated! No further maintenance or development will be done.

9
carto-package.json Normal file
View File

@@ -0,0 +1,9 @@
{
"name": "observatory-server-extension",
"current_version": {
"requires": {
"postgresql": "^10.0.0",
"postgis": "^2.4.0.0"
}
}
}

View File

@@ -2,7 +2,7 @@
Use the following functions to retrieve [Boundary](https://carto.com/docs/carto-engine/data/overview/#boundary-data) data. Data ranges from small areas (e.g. US Census Block Groups) to large areas (e.g. Countries). You can access boundaries by point location lookup, bounding box lookup, direct ID access and several other methods described below.
You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](http://docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](https://carto.com/docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
## OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)
@@ -123,7 +123,7 @@ SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group')
## OBS_GetBoundaryId(point_geometry, boundary_id)
The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](http://docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow.
The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow.
#### Arguments

View File

@@ -97,7 +97,7 @@ valid_timespan | Boolean | True if the `timespan` argument is a valid timespan f
Obtain all numerators that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326))
```
@@ -105,7 +105,7 @@ Obtain all numerators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
@@ -113,7 +113,7 @@ Obtain all numerators that are available within a small rectangle and are
employment related for the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}');
```
@@ -121,7 +121,7 @@ Obtain all numerators that are available within a small rectangle and are
related to both employment and age & gender for the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}');
```
@@ -129,7 +129,7 @@ Obtain all numerators that work with US population (`us.census.acs.B01003001`)
as a denominator.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_denom IS True;
```
@@ -138,7 +138,7 @@ Obtain all numerators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
@@ -146,7 +146,7 @@ WHERE valid_geom IS True;
Obtain all numerators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableNumerators(
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
@@ -191,7 +191,7 @@ valid_timespan | Boolean | True if the `timespan` argument is a valid timespan f
Obtain all denominators that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
@@ -199,14 +199,14 @@ Obtain all denominators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all denominators for male population (`us.census.acs.B01001002`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002')
WHERE valid_numer IS True;
```
@@ -215,7 +215,7 @@ Obtain all denominators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
@@ -223,12 +223,12 @@ WHERE valid_geom IS True;
Obtain all denominators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableDenominators(
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
## OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan)
## OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan, number_geometries)
Return available geometries within a boundary and with the specified
`filter_tags`.
@@ -242,6 +242,7 @@ filter_tags | Text[] | a list of filters. Only geometries for which all of thes
numer_id | Text | the ID of a numerator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional)
denom_id | Text | the ID of a denominator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional)
timespan | Text | the ID of a timespan to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional)
number_geometries | Integer | an additional variable that is used to adjust the calculation of the [score](https://carto.com/docs/carto-engine/data/discovery-functions/#returns-4) (optional)
#### Returns
@@ -274,7 +275,7 @@ meanmediansize | Numeric | Ignored
Obtain all geometries that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
@@ -282,14 +283,14 @@ Obtain all geometries that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all geometries that work with total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
@@ -297,7 +298,7 @@ WHERE valid_numer IS True;
Obtain all geometries with timespan `2015`.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableGeometries(
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015')
WHERE valid_timespan IS True;
```
@@ -342,14 +343,14 @@ valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for th
Obtain all timespans that are available within a small rectangle.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
Obtain all timespans for total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
@@ -358,7 +359,7 @@ Obtain all timespans that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM cdb_observatory.OBS_GetAvailableTimespans(
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```

View File

@@ -108,7 +108,7 @@ The ```OBS_GetMeasure(polygon, measure_id)``` function returns any Data Observat
Name |Description
--- | ---
polygon_geometry | a WGS84 polygon geometry (the_geom)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
@@ -143,7 +143,7 @@ The ```OBS_GetMeasureById(geom_ref, measure_id, boundary_id)``` function returns
Name |Description
--- | ---
geom_ref | a geometry reference (e.g., a US Census geoid)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span (optional) | time span of interest (e.g., 2010 - 2014). If `NULL` is passed, the measure from the most recent data will be used.
@@ -215,7 +215,7 @@ extent | A geometry of the extent of the input geometries
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optionally additional parameters about that column
num_timespan_options | How many historical time periods to include. Defaults to 1
num_score_options | How many alternative boundary levels to include. Defaults to 1
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest.
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest.
The schema of the metadata input objects are as follows:
@@ -321,6 +321,55 @@ SELECT OBS_GetMeta(
) FROM tablename
```
## OBS_MetadataValidation(extent geometry, geometry_type text, metadata json, target_geoms)
The ```OBS_MetadataValidation``` function performs a validation check over the known issues using the extent, type of geometry, and metadata that is being used in the ```OBS_GetMeta``` function.
#### Arguments
Name | Description
---- | -----------
extent | A geometry of the extent of the input geometries
geometry_type | The geometry type of the source data
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optional additional parameters about that column
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest
The schema of the metadata input objects are as follows:
Metadata Input Key | Description
--- | -----------
numer_id | The identifier for the desired measurement. If left blank, a `geom_id` is specified and the column returns a geometry, instead of a measurement
geom_id | Identifier for a desired geographic boundary level used to calculate measures. If undefined, this is automatically assigned. If defined, `numer_id` is blank and the column returns a geometry, instead of a measurement
normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. If the metadata object specifies a geometry, this is ignored
denom_id | When `normalization` is 'denominated', this is the identifier for a desired normalization column. This is automatically assigned. If the metadata object specifies a geometry, this is ignored
numer_timespan | The desired timespan for the measurement. If left unspecified, it defaults to the most recent timespan available
geom_timespan | The desired timespan for the geometry. If left unspecified, it defaults to the timespan matching `numer_timespan`
target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata. For example, if you are passing in points
target_geoms | Override global `target_geoms` for this element of metadata
max_timespan_rank | Only include timespans of this recency (For example, `1` is only the most recent timespan). There is no limit by default
max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). The default is `1`
#### Returns
Key | Description
--- | -----------
valid | A boolean field that represents if the validation was successful or not
errors | A text array with all possible errors
#### Examples
Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`.
```SQL
SELECT OBS_MetadataValidation(
ST_SetSRID(ST_Extent(the_geom), 4326),
ST_GeometryType(the_geom),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
COUNT(*)::INTEGER
) FROM tablename
GROUP BY ST_GeometryType(the_geom)
```
## OBS_GetData(geomvals array[geomval], metadata json)
The ```OBS_GetData(geomvals, metadata)``` function returns a measure and/or
@@ -465,7 +514,7 @@ WITH meta AS (
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename)
SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((fips) FROM tablename),
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta))
```
@@ -481,7 +530,7 @@ WITH meta AS (
) meta FROM tablename),
data as (
SELECT id AS fips, (data->0->>'value') AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((fips) FROM tablename),
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density

View File

@@ -0,0 +1,8 @@
## Overview
Quick reference guides for learning how to use the Data Observatory features.
- [Data discovery guide](https://carto.com/developers/cartoframes/guides/Data-discovery/)
- [Data enrichment guide](https://carto.com/developers/cartoframes/guides/Data-enrichment/)
Play with [real examples](https://carto.com/developers/cartoframes/examples/#example-data-observatory).

View File

@@ -0,0 +1,5 @@
## Introduction
Browse the interactive API documentation to search for specific Data Observatory methods, arguments, and sample code that can be used to build your applications.
[Check the reference](https://carto.com/developers/cartoframes/reference/#heading-Data-Observatory).

View File

@@ -0,0 +1,185 @@
## Measures functions examples
- Add a measure to an empty numeric column based on point locations in your table.
```SQL
UPDATE tablename
SET total_population = OBS_GetUSCensusMeasure(the_geom, 'Total Population')
- Add a measure to an empty numeric column based on polygons in your table
```SQL
UPDATE tablename
SET local_male_population = OBS_GetUSCensusMeasure(the_geom, 'Male Population')
```
- Add a measure to an empty numeric column based on point locations in your table
```SQL
UPDATE tablename
SET median_home_value_sqft = OBS_GetMeasure(the_geom, 'us.zillow.AllHomes_MedianValuePerSqft')
```
- Add a measure to an empty column based on polygons in your table
```SQL
UPDATE tablename
SET household_count = OBS_GetMeasure(the_geom, 'us.census.acs.B11001001')
```
- Add the Category to an empty column text column based on point locations in your table
```SQL
UPDATE tablename
SET segmentation = OBS_GetCategory(the_geom, 'us.census.spielman_singleton_segments.X55')
```
- Obtain metadata that can augment with one additional column of US population
data, using a boundary relevant for the geometry provided and latest timespan.
Limit to only the most recent column most relevant to the extent & density of
input geometries in `tablename`.
```SQL
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1,
COUNT(*)
) FROM tablename
```
- Obtain metadata that can augment with one additional column of US population
data, using census tract boundaries.
```SQL
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.census_tract"}]',
1, 1,
COUNT(*)
) FROM tablename
```
- Obtain metadata that can augment with two additional columns, one for total
population and one for male population.
```SQL
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
1, 1,
COUNT(*)
) FROM tablename
```
- Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`.
```SQL
SELECT OBS_MetadataValidation(
ST_SetSRID(ST_Extent(the_geom), 4326),
ST_GeometryType(the_geom),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
COUNT(*)::INTEGER
) FROM tablename
GROUP BY ST_GeometryType(the_geom)
```
- Obtain population densities for every geometry in a table, keyed by cartodb_id:
```SQL
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1, COUNT(*)
) meta FROM tablename)
SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta))
```
- Update a table with a blank numeric column called `pop_density` with population
densities:
```SQL
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1, COUNT(*)
) meta FROM tablename),
data AS (
SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density
FROM data
WHERE cartodb_id = data.id
```
- Update a table with two measurements at once, population density and household
density. The table should already have a Numeric column `pop_density` and
`household_density`.
```SQL
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom),4326),
'[{"numer_id": "us.census.acs.B01003001"},{"numer_id": "us.census.acs.B11001001"}]',
1, 1, COUNT(*)
) meta from tablename),
data AS (
SELECT id,
data->0->>'value' AS pop_density,
data->1->>'value' AS household_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density,
household_density = data.household_density
FROM data
WHERE cartodb_id = data.id
```
- Obtain population densities for every row of a table with FIPS code county IDs
(USA).
```SQL
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename)
SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta))
```
- Update a table with population densities for every FIPS code county ID (USA).
This table has a blank column called `pop_density` and fips codes stored in a
column `fips`.
```SQL
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename),
data as (
SELECT id AS fips, (data->0->>'value') AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density
FROM data
WHERE fips = data.id
```

View File

@@ -0,0 +1,76 @@
- Insert all Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_census_tracts` which has columns `the_geom` (geometry) and `geom_refs` (text).
```sql
INSERT INTO manhattan_census_tracts(the_geom, geom_refs)
SELECT *
FROM OBS_GetBoundariesByGeometry(
ST_MakeEnvelope(-74.0251922607,40.6945658517,
-73.9651107788,40.7377626342,
4326),
'us.census.tiger.census_tract')
```
- Insert points that lie on Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_tract_points` which has columns `the_geom` (geometry) and `geom_refs` (text).
```sql
INSERT INTO manhattan_tract_points (the_geom, geom_refs)
SELECT *
FROM OBS_GetPointsByGeometry(
ST_MakeEnvelope(-74.0251922607,40.6945658517,
-73.9651107788,40.7377626342,
4326),
'us.census.tiger.census_tract')
```
- Overwrite a point geometry with a boundary geometry that contains it in your table
```SQL
UPDATE tablename
SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group')
```
- Write the US Census block group geoid that contains the point geometry for every row as a new column in your table.
```SQL
UPDATE tablename
SET geometry_id = OBS_GetBoundaryId(the_geom, 'us.census.tiger.block_group')
```
- Use a table of `geometry_id`s (e.g., geoid from the U.S. Census) to select the unique boundaries that they correspond to and insert into a table called, `overlapping_polygons`. This is a useful method for creating new choropleths of aggregate data.
```SQL
INSERT INTO overlapping_polygons (the_geom, geometry_id, point_count)
SELECT
OBS_GetBoundaryById(geometry_id, 'us.census.tiger.county') As the_geom,
geometry_id,
count(*)
FROM tablename
GROUP BY geometry_id
```
- Insert into table `denver_census_tracts` the census tract boundaries and geom_refs of census tracts which intersect within 10 miles of downtown Denver, Colorado.
```sql
INSERT INTO denver_census_tracts(the_geom, geom_refs)
SELECT *
FROM OBS_GetBoundariesByPointAndRadius(
CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado
10000 * 1.609, -- 10 miles (10km * conversion to miles)
'us.census.tiger.census_tract')
```
- Insert into table `denver_tract_points` points on US census tracts and their corresponding geoids for census tracts which intersect within 10 miles of downtown Denver, Colorado, USA.
```sql
INSERT INTO denver_tract_points(the_geom, geom_refs)
SELECT *
FROM OBS_GetPointsByPointAndRadius(
CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado
10000 * 1.609, -- 10 miles (10km * conversion to miles)
'us.census.tiger.census_tract')
```

View File

@@ -0,0 +1,160 @@
```SQL
SELECT * FROM OBS_Search('home value')
```
```SQL
SELECT * FROM OBS_GetAvailableBoundaries(CDB_LatLng(40.7, -73.9))
```
- Obtain all numerators that are available within a small rectangle.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326))
```
- Obtain all numerators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
- Obtain all numerators that are available within a small rectangle and are
employment related for the United States only.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}');
```
- Obtain all numerators that are available within a small rectangle and are
related to both employment and age & gender for the United States only.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}');
```
- Obtain all numerators that work with US population (`us.census.acs.B01003001`)
as a denominator.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_denom IS True;
```
- Obtain all numerators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
- Obtain all numerators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
- Obtain all denominators that are available within a small rectangle.
```SQL
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
- Obtain all denominators that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
- Obtain all denominators for male population (`us.census.acs.B01001002`).
```SQL
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002')
WHERE valid_numer IS True;
```
- Obtain all denominators that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
- Obtain all denominators available in the timespan `2011 - 2015`.
```SQL
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
- Obtain all geometries that are available within a small rectangle.
```SQL
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
- Obtain all geometries that are available within a small rectangle and are for
the United States only.
```SQL
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
- Obtain all geometries that work with total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
- Obtain all geometries with timespan `2015`.
```SQL
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015')
WHERE valid_timespan IS True;
```
- Obtain all timespans that are available within a small rectangle.
```SQL
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
- Obtain all timespans for total population (`us.census.acs.B01003001`).
```SQL
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
- Obtain all timespans that work with US states (`us.census.tiger.state`)
as a geometry.
```SQL
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```

View File

@@ -0,0 +1,107 @@
{
"main": {
"file": "import/import-from-database.md"
},
"categories": [
{
"title": "Import",
"samples": [
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import standard table",
"desc": "Import standard table into your CARTO account.",
"file": "import/import-standard-table.md"
},
{
"title": "Import sync table",
"desc": "Import sync table into your CARTO account from database.",
"file": "import/import-sync-table.md"
},
{
"title": "Import sync table as dataset",
"desc": "Import sync table as dataset into your CARTO account.",
"file": "import/import-from-database.md"
}
]
},
{
"title": "Export",
"samples": [
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
}
]
},
{
"title": "Tables",
"samples": [
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
}
]
},
{
"title": "Misc",
"samples": [
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
},
{
"title": "Import from database",
"desc": "Import data into your CARTO account from database.",
"file": "import/import-from-database.md"
}
]
}
]
}

View File

@@ -0,0 +1,88 @@
## Overview
For Enterprise account plans, the [Data Observatory](https://carto.com/data) provides access to a searchable catalog of advanced location data, such as census block, population segments, boundaries and so on. A set of SQL functions allow you to augment your own data and broaden your analysis by discovering boundaries and measures of data from this catalog.
This section describes the Data Observatory functions and the type of data that it returns.
### Functions Overview
There are several functions for accessing different categories of data into your visualizations. You can discover and retrieve data by requesting OBS functions from the Data Observatory. These Data Observatory functions are designed for specific, targeted methods of data analysis. The response for these functions are classified into two primary types of data results; measures and boundaries.
- Boundaries are the geospatial boundaries you need to map or aggregate your data. Examples include Country Borders, Zip Code Tabulation Areas, and Counties
- Measures are the various dimensions of information that CARTO can tell you about a place. Examples include, Population, Household Income, and Median Age
Depending on the OBS function, you will get one, or both, types of data in your result. See [Measures and Boundary Data](#measures-and-boundary-results) for details about available data.
#### Measures Functions
Use location-based measures to analyze your data by accessing population and industry measurements at point locations, or within a region or polygon. These include variables for demographic, economic, and other types of information.
- See [Measures Functions]({{ site.dataobservatory_docs }}/reference/#measures-functions) for specific OBS functions
- Returns Measures data results
#### Boundary Functions
Use global boundaries to analyze your data by accessing multi-scaled geometries for visualizations. Examples include US Block Groups and Census Tracts. These enable you to aggregate your data into geometric polygons. You can also use your own data to query specific boundaries.
- See [Boundary Functions]({{ site.dataobservatory_docs }}/reference/#boundary-functions) for specific OBS functions
- Returns Boundary data results
#### Discovery Functions
Discovery Functions provide easier ways for you to find Measures and Boundaries of interest in the Data Observatory. The Discovery functions allow you to perform targeted searches for Measures, or use your own data to discover what is available at a given location. As this is a **retrieval tool** of the Data Observatory, the query results do not change your table. The response back displays one or more identifiers as matches to your search criteria. Each unique identifier can _then_ be used as part of other OBS functions to access any of the other Data Observatory functions.
- See [Discovery Functions]({{ site.dataobservatory_docs }}/reference/#discovery-functions) for specific OBS functions
- Returns Boundary or Measures matches for your data
### Measures and Boundary Results
The response from the Data Observatory functions are classified as either Measures or Boundary. Depending on your OBS function, you will get one, or both, types of data in your result.
#### Measures Data
Measures provide details about local populations, markets, industries and other dimensions. You can search for available Measures using the Discovery functions, or by viewing the Data Catalog. Measures can be requested for Point locations, or can be summarized for Polygons (regions). In general, Point location requests will return raw aggregate values (e.g. Median Rent), or will provide amounts per square kilometer (e.g. Population). The total square kilometers of the area searched will be returned, allowing you to get raw counts, if needed. Alternatively, if you search over a polygon, raw counts will be returned.
The following table indicates where Measures data results are available. Measures can include raw measures and when indicated, can provide geometries.
Data Category | Examples | Type of Data Response | Availability
--- | ---
Housing | Vacant Housing Units, Median Rent, Units for Sale, Mortgage Count | Point measurement, Area measurement, With Geo Border | United States
Income | Median Household Income, Gini Index | Point measurement, Area measurement, With Geo Border | United States
Education | Students Enrolled in School, Population Completed H.S | Point measurement, Area measurement, With Geo Border | United States
Languages | Speaks Spanish at Home, Speaks only English at Home | Point measurement, Area measurement, With Geo Border | United States
Employment | Workers over the Age of 16 | Point measurement, Area measurement, With Geo Border | United States
Jobs and Workforce | Origin-Destination of Workforce, Job Wages by job type | Point measurement, Area measurement, With Geo Border | United States
Transportation | Commuters by Public Transportation, Work at Home | Point measurement, Area measurement, With Geo Border | United States
Race, Age and Gender | Asian Population, Median Age, Job wages by race | Point measurement, Area measurement, With Geo Border | United States, Spain
Population | Population per Square Kilometer | Point measurement, Area measurement | United States, Spain
#### Boundary Data
The following table indicates where Boundary data results are available.
Boundary Name | Availability
--- | ---
Countries | Global
First-level administrative subdivisions | Global
Second-level administrative subdivisions | United States
Zip Code Tabulation Areas (ZCTA) | United States
Congressional Districts | United States
Digital Marketing Areas | United States
Census Public Use Microdata Areas | United States
Census Tracts |United States
Census Block Groups | United States
US Census Blocks | United States
Disputed Areas | Global
Marine Area | Global
Oceans | Global
Continents | Global
Timezones | Global
##### Water Clipping Levels
Many geometries come with various degrees of water accuracy (how closely they follow features such as coastlines). Water clipping refers to how the level of accuracy is returned by the Data Observatory. Data results can either include no clip (no water areas are clipped in the geometry), or high clip (coastlines and inland waterways are clipped out of the final geometry). For example, US Census data might only show coastlines as a straight border line, and not as an inland water area. To find out which levels of water clipping are available for Boundary layers, refer to the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html).
**Note:** While high clip water levels may be better for some kinds of maps and analysis, this type of data consumes more account storage space and may be subject to quota limitations.
For details about how to access any of this data, see [Accessing the Data Observatory]({{ site.dataobservatory_docs }}/guides/accessing-the-data-observatory/).

View File

@@ -0,0 +1,121 @@
## Accessing the Data Observatory
The workflow for accessing the Data Observatory includes using a SQL query to apply a specific method of data enrichment or analysis to your data. You can access the Data Observatory by applying a custom query in CARTO Builder, or directly through the SQL API.
#### Prerequisites
You must have an Enterprise account and be familiar with using SQL requests.
- The Data Observatory catalog includes data that is managed by CARTO, on a SaaS cloud platform. For Enterprise users, the Data Observatory can be enabled by contacting CARTO.
- A set of Data Observatory functions (prefaced with "OBS" for Observatory), allow you to retrieve boundaries and measures data through a SQL request. These functions should be used with UPDATE and INSERT statements, not SELECT statements, as we are currently not supporting dynamic use of the Data Observatory
**Tip:** See the recommended [Best Practices](#best-practices) for using the Data Observatory.
### Enrich from Data Observatory
As an alternative to using SQL queries, you can apply the _Enrich from Data Observatory_ ANALYSIS to a selected map layer in CARTO Builder. This enables you add a new column with contextual demographic and economic measures, without having to apply the code yourself. For details, see the [Enrich from Data Observatory Guide](https://carto.com/learn/guides/analysis/enrich-from-data-observatory) in our Learn hub.
### Apply OBS Functions to a Dataset
This procedure describes how to access the Data Observatory functions by applying SQL queries in a selected dataset.
1) Review the [prerequisites](#prerequisites) section before attempting to access any of the Data Observatory functions
2) [View the Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html)
An overview for each of the analyzed functions of data appears, and indicates the unique function signature needed to access the catalog item. You can copy the OBS function from the Data Observatory catalog and modify the placeholder parameters shown in curly brackets (e.g. "{table_name}").
3) From _Your datasets_ dashboard in CARTO, click _NEW DATASET_ and _CREATE EMPTY DATASET_.
This creates an untitled table. You can get population measurements from the Data Observatory to build your dataset and create a map.
4) The SQL view is available when you are viewing your dataset in table view (Data View). Click the slider to switch between viewing your data by METADATA (table) to _SQL_ (opens the SQL view).
5) Apply the OBS function to modify your table.
For example, the following image displays a SQL query using the Boundary function, [`OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)`](https://carto.com/docs/carto-engine/data/boundary-functions/#obsgetboundariesbygeometrygeom-geometry-geometryid-text) function. The SQL query inserts the boundary data as a single polygon geometry for each row of data.
![Query OBS Function in empty dataset](../img/obs_getboundary.jpg)
**Tip:** Want to insert population data to create a dataset? Replace `{my table name}` with your dataset name, and apply the SQL query:
```sql
INSERT INTO {my table name} (the_geom, name)
SELECT *
FROM OBS_GetBoundariesByGeometry(
st_makeenvelope(-73.97257804870605,40.671134192879286,-73.89052391052246,40.722868115036974, 4326),
'us.census.tiger.census_tract'
) As m(the_geom, geoid);
```
Another example shows how to get the local male population into your dataset. Before applying the SQL query, click _ADD COLUMN_ to create and name a column to store the [`OBS_GetMeasure`]({{ site.dataobservatory_docs}}/reference/#obsgetmeasurepolygon-geometry-measureid-text) data.
![Query local male population and apply to data](../img/local_male_pop.jpg)
**Tip:** Want to update your dataset to include the local male population from the Data Observatory? Replace `{my table name}` with your dataset name, and apply the SQL query:
```sql
UPDATE {my table name}
SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002')
```
6) Click _CREATE MAP_ from your dataset, to visualize the Data Observatory results. You can add custom styling, and add widgets to better visualize your data
![Visualize Data Observatory results](../img/visualize_obs_data.jpg)
### SQL API and OBS Functions
This procedure describes how to access the Data Observatory functions directly through the SQL API.
1. In order to use the SQL API, you must be [authenticated]({{ site.bdataobservatory_docs }}/guides/authentication/#authentication) using API keys
**Note:** Review the [prerequisites](#prerequisites) section before attempting to access any of the Data Observatory functions and [view the Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html) to identify the OBS function you are looking for.
2. Query the Data Observatory directly with a specified `OBS` function to apply the results (Measures/Boundaries data) to your table, with the INSERT or UPDATE function
```sql
https://{username}.carto.com/api/v2/sql?q=UPDATE {tablename}
SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002')&api_key={api_key}
```
### Tips
Other useful tips about OBS functions:
- Some Data Observatory functions return geometries, enabling you to apply an UPDATE statement with an OBS function, to update `the_geom` column
- To include [water clipping levels]({{ site.dataobservatory_docs }}/guides/overview/#water-clipping-levels) as part of your results, append `_clipped` as part of the OBS function. For example:
```sql
UPDATE {tablename}
SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002','area','us.census.tiger.census_tract_clipped')
```
### Best Practices
The following usage notes are recommended when using the Data Observatory functions in SQL queries:
- It is discouraged to use the SELECT operation with the Data Observatory functions in your map layers. The results may be visible, but CARTO may not support dynamic rendering of the Data Observatory in the future, so your visualizations may break
The Data Observatory is **recommended** to be used with INSERT or UPDATE operations, for applying analyzed measures and boundaries data to your tables. While SELECT (retrieve) is standard for SQL API requests, be mindful of quota consumption and use INSERT (to insert a new record) or UPDATE (to update an existing record), for best practices.
**Exception:** [Discovery Functions]({{ site.dataobservatory_docs }}/guides/overview/#discovery-functions) are the exception. You can use SELECT as these functions are not actually retrieving data, they are retrieving ids that you can use for other functions.
- You can reduce storage space for unneeded geometries and optimize query optimizations by applying the PostGIS [`ST_Simplify`](http://www.postgis.org/docs/ST_Simplify.html) function. For example, you can simplify the `the_geom` for a large table of polygons and reduce the size of them for quicker rendering. For other tips, see the [most commonly used PostGIS functions](https://carto.com/docs/faqs/postgresql-and-postgis/#what-are-the-most-common-postgis-functions) that you can apply with CARTO
- Only point or polygon geometries are supported for OBS functions. If you attempt to apply Measures or Boundary results to line geometries, an error appears
- The Data Observatory is optimal for modifying existing tables with analytical results, not for building new tables of data
**Exception:** Exceptions apply for the following boundary functions, since they were designed to return multiple responses of geographical identifiers, as opposed to a single geometry. Create an empty dataset and build a new dataset from a SQL query, using any one of these boundary functions.
- [`OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetboundariesbygeometrygeom-geometry-geometryid-text)
- [`OBS_GetPointsByGeometry(polygon geometry, geometry_id text)`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetpointsbygeometrypolygon-geometry-geometryid-text)
- [`OBS_GetBoundariesByPointAndRadius(point geometry, radius numeric, boundary_id text`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetboundariesbypointandradiuspoint-geometry-radius-numeric-boundaryid-text)
- [`OBS_GetPointsByPointAndRadius(point geometry, radius numeric, boundary_id text`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetpointsbypointandradiuspoint-geometry-radius-numeric-boundaryid-text)
- For optimal performance, each SQL request should not exceed 100 rows. As an alternative, you can use a [SQL Batch Query](/docs/carto-engine/sql-api/batch-queries) for queries with long-running CPU processing times
### Examples
View our [CARTO Blogs](https://carto.com/blog/categories/product/) for examples that highlight the benefits of using the Data Observatory.

View File

@@ -0,0 +1,126 @@
## Glossary
A list of boundary ids and measure_names for Data Observatory functions. For US based boundaries, the Shoreline Clipped version provides a high-quality shoreline clipping for mapping uses.
### Boundary IDs
Boundary Name | Boundary ID | Shoreline Clipped Boundary ID
--------------------- | --------------------- | ---
US States | us.census.tiger.state | us.census.tiger.state_clipped
US County | us.census.tiger.county | us.census.tiger.county_clipped
US Census Zip Code Tabulation Areas | us.census.tiger.zcta5 | us.census.tiger.zcta5_clipped
US Census Tracts | us.census.tiger.census_tract | us.census.tiger.census_tract_clipped
US Elementary School District | us.census.tiger.school_district_elementary | us.census.tiger.school_district_elementary_clipped
US Secondary School District | us.census.tiger.school_district_secondary | us.census.tiger.school_district_secondary_clipped
US Unified School District | us.census.tiger.school_district_unified | us.census.tiger.school_district_unified_clipped
US Congressional Districts | us.census.tiger.congressional_district | us.census.tiger.congressional_district_clipped
US Census Blocks | us.census.tiger.block | us.census.tiger.block_clipped
US Census Block Groups | us.census.tiger.block_group | us.census.tiger.block_group_clipped
US Census PUMAs | us.census.tiger.puma | us.census.tiger.puma_clipped
US Incorporated Places | us.census.tiger.place | us.census.tiger.place_clipped
ES Sección Censal | es.ine.geom | none
Regions (First-level Administrative) | whosonfirst.wof_region_geom | none
Continents | whosonfirst.wof_continent_geom | none
Countries | whosonfirst.wof_country_geom | none
Marine Areas | whosonfirst.wof_marinearea_geom | none
Disputed Areas | whosonfirst.wof_disputed_geom | none
### OBS_GetUSCensusMeasure Names Table
This list contains human readable names accepted in the ```OBS_GetUSCensusMeasure``` function. For the more comprehensive list of columns available to the ```OBS_GetMeasure``` function, see the [Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html).
Measure ID | Measure Name | Measure Description
--------------------- | --------------------- | ---
us.census.acs.B01002001 | Median Age | The median age of all people in a given geographic area.
us.census.acs.B15003021 | Population Completed Associates Degree | The number of people in a geographic area over the age of 25 who obtained a associates degree, and did not complete a more advanced degree.
us.census.acs.B15003022 | Population Completed Bachelors Degree | The number of people in a geographic area over the age of 25 who obtained a bachelors degree, and did not complete a more advanced degree.
us.census.acs.B15003023 | Population Completed Masters Degree | The number of people in a geographic area over the age of 25 who obtained a masters degree, but did not complete a more advanced degree.
us.census.acs.B14001007 | Students Enrolled in Grades 9 to 12 | The total number of people in each geography currently enrolled in grades 9 through 12 inclusive. This corresponds roughly to high school.
us.census.acs.B05001006 | Not a U.S. Citizen Population | The number of people within each geography who indicated that they are not U.S. citizens.
us.census.acs.B19001012 | Households with income of $60,000 To $74,999 | The number of households in a geographic area whose annual income was between $60,000 and $74,999.
us.census.acs.B01003001 | Total Population | The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates.
us.census.acs.B01001002 | Male Population | The number of people within each geography who are male.
us.census.acs.B01001026 | Female Population | The number of people within each geography who are female.
us.census.acs.B03002003 | White Population | The number of people identifying as white, non-Hispanic in each geography.
us.census.acs.B03002004 | Black or African American Population | The number of people identifying as black or African American, non-Hispanic in each geography.
us.census.acs.B03002006 | Asian Population | The number of people identifying as Asian, non-Hispanic in each geography.
us.census.acs.B03002012 | Hispanic Population | The number of people identifying as Hispanic or Latino in each geography.
us.census.acs.B03002005 | American Indian and Alaska Native Population | The number of people identifying as American Indian or Alaska native in each geography.
us.census.acs.B03002008 | Other Race population | The number of people identifying as another race in each geography.
us.census.acs.B03002009 | Two or more races population | The number of people identifying as two or more races in each geography.
us.census.acs.B03002002 | Population not Hispanic | The number of people not identifying as Hispanic or Latino in each geography.
us.census.acs.B23025001 | Population age 16 and over | The number of people in each geography who are age 16 or over.
us.census.acs.B08006001 | Workers over the Age of 16 | The number of people in each geography who work. Workers include those employed at private for-profit companies, the self-employed, government workers and non-profit employees.
us.census.acs.B08006002 | Commuters by Car, Truck, or Van | The number of workers age 16 years and over within a geographic area who primarily traveled to work by car, truck or van. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work.
us.census.acs.B08006003 | Commuters who drove alone | The number of workers age 16 years and over within a geographic area who primarily traveled by car driving alone. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work.
us.census.acs.B11001001 | Households | A count of the number of households in each geography. A household consists of one or more people who live in the same dwelling and also share at meals or living accommodation, and may consist of a single family or some other grouping of people.
us.census.acs.B08006004 | Commuters by Carpool | The number of workers age 16 years and over within a geographic area who primarily traveled to work by carpool. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work.
us.census.acs.B08301010 | Commuters by Public Transportation | The number of workers age 16 years and over within a geographic area who primarily traveled to work by public transportation. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work.
us.census.acs.B08006009 | Commuters by Bus | The number of workers age 16 years and over within a geographic area who primarily traveled to work by bus. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. This is a subset of workers who commuted by public transport.
us.census.acs.B08006011 | Commuters by Subway or Elevated | The number of workers age 16 years and over within a geographic area who primarily traveled to work by subway or elevated train. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. This is a subset of workers who commuted by public transport.
us.census.acs.B08006015 | Walked to Work | The number of workers age 16 years and over within a geographic area who primarily walked to work. This would mean that of any way of getting to work, they travelled the most distance walking.
us.census.acs.B08006017 | Worked at Home | The count within a geographical area of workers over the age of 16 who worked at home.
us.census.acs.B09001001 | Children under 18 Years of Age | The number of people within each geography who are under 18 years of age.
us.census.acs.B14001001 | Population 3 Years and Over | The total number of people in each geography age 3 years and over. This denominator is mostly used to calculate rates of school enrollment.
us.census.acs.B14001002 | Students Enrolled in School | The total number of people in each geography currently enrolled at any level of school, from nursery or pre-school to advanced post-graduate education. Only includes those over the age of 3.
us.census.acs.B14001005 | Students Enrolled in Grades 1 to 4 | The total number of people in each geography currently enrolled in grades 1 through 4 inclusive. This corresponds roughly to elementary school.
us.census.acs.B14001006 | Students Enrolled in Grades 5 to 8 | The total number of people in each geography currently enrolled in grades 5 through 8 inclusive. This corresponds roughly to middle school.
us.census.acs.B14001008 | Students Enrolled as Undergraduate in College | The number of people in a geographic area who are enrolled in college at the undergraduate level. Enrollment refers to being registered or listed as a student in an educational program leading to a college degree. This may be a public school or college, a private school or college.
us.census.acs.B15003001 | Population 25 Years and Over | The number of people in a geographic area who are over the age of 25. This is used mostly as a denominator of educational attainment.
us.census.acs.B15003017 | Population Completed High School | The number of people in a geographic area over the age of 25 who completed high school, and did not complete a more advanced degree.
us.census.acs.B15003019 | Population completed less than one year of college, no degree | The number of people in a geographic area over the age of 25 who attended college for less than one year and no further.
us.census.acs.B15003020 | Population completed more than one year of college, no degree | The number of people in a geographic area over the age of 25 who attended college for more than one year but did not obtain a degree.
us.census.acs.B16001001 | Population 5 Years and Over | The number of people in a geographic area who are over the age of 5. This is primarily used as a denominator of measures of language spoken at home.
us.census.acs.B16001002 | Speaks only English at Home | The number of people in a geographic area over age 5 who speak only English at home.
us.census.acs.B16001003 | Speaks Spanish at Home | The number of people in a geographic area over age 5 who speak Spanish at home, possibly in addition to other languages.
us.census.acs.B17001001 | Population for Whom Poverty Status Determined | The number of people in each geography who could be identified as either living in poverty or not. This should be used as the denominator when calculating poverty rates, as it excludes people for whom it was not possible to determine poverty.
us.census.acs.B17001002 | Income In The Past 12 Months Below Poverty Level | The number of people in a geographic area who are part of a family (which could be just them as an individual) determined to be in poverty following the Office of Management and Budgets Directive 14. (https://www.census.gov/hhes/povmeas/methodology/ombdir14.html)
us.census.acs.B08134010 | Number of workers with a commute of over 60 minutes | The number of workers over the age of 16 who do not work from home and commute in over 60 minutes in a geographic area.
us.census.acs.B12005002 | Never Married | The number of people in a geographic area who have never been married.
us.census.acs.B12005005 | Currently married | The number of people in a geographic area who are currently married.
us.census.acs.B12005008 | Married but separated | The number of people in a geographic area who are married but separated.
us.census.acs.B12005012 | Widowed | The number of people in a geographic area who are widowed.
us.census.acs.B12005015 | Divorced | The number of people in a geographic area who are divorced.
us.census.acs.B19013001 | Median Household Income in the past 12 Months | Within a geographic area, the median income received by every household on a regular basis before payments for personal income taxes, social security, union dues, medicare deductions, etc. It includes income received from wages, salary, commissions, bonuses, and tips; self-employment income from own nonfarm or farm businesses, including proprietorships and partnerships; interest, dividends, net rental income, royalty income, or income from estates and trusts; Social Security or Railroad Retirement income; Supplemental Security Income (SSI); any cash public assistance or welfare payments from the state or local welfare office; retirement, survivor, or disability benefits; and any other sources of income received regularly such as Veterans (VA) payments, unemployment and/or workers compensation, child support, and alimony.
us.census.acs.B25001001 | Housing Units | A count of housing units in each geography. A housing unit is a house, an apartment, a mobile home or trailer, a group of rooms, or a single room occupied as separate living quarters, or if vacant, intended for occupancy as separate living quarters.
us.census.acs.B25002003 | Vacant Housing Units | The count of vacant housing units in a geographic area. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant.
us.census.acs.B25004002 | Vacant Housing Units for Rent | The count of vacant housing units in a geographic area that are for rent. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant.
us.census.acs.B19001013 | Households with income of $75,000 To $99,999 | The number of households in a geographic area whose annual income was between $75,000 and $99,999.
us.census.acs.B19001014 | Households with income of $100,000 To $124,999 | The number of households in a geographic area whose annual income was between $100,000 and $124,999.
us.census.acs.B25004004 | Vacant Housing Units for Sale | The count of vacant housing units in a geographic area that are for sale. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant.
us.census.acs.B25058001 | Median Rent | The median contract rent within a geographic area. The contract rent is the monthly rent agreed to or contracted for, regardless of any furnishings, utilities, fees, meals, or services that may be included. For vacant units, it is the monthly rent asked for the rental unit at the time of interview.
us.census.acs.B25071001 | Percent of Household Income Spent on Rent | Within a geographic area, the median percentage of household income which was spent on gross rent. Gross rent is the amount of the contract rent plus the estimated average monthly cost of utilities (electricity, gas, water, sewer etc.) and fuels (oil, coal, wood, etc.) if these are paid by the renter. Household income is the sum of the income of all people 15 years and older living in the household.
us.census.acs.B25075025 | Owner-occupied Housing Units valued at $1,000,000 or more. | The count of owner occupied housing units in a geographic area that are valued at $1,000,000 or more. Value is the respondents estimate of how much the property (house and lot, mobile home and lot, or condominium unit) would sell for if it were for sale.
us.census.acs.B25081002 | Owner-occupied Housing Units with a Mortgage | The count of housing units within a geographic area that are mortagaged. Mortgage refers to all forms of debt where the property is pledged as security for repayment of the debt, including deeds of trust, trust deed, contracts to purchase, land contracts, junior mortgages, and home equity loans.
us.census.acs.B23025002 | Population in Labor Force | The number of people in each geography who are either in the civilian labor force or are members of the U.S. Armed Forces (people on active duty with the United States Army, Air Force, Navy, Marine Corps, or Coast Guard).
us.census.acs.B23025003 | Population in Civilian Labor Force | The number of civilians 16 years and over in each geography who can be classified as either employed or unemployed below.
us.census.acs.B08135001 | Aggregate travel time to work | The total number of minutes every worker over the age of 16 who did not work from home spent spent commuting to work in one day in a geographic area.
us.census.acs.B19001002 | Households with income less than $10,000 | The number of households in a geographic area whose annual income was less than $10,000.
us.census.acs.B19001003 | Households with income of $10,000 to $14,999 | The number of households in a geographic area whose annual income was between $10,000 and $14,999.
us.census.acs.B19001004 | Households with income of $15,000 to $19,999 | The number of households in a geographic area whose annual income was between $15,000 and $19,999.
us.census.acs.B23025004 | Employed Population | The number of civilians 16 years old and over in each geography who either (1) were at work, that is, those who did any work at all during the reference week as paid employees, worked in their own business or profession, worked on their own farm, or worked 15 hours or more as unpaid workers on a family farm or in a family business; or (2) were with a job but not at work, that is, those who did not work during the reference week but had jobs or businesses from which they were temporarily absent due to illness, bad weather, industrial dispute, vacation, or other personal reasons. Excluded from the employed are people whose only activity consisted of work around the house or unpaid volunteer work for religious, charitable, and similar organizations; also excluded are all institutionalized people and people on active duty in the United States Armed Forces.
us.census.acs.B23025005 | Unemployed Population | The number of civilians in each geography who are 16 years old and over and are classified as unemployed.
us.census.acs.B23025006 | Population in Armed Forces | The number of people in each geography who are members of the U.S. Armed Forces (people on active duty with the United States Army, Air Force, Navy, Marine Corps, or Coast Guard).
us.census.acs.B23025007 | Population Not in Labor Force | The number of people in each geography who are 16 years old and over who are not classified as members of the labor force. This category consists mainly of students, homemakers, retired workers, seasonal workers interviewed in an off season who were not looking for work, institutionalized people, and people doing only incidental unpaid family work.
us.census.acs.B12005001 | Population 15 Years and Over | The number of people in a geographic area who are over the age of 15. This is used mostly as a denominator of marital status.
us.census.acs.B08134001 | Workers age 16 and over who do not work from home | The number of workers over the age of 16 who do not work from home in a geographic area.
us.census.acs.B08134002 | Number of workers with less than 10 minute commute | The number of workers over the age of 16 who do not work from home and commute in less than 10 minutes in a geographic area.
us.census.acs.B08303004 | Number of workers with a commute between 10 and 14 minutes | The number of workers over the age of 16 who do not work from home and commute in between 10 and 14 minutes in a geographic area.
us.census.acs.B08303005 | Number of workers with a commute between 15 and 19 minutes | The number of workers over the age of 16 who do not work from home and commute in between 15 and 19 minutes in a geographic area.
us.census.acs.B08303006 | Number of workers with a commute between 20 and 24 minutes | The number of workers over the age of 16 who do not work from home and commute in between 20 and 24 minutes in a geographic area.
us.census.acs.B08303007 | Number of workers with a commute between 25 and 29 minutes | The number of workers over the age of 16 who do not work from home and commute in between 25 and 29 minutes in a geographic area.
us.census.acs.B08303008 | Number of workers with a commute between 30 and 34 minutes | The number of workers over the age of 16 who do not work from home and commute in between 30 and 34 minutes in a geographic area.
us.census.acs.B08134008 | Number of workers with a commute between 35 and 44 minutes | The number of workers over the age of 16 who do not work from home and commute in between 35 and 44 minutes in a geographic area.
us.census.acs.B08303011 | Number of workers with a commute between 45 and 59 minutes | The number of workers over the age of 16 who do not work from home and commute in between 45 and 59 minutes in a geographic area.
us.census.acs.B19001005 | Households with income of $20,000 To $24,999 | The number of households in a geographic area whose annual income was between $20,000 and $24,999.
us.census.acs.B19001006 | Households with income of $25,000 To $29,999 | The number of households in a geographic area whose annual income was between $20,000 and $24,999.
us.census.acs.B19001007 | Households with income of $30,000 To $34,999 | The number of households in a geographic area whose annual income was between $30,000 and $34,999.
us.census.acs.B19001008 | Households with income of $35,000 To $39,999 | The number of households in a geographic area whose annual income was between $35,000 and $39,999.
us.census.acs.B19001009 | Households with income of $40,000 To $44,999 | The number of households in a geographic area whose annual income was between $40,000 and $44,999.
us.census.acs.B19001010 | Households with income of $45,000 To $49,999 | The number of households in a geographic area whose annual income was between $45,000 and $49,999.
us.census.acs.B19001011 | Households with income of $50,000 To $59,999 | The number of households in a geographic area whose annual income was between $50,000 and $59,999.
us.census.acs.B19001015 | Households with income of $125,000 To $149,999 | The number of households in a geographic area whose annual income was between $125,000 and $149,999.
us.census.acs.B19001016 | Households with income of $150,000 To $199,999 | The number of households in a geographic area whose annual income was between $150,000 and $1999,999.
us.census.acs.B19001017 | Households with income of $200,000 Or More | The number of households in a geographic area whose annual income was more than $200,000.

BIN
docs/v1/img/avatar_do.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

View File

@@ -0,0 +1,5 @@
## Introduction
The Data Observatory, available for Enterprise accounts, provides access to a catalog of analyzed data methods, and enables you to apply the results to your own datasets.
The contents described in this document are subject to CARTO's [Terms of Service](https://carto.com/legal/)

View File

@@ -0,0 +1,9 @@
## Authentication
Data Observatory, like any other [CARTO platform's component]({{site.fundamental_docs}}/components/), requires using an API Key. From your CARTO dashboard, click _[Your API keys](https://carto.com/login)_ from the avatar drop-down menu to view your uniquely generated API Key for managing data with CARTO Engine.
![Your API Keys](../img/avatar_do.gif)
Learn more about the [basics of authorization]({{site.fundamental_docs}}/authorization/), or dig into the details of [Auth API]({{site.authapi_docs}}/), if you want to know more about this part of CARTO platform.
The examples in this documentation may include a placeholder for the API Key. Ensure that you modify any placeholder parameters with your own credentials.

View File

@@ -0,0 +1,3 @@
## Versioning
Data Observartory uses [Semantic Versioning](http://semver.org/). View our Github repository to find tags for each [release](https://github.com/CartoDB/observatory-extension/releases).

View File

@@ -0,0 +1,539 @@
## Measures Functions
[Data Observatory Measures]({{site.dataobservatory_docs}}/guides/overview/#methods-overview) are the numerical location data you can access. The measure functions allow you to access individual measures to augment your own data or integrate in your analysis workflows. Measures are used by sending an identifier or a geometry (point or polygon) and receiving back a measure (an absolute value) for that location.
There are hundreds of measures and the list is growing with each release. You can currently discover and learn about measures contained in the Data Observatory by downloading our [Data Catalog](https://cartodb.github.io/bigmetadata/index.html).
You can [access]({{site.dataobservatory_docs}}/guides/overview/accessing-the-data-observatory/) measures through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you]({{site.dataobservatory_docs}}/guides/overview/accessing-the-data-observatory/) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
### OBS_GetUSCensusMeasure(point geometry, measure_name text)
The ```OBS_GetUSCensusMeasure(point, measure_name)``` function returns a measure based on a subset of the US Census variables at a point location. The ```OBS_GetUSCensusMeasure``` function is limited to only a subset of all measures that are available in the Data Observatory. To access the full list, use measure IDs with the ```OBS_GetMeasure``` function below.
#### Arguments
Name |Description
--- | ---
point | a WGS84 point geometry (the_geom)
measure_name | a human-readable name of a US Census variable. The list of measure_names is [available in the Glossary](https://carto.com/docs/carto-engine/data/glossary/#obsgetuscensusmeasure-names-table).
normalize | for measures that are **sums** (e.g. population) the default normalization is 'area' and response comes back as a rate per square kilometer. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
#### Returns
A NUMERIC value
Key | Description
--- | ---
value | the raw or normalized measure
#### Example
Add a measure to an empty numeric column based on point locations in your table.
```sql
UPDATE tablename
SET total_population = OBS_GetUSCensusMeasure(the_geom, 'Total Population')
```
### OBS_GetUSCensusMeasure(polygon geometry, measure_name text)
The ```OBS_GetUSCensusMeasure(polygon, measure_name)``` function returns a measure based on a subset of the US Census variables within a given polygon. The ```OBS_GetUSCensusMeasure``` function is limited to only a subset of all measures that are available in the Data Observatory. To access the full list, use the ```OBS_GetMeasure``` function below.
#### Arguments
Name |Description
--- | ---
polygon | a WGS84 polygon geometry (the_geom)
measure_name | a human readable string name of a US Census variable. The list of measure_names is [available in the Glossary](https://carto.com/docs/carto-engine/data/glossary/#obsgetuscensusmeasure-names-table).
normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
#### Returns
A NUMERIC value
Key | Description
--- | ---
value | the raw or normalized measure
#### Example
Add a measure to an empty numeric column based on polygons in your table
```sql
UPDATE tablename
SET local_male_population = OBS_GetUSCensusMeasure(the_geom, 'Male Population')
```
### OBS_GetMeasure(point geometry, measure_id text)
The ```OBS_GetMeasure(point, measure_id)``` function returns any Data Observatory measure at a point location. You can browse all available Measures in the [Catalog](https://cartodb.github.io/bigmetadata/index.html).
#### Arguments
Name |Description
--- | ---
point | a WGS84 point geometry (the_geom)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)). It is important to note that these are different than 'measure_name' used in the Census based functions above.
normalize | for measures that are **sums** (e.g. population) the default normalization is 'area' and response comes back as a rate per square kilometer. The other option is 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html). (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
#### Returns
A NUMERIC value
Key | Description
--- | ---
value | the raw or normalized measure
#### Example
Add a measure to an empty numeric column based on point locations in your table
```sql
UPDATE tablename
SET median_home_value_sqft = OBS_GetMeasure(the_geom, 'us.zillow.AllHomes_MedianValuePerSqft')
```
### OBS_GetMeasure(polygon geometry, measure_id text)
The ```OBS_GetMeasure(polygon, measure_id)``` function returns any Data Observatory measure calculated within a polygon.
#### Arguments
Name |Description
--- | ---
polygon_geometry | a WGS84 polygon geometry (the_geom)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional)
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span | time span of interest (e.g., 2010 - 2014)
#### Returns
A NUMERIC value
Key | Description
--- | ---
value | the raw or normalized measure
#### Example
Add a measure to an empty column based on polygons in your table
```sql
UPDATE tablename
SET household_count = OBS_GetMeasure(the_geom, 'us.census.acs.B11001001')
```
#### Errors
* If an unrecognized normalization type is input, raises error: `'Only valid inputs for "normalize" are "area" (default) and "denominator".`
### OBS_GetMeasureById(geom_ref text, measure_id text, boundary_id text)
The ```OBS_GetMeasureById(geom_ref, measure_id, boundary_id)``` function returns any Data Observatory measure that corresponds to the boundary in ```boundary_id``` that has a geometry reference of ```geom_ref```.
#### Arguments
Name |Description
--- | ---
geom_ref | a geometry reference (e.g., a US Census geoid)
measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf))
boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract')
time_span (optional) | time span of interest (e.g., 2010 - 2014). If `NULL` is passed, the measure from the most recent data will be used.
#### Returns
A NUMERIC value
Key | Description
--- | ---
value | the raw measure associated with `geom_ref`
#### Example
Add a measure to an empty column based on county geoids in your table
```sql
UPDATE tablename
SET household_count = OBS_GetMeasureById(geoid_column, 'us.census.acs.B11001001', 'us.census.tiger.county')
```
#### Errors
* Returns `NULL` if there is a mismatch between the geometry reference and the boundary id such as using the geoid of a county with the boundary of block groups
## OBS_GetCategory(point geometry, category_id text)
The ```OBS_GetCategory(point, category_id)``` function returns any Data Observatory Category value at a point location. The Categories available are currently limited to Segmentation categories. See the Segmentation section of the [Catalog](https://cartodb.github.io/bigmetadata/index.html) for more detail.
#### Arguments
Name |Description
--- | ---
point | a WGS84 point geometry (the_geom)
category_id | a category identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)).
#### Returns
A TEXT value
Key | Description
--- | ---
value | a text based category found at the supplied point
#### Example
Add the Category to an empty column text column based on point locations in your table
```sql
UPDATE tablename
SET segmentation = OBS_GetCategory(the_geom, 'us.census.spielman_singleton_segments.X55')
```
### OBS_GetMeta(extent geometry, metadata json, max_timespan_rank, max_score_rank, target_geoms)
The ```OBS_GetMeta(extent, metadata)``` function returns a completed Data
Observatory metadata JSON Object for use in ```OBS_GetData(geomvals,
metadata)``` or ```OBS_GetData(ids, metadata)```. It is not possible to pass
metadata to those functions if it is not processed by ```OBS_GetMeta(extent,
metadata)``` first.
`OBS_GetMeta` makes it possible to automatically select appropriate timespans
and boundaries for the measurement you want.
#### Arguments
Name | Description
---- | -----------
extent | A geometry of the extent of the input geometries
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optionally additional parameters about that column
num_timespan_options | How many historical time periods to include. Defaults to 1
num_score_options | How many alternative boundary levels to include. Defaults to 1
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest.
The schema of the metadata input objects are as follows:
Metadata Input Key | Description
--- | -----------
numer_id | The identifier for the desired measurement. If left blank, but a `geom_id` is specified, the column will return a geometry instead of a measurement.
geom_id | Identifier for a desired geographic boundary level to use when calculating measures. Will be automatically assigned if undefined. If defined but `numer_id` is blank, then the column will return a geometry instead of a measurement.
normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. Ignored if this metadata object specifies a geometry.
denom_id | Identifier for a desired normalization column in case `normalization` is 'denominated'. Will be automatically assigned if necessary. Ignored if this metadata object specifies a geometry.
numer_timespan | The desired timespan for the measurement. Defaults to most recent timespan available if left unspecified.
geom_timespan | The desired timespan for the geometry. Defaults to timespan matching numer_timespan if left unspecified.
target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata, for example if you're passing in points.
target_geoms | Override global `target_geoms` for this element of metadata
max_timespan_rank | Only include timespans of this recency (for example, `1` is only the most recent timespan). No limit by default
max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). Is `1` by default
#### Returns
A JSON array composed of metadata output objects.
Key | Description
--- | -----------
meta | A JSON array with completed metadata for the requested data, including all keys below
The schema of the metadata output objects are as follows. You should pass this
array as-is to ```OBS_GetData```. If you modify any values the function will
fail.
Metadata Output Key | Description
--- | -----------
suggested_name | A suggested column name for adding this to an existing table
numer_id | Identifier for desired measurement
numer_timespan | Timespan that will be used of the desired measurement
numer_name | Human-readable name of desired measure
numer_description | Long human-readable description of the desired measure
numer_t_description | Further information about the source table
numer_type | PostgreSQL/PostGIS type of desired measure
numer_colname | Internal identifier for column name
numer_tablename | Internal identifier for table
numer_geomref_colname | Internal identifier for geomref column name
denom_id | Identifier for desired normalization
denom_timespan | Timespan that will be used of the desired normalization
denom_name | Human-readable name of desired measure's normalization
denom_description | Long human-readable description of the desired measure's normalization
denom_t_description | Further information about the source table
denom_type | PostgreSQL/PostGIS type of desired measure's normalization
denom_colname | Internal identifier for normalization column name
denom_tablename | Internal identifier for normalization table
denom_geomref_colname | Internal identifier for normalization geomref column name
geom_id | Identifier for desired boundary geometry
geom_timespan | Timespan that will be used of the desired boundary geometry
geom_name | Human-readable name of desired boundary geometry
geom_description | Long human-readable description of the desired boundary geometry
geom_t_description | Further information about the source table
geom_type | PostgreSQL/PostGIS type of desired boundary geometry
geom_colname | Internal identifier for boundary geometry column name
geom_tablename | Internal identifier for boundary geometry table
geom_geomref_colname | Internal identifier for boundary geometry ref column name
timespan_rank | Ranking of this measurement by time, most recent is 1, second most recent 2, etc.
score | The score of this measurement's boundary compared to the `extent` and `target_geoms` passed in. Between 0 and 100.
score_rank | The ranking of this measurement's boundary, highest ranked is 1, second is 2, etc.
numer_aggregate | The aggregate type of the numerator, either `sum`, `average`, `median`, or blank
denom_aggregate | The aggregate type of the denominator, either `sum`, `average`, `median`, or blank
normalization | The sort of normalization that will be used for this measure, either `area`, `predenominated`, or `denominated`
#### Examples
Obtain metadata that can augment with one additional column of US population
data, using a boundary relevant for the geometry provided and latest timespan.
Limit to only the most recent column most relevant to the extent & density of
input geometries in `tablename`.
```sql
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1,
COUNT(*)
) FROM tablename
```
Obtain metadata that can augment with one additional column of US population
data, using census tract boundaries.
```sql
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.census_tract"}]',
1, 1,
COUNT(*)
) FROM tablename
```
Obtain metadata that can augment with two additional columns, one for total
population and one for male population.
```sql
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
1, 1,
COUNT(*)
) FROM tablename
```
### OBS_MetadataValidation(extent geometry, geometry_type text, metadata json, target_geoms)
The ```OBS_MetadataValidation``` function performs a validation check over the known issues using the extent, type of geometry, and metadata that is being used in the ```OBS_GetMeta``` function.
#### Arguments
Name | Description
---- | -----------
extent | A geometry of the extent of the input geometries
geometry_type | The geometry type of the source data
metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optional additional parameters about that column
target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest
The schema of the metadata input objects are as follows:
Metadata Input Key | Description
--- | -----------
numer_id | The identifier for the desired measurement. If left blank, a `geom_id` is specified and the column returns a geometry, instead of a measurement
geom_id | Identifier for a desired geographic boundary level used to calculate measures. If undefined, this is automatically assigned. If defined, `numer_id` is blank and the column returns a geometry, instead of a measurement
normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. If the metadata object specifies a geometry, this is ignored
denom_id | When `normalization` is 'denominated', this is the identifier for a desired normalization column. This is automatically assigned. If the metadata object specifies a geometry, this is ignored
numer_timespan | The desired timespan for the measurement. If left unspecified, it defaults to the most recent timespan available
geom_timespan | The desired timespan for the geometry. If left unspecified, it defaults to the timespan matching `numer_timespan`
target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata. For example, if you are passing in points
target_geoms | Override global `target_geoms` for this element of metadata
max_timespan_rank | Only include timespans of this recency (For example, `1` is only the most recent timespan). There is no limit by default
max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). The default is `1`
#### Returns
Key | Description
--- | -----------
valid | A boolean field that represents if the validation was successful or not
errors | A text array with all possible errors
#### Examples
Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`.
```sql
SELECT OBS_MetadataValidation(
ST_SetSRID(ST_Extent(the_geom), 4326),
ST_GeometryType(the_geom),
'[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]',
COUNT(*)::INTEGER
) FROM tablename
GROUP BY ST_GeometryType(the_geom)
```
### OBS_GetData(geomvals array[geomval], metadata json)
The ```OBS_GetData(geomvals, metadata)``` function returns a measure and/or
geometry corresponding to the `metadata` JSON array for each every Geometry of
the `geomval` element in the `geomvals` array. The metadata argument must be
obtained from ```OBS_GetMeta(extent, metadata)```.
#### Arguments
Name | Description
---- | -----------
geomvals | An array of `geomval` elements, which are obtained by casting together a `Geometry` and a `Numeric`. This should be obtained by using `ARRAY_AGG((the_geom, cartodb_id)::geomval)` from the CARTO table one wishes to obtain data for.
metadata | A JSON array composed of metadata output objects from ```OBS_GetMeta(extent, metadata)```. The schema of the elements of the `metadata` JSON array corresponds to that of the output of ```OBS_GetMeta(extent, metadata)```, and this argument must be obtained from that function in order for the call to be valid.
#### Returns
A TABLE with the following schema, where each element of the input `geomvals`
array corresponds to one row:
Column | Type | Description
------ | ---- | -----------
id | Numeric | ID corresponding to the `val` component of an element of the input `geomvals` array
data | JSON | A JSON array with elements corresponding to the input `metadata` JSON array
Each `data` object has the following keys:
Key | Description
--- | -----------
value | The value of the measurement or geometry for the geometry corresponding to this row and measurement corresponding to this position in the `metadata` JSON array
To determine the appropriate cast for `value`, one can use the `numer_type`
or `geom_type` key corresponding to that value in the input `metadata` JSON
array.
#### Examples
Obtain population densities for every geometry in a table, keyed by cartodb_id:
```sql
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1, COUNT(*)
) meta FROM tablename)
SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta))
```
Update a table with a blank numeric column called `pop_density` with population
densities:
```sql
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001"}]',
1, 1, COUNT(*)
) meta FROM tablename),
data AS (
SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density
FROM data
WHERE cartodb_id = data.id
```
Update a table with two measurements at once, population density and household
density. The table should already have a Numeric column `pop_density` and
`household_density`.
```sql
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom),4326),
'[{"numer_id": "us.census.acs.B01003001"},{"numer_id": "us.census.acs.B11001001"}]',
1, 1, COUNT(*)
) meta from tablename),
data AS (
SELECT id,
data->0->>'value' AS pop_density,
data->1->>'value' AS household_density
FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density,
household_density = data.household_density
FROM data
WHERE cartodb_id = data.id
```
## OBS_GetData(ids array[text], metadata json)
The ```OBS_GetData(ids, metadata)``` function returns a measure and/or
geometry corresponding to the `metadata` JSON array for each every id of
the `ids` array. The metadata argument must be obtained from
`OBS_GetMeta(extent, metadata)`. When obtaining metadata, one must include
the `geom_id` corresponding to the boundary that the `ids` refer to.
#### Arguments
Name | Description
---- | -----------
ids | An array of `TEXT` elements. This should be obtained by using `ARRAY_AGG(col_of_geom_refs)` from the CARTO table one wishes to obtain data for.
metadata | A JSON array composed of metadata output objects from ```OBS_GetMeta(extent, metadata)```. The schema of the elements of the `metadata` JSON array corresponds to that of the output of ```OBS_GetMeta(extent, metadata)```, and this argument must be obtained from that function in order for the call to be valid.
For this function to work, the `metadata` argument must include a `geom_id`
that corresponds to the ids found in `col_of_geom_refs`.
#### Returns
A TABLE with the following schema, where each element of the input `ids` array
corresponds to one row:
Column | Type | Description
------ | ---- | -----------
id | Text | ID corresponding to an element of the input `ids` array
data | JSON | A JSON array with elements corresponding to the input `metadata` JSON array
Each `data` object has the following keys:
Key | Description
--- | -----------
value | The value of the measurement or geometry for the geometry corresponding to this row and measurement corresponding to this position in the `metadata` JSON array
To determine the appropriate cast for `value`, one can use the `numer_type`
or `geom_type` key corresponding to that value in the input `metadata` JSON
array.
#### Examples
Obtain population densities for every row of a table with FIPS code county IDs
(USA).
```sql
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename)
SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta))
```
Update a table with population densities for every FIPS code county ID (USA).
This table has a blank column called `pop_density` and fips codes stored in a
column `fips`.
```sql
WITH meta AS (
SELECT OBS_GetMeta(
ST_SetSRID(ST_Extent(the_geom), 4326),
'[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]'
) meta FROM tablename),
data as (
SELECT id AS fips, (data->0->>'value') AS pop_density
FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename),
(SELECT meta FROM meta)))
UPDATE tablename
SET pop_density = data.pop_density
FROM data
WHERE fips = data.id
```

View File

@@ -0,0 +1,273 @@
## Boundary Functions
Use the following functions to retrieve [Boundary](https://carto.com/docs/carto-engine/data/overview/#boundary-data) data. Data ranges from small areas (e.g. US Census Block Groups) to large areas (e.g. Countries). You can access boundaries by point location lookup, bounding box lookup, direct ID access and several other methods described below.
You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](https://carto.com/docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT).
### OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)
The ```OBS_GetBoundariesByGeometry(geometry, geometry_id)``` method returns a set of boundary geometries that intersect a supplied geometry. This can be used to find all boundaries that are within or overlap a bounding box. You have the ability to choose whether to retrieve all boundaries that intersect your supplied bounding box or only those that fall entirely inside of your bounding box.
#### Arguments
Name |Description
--- | ---
geom | a WGS84 geometry
geometry_id | a string identifier for a boundary geometry
timespan (optional) | year(s) to request from ('NULL' (default) gives most recent)
overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'.
#### Returns
A table with the following columns:
Column Name | Description
--- | ---
the_geom | a boundary geometry (e.g., US Census tract boundaries)
geom_refs | a string identifier for the geometry (e.g., geoids of US Census tracts)
If geometries are not found for the requested `geom`, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned.
#### Example
Insert all Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_census_tracts` which has columns `the_geom` (geometry) and `geom_refs` (text).
```sql
INSERT INTO manhattan_census_tracts(the_geom, geom_refs)
SELECT *
FROM OBS_GetBoundariesByGeometry(
ST_MakeEnvelope(-74.0251922607,40.6945658517,
-73.9651107788,40.7377626342,
4326),
'us.census.tiger.census_tract')
```
#### Errors
* If an `overlap_type` other than the valid ones listed above is entered, then an error is thrown
## OBS_GetPointsByGeometry(polygon geometry, geometry_id text)
The ```OBS_GetPointsByGeometry(polygon, geometry_id)``` method returns point geometries and their geographical identifiers that intersect (or are contained by) a bounding box polygon and lie on the surface of a boundary corresponding to the boundary with same geographical identifiers (e.g., a point that is on a census tract with the same geoid). This is a useful alternative to ```OBS_GetBoundariesByGeometry``` listed above because it returns much less data for each location.
#### Arguments
Name |Description
--- | ---
polygon | a bounding box or other geometry
geometry_id | a string identifier for a boundary geometry
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'.
#### Returns
A table with the following columns:
Column Name | Description
--- | ---
the_geom | a point geometry on a boundary (e.g., a point that lies on a US Census tract)
geom_refs| a string identifier for the geometry (e.g., the geoid of a US Census tract)
If geometries are not found for the requested geometry, `geometry_id`, `timespan`, or `overlap_type`, then NULL values are returned.
#### Example
Insert points that lie on Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_tract_points` which has columns `the_geom` (geometry) and `geom_refs` (text).
```sql
INSERT INTO manhattan_tract_points (the_geom, geom_refs)
SELECT *
FROM OBS_GetPointsByGeometry(
ST_MakeEnvelope(-74.0251922607,40.6945658517,
-73.9651107788,40.7377626342,
4326),
'us.census.tiger.census_tract')
```
#### Errors
* If a geometry other than a point is passed as the first argument, an error is thrown: `Invalid geometry type (ST_Point), expecting 'ST_MultiPolygon' or 'ST_Polygon'`
### OBS_GetBoundary(point_geometry, boundary_id)
The ```OBS_GetBoundary(point_geometry, boundary_id)``` method returns a boundary geometry defined as overlapping the point geometry and from the desired boundary set (e.g. Census Tracts). See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). This is a useful method for performing aggregations of points.
#### Arguments
Name | Description
--- | ---
point_geometry | a WGS84 polygon geometry (the_geom)
boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids)
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
#### Returns
A boundary geometry. If no value is found at the requested `boundary_id` or `timespan`, a null value is returned.
Value | Description
--- | ---
geom | WKB geometry
#### Example
Overwrite a point geometry with a boundary geometry that contains it in your table
```sql
UPDATE tablename
SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group')
```
#### Errors
* If a geometry other than a point is passed, an error is thrown: `Invalid geometry type (ST_Line), expecting 'ST_Point'`
### OBS_GetBoundaryId(point_geometry, boundary_id)
The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow.
#### Arguments
Name |Description
--- | ---
point_geometry | a WGS84 point geometry (the_geom)
boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids)
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
#### Returns
A TEXT boundary geometry id. If no value is found at the requested `boundary_id` or `timespan`, a null value is returned.
Value | Description
--- | ---
geometry_id | a string identifier of a geometry in the Boundaries
#### Example
Write the US Census block group geoid that contains the point geometry for every row as a new column in your table.
```sql
UPDATE tablename
SET geometry_id = OBS_GetBoundaryId(the_geom, 'us.census.tiger.block_group')
```
#### Errors
* If a geometry other than a point is passed, an error is thrown: `Invalid geometry type (ST_Line), expecting 'ST_Point'`
### OBS_GetBoundaryById(geometry_id, boundary_id)
The ```OBS_GetBoundaryById(geometry_id, boundary_id)``` returns the boundary geometry for a unique geometry_id. A geometry_id can be found using the ```OBS_GetBoundaryId(point_geometry, boundary_id)``` method described above.
#### Arguments
Name | Description
--- | ---
geometry_id | a string identifier for a Boundary geometry
boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids)
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
#### Returns
A boundary geometry. If a geometry is not found for the requested `geometry_id`, `boundary_id`, or `timespan`, then a null value is returned.
Key | Description
--- | ---
geom | a WGS84 polygon geometry
#### Example
Use a table of `geometry_id`s (e.g., geoid from the U.S. Census) to select the unique boundaries that they correspond to and insert into a table called, `overlapping_polygons`. This is a useful method for creating new choropleths of aggregate data.
```sql
INSERT INTO overlapping_polygons (the_geom, geometry_id, point_count)
SELECT
OBS_GetBoundaryById(geometry_id, 'us.census.tiger.county') As the_geom,
geometry_id,
count(*)
FROM tablename
GROUP BY geometry_id
```
### OBS_GetBoundariesByPointAndRadius(point geometry, radius numeric, boundary_id text)
The ```OBS_GetBoundariesByPointAndRadius(point, radius, boundary_id)``` method returns boundary geometries and their geographical identifiers that intersect (or are contained by) a circle centered on a point with a radius.
#### Arguments
Name |Description
--- | ---
point | a WGS84 point geometry
radius | a radius (in meters) from the center point
geometry_id | a string identifier for a boundary geometry
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'.
#### Returns
A table with the following columns:
Column Name | Description
--- | ---
the_geom | a boundary geometry (e.g., a US Census tract)
geom_refs| a string identifier for the geometry (e.g., the geoid of a US Census tract)
If geometries are not found for the requested point and radius, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned.
#### Example
Insert into table `denver_census_tracts` the census tract boundaries and geom_refs of census tracts which intersect within 10 miles of downtown Denver, Colorado.
```sql
INSERT INTO denver_census_tracts(the_geom, geom_refs)
SELECT *
FROM OBS_GetBoundariesByPointAndRadius(
CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado
10000 * 1.609, -- 10 miles (10km * conversion to miles)
'us.census.tiger.census_tract')
```
#### Errors
* If a geometry other than a point is passed, an error is thrown. E.g., `Invalid geometry type (ST_Line), expecting 'ST_Point'`
### OBS_GetPointsByPointAndRadius(point geometry, radius numeric, boundary_id text)
The ```OBS_GetPointsByPointAndRadius(point, radius, boundary_id)``` method returns point geometries on boundaries (e.g., a point that lies on a Census tract) and their geographical identifiers that intersect (or are contained by) a circle centered on a point with a radius.
#### Arguments
Name |Description
--- | ---
point | a WGS84 point geometry
radius | radius (in meters)
geometry_id | a string identifier for a boundary geometry
timespan (optional) | year(s) to request from (`NULL` (default) gives most recent)
overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'.
#### Returns
A table with the following columns:
Column Name | Description
--- | ---
the_geom | a point geometry (e.g., a point on a US Census tract)
geom_refs | a string identifier for the geometry (e.g., the geoid of a US Census tract)
If geometries are not found for the requested point and radius, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned.
#### Example
Insert into table `denver_tract_points` points on US census tracts and their corresponding geoids for census tracts which intersect within 10 miles of downtown Denver, Colorado, USA.
```sql
INSERT INTO denver_tract_points(the_geom, geom_refs)
SELECT *
FROM OBS_GetPointsByPointAndRadius(
CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado
10000 * 1.609, -- 10 miles (10km * conversion to miles)
'us.census.tiger.census_tract')
```
#### Errors
* If a geometry other than a point is passed, an error is thrown. E.g., `Invalid geometry type (ST_Line), expecting 'ST_Point'`

View File

@@ -0,0 +1,365 @@
## Discovery Functions
If you are using the [discovery methods]({{ site.dataobservatory_docs}}/guides/overview/#discovery-methods) from the Data Observatory, use the following functions to retrieve [boundary]({{ site.dataobservatory_docs}}/guides/overview/#boundary-data) and [measures]({{ site.dataobservatory_docs}}/guides/overview/#measures-data) data.
### OBS_Search(search_term)
Use arbitrary text to search all available measures
#### Arguments
Name | Description
--- | ---
search_term | a string to search for available measures
boundary_id | a string identifier for a boundary geometry (optional)
#### Returns
A TABLE containing the following properties
Key | Description
--- | ---
id | the unique id of the measure for use with the ```OBS_GetMeasure``` function
name | the human readable name of the measure
description | a brief description of the measure
aggregate | **sum** are raw count values, **median** are statistical medians, **average** are statistical averages, **undefined** other (e.g. an index value)
source | where the data came from (e.g. US Census Bureau)
#### Example
```sql
SELECT * FROM OBS_Search('home value')
```
### OBS_GetAvailableBoundaries(point_geometry)
Returns available `boundary_id`s at a given point geometry.
#### Arguments
Name | Description
--- | ---
point_geometry | a WGS84 point geometry (e.g. the_geom)
#### Returns
A TABLE containing the following properties
Key | Description
--- | ---
boundary_id | a boundary identifier from the [Boundary ID Glossary]({{ site.dataobservatory_docs}}/guides/glossary/#boundary-ids)
description | a brief description of the boundary dataset
time_span | the timespan attached the boundary. this does not mean that the boundary is invalid outside of the timespan, but is the explicit timespan published with the geometry.
#### Example
```sql
SELECT * FROM OBS_GetAvailableBoundaries(CDB_LatLng(40.7, -73.9))
```
### OBS_GetAvailableNumerators(bounds, filter_tags, denom_id, geom_id, timespan)
Return available numerators within a boundary and with the specified
`filter_tags`.
#### Arguments
Name | Type | Description
--- | --- | ---
bounds | Geometry(Geometry, 4326) | a geometry which some of the numerator's data must intersect with
filter_tags | Text[] | a list of filters. Only numerators for which all of these apply are returned `NULL` to ignore (optional)
denom_id | Text | the ID of a denominator to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional)
geom_id | Text | the ID of a geometry to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional)
timespan | Text | the ID of a timespan to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional)
#### Returns
A TABLE containing the following properties
Key | Type | Description
--- | ---- | -----------
numer_id | Text | The ID of the numerator
numer_name | Text | A human readable name for the numerator
numer_description | Text | Description of the numerator. Is sometimes NULL
numer_weight | Numeric | Numeric "weight" of the numerator. Ignored.
numer_license | Text | ID of the license for the numerator
numer_source | Text | ID of the source for the numerator
numer_type | Text | Postgres type of the numerator
numer_aggregate | Text | Aggregate type of the numerator. If `'SUM'`, this can be normalized by area
numer_extra | JSONB | Extra information about the numerator column. Ignored.
numer_tags | Text[] | Array of all tags applying to this numerator
valid_denom | Boolean | True if the `denom_id` argument is a valid denominator for this numerator, False otherwise
valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this numerator, False otherwise
valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this numerator, False otherwise
#### Examples
Obtain all numerators that are available within a small rectangle.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326))
```
Obtain all numerators that are available within a small rectangle and are for
the United States only.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all numerators that are available within a small rectangle and are
employment related for the United States only.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}');
```
Obtain all numerators that are available within a small rectangle and are
related to both employment and age & gender for the United States only.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}');
```
Obtain all numerators that work with US population (`us.census.acs.B01003001`)
as a denominator.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_denom IS True;
```
Obtain all numerators that work with US states (`us.census.tiger.state`)
as a geometry.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
Obtain all numerators available in the timespan `2011 - 2015`.
```sql
SELECT * FROM OBS_GetAvailableNumerators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
### OBS_GetAvailableDenominators(bounds, filter_tags, numer_id, geom_id, timespan)
Return available denominators within a boundary and with the specified
`filter_tags`.
#### Arguments
Name | Type | Description
--- | --- | ---
bounds | Geometry(Geometry, 4326) | a geometry which some of the denominator's data must intersect with
filter_tags | Text[] | a list of filters. Only denominators for which all of these apply are returned `NULL` to ignore (optional)
numer_id | Text | the ID of a numerator to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional)
geom_id | Text | the ID of a geometry to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional)
timespan | Text | the ID of a timespan to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional)
#### Returns
A TABLE containing the following properties
Key | Type | Description
--- | ---- | -----------
denom_id | Text | The ID of the denominator
denom_name | Text | A human readable name for the denominator
denom_description | Text | Description of the denominator. Is sometimes NULL
denom_weight | Numeric | Numeric "weight" of the denominator. Ignored.
denom_license | Text | ID of the license for the denominator
denom_source | Text | ID of the source for the denominator
denom_type | Text | Postgres type of the denominator
denom_aggregate | Text | Aggregate type of the denominator. If `'SUM'`, this can be normalized by area
denom_extra | JSONB | Extra information about the denominator column. Ignored.
denom_tags | Text[] | Array of all tags applying to this denominator
valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this denominator, False otherwise
valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this denominator, False otherwise
valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this denominator, False otherwise
#### Examples
Obtain all denominators that are available within a small rectangle.
```sql
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
Obtain all denominators that are available within a small rectangle and are for
the United States only.
```sql
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all denominators for male population (`us.census.acs.B01001002`).
```sql
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002')
WHERE valid_numer IS True;
```
Obtain all denominators that work with US states (`us.census.tiger.state`)
as a geometry.
```sql
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```
Obtain all denominators available in the timespan `2011 - 2015`.
```sql
SELECT * FROM OBS_GetAvailableDenominators(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015')
WHERE valid_timespan IS True;
```
### OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan, number_geometries)
Return available geometries within a boundary and with the specified
`filter_tags`.
#### Arguments
Name | Type | Description
--- | --- | ---
bounds | Geometry(Geometry, 4326) | a geometry which must intersect the geometry
filter_tags | Text[] | a list of filters. Only geometries for which all of these apply are returned `NULL` to ignore (optional)
numer_id | Text | the ID of a numerator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional)
denom_id | Text | the ID of a denominator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional)
timespan | Text | the ID of a timespan to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional)
number_geometries | Integer | an additional variable that is used to adjust the calculation of the [score]({{ site.dataobservatory_docs}}/guides/discovery-functions/#returns-4) (optional)
#### Returns
A TABLE containing the following properties
Key | Type | Description
--- | ---- | -----------
geom_id | Text | The ID of the geometry
geom_name | Text | A human readable name for the geometry
geom_description | Text | Description of the geometry. Is sometimes NULL
geom_weight | Numeric | Numeric "weight" of the geometry. Ignored.
geom_aggregate | Text | Aggregate type of the geometry. Ignored.
geom_license | Text | ID of the license for the geometry
geom_source | Text | ID of the source for the geometry
geom_type | Text | Postgres type of the geometry
geom_extra | JSONB | Extra information about the geometry column. Ignored.
geom_tags | Text[] | Array of all tags applying to this geometry
valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this geometry, False otherwise
valid_denom | Boolean | True if the `geom_id` argument is a valid geometry for this geometry, False otherwise
valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this geometry, False otherwise
score | Numeric | Score between 0 and 100 for this geometry, higher numbers mean that this geometry is a better choice for the passed extent
numtiles | Numeric | How many raster tiles were read for score, numgeoms, and percentfill estimates
numgeoms | Numeric | About how many of these geometries fit inside the passed extent
percentfill | Numeric | About what percentage of the passed extent is filled with these geometries
estnumgeoms | Numeric | Ignored
meanmediansize | Numeric | Ignored
#### Examples
Obtain all geometries that are available within a small rectangle.
```sql
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
Obtain all geometries that are available within a small rectangle and are for
the United States only.
```sql
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}');
```
Obtain all geometries that work with total population (`us.census.acs.B01003001`).
```sql
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
Obtain all geometries with timespan `2015`.
```sql
SELECT * FROM OBS_GetAvailableGeometries(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015')
WHERE valid_timespan IS True;
```
## OBS_GetAvailableTimespans(bounds, filter_tags, numer_id, denom_id, geom_id)
Return available timespans within a boundary and with the specified
`filter_tags`.
#### Arguments
Name | Type | Description
--- | --- | ---
bounds | Geometry(Geometry, 4326) | a geometry which some of the timespan's data must intersect with
filter_tags | Text[] | a list of filters. Ignore
numer_id | Text | the ID of a numerator to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional)
denom_id | Text | the ID of a denominator to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional)
geom_id | Text | the ID of a geometry to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional)
#### Returns
A TABLE containing the following properties
Key | Type | Description
--- | ---- | -----------
timespan_id | Text | The ID of the timespan
timespan_name | Text | A human readable name for the timespan
timespan_description | Text | Ignored
timespan_weight | Numeric | Ignored
timespan_aggregate | Text | Ignored
timespan_license | Text | Ignored
timespan_source | Text | Ignored
timespan_type | Text | Ignored
timespan_extra | JSONB | Ignored
timespan_tags | JSONB | Ignored
valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this timespan, False otherwise
valid_denom | Boolean | True if the `timespan` argument is a valid timespan for this timespan, False otherwise
valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this timespan, False otherwise
#### Examples
Obtain all timespans that are available within a small rectangle.
```sql
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326));
```
Obtain all timespans for total population (`us.census.acs.B01003001`).
```sql
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001')
WHERE valid_numer IS True;
```
Obtain all timespans that work with US states (`us.census.tiger.state`)
as a geometry.
```sql
SELECT * FROM OBS_GetAvailableTimespans(
ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state')
WHERE valid_geom IS True;
```

View File

@@ -0,0 +1,35 @@
## Support Options
Feeling stuck? There are many ways to find help.
* Ask a question on [GIS StackExchange](https://gis.stackexchange.com/questions/tagged/carto) using the `CARTO` tag.
* [Report an issue](https://github.com/CartoDB/cartodb.js/issues) in Github.
* Engine Plan customers have additional access to enterprise-level support through CARTO's support representatives.
If you just want to describe an issue or share an idea, just <a class="typeform-share" href="https://cartohq.typeform.com/to/mH6RRl" data-mode="popup" target="_blank"> send your feedback</a>.
### Issues on Github
If you think you may have found a bug, or if you have a feature request that you would like to share with the CARTO.js team, please [open an issue](https://github.com/cartodb/cartodb.js/issues/new).
Before opening an issue, review the [contributing guidelines](https://github.com/CartoDB/cartodb.js/blob/develop/CONTRIBUTING.md#filling-a-ticket).
### Community support on GIS Stack Exchange
GIS Stack Exchange is the most popular community in the geospatial industry. This is a collaboratively-edited question and answer site for geospatial programmers and technicians. It is a fantastic resource for asking technical questions about developing and maintaining your application.
When posting a new question, please consider the following:
* Read the GIS Stack Exchange [help](https://gis.stackexchange.com/help) and [how to ask](https://gis.stackexchange.com/help/how-to-ask) pages for guidelines and tips about posting questions.
* Be very clear about your question in the subject. A clear explanation helps those trying to answer your question, as well as those who may be looking for information in the future.
* Be informative in your post. Details, code snippets, logs, screenshots, etc. help others to understand your problem.
* Use code that demonstrates the problem. It is very hard to debug errors without sample code to reproduce the problem.
### Engine Plan Customers
Engine Plan customers have additional support options beyond general community support. As per your account Terms of Service, you have access to enterprise-level support through CARTO's support representatives available at [enterprise-support@carto.com](mailto:enterprise-support@carto.com)
In order to speed up the resolution of your issue, provide as much information as possible (even if it is a link from community support). This allows our engineers to investigate your problem as soon as possible.
If you are not yet CARTO customer, browse our [plans & pricing](https://carto.com/pricing/) and find the right plan for you.

View File

@@ -0,0 +1,36 @@
## Contribute
CARTO platform is an open-source ecosystem. You can read about the [fundamentals]({{site.fundamental_docs}}/components/) of CARTO architecture and its components.
We are more than happy to receive your contributions to the code and the documentation as well.
## Filling a ticket
If you want to open a new issue in our repository, please follow these instructions:
1. Descriptive title.
2. Write a good description, it always helps.
3. Specify the steps to reproduce the problem.
4. Try to add an example showing the problem.
## Contributing code
Best part of open source, collaborate in Data Observatory code!. We like hearing from you, so if you have any bug fixed, or a new feature ready to be merged, those are the steps you should follow:
1. Fork the repository.
2. Create a new branch in your forked repository.
3. Commit your changes. Add new tests if it is necessary.
4. Open a pull request.
5. Any of the maintainers will take a look.
6. If everything works, it will merged and released \o/.
If you want more detailed information, this [GitHub guide](https://guides.github.com/activities/contributing-to-open-source/) is a must.
## Completing documentation
Data Observatory documentation is located in ```docs/```. That folder is the content that appears in the [Developer Center](http://carto.com/developers/data-observatory/). Just follow the instructions described in [contributing code](#contributing-code) and after accepting your pull request, we will make it appear online :).
**Tip:** A convenient, easy way of proposing changes in documentation is by using the GitHub editor directly on the web. You can easily create a branch with your changes and make a PR from there.
## Submitting contributions
You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions).

View File

@@ -0,0 +1,32 @@
## License
The Data Observatory is a collection of data sources with varying licenses and terms of use. We have endeavored to find you data that will work for the broadest set of use-cases. The following third-party data sources are used in the Data Observatory, and we have included the links to the terms governing their use.
_**Legal Note**: The Data Observatory makes use of a variety of third party data and databases (collectively, the “Data”). You acknowledge that the included Data, and the licenses and terms of use, may be amended from time to time. Whenever you use the Data, you agree to the current relevant terms or license. Some Data will require that you provide attribution to the data source. Other Data may be protected by US or international copyright laws, treaties, or conventions. The Data and associated metadata are provided 'as-is', without express or implied warranty of any kind, including, but not limited to, infringement, merchantability and fitness for a particular purpose. CartoDB is not responsible for the accuracy, completeness, timeliness or quality of the Data._
Name | Terms link
-------|---------
ACS | [https://www.usa.gov/government-works](https://www.usa.gov/government-works)
Australian Bureau of Statistics DataPacks | [https://creativecommons.org/licenses/by/2.5/au/](https://creativecommons.org/licenses/by/2.5/au/)
Bureau of Labor Statistics Quarterly Census of Employment and Wages (QCEW) | [https://www.usa.gov/government-works](https://www.usa.gov/government-works)
Censo Demográfico of the Instituto Brasileiro de Geografia e Estatística (IBGE) | Statistics are provided by the federal Institute of Applied Economic Research (IPEA), many of which are reproduced from another source. Some series are regularly updated, others are not. Licensing information is similar to CC-BY, allowing copying and reuse, but requiring attribution.<br /><br />[http://www.ipeadata.gov.br/iframe_direitouso.aspx](http://www.ipeadata.gov.br/iframe_direitouso.aspx?width=1009&height=767)
Consumer Data Research Centre | [http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/](http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/)
El Instituto Nacional de Estadística (INE) | The National Statistics Institute (INE) of Spain includes data from multiple sources. If you are re-using their data, they explicitly require that you reference them accordingly<br /><br />[http://www.ine.es/ss/Satellite?L=0&c=Page&cid=1254735849170&p=1254735849170&pagename=Ayuda%2FINELayout](http://www.ine.es/ss/Satellite?L=0&c=Page&cid=1254735849170&p=1254735849170&pagename=Ayuda%2FINELayout)
EuroGraphics EuroGlobalMap | [http://www.eurogeographics.org/content/eurogeographics-euroglobalmap-opendata](http://www.eurogeographics.org/content/eurogeographics-euroglobalmap-opendata)<br /><br />This product includes Intellectual Property from European National Mapping and Cadastral Authorities and is licensed on behalf of these by EuroGeographics. Original product is available for free at [www.eurogeographics.org](http://www.eurogeographics.org/). Terms of the license available at [http://www.eurogeographics.org/form/topographic-data-eurogeographics](http://www.eurogeographics.org/form/topographic-data-eurogeographics)
GeoNames | [http://www.geonames.org/](http://www.geonames.org/)
GeoPlanet | [https://developer.yahoo.com/geo/geoplanet/](https://developer.yahoo.com/geo/geoplanet/)
Instituto Nacional de Estadística y Geografía | The National Statistics and Geography Institute (INEGI) of Mexico requires credit be given to INEGI as an author<br /><br />[http://www.inegi.org.mx/terminos/terminos_info.aspx](http://www.inegi.org.mx/terminos/terminos_info.aspx)
National Center for Geographic Information (CNIG) | [https://www.cnig.es/propiedadIntelectual.do](https://www.cnig.es/propiedadIntelectual.do)
National Institute of Statistics and Economic Studies (INSEE) | [http://www.insee.fr/en/service/default.asp?page=rediffusion/copyright.htm](http://www.insee.fr/en/service/default.asp?page=rediffusion/copyright.htm)
Natural Earth | [http://www.naturalearthdata.com/about/terms-of-use/](http://www.naturalearthdata.com/about/terms-of-use/)
Northern Ireland Statistics and Research Agency | [https://www.nisra.gov.uk/statistics/terms-and-conditions](https://www.nisra.gov.uk/statistics/terms-and-conditions)
Office for National Statistics (ONS) | [http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/](http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/)
Quattroshapes | [https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md](https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md)
Scotland's Census Data Warehouse by National Records of Scotland | [https://www.nrscotland.gov.uk/copyright-and-disclaimer](https://www.nrscotland.gov.uk/copyright-and-disclaimer)
Spielman & Singleton | [https://www.openicpsr.org/openicpsr/project/100235/version/V5/view](https://www.openicpsr.org/openicpsr/project/100235/version/V5/view)
Statistics Canada Census of Population 2011 | [http://www.statcan.gc.ca/eng/reference/licence](http://www.statcan.gc.ca/eng/reference/licence)
Statistics Canada National Household Survey 2011 | [http://www.statcan.gc.ca/eng/reference/licence](http://www.statcan.gc.ca/eng/reference/licence)
TIGER | [https://www.usa.gov/government-works](https://www.usa.gov/government-works)
Who's on First | [http://whosonfirst.mapzen.com#License](http://whosonfirst.mapzen.com#License)
Zetashapes | [http://zetashapes.com/license](http://zetashapes.com/license)
Zillow Home Value Index | This data is "Aggregate Data", per the Zillow Terms of Use<br /><br />[http://www.zillow.com/corp/Terms.htm](http://www.zillow.com/corp/Terms.htm)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.6.0'
default_version = '1.9.0'
requires = 'postgis'
superuser = true
schema = cdb_observatory

4
scripts/ci/docker-test.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
docker run -e PGHOST=localhost -e PGPORT=5432 -v `pwd`:/srv --entrypoint="/bin/bash" ${1} /srv/scripts/ci/run_tests_docker.sh && \
docker ps --filter status=dead --filter status=exited -aq | xargs docker rm -v

View File

@@ -0,0 +1,38 @@
#!/bin/bash
# echo commands
set -x
# exit on error
set -e
dpkg -l | grep postgresql
# Add the PDGD repository
apt-key adv --keyserver keys.gnupg.net --recv-keys 7FCC7D46ACCC4CF8
add-apt-repository "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main"
apt-get update
# Remove those all PgSQL versions except the one we're testing
PGSQL_VERSIONS=(9.2 9.3 9.4 9.5 9.6 10)
/etc/init.d/postgresql stop # stop travis default instance
for V in "${PGSQL_VERSIONS[@]}"; do
if [ "$V" != "$PGSQL_VERSION" ]; then
apt-get -y remove --purge postgresql-${V} postgresql-client-${V} postgresql-contrib-${V} postgresql-${V}-postgis-2.3-scripts
else
apt-get -y remove --purge postgresql-${V}-postgis-2.3-scripts
fi
done
apt-get -y autoremove
# Install PostgreSQL
apt-get -y install postgresql-${PGSQL_VERSION} postgresql-${PGSQL_VERSION}-postgis-${POSTGIS_VERSION} postgresql-server-dev-${PGSQL_VERSION} postgresql-plpython-${PGSQL_VERSION}
# Configure it to accept local connections from postgres
echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" > /etc/postgresql/${PGSQL_VERSION}/main/pg_hba.conf
# Restart PostgreSQL
/etc/init.d/postgresql restart ${PGSQL_VERSION}
dpkg -l | grep postgresql

View File

@@ -0,0 +1,12 @@
#!/bin/bash
/etc/init.d/postgresql start
cd /srv
make clean-all
make install
cd /srv/src/pg
make test || { cat /srv/src/pg/test/regression.diffs; false; }

View File

@@ -2,12 +2,18 @@ import os
import psycopg2
import subprocess
PGUSER = os.environ.get('PGUSER', 'postgres')
PGPASSWORD = os.environ.get('PGPASSWORD', '')
PGHOST=os.environ.get('PGHOST', 'localhost')
PGPORT=os.environ.get('PGPORT', '5432')
PGDATABASE=os.environ.get('PGDATABASE', 'postgres')
DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format(
user=os.environ.get('PGUSER', 'postgres'),
password=os.environ.get('PGPASSWORD', ''),
host=os.environ.get('PGHOST', 'localhost'),
port=os.environ.get('PGPORT', '5432'),
database=os.environ.get('PGDATABASE', 'postgres'),
user=PGUSER,
password=PGPASSWORD,
host=PGHOST,
port=PGPORT,
database=PGDATABASE
))
CURSOR = DB_CONN.cursor()
@@ -51,9 +57,9 @@ def get_tablename_query(column_id, boundary_id, timespan):
METADATA_TABLES = ['obs_table', 'obs_column_table', 'obs_column', 'obs_column_tag',
'obs_tag', 'obs_column_to_column', 'obs_dump_version', 'obs_meta',
'obs_meta_numer', 'obs_meta_denom', 'obs_meta_geom',
'obs_meta_timespan', 'obs_column_table_tile',
'obs_column_table_tile_simple']
'obs_table_to_table', 'obs_meta_numer', 'obs_meta_denom',
'obs_meta_geom', 'obs_meta_timespan', 'obs_meta_geom_numer_timespan',
'obs_column_table_tile', 'obs_column_table_tile_simple']
FIXTURES = [
('us.census.acs.B01003001_quantile', 'us.census.tiger.census_tract', '2010 - 2014'),
@@ -207,15 +213,13 @@ FIXTURES = [
('us.census.spielman_singleton_segments.X55', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.zillow.AllHomes_Zhvi', 'us.census.tiger.zcta5', '2014-01'),
('us.zillow.AllHomes_Zhvi', 'us.census.tiger.zcta5', '2016-06'),
('whosonfirst.wof_country_name', 'whosonfirst.wof_country_geom', '2016'),
('us.census.acs.B01003001', 'us.census.tiger.zcta5_clipped', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.block_group_clipped', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract_clipped', '2010 - 2014'),
('us.census.tiger.fullname', 'us.census.tiger.pointlm_geom', '2016'),
('us.census.tiger.fullname', 'us.census.tiger.prisecroads_geom', '2016'),
('us.census.tiger.name', 'us.census.tiger.county', '2015'),
('us.census.tiger.name', 'us.census.tiger.county_clipped', '2015'),
('us.census.tiger.name', 'us.census.tiger.block_group', '2015'),
('us.census.acs.B01003001', 'us.census.tiger.zcta5', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.census.tiger.place_geoname', 'us.census.tiger.place_clipped', '2015'),
('us.census.tiger.county_geoname', 'us.census.tiger.county_clipped', '2015'),
('us.census.tiger.county_geoname', 'us.census.tiger.county', '2015'),
('us.census.tiger.block_group_geoname', 'us.census.tiger.block_group', '2015'),
]
OUTFILE_PATH = os.path.join(os.path.dirname(__file__), '..',
@@ -230,27 +234,35 @@ def dump(cols, tablename, where=''):
tablename=tablename,
))
subprocess.check_call('pg_dump -x --section=pre-data -t observatory.{tablename} '
subprocess.check_call('PGPASSWORD={pgpassword} PGUSER={pguser} PGHOST={pghost} PGDATABASE={pgdb} '
'pg_dump -x --section=pre-data -t observatory.{tablename} '
' | sed "s:SET search_path.*::" '
' | sed "s:CREATE TABLE :CREATE TABLE observatory.:" '
' | sed "s:ALTER TABLE.*OWNER.*::" '
' | sed "s:SET idle_in_transaction_session_timeout.*::" '
' >> {outfile}'.format(
tablename=tablename,
outfile=OUTFILE_PATH,
pgpassword=PGPASSWORD,
pghost=PGHOST,
pgdb=PGDATABASE,
pguser=PGUSER
), shell=True)
with open(OUTFILE_PATH, 'a') as outfile:
outfile.write('COPY observatory."{}" FROM stdin WITH CSV HEADER;\n'.format(tablename))
subprocess.check_call('''
psql -c "COPY (SELECT {cols} \
PGPASSWORD={pgpassword} psql -U {pguser} -d {pgdb} -h {pghost} -c "COPY (SELECT {cols} \
FROM observatory.{tablename} {where}) \
TO STDOUT WITH CSV HEADER" >> {outfile}'''.format(
cols=cols,
tablename=tablename,
where=where,
outfile=OUTFILE_PATH,
pgpassword=PGPASSWORD,
pghost=PGHOST,
pgdb=PGDATABASE,
pguser=PGUSER
), shell=True)
with open(OUTFILE_PATH, 'a') as outfile:
@@ -347,6 +359,10 @@ def main():
timespans=','.join(["'{}'".format(x) for _, _, x in FIXTURES]),
table_ids=','.join(["'{}'".format(x) for _, _, x in unique_tables])
)
elif tablename in ('obs_table_to_table'):
where = '''WHERE source_id IN ({table_ids})'''.format(
table_ids=','.join(["'{}'".format(x) for _, _, x in unique_tables])
)
else:
where = ''
dump('*', tablename, where)
@@ -355,12 +371,6 @@ def main():
if 'zcta5' in table_id or 'zillow_zip' in table_id:
where = '\'11%\''
compare = 'LIKE'
elif 'pri_sec_roads' in table_id or 'point_landmark' in table_id:
dump('*', tablename, 'WHERE geom && ST_MakeEnvelope(-74,40.69,-73.9,40.72, 4326)')
continue
elif 'whosonfirst' in table_id:
where = "('85632785','85633051','85633111','85633147','85633253','85633267')"
compare = 'IN'
elif 'county' in table_id and 'tiger' in table_id:
where = "('48061', '36047')"
compare = 'IN'

View File

@@ -1,3 +1,4 @@
requests
nose
nose_parameterized
psycopg2

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.6.0'
default_version = '1.9.0'
requires = 'postgis'
superuser = true
schema = cdb_observatory

View File

@@ -1,3 +1,16 @@
-- In Postgis 3+, geomval is part of postgis_raster
-- Trying to workaround it by creating the type ourselves leads to other issues:
-- - If we create it under public, then if we try to create postgis_raster afterwards it will fail (ERROR: type "geomval" already exists).
-- - If we create it under cdb_observatory, then things work until we install postgis_raster. At that moment depending on how
-- the search_path is set (per call, function, user...) we start getting random errors since it's mixing public.geomval and
-- cdb_observatory.geomval (function cdb_observatory.obs_getdata(geomval[], json) does not exist)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'geomval') THEN
RAISE EXCEPTION 'Missing `geomval` type. Use `CREATE EXTENSION postgis_raster` to enable it.';
END IF;
END$$;
-- Returns the table name with geoms for the given geometry_id
-- TODO probably needs to take in the column_id array to get the relevant

View File

@@ -249,10 +249,10 @@ BEGIN
'suggested_name', cdb_observatory.FIRST(
LOWER(TRIM(BOTH '_' FROM regexp_replace(CASE WHEN numer_id IS NOT NULL
THEN CASE
WHEN normalization ILIKE 'area%%' THEN numer_colname || ' per sq km'
WHEN normalization ILIKE 'denom%%' THEN numer_colname || ' rate'
ELSE numer_colname
END || ' ' || numer_timespan
WHEN normalization ILIKE 'area%%' THEN numer_colname || ' per sq km' || ' ' || numer_timespan
WHEN normalization ILIKE 'denom%%' THEN numer_colname || ' ' || numer_timespan || ' by ' || denom_colname
ELSE numer_colname || ' ' || numer_timespan
END
ELSE geom_name || ' ' || geom_timespan
END, '[^a-zA-Z0-9]+', '_', 'g')))
),
@@ -516,8 +516,8 @@ BEGIN
WHEN numer_id IS NULL THEN
'''geomref'', ' || 'cdb_observatory.FIRST(' || geom_tablename ||
'.' || geom_geomref_colname || '), ' ||
'''value'', ' || 'cdb_observatory.FIRST(' || geom_tablename ||
'.' || geom_colname || ')'
'''value'', ' || '(cdb_observatory.FIRST(' || geom_tablename ||
'.' || geom_colname || '))::TEXT' -- Needed to force text output in Postgis 3+, later parsed automagically by ::Geometry. Otherwise we'd get geojson output
ELSE ''
END || ')', ', ')
AS colspecs,
@@ -1076,3 +1076,46 @@ BEGIN
RETURN result;
END;
$$ LANGUAGE plpgsql STABLE;
-- MetadataValidation checks the metadata parameters and the geometry type
-- of the data in order to find possible wrong cases
CREATE OR REPLACE FUNCTION cdb_observatory.obs_metadatavalidation(
geometry_extent geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL
)
RETURNS TABLE(valid boolean, errors text[]) AS $$
DECLARE
meta json;
errors text[];
BEGIN
errors := (ARRAY[])::TEXT[];
IF geometry_type IN ('ST_Polygon', 'ST_MultiPolygon') THEN
FOR meta IN EXECUTE 'SELECT json_array_elements(cdb_observatory.OBS_GetMeta($1, $2, 1, 1, $3))' USING geometry_extent, params, target_geoms
LOOP
IF (meta->>'normalization' = 'denominated' AND meta->>'denom_id' is NULL) THEN
errors := array_append(errors, 'Normalizated measure should have a numerator and a denominator. Please review the provided options.');
END IF;
IF (meta->>'numer_aggregate' IS NULL) THEN
errors := array_append(errors, 'For polygon geometries, aggregation is mandatory. Please review the provided options');
END IF;
IF (meta->>'numer_aggregate' IN ('median', 'average') AND meta->>'denom_id' IS NULL) THEN
errors := array_append(errors, 'Median or average aggregation for polygons requires a denominator to provide weights. Please review the provided options');
END IF;
IF (meta->>'numer_aggregate' IN ('median', 'average') AND meta->>'normalization' NOT LIKE 'pre%') THEN
errors := array_append(errors, format('Median or average aggregation only supports prenormalized normalization, %s passed. Please review the provided options', meta->>'normalization'));
END IF;
END LOOP;
IF CARDINALITY(errors) > 0 THEN
RETURN QUERY EXECUTE 'SELECT FALSE, $1' USING errors;
ELSE
RETURN QUERY SELECT TRUE, ARRAY[]::TEXT[];
END IF;
ELSE
RETURN QUERY SELECT TRUE, ARRAY[]::TEXT[];
END IF;
RETURN;
END;
$$ LANGUAGE plpgsql STABLE;

View File

@@ -323,7 +323,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableGeometries(
filter_tags TEXT[] DEFAULT NULL,
numer_id TEXT DEFAULT NULL,
denom_id TEXT DEFAULT NULL,
timespan TEXT DEFAULT NULL
timespan TEXT DEFAULT NULL,
number_geoms INTEGER DEFAULT NULL
) RETURNS TABLE (
geom_id TEXT,
geom_name TEXT,
@@ -382,7 +383,7 @@ BEGIN
CASE WHEN $1 IS NOT NULL AND $1 != '' THEN
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND onu.numer_id = $1 AND ($3 = ANY(onu.timespans) OR $3 IN (select(unnest(o.timespans)))))
ELSE
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND ($3 = ANY(onu.timespans) OR $3 IN (select(unnest(o.timespans)))))
EXISTS (SELECT 1 FROM observatory.obs_meta_geom_numer_timespan onu WHERE o.geom_id = onu.geom_id AND ($3 = ANY(onu.geom_timespans) OR $3 IN (select(unnest(o.timespans)))))
END
ELSE
false
@@ -390,15 +391,16 @@ BEGIN
FROM observatory.obs_meta_geom o
WHERE %s (geom_tags ?& $4 OR CARDINALITY($4) = 0)
), scores AS (
SELECT * FROM cdb_observatory._OBS_GetGeometryScores($5,
(SELECT ARRAY_AGG(geom_id) FROM available_geoms)
SELECT * FROM cdb_observatory._OBS_GetGeometryScores(bounds => $5,
filter_geom_ids => (SELECT ARRAY_AGG(geom_id) FROM available_geoms),
desired_num_geoms => $6::integer
)
) SELECT DISTINCT ON (geom_id) available_geoms.*, score, numtiles, notnull_percent, numgeoms,
percentfill, estnumgeoms, meanmediansize
FROM available_geoms, scores
WHERE available_geoms.geom_id = scores.column_id
$string$, geom_clause)
USING numer_id, denom_id, timespan, filter_tags, bounds;
USING numer_id, denom_id, timespan, filter_tags, bounds, number_geoms;
RETURN;
END
$$ LANGUAGE plpgsql;
@@ -446,12 +448,12 @@ BEGIN
NULL::TEXT timespan_aggregate,
NULL::TEXT timespan_license,
NULL::TEXT timespan_source,
NULL::TEXT timespan_type,
timespan_type::TEXT,
NULL::JSONB timespan_extra,
NULL::JSONB timespan_tags,
$1 = ANY(numers) valid_numer,
$2 = ANY(denoms) valid_denom,
$3 = ANY(geoms) valid_geom_id
COALESCE($1 = ANY(numers), false) valid_numer,
COALESCE($2 = ANY(denoms), false) valid_denom,
COALESCE($3 = ANY(geoms), false) valid_geom_id
FROM observatory.obs_meta_timespan
WHERE %s (timespan_tags ?& $4 OR CARDINALITY($4) = 0)
$string$, geom_clause)

View File

@@ -0,0 +1,839 @@
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetTileBounds(z INTEGER, x INTEGER, y INTEGER)
RETURNS NUMERIC[] AS $$
import math
def tile2lnglat(z, x, y):
n = 2.0 ** z
y = (1 << z) - y - 1
lon = x / n * 360.0 - 180.0
lat_rad = math.atan(math.sinh(math.pi * (1 - 2 * y / n)))
lat = - math.degrees(lat_rad)
return lon, lat
lon0, lat0 = tile2lnglat(z, x, y)
lon1, lat1 = tile2lnglat(z, x+1, y-1)
return [lon0, lat0, lon1, lat1]
$$ LANGUAGE plpythonu;
DROP FUNCTION IF EXISTS cdb_observatory.OBS_GetMVT(z INTEGER, x INTEGER, y INTEGER, params JSONB);
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMVT(z INTEGER, x INTEGER, y INTEGER,
params JSON DEFAULT NULL,
extent INTEGER DEFAULT 4096, buf INTEGER DEFAULT 256, clip_geom BOOLEAN DEFAULT True)
RETURNS TABLE (mvt BYTEA)
AS $$
DECLARE
bounds NUMERIC[];
geom GEOMETRY;
ext BOX2D;
meta JSON;
procgeom_clauses TEXT;
val_clauses TEXT;
json_clause TEXT;
BEGIN
bounds := cdb_observatory.OBS_GetTileBounds(z, x, y);
geom := ST_MakeEnvelope(bounds[1], bounds[2], bounds[3], bounds[4], 4326);
ext := ST_MakeBox2D(ST_Point(bounds[1], bounds[2]), ST_Point(bounds[3], bounds[4]));
meta := cdb_observatory.obs_getmeta(geom, params::json, 1::integer, 1::integer, 1::integer);
/* Read metadata to generate clauses for query */
EXECUTE $query$
WITH _meta AS (SELECT
row_number() over () colid, *
FROM json_to_recordset($1)
AS x(id TEXT, numer_id TEXT, numer_aggregate TEXT, numer_colname TEXT,
numer_geomref_colname TEXT, numer_tablename TEXT, numer_type TEXT,
denom_id TEXT, denom_aggregate TEXT, denom_colname TEXT,
denom_geomref_colname TEXT, denom_tablename TEXT, denom_type TEXT,
denom_reltype TEXT, geom_id TEXT, geom_colname TEXT,
geom_geomref_colname TEXT, geom_tablename TEXT, geom_type TEXT,
numer_timespan TEXT, geom_timespan TEXT, normalization TEXT,
api_method TEXT, api_args JSON)
),
-- Generate procgeom clauses.
-- These join the users' geoms to the relevant geometries for the
-- asked-for measures in the Observatory.
_procgeom_clauses AS (
SELECT
'_procgeoms_' || Coalesce(left(geom_tablename,40) || '_' || geom_geomref_colname, api_method) || ' AS (' ||
'SELECT ' ||
'st_intersection(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) AS geom, ' ||
'ST_AsMVTGeom(st_intersection(' || geom_tablename || '.' || geom_colname || ', _geoms.geom), $2, $3, $4, $5) AS mvtgeom, ' ||
geom_tablename || '.' || geom_geomref_colname || ' AS geomref, ' ||
'CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')
THEN ST_Area(_geoms.geom) / Nullif(ST_Area(' || geom_tablename || '.' || geom_colname || '), 0)
WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom)
THEN 1
ELSE ST_Area(cdb_observatory.safe_intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) /
Nullif(ST_Area(' || geom_tablename || '.' || geom_colname || '), 0)
END pct_obs' || '
FROM _geoms, observatory.' || geom_tablename || '
WHERE ST_Intersects(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')'
|| ')'
AS procgeom_clause
FROM _meta
GROUP BY api_method, geom_tablename, geom_geomref_colname, geom_colname
),
-- Generate val clauses.
-- These perform interpolations or other necessary calculations to
-- provide values according to users geometries.
_val_clauses AS (
SELECT
'_vals_' || Coalesce(left(geom_tablename,40) || '_' || geom_geomref_colname, api_method) || ' AS (
SELECT _procgeoms.geomref, _procgeoms.mvtgeom, ' ||
String_Agg('json_build_object(' || CASE
-- api-delivered values
WHEN api_method IS NOT NULL THEN
'''' || numer_colname || ''', ' ||
'ARRAY_AGG( ' ||
api_method || '.' || numer_colname || ')::' || numer_type || '[]'
-- numeric internal values
WHEN cdb_observatory.isnumeric(numer_type) THEN
'''' || numer_colname || ''', ' || CASE
-- denominated
WHEN LOWER(normalization) LIKE 'denom%'
THEN CASE
WHEN denom_tablename IS NULL THEN ' NULL '
-- denominated polygon interpolation
ELSE
' ROUND(CAST(SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * _procgeoms.pct_obs ' ||
' ) / NULLIF(SUM(' || denom_tablename || '.' || denom_colname || ' ' ||
' * _procgeoms.pct_obs), 0) AS NUMERIC), 4) '
END
-- areaNormalized
WHEN LOWER(normalization) LIKE 'area%'
THEN
-- areaNormalized polygon interpolation
' ROUND(CAST(SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * _procgeoms.pct_obs' ||
' ) / (Nullif(ST_Area(cdb_observatory.FIRST(_procgeoms.geom)::Geography), 0) / 1000000) AS NUMERIC), 4) '
-- median/average measures with universe
WHEN LOWER(numer_aggregate) IN ('median', 'average') AND
denom_reltype ILIKE 'universe' AND LOWER(normalization) LIKE 'pre%'
THEN
-- predenominated polygon interpolation weighted by universe
' ROUND(CAST(SUM(' || numer_tablename || '.' || numer_colname ||
' * ' || denom_tablename || '.' || denom_colname ||
' * _procgeoms.pct_obs ' ||
' ) / Nullif(SUM(' || denom_tablename || '.' || denom_colname ||
' * _procgeoms.pct_obs ' || '), 0)AS NUMERIC), 4) '
-- prenormalized for summable measures. point or summable only!
WHEN numer_aggregate ILIKE 'sum' AND LOWER(normalization) LIKE 'pre%'
THEN
-- predenominated polygon interpolation
' ROUND(CAST(SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * _procgeoms.pct_obs) AS NUMERIC), 4) '
-- Everything else. Point only!
ELSE
' cdb_observatory._OBS_RaiseNotice(''Cannot perform calculation over polygon for ' ||
numer_id || '/' || coalesce(denom_id, '') || '/' || geom_id || '/' || numer_timespan || ''')::Numeric '
END || '::' || numer_type
-- categorical/text
WHEN LOWER(numer_type) LIKE 'text' THEN
'''' || numer_colname || ''', ' || 'MODE() WITHIN GROUP (ORDER BY ' || numer_tablename || '.' || numer_colname || ') '
-- geometry
WHEN numer_id IS NULL THEN
'''geomref'', _procgeoms.geomref, ' ||
'''' || numer_colname || ''', ' || 'cdb_observatory.FIRST(_procgeoms.mvtgeom)::TEXT'
ELSE ''
END
|| ') val_' || colid, ', ')
|| '
FROM _procgeoms_' || Coalesce(left(geom_tablename,40) || '_' || geom_geomref_colname, api_method) || ' _procgeoms ' ||
Coalesce(String_Agg(DISTINCT
Coalesce('LEFT JOIN observatory.' || numer_tablename || ' ON _procgeoms.geomref = observatory.' || numer_tablename || '.' || numer_geomref_colname,
', LATERAL (SELECT * FROM cdb_observatory.' || api_method || '(_procgeoms.mvtgeom' || Coalesce(', ' ||
(SELECT STRING_AGG(REPLACE(val::text, '"', ''''), ', ')
FROM (SELECT JSON_Array_Elements(api_args) as val) as vals),
'') || ')) AS ' || api_method)
, ' '), '') ||
E'\n GROUP BY _procgeoms.geomref, _procgeoms.mvtgeom
ORDER BY _procgeoms.geomref'
|| ')'
AS val_clause,
'_vals_' || Coalesce(left(geom_tablename, 40) || '_' || geom_geomref_colname, api_method) AS cte_name
FROM _meta
GROUP BY geom_tablename, geom_geomref_colname, geom_colname, api_method
),
-- Generate clauses necessary to join together val_clauses
_val_joins AS (
SELECT String_Agg(a.cte_name || '.geomref = ' || b.cte_name || '.geomref ', ' AND ') val_joins
FROM _val_clauses a, _val_clauses b
WHERE a.cte_name != b.cte_name
AND a.cte_name < b.cte_name
),
-- Generate JSON clause. This puts together vals from val_clauses
_json_clause AS (SELECT
'SELECT ST_AsMVT(q, ''data'', $3) FROM (' ||
'SELECT ' || cdb_observatory.FIRST(cte_name) || '.mvtgeom geom,
replace(' || (SELECT String_Agg('val_' || colid, '::TEXT || ') FROM _meta) || ', ''}{'', '', '')::jsonb
FROM ' || String_Agg(cte_name, ', ') ||
' WHERE ST_Area(' || cdb_observatory.FIRST(cte_name) || '.mvtgeom) > 0' ||
Coalesce(' AND ' || val_joins, ') q')
AS json_clause
FROM _val_clauses, _val_joins
GROUP BY val_joins
)
SELECT (SELECT String_Agg(procgeom_clause, E',\n ') FROM _procgeom_clauses),
(SELECT String_Agg(val_clause, E',\n ') FROM _val_clauses),
json_clause
FROM _json_clause
$query$ INTO
procgeom_clauses,
val_clauses,
json_clause
USING meta;
IF procgeom_clauses IS NULL OR val_clauses IS NULL OR json_clause IS NULL THEN
RETURN;
END IF;
/* Execute query */
RETURN QUERY EXECUTE format($query$
WITH _geoms AS (%s),
-- procgeom_clauses
%s,
-- val_clauses
%s
-- json_clause
%s
$query$, 'SELECT $1::geometry as geom',
String_Agg(procgeom_clauses, E',\n '),
String_Agg(val_clauses, E',\n '),
json_clause)
USING geom, ext, extent, buf, clip_geom;
RETURN;
END
$$ LANGUAGE plpgsql;
DROP TABLE IF EXISTS cdb_observatory.OBS_CachedMeta;
CREATE TABLE cdb_observatory.OBS_CachedMeta(
z INTEGER,
parameters TEXT,
num_timespans INTEGER,
num_scores INTEGER,
num_target_geoms INTEGER,
result JSON,
PRIMARY KEY (z, parameters, num_timespans, num_scores, num_target_geoms)
);
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_RetrieveMeta(
zoom INTEGER,
geom geometry(Geometry, 4326),
getmeta_parameters JSON,
num_timespan_options INTEGER DEFAULT NULL,
num_score_options INTEGER DEFAULT NULL,
target_geoms INTEGER DEFAULT NULL)
RETURNS JSON
AS $$
DECLARE
result JSON;
BEGIN
SELECT c.result
INTO result
FROM cdb_observatory.OBS_CachedMeta c
WHERE c.z = zoom
AND c.parameters = getmeta_parameters::TEXT
AND c.num_timespans = num_timespan_options
AND c.num_scores = num_score_options
AND c.num_target_geoms = target_geoms;
IF result IS NULL THEN
result := cdb_observatory.obs_getmeta(geom, getmeta_parameters, num_timespan_options, num_score_options, target_geoms);
INSERT INTO cdb_observatory.OBS_CachedMeta(z, parameters, num_timespans, num_scores, num_target_geoms, result)
SELECT zoom, getmeta_parameters::TEXT, num_timespan_options, num_score_options, target_geoms, result
ON CONFLICT (z, parameters, num_timespans, num_scores, num_target_geoms)
DO UPDATE SET result = EXCLUDED.result;
END IF;
return result;
END
$$ LANGUAGE plpgsql PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMCDates(
mc_schema TEXT,
geo_level TEXT,
month_no TEXT DEFAULT NULL)
RETURNS TEXT[]
AS $$
DECLARE
mc_table TEXT;
where_clause TEXT DEFAULT '';
dates TEXT[];
BEGIN
mc_table := cdb_observatory.OBS_GetMCTable(mc_schema, geo_level);
IF month_no IS NOT NULL THEN
where_clause := format(
$query$
WHERE month LIKE '%1$s/__/____'
$query$, LPAD(month_no, 2, '0'));
END IF;
EXECUTE
format(
$query$
SELECT ARRAY_AGG(DISTINCT month) dates
FROM "%1$s".%2$s
%3$s
$query$, mc_schema, mc_table, where_clause)
INTO dates;
RETURN dates;
END
$$ LANGUAGE plpgsql PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMCTable(mc_schema TEXT, geo_level TEXT)
RETURNS TEXT
AS $$
DECLARE
mc_table TEXT;
BEGIN
-- SELECT tablename from pg_tables
-- INTO mc_table
-- WHERE schemaname = mc_schema
-- AND tablename LIKE '%'||geo_level||'%';
mc_table := 'mc_' || geo_level;
RETURN mc_table;
END
$$ LANGUAGE plpgsql PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMCDOMVT(
z INTEGER, x INTEGER, y INTEGER,
geography_level TEXT,
do_measurements TEXT[],
mc_measurements TEXT[],
mc_categories TEXT[] DEFAULT ARRAY['TR']::TEXT[],
mc_months TEXT[] DEFAULT ARRAY['2018-02-01']::TEXT[],
use_meta_cache BOOLEAN DEFAULT True,
shoreline_clipped BOOLEAN DEFAULT True,
optimize_clipping BOOLEAN DEFAULT False,
simplify_geometries BOOLEAN DEFAULT False,
area_normalized BOOLEAN DEFAULT False,
extent INTEGER DEFAULT 4096,
buf INTEGER DEFAULT 256,
clip_geom BOOLEAN DEFAULT True)
RETURNS TABLE (
mvtgeom GEOMETRY,
mvtdata JSONB
)
AS $$
DECLARE
state_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.state';
county_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.county';
tract_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.census_tract';
blockgroup_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.block_group';
block_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.block';
mc_schema CONSTANT TEXT DEFAULT 'us.mastercard';
mc_geoid CONSTANT TEXT DEFAULT 'region_id';
mc_category_column CONSTANT TEXT DEFAULT 'category';
mc_month_column CONSTANT TEXT DEFAULT 'month';
mc_table TEXT;
mc_category TEXT;
mc_table_categories TEXT DEFAULT '';
mc_month TEXT;
mc_month_slug TEXT;
mc_measurements_categories TEXT[];
mc_measurement TEXT;
bounds NUMERIC[];
geom GEOMETRY;
ext BOX2D;
measurement TEXT;
getmeta_parameters TEXT;
meta JSON;
mc_geography_level TEXT;
numer_tablename_do TEXT DEFAULT '';
numer_tablenames_do TEXT[] DEFAULT ARRAY['']::TEXT[];
numer_tablenames_do_outer TEXT DEFAULT '';
numer_tablenames_mc TEXT DEFAULT '';
numer_colnames_do TEXT DEFAULT '';
numer_colnames_do_qualified TEXT DEFAULT '';
numer_colnames_do_normalized TEXT DEFAULT '';
numer_colnames_mc TEXT DEFAULT '';
numer_colnames_mc_current TEXT DEFAULT '';
numer_colnames_mc_qualified TEXT DEFAULT '';
numer_colnames_mc_qualified_current TEXT DEFAULT '';
numer_colnames_mc_normalized TEXT DEFAULT '';
numer_colnames_mc_normalized_current TEXT DEFAULT '';
geom_tablenames TEXT;
geom_colnames TEXT;
geom_geomref_colnames TEXT;
geom_geomref_colnames_qualified TEXT;
geom_relations_do TEXT[] DEFAULT ARRAY['']::TEXT[];
geom_relations_mc TEXT DEFAULT '';
geom_mc_outerjoins TEXT DEFAULT '';
simplification_tolerance NUMERIC DEFAULT 0;
area_normalization TEXT DEFAULT '';
i INTEGER DEFAULT 0;
clipped TEXT default '';
BEGIN
IF area_normalized THEN
area_normalization := '/area_ratio';
END IF;
IF shoreline_clipped THEN
clipped := '_clipped';
END IF;
CASE
WHEN geography_level = state_geoname THEN
simplification_tolerance := 0.1;
IF optimize_clipping THEN
clipped := '';
END IF;
WHEN geography_level = county_geoname THEN
simplification_tolerance := 0.01;
WHEN geography_level = tract_geoname THEN
simplification_tolerance := 0.001;
WHEN geography_level = blockgroup_geoname THEN
simplification_tolerance := 0.0001;
WHEN geography_level = block_geoname THEN
simplification_tolerance := 0.0001;
ELSE
simplification_tolerance := 0;
END CASE;
IF NOT simplify_geometries THEN
simplification_tolerance := 0;
END IF;
bounds := cdb_observatory.OBS_GetTileBounds(z, x, y);
geom := ST_MakeEnvelope(bounds[1], bounds[2], bounds[3], bounds[4], 4326);
ext := ST_MakeBox2D(ST_Transform(ST_SetSRID(ST_Point(bounds[1], bounds[2]), 4326), 3857),
ST_Transform(ST_SetSRID(ST_Point(bounds[3], bounds[4]), 4326), 3857));
---------DO---------
getmeta_parameters := '[ ';
FOREACH measurement IN ARRAY do_measurements LOOP
getmeta_parameters := getmeta_parameters || '{"numer_id":"' || measurement || '","geom_id":"' || geography_level || clipped ||'"},';
END LOOP;
getmeta_parameters := substring(getmeta_parameters from 1 for length(getmeta_parameters) - 1) || ' ]';
IF use_meta_cache THEN
meta := cdb_observatory.OBS_RetrieveMeta(z, geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
ELSE
meta := cdb_observatory.obs_getmeta(geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
END IF;
IF meta IS NOT NULL THEN
SELECT array_agg(distinct 'observatory.'||numer_tablename) numer_tablenames,
string_agg(distinct numer_colname, ',')||',' numer_colnames,
string_agg(distinct numer_tablename||'.'||numer_colname, ',')||',' numer_colnames_qualified,
string_agg(distinct numer_colname||area_normalization||' '||numer_colname, ',')||',' numer_colnames_normalized,
(array_agg(distinct 'observatory.'||geom_tablename))[1] geom_tablenames,
(array_agg(distinct geom_colname))[1] geom_colnames,
(array_agg(distinct geom_geomref_colname))[1] geom_geomref_colnames,
(array_agg(distinct geom_tablename||'.'||geom_geomref_colname))[1] geom_geomref_colnames_qualified,
array_agg(distinct numer_tablename||'.'||numer_geomref_colname||'='||geom_tablename||'.'||geom_geomref_colname) geom_relations
INTO numer_tablenames_do, numer_colnames_do, numer_colnames_do_qualified, numer_colnames_do_normalized, geom_tablenames, geom_colnames,
geom_geomref_colnames, geom_geomref_colnames_qualified, geom_relations_do
FROM json_to_recordset(meta)
AS x(id TEXT, numer_id TEXT, numer_aggregate TEXT, numer_colname TEXT, numer_geomref_colname TEXT, numer_tablename TEXT,
numer_type TEXT, denom_id TEXT, denom_aggregate TEXT, denom_colname TEXT, denom_geomref_colname TEXT, denom_tablename TEXT,
denom_type TEXT, denom_reltype TEXT, geom_id TEXT, geom_colname TEXT, geom_geomref_colname TEXT, geom_tablename TEXT,
geom_type TEXT, numer_timespan TEXT, geom_timespan TEXT, normalization TEXT, api_method TEXT, api_args JSON);
IF numer_tablenames_do IS NULL OR numer_colnames_do IS NULL OR numer_colnames_do_qualified IS NULL OR numer_colnames_do_normalized IS NULL
OR geom_tablenames IS NULL OR geom_colnames IS NULL OR geom_geomref_colnames IS NULL OR geom_geomref_colnames_qualified IS NULL
OR geom_relations_do IS NULL THEN
RETURN;
END IF;
i := 0;
FOREACH numer_tablename_do IN ARRAY numer_tablenames_do LOOP
i := i + 1;
numer_tablenames_do_outer := numer_tablenames_do_outer || 'LEFT OUTER JOIN ' || numer_tablename_do || ' ON ' || geom_relations_do[i] || ' ';
END LOOP;
ELSE
getmeta_parameters := '[{"geom_id":"' || geography_level || clipped ||'"}]';
meta := cdb_observatory.obs_getmeta(geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
IF meta IS NULL THEN
RETURN;
END IF;
SELECT (array_agg(distinct 'observatory.'||geom_tablename))[1] geom_tablenames,
(array_agg(distinct geom_colname))[1] geom_colnames,
(array_agg(distinct geom_geomref_colname))[1] geom_geomref_colnames,
(array_agg(distinct geom_tablename||'.'||geom_geomref_colname))[1] geom_geomref_colnames_qualified
FROM json_to_recordset(meta)
INTO geom_tablenames, geom_colnames, geom_geomref_colnames, geom_geomref_colnames_qualified
AS x(id TEXT, numer_id TEXT, numer_aggregate TEXT, numer_colname TEXT, numer_geomref_colname TEXT, numer_tablename TEXT,
numer_type TEXT, denom_id TEXT, denom_aggregate TEXT, denom_colname TEXT, denom_geomref_colname TEXT, denom_tablename TEXT,
denom_type TEXT, denom_reltype TEXT, geom_id TEXT, geom_colname TEXT, geom_geomref_colname TEXT, geom_tablename TEXT,
geom_type TEXT, numer_timespan TEXT, geom_timespan TEXT, normalization TEXT, api_method TEXT, api_args JSON);
END IF;
---------MC---------
IF geography_level = 'us.census.tiger.census_tract' THEN
mc_geography_level := 'tract';
ELSE
mc_geography_level := (string_to_array(geography_level, '.'))[array_length(string_to_array(geography_level, '.'), 1)];
END IF;
mc_table := cdb_observatory.OBS_GetMCTable(mc_schema, mc_geography_level);
FOREACH mc_month IN ARRAY mc_months LOOP
mc_month_slug := replace(mc_month, '/', '');
FOREACH mc_category IN ARRAY mc_categories LOOP
mc_category := lower(mc_category);
mc_measurements_categories := ARRAY['']::TEXT[];
FOREACH mc_measurement IN ARRAY mc_measurements LOOP
mc_measurements_categories := array_append(mc_measurements_categories, mc_measurement||'_'||mc_category);
END LOOP;
SELECT string_agg(column_name||'_'||mc_month_slug, ','),
string_agg(mc_table||'_'||mc_month_slug||'.'||column_name||' '||column_name||'_'||mc_month_slug, ','),
string_agg(distinct column_name||'_'||mc_month_slug||area_normalization||' '||column_name||'_'||mc_month_slug, ',')
INTO numer_colnames_mc_current, numer_colnames_mc_qualified_current, numer_colnames_mc_normalized_current
FROM information_schema.columns
WHERE table_schema = mc_schema
AND table_name = mc_table
AND column_name = ANY(mc_measurements_categories);
IF numer_colnames_mc_current IS NOT NULL THEN
numer_colnames_mc := coalesce(numer_colnames_mc, '')||numer_colnames_mc_current||',';
END IF;
IF numer_colnames_mc_qualified_current IS NOT NULL THEN
numer_colnames_mc_qualified := coalesce(numer_colnames_mc_qualified, '')||numer_colnames_mc_qualified_current||',';
END IF;
IF numer_colnames_mc_normalized_current IS NOT NULL THEN
numer_colnames_mc_normalized := coalesce(numer_colnames_mc_normalized, '')||numer_colnames_mc_normalized_current||',';
END IF;
END LOOP;
IF mc_table IS NOT NULL THEN
numer_tablenames_mc := '"'||mc_schema||'".'||mc_table||' '||mc_table||'_'||mc_month_slug;
geom_relations_mc := mc_table||'_'||mc_month_slug||'.'||mc_geoid||'='||geom_geomref_colnames_qualified;
mc_table_categories := mc_table||'_'||mc_month_slug||'.'||mc_month_column||'='''||mc_month||'''';
geom_mc_outerjoins := coalesce(geom_mc_outerjoins, '')||' LEFT OUTER JOIN '||numer_tablenames_mc||' ON '||geom_relations_mc||' AND '||mc_table_categories;
END IF;
END LOOP;
---------Query build and execution---------
RETURN QUERY EXECUTE format(
$query$
SELECT mvtgeom,
(select row_to_json(_)::jsonb from (select id, %9$s %3$s area_ratio, area) as _) as mvtdata
FROM (
SELECT ST_AsMVTGeom(ST_Transform(the_geom, 3857), $1, $2, $3, $4) AS mvtgeom, %8$s as id, %6$s %7$s area_ratio, area FROM (
SELECT %1$s the_geom, %8$s, %2$s %10$s
CASE WHEN ST_Within($5, %1$s)
THEN ST_Area($5) / Nullif(ST_Area(%1$s), 0)
WHEN ST_Within(%1$s, $5)
THEN 1
ELSE ST_Area(ST_Intersection(st_simplifyvw(%1$s, $6), $5)) / Nullif(ST_Area(%1$s), 0)
END area_ratio,
ROUND(ST_Area(ST_Transform(the_geom,3857))::NUMERIC, 2) area
FROM %5$s
%4$s
%11$s
WHERE st_intersects(%1$s, $5)
) p
) q
$query$,
geom_colnames, numer_colnames_do_qualified, numer_colnames_mc, numer_tablenames_do_outer, geom_tablenames, numer_colnames_do_normalized,
numer_colnames_mc_normalized, geom_geomref_colnames, numer_colnames_do, numer_colnames_mc_qualified, geom_mc_outerjoins)
USING ext, extent, buf, clip_geom, geom, simplification_tolerance
RETURN;
END
$$ LANGUAGE plpgsql PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMCDOMVT(
z INTEGER,
geography_level TEXT,
do_measurements TEXT[],
mc_measurements TEXT[],
mc_categories TEXT[] DEFAULT ARRAY['TR']::TEXT[],
mc_months TEXT[] DEFAULT ARRAY['2018-02-01']::TEXT[],
use_meta_cache BOOLEAN DEFAULT True,
shoreline_clipped BOOLEAN DEFAULT True,
optimize_clipping BOOLEAN DEFAULT False,
simplify_geometries BOOLEAN DEFAULT False,
area_normalized BOOLEAN DEFAULT False,
extent INTEGER DEFAULT 4096,
buf INTEGER DEFAULT 256,
clip_geom BOOLEAN DEFAULT True)
RETURNS TABLE (
x INTEGER,
y INTEGER,
zoom INTEGER,
mvtgeom GEOMETRY,
mvtdata JSONB
)
AS $$
DECLARE
state_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.state';
county_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.county';
tract_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.census_tract';
blockgroup_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.block_group';
block_geoname CONSTANT TEXT DEFAULT 'us.census.tiger.block';
tiler_table_prefix CONSTANT TEXT DEFAULT 'tiler.xyz_us_do_geoms_tiles_temp_';
avg_x INTEGER;
avg_y INTEGER;
mc_schema CONSTANT TEXT DEFAULT 'us.mastercard';
mc_geoid CONSTANT TEXT DEFAULT 'region_id';
mc_category_column CONSTANT TEXT DEFAULT 'category';
mc_month_column CONSTANT TEXT DEFAULT 'month';
mc_table TEXT;
mc_category TEXT;
mc_category_name TEXT;
mc_table_categories TEXT DEFAULT '';
mc_month TEXT;
mc_month_slug TEXT;
mc_measurements_categories TEXT[];
mc_measurement TEXT;
measurement TEXT;
getmeta_parameters TEXT;
meta JSON;
mc_geography_level TEXT;
simplification_tolerance NUMERIC DEFAULT 0;
area_normalization TEXT DEFAULT '';
clipped TEXT default '';
i INTEGER DEFAULT 0;
bounds NUMERIC[];
geom GEOMETRY;
ext BOX2D;
numer_tablename_do TEXT DEFAULT '';
numer_tablenames_do TEXT[] DEFAULT ARRAY['']::TEXT[];
numer_tablenames_do_outer TEXT DEFAULT '';
numer_tablenames_mc TEXT DEFAULT '';
numer_colnames_do TEXT DEFAULT '';
numer_colnames_do_qualified TEXT DEFAULT '';
numer_colnames_do_normalized TEXT DEFAULT '';
numer_colnames_mc TEXT DEFAULT '';
numer_colnames_mc_current TEXT DEFAULT '';
numer_colnames_mc_qualified TEXT DEFAULT '';
numer_colnames_mc_qualified_current TEXT DEFAULT '';
numer_colnames_mc_normalized TEXT DEFAULT '';
numer_colnames_mc_normalized_current TEXT DEFAULT '';
geom_tablenames TEXT;
geom_colnames TEXT;
geom_geomref_colnames TEXT;
geom_geomref_colnames_qualified TEXT;
geom_relations_do TEXT[] DEFAULT ARRAY['']::TEXT[];
geom_relations_mc TEXT DEFAULT '';
geom_mc_outerjoins TEXT DEFAULT '';
BEGIN
IF geography_level = 'us.census.tiger.census_tract' THEN
mc_geography_level := 'tract';
ELSE
mc_geography_level := (string_to_array(geography_level, '.'))[array_length(string_to_array(geography_level, '.'), 1)];
END IF;
-- Get the average x and y (in the middle of the BBox)
EXECUTE
format(
$query$
SELECT ROUND(AVG(x)) AS x, ROUND(AVG(y)) as y
FROM %3$s%1$s_%2$s
$query$, mc_geography_level, z, tiler_table_prefix)
INTO avg_x, avg_y;
IF area_normalized THEN
area_normalization := '/area_ratio';
END IF;
IF shoreline_clipped THEN
clipped := '_clipped';
END IF;
CASE
WHEN geography_level = state_geoname THEN
simplification_tolerance := 0.1;
IF optimize_clipping THEN
clipped := '';
END IF;
WHEN geography_level = county_geoname THEN
simplification_tolerance := 0.01;
WHEN geography_level = tract_geoname THEN
simplification_tolerance := 0.001;
WHEN geography_level = blockgroup_geoname THEN
simplification_tolerance := 0.0001;
WHEN geography_level = block_geoname THEN
simplification_tolerance := 0.0001;
ELSE
simplification_tolerance := 0;
END CASE;
IF NOT simplify_geometries THEN
simplification_tolerance := 0;
END IF;
bounds := cdb_observatory.OBS_GetTileBounds(z, avg_x, avg_y);
geom := ST_MakeEnvelope(bounds[1], bounds[2], bounds[3], bounds[4], 4326);
ext := ST_MakeBox2D(ST_Transform(ST_SetSRID(ST_Point(bounds[1], bounds[2]), 4326), 3857),
ST_Transform(ST_SetSRID(ST_Point(bounds[3], bounds[4]), 4326), 3857));
---------DO---------
getmeta_parameters := '[ ';
FOREACH measurement IN ARRAY do_measurements LOOP
getmeta_parameters := getmeta_parameters || '{"numer_id":"' || measurement || '","geom_id":"' || geography_level || clipped ||'"},';
END LOOP;
getmeta_parameters := substring(getmeta_parameters from 1 for length(getmeta_parameters) - 1) || ' ]';
IF use_meta_cache THEN
meta := cdb_observatory.OBS_RetrieveMeta(z, geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
ELSE
meta := cdb_observatory.obs_getmeta(geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
END IF;
IF meta IS NOT NULL THEN
SELECT array_agg(distinct 'observatory.'||numer_tablename) numer_tablenames,
string_agg(distinct numer_colname, ',')||',' numer_colnames,
string_agg(distinct numer_tablename||'.'||numer_colname, ',')||',' numer_colnames_qualified,
string_agg(distinct numer_colname||area_normalization||' '||numer_colname, ',')||',' numer_colnames_normalized,
(array_agg(distinct 'observatory.'||geom_tablename))[1] geom_tablenames,
(array_agg(distinct geom_colname))[1] geom_colnames,
(array_agg(distinct geom_geomref_colname))[1] geom_geomref_colnames,
(array_agg(distinct geom_tablename||'.'||geom_geomref_colname))[1] geom_geomref_colnames_qualified,
array_agg(distinct numer_tablename||'.'||numer_geomref_colname||'='||geom_tablename||'.'||geom_geomref_colname) geom_relations
INTO numer_tablenames_do, numer_colnames_do, numer_colnames_do_qualified, numer_colnames_do_normalized, geom_tablenames, geom_colnames,
geom_geomref_colnames, geom_geomref_colnames_qualified, geom_relations_do
FROM json_to_recordset(meta)
AS x(id TEXT, numer_id TEXT, numer_aggregate TEXT, numer_colname TEXT, numer_geomref_colname TEXT, numer_tablename TEXT,
numer_type TEXT, denom_id TEXT, denom_aggregate TEXT, denom_colname TEXT, denom_geomref_colname TEXT, denom_tablename TEXT,
denom_type TEXT, denom_reltype TEXT, geom_id TEXT, geom_colname TEXT, geom_geomref_colname TEXT, geom_tablename TEXT,
geom_type TEXT, numer_timespan TEXT, geom_timespan TEXT, normalization TEXT, api_method TEXT, api_args JSON);
IF numer_tablenames_do IS NULL OR numer_colnames_do IS NULL OR numer_colnames_do_qualified IS NULL OR numer_colnames_do_normalized IS NULL
OR geom_tablenames IS NULL OR geom_colnames IS NULL OR geom_geomref_colnames IS NULL OR geom_geomref_colnames_qualified IS NULL
OR geom_relations_do IS NULL THEN
RETURN;
END IF;
i := 0;
FOREACH numer_tablename_do IN ARRAY numer_tablenames_do LOOP
i := i + 1;
numer_tablenames_do_outer := numer_tablenames_do_outer || 'LEFT OUTER JOIN ' || numer_tablename_do || ' ON ' || geom_relations_do[i] || ' ';
END LOOP;
ELSE
getmeta_parameters := '[{"geom_id":"' || geography_level || clipped ||'"}]';
meta := cdb_observatory.obs_getmeta(geom, getmeta_parameters::json, 1::integer, 1::integer, 1::integer);
IF meta IS NULL THEN
RETURN;
END IF;
SELECT (array_agg(distinct 'observatory.'||geom_tablename))[1] geom_tablenames,
(array_agg(distinct geom_colname))[1] geom_colnames,
(array_agg(distinct geom_geomref_colname))[1] geom_geomref_colnames,
(array_agg(distinct geom_tablename||'.'||geom_geomref_colname))[1] geom_geomref_colnames_qualified
FROM json_to_recordset(meta)
INTO geom_tablenames, geom_colnames, geom_geomref_colnames, geom_geomref_colnames_qualified
AS x(id TEXT, numer_id TEXT, numer_aggregate TEXT, numer_colname TEXT, numer_geomref_colname TEXT, numer_tablename TEXT,
numer_type TEXT, denom_id TEXT, denom_aggregate TEXT, denom_colname TEXT, denom_geomref_colname TEXT, denom_tablename TEXT,
denom_type TEXT, denom_reltype TEXT, geom_id TEXT, geom_colname TEXT, geom_geomref_colname TEXT, geom_tablename TEXT,
geom_type TEXT, numer_timespan TEXT, geom_timespan TEXT, normalization TEXT, api_method TEXT, api_args JSON);
END IF;
---------MC---------
IF geography_level = 'us.census.tiger.census_tract' THEN
mc_geography_level := 'tract';
ELSE
mc_geography_level := (string_to_array(geography_level, '.'))[array_length(string_to_array(geography_level, '.'), 1)];
END IF;
mc_table := cdb_observatory.OBS_GetMCTable(mc_schema, mc_geography_level);
FOREACH mc_month IN ARRAY mc_months LOOP
mc_month_slug := replace(mc_month, '/', '');
FOREACH mc_category IN ARRAY mc_categories LOOP
mc_category := lower(mc_category);
mc_measurements_categories := ARRAY['']::TEXT[];
FOREACH mc_measurement IN ARRAY mc_measurements LOOP
mc_measurements_categories := array_append(mc_measurements_categories, mc_measurement||'_'||mc_category);
END LOOP;
SELECT string_agg(column_name||'_'||mc_month_slug, ','),
string_agg(mc_table||'_'||mc_month_slug||'.'||column_name||' '||column_name||'_'||mc_month_slug, ','),
string_agg(distinct column_name||'_'||mc_month_slug||area_normalization||' '||column_name||'_'||mc_month_slug, ',')
INTO numer_colnames_mc_current, numer_colnames_mc_qualified_current, numer_colnames_mc_normalized_current
FROM information_schema.columns
WHERE table_schema = mc_schema
AND table_name = mc_table
AND column_name = ANY(mc_measurements_categories);
IF numer_colnames_mc_current IS NOT NULL THEN
numer_colnames_mc := coalesce(numer_colnames_mc, '')||numer_colnames_mc_current||',';
END IF;
IF numer_colnames_mc_qualified_current IS NOT NULL THEN
numer_colnames_mc_qualified := coalesce(numer_colnames_mc_qualified, '')||numer_colnames_mc_qualified_current||',';
END IF;
IF numer_colnames_mc_normalized_current IS NOT NULL THEN
numer_colnames_mc_normalized := coalesce(numer_colnames_mc_normalized, '')||numer_colnames_mc_normalized_current||',';
END IF;
END LOOP;
IF mc_table IS NOT NULL THEN
numer_tablenames_mc := '"'||mc_schema||'".'||mc_table||' '||mc_table||'_'||mc_month_slug;
geom_relations_mc := mc_table||'_'||mc_month_slug||'.'||mc_geoid||'='||geom_geomref_colnames_qualified;
mc_table_categories := mc_table||'_'||mc_month_slug||'.'||mc_month_column||'='''||mc_month||'''';
geom_mc_outerjoins := coalesce(geom_mc_outerjoins, '')||' LEFT OUTER JOIN '||numer_tablenames_mc||' ON '||geom_relations_mc||' AND '||mc_table_categories;
END IF;
END LOOP;
---------Query build and execution---------
RETURN QUERY EXECUTE format(
$query$
SELECT x, y, z,
mvtgeom,
(select row_to_json(_)::jsonb from (select id, %9$s %3$s area_ratio, area) as _) as mvtdata
FROM (
SELECT x, y, z,
ST_AsMVTGeom(ST_Transform(the_geom, 3857),
bbox2d, $1, $2, $3) AS mvtgeom, %8$s as id, %6$s %7$s area_ratio, area FROM (
SELECT tx.x, tx.y, tx.z,
%1$s the_geom, %8$s, %2$s %10$s
CASE WHEN ST_Within(tx.envelope, %1$s)
THEN ST_Area(tx.envelope) / Nullif(ST_Area(%1$s), 0)
WHEN ST_Within(%1$s, tx.envelope)
THEN 1
ELSE ST_Area(ST_Intersection(st_simplifyvw(%1$s, $4), tx.envelope)) / Nullif(ST_Area(%1$s), 0)
END area_ratio,
ROUND(ST_Area(ST_Transform(the_geom,3857))::NUMERIC, 2) area,
ST_MakeBox2D(ST_Transform(ST_SetSRID(ST_Point(tx.bounds[1], tx.bounds[2]), 4326), 3857),
ST_Transform(ST_SetSRID(ST_Point(tx.bounds[3], tx.bounds[4]), 4326), 3857)) bbox2d
FROM tiler.xyz_us_mc_tiles_temp_%12$s_%13$s tx,
%5$s
%4$s
%11$s
WHERE st_intersects(%1$s, tx.envelope)
) p
) q
$query$,
geom_colnames, numer_colnames_do_qualified, numer_colnames_mc, numer_tablenames_do_outer, geom_tablenames, numer_colnames_do_normalized,
numer_colnames_mc_normalized, geom_geomref_colnames, numer_colnames_do, numer_colnames_mc_qualified, geom_mc_outerjoins,
mc_geography_level, z)
USING extent, buf, clip_geom, simplification_tolerance
RETURN;
END
$$ LANGUAGE plpgsql PARALLEL RESTRICTED;

View File

@@ -1,4 +1,167 @@
-- Install dependencies
CREATE EXTENSION postgis;
-- Install the extension
CREATE EXTENSION observatory VERSION 'dev';
\set ECHO none
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)
set_config
------------
(1 row)

View File

@@ -1,6 +1,5 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/load_fixtures.sql
SET client_min_messages TO WARNING;
\set ECHO none
_obs_geomtable_with_returned_table
@@ -21,7 +20,6 @@ t
obs_dumpversion_notnull
t
(1 row)
ERROR: Error performing intersection: TopologyException: found non-noded intersection between LINESTRING (-97.1968 25.9574, -97.1971 25.9576) and LINESTRING (-97.197 25.9575, -97.1972 25.9576) at -97.19699802694231 25.957551976080605
complex_safe_intersection_works
t
(1 row)

View File

@@ -240,12 +240,6 @@ t|t|t
id|correct_num_geoms|correct_pop|correct_bg_names
t|t|t|t
(1 row)
id|correct_num_points
t|t
(1 row)
id|correct_num_points|pointgeom_names
t|t|t
(1 row)
id|obs_getdata_by_id_one_measure_null
t|t
(1 row)
@@ -299,8 +293,17 @@ tract_sample|tract_max_error|tract_avg_error|tract_min_error
25|t|t|t
50|t|t|t
100|t|t|t
761|t|t|t
741|t|t|t
(9 rows)
no_bg_point_error
t
(1 row)
valid|errors
t|{}
(1 row)
valid|errors
f|{"Median or average aggregation only supports prenormalized normalization, denominated passed. Please review the provided options"}
(1 row)
valid|errors
f|{"Normalizated measure should have a numerator and a denominator. Please review the provided options."}
(1 row)

View File

@@ -232,21 +232,21 @@ us.census.tiger.zcta5|9
us.census.tiger.county|0
(4 rows)
column_id|_obs_geometryscores_numgeoms_50km_buffer
us.census.tiger.block_group|10817
us.census.tiger.block_group|10818
us.census.tiger.census_tract|3396
us.census.tiger.zcta5|484
us.census.tiger.zcta5|483
us.census.tiger.county|11
(4 rows)
column_id|_obs_geometryscores_numgeoms_500km_buffer
us.census.tiger.block_group|48567
us.census.tiger.census_tract|15823
us.census.tiger.zcta5|6466
us.census.tiger.block_group|48569
us.census.tiger.census_tract|15825
us.census.tiger.zcta5|6465
us.census.tiger.county|295
(4 rows)
column_id|_obs_geometryscores_numgeoms_2500km_buffer
us.census.tiger.block_group|165852
us.census.tiger.census_tract|55283
us.census.tiger.zcta5|27046
us.census.tiger.zcta5|26529
us.census.tiger.county|2551
(4 rows)
_obs_geometryscores_500km_buffer_50_geoms

View File

@@ -42,12 +42,6 @@ t
obs_getboundarybyid_boundary_id_mismatch_geom_id
t
(1 row)
_obs_getboundariesbygeometry_roads_around_cartodb
t
(1 row)
_obs_getboundariesbygeometry_points_around_cartodb
t
(1 row)
_obs_getboundariesbygeometry_tracts_around_cartodb
t
(1 row)
@@ -60,9 +54,6 @@ t
obs_getboundariesbygeometry_tracts_around_null_island
t
(1 row)
obs_getboundariesbygeometry_wof
t
(1 row)
obs_getboundariesbypointandradius_around_cartodb
t
(1 row)

View File

@@ -8,27 +8,28 @@ DROP TABLE IF EXISTS observatory.obs_tag;
DROP TABLE IF EXISTS observatory.obs_column_to_column;
DROP TABLE IF EXISTS observatory.obs_dump_version;
DROP TABLE IF EXISTS observatory.obs_meta;
DROP TABLE IF EXISTS observatory.obs_table_to_table;
DROP TABLE IF EXISTS observatory.obs_meta_numer;
DROP TABLE IF EXISTS observatory.obs_meta_denom;
DROP TABLE IF EXISTS observatory.obs_meta_geom;
DROP TABLE IF EXISTS observatory.obs_meta_timespan;
DROP TABLE IF EXISTS observatory.obs_meta_geom_numer_timespan;
DROP TABLE IF EXISTS observatory.obs_column_table_tile;
DROP TABLE IF EXISTS observatory.obs_column_table_tile_simple;
DROP TABLE IF EXISTS observatory.obs_78fb6c1d6ff6505225175922c2c389ce48d7632c;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_fcd4e4f5610f6764973ef8c0c215b2e80bec8963;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_7615e8622a68bfc5fe37c69c9880edfb40250103;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_e32f8e59c7c8861ee5ee4029b3ace2af9a5c9caf;
DROP TABLE IF EXISTS observatory.obs_23cb5063486bd7cf36f17e89e5e65cd31b331f6e;
DROP TABLE IF EXISTS observatory.obs_1ea93bbc109c87c676b3270789dacf7a1430db6c;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_8e30e6b3792430b410ba5b9e49cdc6a0d404d48f;
DROP TABLE IF EXISTS observatory.obs_08025e1287e3af2b5de571d06562ba8d3bdb48e9;
DROP TABLE IF EXISTS observatory.obs_fae094ddb7157380e2495b9867e1f067fdbdf288;
DROP TABLE IF EXISTS observatory.obs_d03c931c9b7f9df54c3fae95bb7f958fe3187c71;
DROP TABLE IF EXISTS observatory.obs_a6811c89ed79ab4339d89a86907b586439cc74df;
DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13;
DROP TABLE IF EXISTS observatory.obs_3b537fe9a1dcdd3be4a53f64429e30a836ecb6ee;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_8e30e6b3792430b410ba5b9e49cdc6a0d404d48f;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_9b319c207dfa600c2296a6d46055e54a4c00f646;
DROP TABLE IF EXISTS observatory.obs_9b319c207dfa600c2296a6d46055e54a4c00f646;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,16 @@
-- Install dependencies
CREATE EXTENSION postgis;
-- Install the extension
CREATE EXTENSION observatory VERSION 'dev';
\set ECHO none
\set QUIET on
SET client_min_messages TO ERROR;
-- For Postgis 3+ install postgis_raster. Otherwise observatory will fail to install
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_available_extensions WHERE name = 'postgis_raster') THEN
CREATE EXTENSION postgis_raster WITH SCHEMA public CASCADE;
END IF;
END$$;
CREATE EXTENSION observatory VERSION 'dev' CASCADE;
\i test/fixtures/load_fixtures.sql

View File

@@ -1,6 +1,5 @@
\pset format unaligned
\set ECHO all
\i test/fixtures/load_fixtures.sql
SET client_min_messages TO WARNING;
\set ECHO none
@@ -48,12 +47,6 @@ SELECT cdb_observatory._OBS_StandardizeMeasureName('test 343 %% 2 qqq }}{{}}') =
SELECT cdb_observatory.OBS_DumpVersion()
IS NOT NULL AS OBS_DumpVersion_notnull;
-- Should fail to perform intersection
SELECT ST_IsValid(ST_Intersection(
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county'),
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county_clipped')
)) AS complex_intersection_fails;
-- Should succeed in intersecting
SELECT ST_IsValid(cdb_observatory.safe_intersection(
cdb_observatory.OBS_GetBoundaryByID('48061', 'us.census.tiger.county'),

View File

@@ -22,14 +22,14 @@ SELECT cdb_observatory.OBS_GetSegmentSnapshot(
)::text is null as null_island_segmentation;
-- Point-based OBS_GetMeasure with zillow
SELECT abs(OBS_GetMeasure_zhvi_point - 597900) / 597900 < 5.0 AS OBS_GetMeasure_zhvi_point_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.94602417945862, 40.6768220087458), 4326),
SELECT abs(OBS_GetMeasure_zhvi_point - 446000) / 446000 < 5.0 AS OBS_GetMeasure_zhvi_point_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.90820503234865, 40.69469600456701), 4326),
'us.zillow.AllHomes_Zhvi', null, 'us.census.tiger.zcta5', '2014-01'
) As t(OBS_GetMeasure_zhvi_point);
-- Point-based OBS_GetMeasure with later measure
SELECT abs(OBS_GetMeasure_zhvi_point_default_latest - 995400) / 995400 < 5.0 AS OBS_GetMeasure_zhvi_point_default_latest_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.94602417945862, 40.6768220087458), 4326),
SELECT abs(OBS_GetMeasure_zhvi_point_default_latest - 701400) / 701400 < 5.0 AS OBS_GetMeasure_zhvi_point_default_latest_test FROM cdb_observatory.OBS_GetMeasure(
ST_SetSRID(ST_Point(-73.90820503234865, 40.69469600456701), 4326),
'us.zillow.AllHomes_Zhvi', null, 'us.census.tiger.zcta5', '2016-06'
) As t(OBS_GetMeasure_zhvi_point_default_latest);
@@ -341,7 +341,7 @@ SELECT
(meta->0->>'id')::integer = 1 id,
(meta->0->>'numer_id') = 'us.census.acs.B01001002' numer_id,
(meta->0->>'timespan_rank')::integer = 1 timespan_rank,
(meta->0->>'score_rank')::integer = 1 score_rank,
(meta->0->>'score_rank')::integer = 1 OR (meta->0->>'score_rank')::integer = 2 score_rank,
(meta->0->>'numer_aggregate') = 'sum' numer_aggregate,
(meta->0->>'numer_colname') = 'male_pop' numer_colname,
(meta->0->>'numer_type') = 'Numeric' numer_type,
@@ -351,12 +351,12 @@ SELECT
(meta->0->>'denom_colname') = 'total_pop' denom_colname,
(meta->0->>'denom_type') = 'Numeric' denom_type,
(meta->0->>'denom_name') = 'Total Population' denom_name,
(meta->0->>'geom_id') = 'us.census.tiger.block_group' geom_id,
(meta->0->>'geom_id') = 'us.census.tiger.block_group' OR (meta->0->>'geom_id') = 'us.census.tiger.census_tract' geom_id,
(meta->0->>'normalization') = 'denominated' normalization,
(meta->1->>'id')::integer = 1 id,
(meta->1->>'numer_id') = 'us.census.acs.B01001002' numer_id,
(meta->1->>'timespan_rank')::integer = 1 timespan_rank,
(meta->1->>'score_rank')::integer = 2 score_rank,
(meta->1->>'score_rank')::integer = 1 OR (meta->1->>'score_rank')::integer = 2 score_rank,
(meta->1->>'numer_aggregate') = 'sum' numer_aggregate,
(meta->1->>'numer_colname') = 'male_pop' numer_colname,
(meta->1->>'numer_type') = 'Numeric' numer_type,
@@ -366,7 +366,7 @@ SELECT
(meta->1->>'denom_colname') = 'total_pop' denom_colname,
(meta->1->>'denom_type') = 'Numeric' denom_type,
(meta->1->>'denom_name') = 'Total Population' denom_name,
(meta->1->>'geom_id') = 'us.census.tiger.census_tract' geom_id,
(meta->1->>'geom_id') = 'us.census.tiger.block_group' OR (meta->1->>'geom_id') = 'us.census.tiger.census_tract' geom_id,
(meta->1->>'normalization') = 'denominated' normalization
FROM meta;
@@ -416,7 +416,7 @@ SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
-- OBS_GetMeta provides suggested name for simple meta request with denom
SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
'[{"numer_id": "us.census.acs.B01001002", "normalization": "denom"}]'
)->0->>'suggested_name' = 'male_pop_rate_2010_2014' obs_getmeta_suggested_name_denom;
)->0->>'suggested_name' = 'male_pop_2010_2014_by_total_pop' obs_getmeta_suggested_name_denom;
-- OBS_GetData/OBS_GetMeta by id with empty list/null
WITH data AS (SELECT * FROM cdb_observatory.OBS_GetData(ARRAY[]::TEXT[], null))
@@ -677,7 +677,7 @@ data AS (SELECT * FROM cdb_observatory.OBS_GetData(
(SELECT meta FROM meta)))
SELECT id = 1 id,
data->0->>'value' = 'Hispanic Black mix multilingual, high poverty, renters, uses public transport' data_poly_categorical,
abs((data->1->>'value')::Numeric - 15787) / 15787 < 0.0001 valcol
abs((data->1->>'value')::Numeric - 15790) / 15790 < 0.0001 valcol
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with polygons inside a polygon
@@ -700,46 +700,22 @@ data AS (SELECT * FROM cdb_observatory.OBS_GetData(
(SELECT meta FROM meta), false))
SELECT every(id = 1) is TRUE id,
count(distinct (data->0->>'value')::geometry) = 16 correct_num_geoms,
abs(sum((data->1->>'value')::numeric) - 12327) / 12327 < 0.001 correct_pop
abs(sum((data->1->>'value')::numeric) - 12329) / 12329 < 0.001 correct_pop
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with polygons inside a polygon + one measure + one text
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.acs.B01003001", "normalization": "predenom", "geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.tiger.name", "geom_id": "us.census.tiger.block_group"}]') meta),
'[{"geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.acs.B01003001", "normalization": "predenom", "geom_id": "us.census.tiger.block_group"}, {"numer_id": "us.census.tiger.block_group_geoname", "geom_id": "us.census.tiger.block_group"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) is TRUE id,
count(distinct (data->0->>'value')::geometry) = 16 correct_num_geoms,
abs(sum((data->1->>'value')::numeric) - 12327) / 12327 < 0.001 correct_pop,
abs(sum((data->1->>'value')::numeric) - 12329) / 12329 < 0.001 correct_pop,
array_agg(distinct data->2->>'value') = '{"Block Group 1","Block Group 2","Block Group 3","Block Group 4","Block Group 5"}' correct_bg_names
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with points inside a polygon
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.pointlm_geom"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) AS id,
count(distinct (data->0->>'value')::geometry(point, 4326)) = 3 correct_num_points
FROM data;
-- OBS_GetData/OBS_GetMeta by geom with points inside a polygon + one text
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"geom_id": "us.census.tiger.pointlm_geom"}, {"geom_id": "us.census.tiger.pointlm_geom", "numer_id": "us.census.tiger.fullname"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta), false))
SELECT every(id = 1) AS id,
count(distinct (data->0->>'value')::geometry(point, 4326)) = 3 correct_num_points,
array_agg(data->1->>'value') = '{"Bushwick Yards","Edward Block Square","Bushwick Houses"}' pointgeom_names
FROM data;
-- OBS_GetData by id with one standard measure
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
@@ -896,7 +872,8 @@ WITH _geoms AS (
FALSE
)
WHERE data->0->>'geomref' LIKE '36047%'
ORDER BY RANDOM()
and (data->1->>'value')::numeric > 1000
ORDER BY geom_ref
), geoms AS (
SELECT *, row_number() OVER () cartodb_id FROM _geoms
), samples AS (
@@ -973,3 +950,9 @@ WITH _geoms AS (
FROM geoms, results
WHERE cartodb_id = id
;
-- OBS_MetadataValidation
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B01003001","denom_id": null,"normalization": "prenormalized","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B25058001","denom_id": null,"normalization": "denominated","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(NULL, 'ST_Polygon', '[{"numer_id": "us.census.acs.B15003001","denom_id": null,"normalization": "denominated","geom_id": null,"numer_timespan": "2010 - 2014"}]'::json, 500);

View File

@@ -503,7 +503,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
@@ -532,8 +532,8 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
= ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 2500000)::Geometry(Geometry, 4326),
@@ -593,7 +593,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@@ -613,7 +613,7 @@ SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_25000_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),

File diff suppressed because one or more lines are too long

View File

@@ -1,8 +1,7 @@
from nose.tools import assert_equal, assert_is_not_none
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import izip_longest
from itertools import zip_longest
from util import query
from collections import OrderedDict
import json
@@ -12,127 +11,126 @@ def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
return zip_longest(fillvalue=fillvalue, *args)
USE_SCHEMA = True
SKIP_COLUMNS = set([
u'mx.inegi_columns.INDI18',
u'mx.inegi_columns.ECO40',
u'mx.inegi_columns.POB34',
u'mx.inegi_columns.POB63',
u'mx.inegi_columns.INDI7',
u'mx.inegi_columns.EDU28',
u'mx.inegi_columns.SCONY10',
u'mx.inegi_columns.EDU31',
u'mx.inegi_columns.POB7',
u'mx.inegi_columns.VIV30',
u'mx.inegi_columns.INDI12',
u'mx.inegi_columns.EDU13',
u'mx.inegi_columns.ECO43',
u'mx.inegi_columns.VIV9',
u'mx.inegi_columns.HOGAR25',
u'mx.inegi_columns.POB32',
u'mx.inegi_columns.ECO7',
u'mx.inegi_columns.INDI19',
u'mx.inegi_columns.INDI16',
u'mx.inegi_columns.POB65',
u'mx.inegi_columns.INDI3',
u'mx.inegi_columns.INDI9',
u'mx.inegi_columns.POB36',
u'mx.inegi_columns.POB33',
u'mx.inegi_columns.POB58',
u'mx.inegi_columns.DISC4',
u'mx.inegi_columns.VIV41',
u'mx.inegi_columns.VIV40',
u'mx.inegi_columns.VIV17',
u'mx.inegi_columns.VIV25',
u'mx.inegi_columns.EDU10',
u'whosonfirst.wof_disputed_name',
u'us.census.tiger.fullname',
u'whosonfirst.wof_marinearea_name',
u'us.census.tiger.mtfcc',
u'whosonfirst.wof_county_name',
u'whosonfirst.wof_region_name',
'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
, 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
, 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
, 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
, 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
, 'fr.insee.P12_ACTOCC15P_ILT45D'
, 'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
, 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
, 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
, 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
, 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
, 'fr.insee.P12_ACTOCC15P_ILT45D'
, 'uk.ons.LC3202WA0007'
, 'uk.ons.LC3202WA0010'
, 'uk.ons.LC3202WA0004'
, 'uk.ons.LC3204WA0004'
, 'uk.ons.LC3204WA0007'
, 'uk.ons.LC3204WA0010'
, 'br.geo.subdistritos_name'
'mx.inegi_columns.INDI18',
'mx.inegi_columns.ECO40',
'mx.inegi_columns.POB34',
'mx.inegi_columns.POB63',
'mx.inegi_columns.INDI7',
'mx.inegi_columns.EDU28',
'mx.inegi_columns.SCONY10',
'mx.inegi_columns.EDU31',
'mx.inegi_columns.POB7',
'mx.inegi_columns.VIV30',
'mx.inegi_columns.INDI12',
'mx.inegi_columns.EDU13',
'mx.inegi_columns.ECO43',
'mx.inegi_columns.VIV9',
'mx.inegi_columns.HOGAR25',
'mx.inegi_columns.POB32',
'mx.inegi_columns.ECO7',
'mx.inegi_columns.INDI19',
'mx.inegi_columns.INDI16',
'mx.inegi_columns.POB65',
'mx.inegi_columns.INDI3',
'mx.inegi_columns.INDI9',
'mx.inegi_columns.POB36',
'mx.inegi_columns.POB33',
'mx.inegi_columns.POB58',
'mx.inegi_columns.DISC4',
'mx.inegi_columns.VIV41',
'mx.inegi_columns.VIV40',
'mx.inegi_columns.VIV17',
'mx.inegi_columns.VIV25',
'mx.inegi_columns.EDU10',
'whosonfirst.wof_disputed_name',
'us.census.tiger.fullname',
'whosonfirst.wof_marinearea_name',
'us.census.tiger.mtfcc',
'whosonfirst.wof_county_name',
'whosonfirst.wof_region_name',
'fr.insee.P12_RP_CHOS',
'fr.insee.P12_RP_HABFOR',
'fr.insee.P12_RP_EAUCH',
'fr.insee.P12_RP_BDWC',
'fr.insee.P12_RP_MIDUR',
'fr.insee.P12_RP_CLIM',
'fr.insee.P12_RP_MIBOIS',
'fr.insee.P12_RP_CASE',
'fr.insee.P12_RP_TTEGOU',
'fr.insee.P12_RP_ELEC',
'fr.insee.P12_ACTOCC15P_ILT45D',
'fr.insee.P12_RP_CHOS',
'fr.insee.P12_RP_HABFOR',
'fr.insee.P12_RP_EAUCH',
'fr.insee.P12_RP_BDWC',
'fr.insee.P12_RP_MIDUR',
'fr.insee.P12_RP_CLIM',
'fr.insee.P12_RP_MIBOIS',
'fr.insee.P12_RP_CASE',
'fr.insee.P12_RP_TTEGOU',
'fr.insee.P12_RP_ELEC',
'fr.insee.P12_ACTOCC15P_ILT45D',
'uk.ons.LC3202WA0007',
'uk.ons.LC3202WA0010',
'uk.ons.LC3202WA0004',
'uk.ons.LC3204WA0004',
'uk.ons.LC3204WA0007',
'uk.ons.LC3204WA0010',
'br.geo.subdistritos_name',
# Problems with universe (denominator is zero in test area)
'ca.statcan.census2016.cols_census.c0058_t',
'ca.statcan.census2016.cols_census.c1878_f',
'ca.statcan.census2016.cols_census.c1878_m',
'ca.statcan.census2016.cols_census.c1674_t',
'ca.statcan.census2016.cols_census.c1675_t',
'ca.statcan.census2016.cols_census.c1676_t',
'ca.statcan.census2016.cols_census.c1677_t',
'ca.statcan.census2016.cols_census.c0801_t',
'ca.statcan.census2016.cols_census.c0802_t',
'ca.statcan.census2016.cols_census.c0803_t',
'ca.statcan.census2016.cols_census.c0805_t',
'ca.statcan.census2016.cols_census.c0806_t',
'ca.statcan.census2016.cols_census.c0807_t',
'ca.statcan.census2016.cols_census.c0809_t',
'ca.statcan.census2016.cols_census.c0810_t',
'ca.statcan.census2016.cols_census.c0811_t',
'ca.statcan.census2016.cols_census.c0813_t',
'ca.statcan.census2016.cols_census.c0814_t',
'ca.statcan.census2016.cols_census.c0815_t',
'ca.statcan.census2016.cols_census.c0817_t',
'ca.statcan.census2016.cols_census.c0818_t',
'ca.statcan.census2016.cols_census.c0820_t',
'ca.statcan.census2016.cols_census.c0821_t',
'ca.statcan.census2016.cols_census.c0821_t',
'ca.statcan.census2016.cols_census.c0823_t',
'ca.statcan.census2016.cols_census.c0824_t',
'ca.statcan.census2016.cols_census.c0826_t',
'ca.statcan.census2016.cols_census.c0827_t',
'ca.statcan.census2016.cols_census.c0829_t',
'ca.statcan.census2016.cols_census.c0830_t',
'ca.statcan.census2016.cols_census.c0832_t',
'ca.statcan.census2016.cols_census.c0833_t'
])
MEASURE_COLUMNS = query('''
SELECT ARRAY_AGG(DISTINCT numer_id) numer_ids,
SELECT cdb_observatory.FIRST(distinct numer_id) numer_ids,
numer_aggregate,
denom_reltype,
section_tags
denom_reltype
FROM observatory.obs_meta
WHERE numer_weight > 0
AND numer_id NOT IN ('{skip}')
AND numer_id NOT LIKE 'eu.%' --Skipping Eurostat
AND section_tags IS NOT NULL
AND subsection_tags IS NOT NULL
GROUP BY numer_aggregate, section_tags, denom_reltype
GROUP BY numer_id, numer_aggregate, denom_reltype
'''.format(skip="', '".join(SKIP_COLUMNS))).fetchall()
#CATEGORY_COLUMNS = query('''
#SELECT distinct numer_id
#FROM observatory.obs_meta
#WHERE numer_type ILIKE 'text'
#AND numer_weight > 0
#''').fetchall()
#
#BOUNDARY_COLUMNS = query('''
#SELECT id FROM observatory.obs_column
#WHERE type ILIKE 'geometry'
#AND weight > 0
#''').fetchall()
#
#US_CENSUS_MEASURE_COLUMNS = query('''
#SELECT distinct numer_name
#FROM observatory.obs_meta
#WHERE numer_type ILIKE 'numeric'
#AND 'us.census.acs' = ANY (subsection_tags)
#AND numer_weight > 0
#''').fetchall()
#def default_geometry_id(column_id):
# '''
# Returns default test point for the column_id.
# '''
# if column_id == 'whosonfirst.wof_disputed_geom':
# return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
# elif column_id == 'whosonfirst.wof_marinearea_geom':
# return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
# elif column_id in ('us.census.tiger.school_district_elementary',
# 'us.census.tiger.school_district_secondary',
# 'us.census.tiger.school_district_elementary_clipped',
# 'us.census.tiger.school_district_secondary_clipped'):
# return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
# elif column_id.startswith('es.ine'):
# return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
# elif column_id.startswith('us.zillow'):
# return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
# elif column_id.startswith('ca.'):
# return ''
# else:
# return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_lonlat(column_id):
'''
@@ -142,11 +140,6 @@ def default_lonlat(column_id):
return (76.57, 33.78)
elif column_id == 'whosonfirst.wof_marinearea_geom':
return (-68.47, 43.33)
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return (40.7025, -73.7067)
elif column_id.startswith('uk'):
if 'WA' in column_id:
return (51.46844551219723, -3.184833526611328)
@@ -158,30 +151,15 @@ def default_lonlat(column_id):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('mx.'):
return (19.41347699386547, -99.17019367218018)
elif column_id.startswith('th.'):
return (13.725377712079784, 100.49263000488281)
# cols for French Guyana only
#elif column_id in ('fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
# , 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
# , 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
# , 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
# , 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
# , 'fr.insee.P12_ACTOCC15P_ILT45D'
# , 'fr.insee.P12_RP_CHOS', 'fr.insee.P12_RP_HABFOR'
# , 'fr.insee.P12_RP_EAUCH', 'fr.insee.P12_RP_BDWC'
# , 'fr.insee.P12_RP_MIDUR', 'fr.insee.P12_RP_CLIM'
# , 'fr.insee.P12_RP_MIBOIS', 'fr.insee.P12_RP_CASE'
# , 'fr.insee.P12_RP_TTEGOU', 'fr.insee.P12_RP_ELEC'
# , 'fr.insee.P12_ACTOCC15P_ILT45D'):
# return (4.938408371206558, -52.32908248901367)
elif column_id.startswith('fr.'):
return (48.860875144709475, 2.3613739013671875)
elif column_id.startswith('ca.'):
return (43.65594991256823, -79.37965393066406)
elif (column_id.startswith('us.census.tiger.school_district_elementary') or
column_id.startswith('us.census.tiger.school_district_secondary')):
return (40.7025, -73.7067)
elif column_id.startswith('us.census.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.dma.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.ihme.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.bls.'):
@@ -192,8 +170,6 @@ def default_lonlat(column_id):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('us.epa.'):
return (28.3305906291771, -81.3544048197256)
elif column_id.startswith('eu.'):
raise SkipTest('No tests for Eurostat!')
elif column_id.startswith('br.'):
return (-23.53, -46.63)
elif column_id.startswith('au.'):
@@ -202,56 +178,65 @@ def default_lonlat(column_id):
raise Exception('No catalog point set for {}'.format(column_id))
def default_point(column_id):
lat, lng = default_lonlat(column_id)
def default_point(test_point):
lat, lng = test_point
return 'ST_SetSRID(ST_MakePoint({lng}, {lat}), 4326)'.format(
lat=lat, lng=lng)
def default_area(column_id):
def default_area(test_point):
'''
Returns default test area for the column_id
'''
point = default_point(column_id)
point = default_point(test_point)
area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 250), 4326)'.format(
point=point)
return area
#@parameterized(US_CENSUS_MEASURE_COLUMNS)
#def test_get_us_census_measure_points(name):
# resp = query('''
#SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}')
# '''.format(name=name.replace("'", "''"),
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point('')))
# rows = resp.fetchall()
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
def filter_points():
return MEASURE_COLUMNS
def grouped_measure_columns():
for numer_ids, numer_aggregate, denom_reltype, section_tags in MEASURE_COLUMNS:
def filter_areas():
filtered = []
for numer_ids, numer_aggregate, denom_reltype in MEASURE_COLUMNS:
if numer_aggregate is None or numer_aggregate.lower() not in ('sum', 'median', 'average'):
continue
if numer_aggregate.lower() in ('median', 'average') \
and (denom_reltype is None or denom_reltype.lower() != 'universe'):
continue
filtered.append((numer_ids, numer_aggregate, denom_reltype))
return filtered
def grouped_measure_columns(filtered_columns):
groupbypoint = dict()
for row in filtered_columns:
numer_ids = row[0]
point = default_lonlat(numer_ids)
if point in groupbypoint:
groupbypoint[point].append(numer_ids)
else:
groupbypoint[point] = [numer_ids]
for point, numer_ids in groupbypoint.items():
for colgroup in grouper(numer_ids, 50):
yield [c for c in colgroup if c], numer_aggregate, denom_reltype, section_tags
yield point, [c for c in colgroup if c]
@parameterized(grouped_measure_columns())
def test_get_measure_points(numer_ids, numer_aggregate, denom_reltype, section_tags):
_test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, default_point(numer_ids[0]))
@parameterized(grouped_measure_columns(filter_points()))
def test_get_measure_points(point, numer_ids):
_test_measures(numer_ids, default_point(point))
@parameterized(grouped_measure_columns())
def test_get_measure_areas(numer_ids, numer_aggregate, denom_reltype, section_tags):
if numer_aggregate is None or numer_aggregate.lower() not in ('sum', 'median', 'average'):
return
if numer_aggregate.lower() in ('median', 'average') \
and (denom_reltype is None \
or denom_reltype.lower() != 'universe'):
return
_test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, default_area(numer_ids[0]))
@parameterized(grouped_measure_columns(filter_areas()))
def test_get_measure_areas(point, numer_ids):
_test_measures(numer_ids, default_area(point))
def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom):
def _test_measures(numer_ids, geom):
in_params = []
for numer_id in numer_ids:
in_params.append({
@@ -259,7 +244,7 @@ def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom
'normalization': 'predenominated'
})
params = query(u'''
params = query('''
SELECT {schema}OBS_GetMeta({geom}, '{in_params}')
'''.format(schema='cdb_observatory.' if USE_SCHEMA else '',
geom=geom,
@@ -267,15 +252,15 @@ def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom
# We can get duplicate IDs from multi-denominators, so for now we
# compress those measures into a single
params = OrderedDict([(p['id'], p) for p in params]).values()
params = list(OrderedDict([(p['id'], p) for p in params]).values())
assert_equal(len(params), len(in_params),
'Inconsistent out and in params for {}'.format(in_params))
q = u'''
q = '''
SELECT * FROM {schema}OBS_GetData(ARRAY[({geom}, 1)::geomval], '{params}')
'''.format(schema='cdb_observatory.' if USE_SCHEMA else '',
geom=geom,
params=json.dumps(params).replace(u"'", "''"))
params=json.dumps(params).replace("'", "''"))
resp = query(q).fetchone()
assert_is_not_none(resp, 'NULL returned for {}'.format(in_params))
rawvals = resp[1]
@@ -284,90 +269,3 @@ def _test_measures(numer_ids, numer_aggregate, section_tags, denom_reltype, geom
assert_equal(len(vals), len(in_params))
for i, val in enumerate(vals):
assert_is_not_none(val, 'NULL for {}'.format(in_params[i]['numer_id']))
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_areas(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetCategory({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_points(column_id):
# if column_id in SKIP_COLUMNS:
# raise SkipTest('Column {} should be skipped'.format(column_id))
# resp = query('''
#SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# rows = resp.fetchall()
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundaries_by_geometry(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundariesByGeometry({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_points_by_geometry(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetPointsByGeometry({area}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# area=default_area(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_points(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundary({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_id(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundaryId({point}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# point=default_point(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_by_id(column_id):
# resp = query('''
#SELECT * FROM {schema}OBS_GetBoundaryById({geometry_id}, '{column_id}')
# '''.format(column_id=column_id,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# geometry_id=default_geometry_id(column_id)))
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0][0])

View File

@@ -25,7 +25,7 @@ for q in (
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.1, 40.5,
-73.8, 40.9, 4326),
'us.census.tiger.census_tract_clipped')) foo
'us.census.tiger.census_tract_2015_clipped')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 1000''',
'DROP TABLE IF EXISTS obs_perftest_complex',
@@ -42,35 +42,9 @@ for q in (
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-75.05437469482422,40.66319159533881,
-73.81885528564453,41.745696344339564, 4326),
'us.census.tiger.county_clipped')) foo
'us.census.tiger.county_2015_clipped')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_simple',
'''CREATE TABLE obs_perftest_country_simple (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_simple (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_complex',
'''CREATE TABLE obs_perftest_country_complex (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_complex (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
#'''SET statement_timeout = 5000;'''
):
LIMIT 50;'''):
q_formatted = q.format(
schema='cdb_observatory.' if USE_SCHEMA else '',
)
@@ -118,18 +92,10 @@ def record(params, results):
('complex', '_OBS_GetGeometryScores', 'NULL', 1),
('complex', '_OBS_GetGeometryScores', 'NULL', 500),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 1),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 500),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 5000),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 1),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 500),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 5000),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000)
])
def test_getgeometryscores_performance(geom_complexity, api_method, filters, target_geoms):
print api_method, geom_complexity, filters, target_geoms
print(api_method, geom_complexity, filters, target_geoms)
rownums = (1, 5, 10, ) if 'complex' in geom_complexity else (5, 25, 50,)
results = []
@@ -152,7 +118,7 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({
@@ -211,7 +177,7 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
('complex', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
])
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom, boundary):
print api_method, geom_complexity, normalization, geom, boundary
print(api_method, geom_complexity, normalization, geom, boundary)
col = 'measure' if 'measure' in api_method.lower() else 'category'
results = []
@@ -235,7 +201,7 @@ def test_getmeasure_performance(geom_complexity, api_method, normalization, geom
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({
@@ -283,7 +249,7 @@ def test_getmeasure_performance(geom_complexity, api_method, normalization, geom
('complex', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
])
def test_getdata_performance(geom_complexity, normalization, geom, boundary):
print geom_complexity, normalization, geom, boundary
print(geom_complexity, normalization, geom, boundary)
cols = ['us.census.acs.B01001002',
'us.census.acs.B01001003',
@@ -339,7 +305,7 @@ def test_getdata_performance(geom_complexity, normalization, geom, boundary):
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
print(rows, ': ', qps, ' QPS')
if 'OBS_RECORD_TEST' in os.environ:
record({