27 Commits
1.0.4 ... 1.0.7

Author SHA1 Message Date
Carla Iriberri
f4113eaea3 Release 1.0.7 2016-09-21 11:24:29 +02:00
Carla
86fac2a600 Merge pull request #196 from CartoDB/release-v-1.0.7
Release v 1.0.7
2016-09-21 11:12:22 +02:00
John Krauss
2d753cd758 Skip bad MX measure, smaller buffer for faster tests, updated NEWS.md 2016-09-20 17:56:23 +00:00
john krauss
96a98c3bce Merge pull request #194 from CartoDB/null-resilience
Resolve #178
2016-09-20 13:38:11 -04:00
john krauss
d58263935d Merge pull request #195 from CartoDB/ca-testing
Add point to make sure CA data is present
2016-09-20 12:27:02 -04:00
John Krauss
104608c6d3 Add point to make sure CA data is present 2016-09-20 16:31:15 +00:00
John Krauss
c67fe12111 return NULL in cases when NULL is passed as input geometry or geometry ID. resolves #178 2016-09-20 16:26:13 +00:00
Carla Iriberri
860290595c Release 1.0.6 2016-09-08 10:37:37 +02:00
Carla
bf4ade2fa0 Merge pull request #186 from CartoDB/measure_release
Use explicit functions for query construction and metadata
2016-09-08 09:58:25 +02:00
Carla
32d37a74b3 Remove cascades and quote conveniently 2016-09-02 12:04:03 +02:00
Mario de Frutos
da877e4ef0 Modify PR template to include the update of NEWS.md 2016-08-25 14:36:07 +02:00
Mario de Frutos
15de07ca33 Modify PR template 2016-08-25 14:30:42 +02:00
Mario de Frutos
8af3e22661 Merge pull request #188 from CartoDB/pr_template
Added PR template
2016-08-25 14:27:14 +02:00
Mario de Frutos
fdd591b159 Added PR template 2016-08-25 11:28:01 +02:00
Carla Iriberri
5eb4ede219 Fix 2016-08-23 17:20:48 +02:00
Carla Iriberri
dd5f560359 Separate functions between files 2016-08-19 16:39:30 +02:00
Carla Iriberri
62c2693553 Avoid function check to dispatch 2016-08-19 13:04:54 +02:00
Carla Iriberri
48d1bfdb13 Remove JSON manipulation to use json functions 2016-08-19 12:45:38 +02:00
Carla Iriberri
30f27e5b58 Check function name and use param names instead of 2016-08-18 15:43:03 +02:00
Carla Iriberri
26b22a9bf4 Use explicit functions for query construction and metadata 2016-08-18 15:36:32 +02:00
Mario de Frutos
c9e809c061 Merge pull request #185 from CartoDB/develop
Release 1.0.5
2016-08-18 15:06:50 +02:00
Mario de Frutos
43e83751ae Release 1.0.5 artifact 2016-08-18 15:05:38 +02:00
Mario de Frutos
4c13434b9a Merge pull request #182 from CartoDB/sql-tests
SQL Integration and Performance Tests
2016-08-18 14:54:53 +02:00
Mario de Frutos
8785639ece Merge pull request #154 from CartoDB/iriberri-patch-1
Use 6432 for connections from server
2016-08-18 11:10:02 +02:00
John Krauss
f991f5a1e6 docs and NEWS for the new tests 2016-08-12 18:56:06 +00:00
John Krauss
e4b4ebf72d Adapted autotest to to work with SQL directly instead of over HTTP SQL API 2016-08-12 18:48:31 +00:00
Carla
bfa57f4971 Use 6432 for connections from server 2016-07-19 17:54:08 +02:00
19 changed files with 6971 additions and 141 deletions

17
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,17 @@
## Request for a new Data observatory extension deploy
I'd like to request a new data observatory extension deploy: dump + extension
## Dump database id to be deployed
Please put here the dump id to be deployed: <dump_id>
## Data Observatory extension PRs included.
*Please update the NEWS.md*
Add down here the PR links to be added and deployed:
-
// @CartoDB/dataservices

View File

@@ -18,7 +18,7 @@ test: ## Run the tests for the development version of the extension
$(MAKE) -C $(EXT_DIR) test
# Generate a new release into release
release: ## Generate a new release of the extension. Only for telease manager
release: ## Generate a new release of the extension. Only for release manager
$(MAKE) -C $(EXT_DIR) release
# Install the current release.

25
NEWS.md
View File

@@ -1,3 +1,28 @@
1.0.7 (2016-09-20)
__Bugfixes__
* `NULL` geometries or geometry IDs no longer result in an exception from any
augmentation functions ([#178](https://github.com/CartoDB/observatory-extension/issues/178))
__Improvements__
* Automatic tests work for Canada and Thailand
1.0.6 (2016-09-08)
__Improvements__
* New function structure for Table-level functions which allows to separate the
framework logic from the observatory measure functions.
1.0.5 (2016-08-12)
__Improvements__
* Integration tests moved to `src/python/test/`, and can be run without hitting
any HTTP SQL API.
1.0.4 (2016-07-26)
__Bugfixes__

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.0.4'
default_version = '1.0.7'
requires = 'postgis, postgres_fdw'
superuser = true
schema = cdb_observatory

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.0.4'
default_version = '1.0.7'
requires = 'postgis, postgres_fdw'
superuser = true
schema = cdb_observatory

View File

@@ -8,7 +8,7 @@ DECLARE
BEGIN
-- Build connection string
connection_str := '{"server":{"extensions":"postgis", "dbname":"'
|| user_dbname ||'", "host":"' || user_hostname ||'", "port":"5432"}, "users":{"public"'
|| user_dbname ||'", "host":"' || user_hostname ||'", "port":"6432"}, "users":{"public"'
|| ':{"user":"' || username ||'", "password":""} } }';
-- This function tries to be as idempotent as possible, by not creating anything more than once

View File

@@ -179,7 +179,10 @@ BEGIN
--raise notice 'Cannot find data table for boundary ID %, column_ids %, and time_span %', geometry_level, column_ids, time_span;
END IF;
IF ST_GeometryType(geom) = 'ST_Point'
IF geom IS NULL
THEN
results := NULL;
ELSIF ST_GeometryType(geom) = 'ST_Point'
THEN
--raise notice 'geom_table_name %, data_table_info %', geom_table_name, data_table_info::json[];
results := cdb_observatory._OBS_GetPoints(geom,
@@ -361,6 +364,10 @@ DECLARE
sql TEXT;
numer_name TEXT;
BEGIN
IF geom IS NULL THEN
RETURN NULL;
END IF;
geom := ST_SnapToGrid(geom, 0.000001);
EXECUTE
@@ -525,6 +532,9 @@ DECLARE
measure_val NUMERIC;
data_geoid_colname TEXT;
BEGIN
IF geom_ref IS NULL THEN
RETURN NULL;
END IF;
EXECUTE
$query$
@@ -573,6 +583,9 @@ DECLARE
category_val TEXT;
category_share NUMERIC;
BEGIN
IF geom IS NULL THEN
RETURN NULL;
END IF;
EXECUTE
$query$

View File

@@ -24,9 +24,12 @@ BEGIN
EXCEPTION
WHEN others THEN
-- Disconnect user imported table. Delete schema and FDW server.
EXECUTE 'DROP FOREIGN TABLE IF EXISTS ' || fdw_import_schema || '.' || table_name;
EXECUTE 'DROP SCHEMA IF EXISTS ' || fdw_import_schema || ' CASCADE';
EXECUTE 'DROP SERVER IF EXISTS ' || fdw_server || ' CASCADE;';
EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || fdw_import_schema || '".' || table_name;
EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || fdw_import_schema || '".cdb_tablemetadata';
EXECUTE 'DROP SCHEMA IF EXISTS "' || fdw_import_schema || '"';
EXECUTE 'DROP USER MAPPING IF EXISTS FOR public SERVER "' || fdw_server || '"';
EXECUTE 'DROP SERVER IF EXISTS "' || fdw_server || '"';
RETURN (null, null, null);
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
@@ -37,27 +40,9 @@ AS $$
DECLARE
colnames text[];
coltypes text[];
requested_measures text[];
measure text;
BEGIN
-- Simple mock, there should be real logic in here.
IF $3 NOT ILIKE 'GetMeasure' OR $3 IS NULL THEN
RAISE 'This function is not supported yet: %', $3;
END IF;
SELECT translate($4::json->>'tag_name','[]', '{}')::text[] INTO requested_measures;
FOREACH measure IN ARRAY requested_measures
LOOP
IF NOT measure ILIKE ANY (Array['total_pop', 'pop_16_over']::text[]) THEN
RAISE 'This measure is not supported yet: %', measure;
END IF;
SELECT array_append(colnames, measure) INTO colnames;
SELECT array_append(coltypes, 'double precision'::text) INTO coltypes;
END LOOP;
EXECUTE FORMAT('SELECT r.colnames::text[], r.coltypes::text[] FROM cdb_observatory._%sResultMetadata(%L::json) r', function_name, params::text)
INTO colnames, coltypes;
RETURN (colnames::text[], coltypes::text[]);
END;
@@ -68,41 +53,17 @@ RETURNS SETOF record
AS $$
DECLARE
data_query text;
tag_name text[];
tag text;
tags_list text;
tags_query text;
rec RECORD;
BEGIN
SELECT translate($6::json->>'tag_name','[]', '{}')::text[] INTO tag_name;
SELECT array_to_string(tag_name, ',') INTO tags_list;
tags_query := '';
FOREACH tag IN ARRAY tag_name
LOOP
SELECT tags_query || ' sum(' || tag || '/fraction)::double precision as ' || tag || ', ' INTO tags_query;
END LOOP;
-- Simple mock, there should be real logic in here.
data_query := '(WITH _areas AS(SELECT ST_Area(a.the_geom::geography)'
|| '/ (1000 * 1000) as fraction, a.geoid, b.cartodb_id FROM '
|| 'observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308 as a, '
|| table_schema || '.' || table_name || ' AS b '
|| 'WHERE b.the_geom && a.the_geom ), values AS (SELECT geoid, '
|| tags_list
|| ' FROM observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd ) '
|| 'SELECT '
|| tags_query
|| ' cartodb_id::int FROM _areas, values '
|| 'WHERE values.geoid = _areas.geoid GROUP BY cartodb_id);';
EXECUTE FORMAT('SELECT cdb_observatory._%sQuery(%L, %L, %L::json)', function_name, table_schema, table_name, params::text)
INTO data_query;
FOR rec IN EXECUTE data_query
LOOP
RETURN NEXT rec;
END LOOP;
RETURN;
RETURN;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
@@ -112,8 +73,10 @@ RETURNS boolean
AS $$
BEGIN
EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || table_schema || '".' || table_name;
EXECUTE 'DROP SCHEMA IF EXISTS ' || table_schema || ' CASCADE';
EXECUTE 'DROP SERVER IF EXISTS ' || servername || ' CASCADE;';
EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || table_schema || '".cdb_tablemetadata';
EXECUTE 'DROP SCHEMA IF EXISTS "' || table_schema || '"';
EXECUTE 'DROP USER MAPPING IF EXISTS FOR public SERVER "' || servername || '"';
EXECUTE 'DROP SERVER IF EXISTS "' || servername || '"';
RETURN true;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;

View File

@@ -0,0 +1,79 @@
--
--
-- OBS_GetMeasure
--
--
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetMeasureResultMetadata(params json)
RETURNS cdb_observatory.ds_return_metadata
AS $$
DECLARE
colnames text[]; -- Array to store the name of the measures to be returned
coltypes text[]; -- Array to store the type of the measures to be returned
requested_measures text[];
measure_id text;
BEGIN
-- By definition, all the measure results for the OBS_GetMeasure API are numeric values
SELECT ARRAY(SELECT json_array_elements_text(params->'measure_id'))::text[] INTO requested_measures;
FOREACH measure_id IN ARRAY requested_measures
LOOP
SELECT array_append(colnames, measure_id) INTO colnames;
SELECT array_append(coltypes, 'numeric'::text) INTO coltypes;
END LOOP;
RETURN (colnames::text[], coltypes::text[]);
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetMeasureQuery(table_schema text, table_name text, params json)
RETURNS text
AS $$
DECLARE
data_query text;
measure_ids_arr text[];
measure_id text;
measures_list text;
measures_query text;
normalize text;
boundary_id text;
time_span text;
geom_table_name text;
data_table_name text;
BEGIN
measures_query := '';
-- SELECT table_name from obs_meta WHERE boundary_id = {bound} AND [...] INTO geom_table_name
geom_table_name := 'observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308';
-- SELECT table_name from obs_meta WHERE time_span = {time} AND [...] INTO data_table_name
data_table_name := 'observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd';
-- Get measure_ids array from JSON
SELECT ARRAY(SELECT json_array_elements_text(params->'measure_id'))::text[] INTO measure_ids_arr;
-- Get a comma-separated list of measures ("total_pop, over_16_pop") to be used in SELECTs
SELECT array_to_string(measure_ids_arr, ',') INTO measures_list;
FOREACH measure_id IN ARRAY measure_ids_arr
LOOP
-- Build query to compute each value and normalize
-- Assumes the default normalization method, the normalize parameter given in the JSON
-- should be checked in order to build the final query
SELECT measures_query || ' sum(' || measure_id || '/fraction)::numeric as ' || measure_id || ', ' INTO measures_query;
END LOOP;
-- Data query should select the measures and the cartodb_id of the user table, in that order.
data_query := '(WITH _areas AS(SELECT ST_Area(a.the_geom::geography)'
|| '/ (1000 * 1000) as fraction, a.geoid, b.cartodb_id FROM '
|| geom_table_name || ' as a, '
|| table_schema || '.' || table_name || ' AS b '
|| 'WHERE b.the_geom && a.the_geom ), values AS (SELECT geoid, '
|| measures_list
|| ' FROM ' || data_table_name || ' ) '
|| 'SELECT '
|| measures_query
|| ' cartodb_id::int FROM _areas, values '
|| 'WHERE values.geoid = _areas.geoid GROUP BY cartodb_id);';
RETURN data_query;
END;
$$ LANGUAGE plpgsql;

View File

@@ -66,12 +66,18 @@ t
obs_getmeasure_bad_geometry
t
(1 row)
obs_getmeasure_null
t
(1 row)
obs_getcategory_point
t
(1 row)
obs_getcategory_polygon
t
(1 row)
obs_getcategory_null
t
(1 row)
obs_getpopulation
t
(1 row)
@@ -81,6 +87,9 @@ t
obs_getpopulation_polygon_null_test
t
(1 row)
obs_getpopulation_polygon_null_geom_test
t
(1 row)
obs_getuscensusmeasure_point_male_pop
t
(1 row)
@@ -90,12 +99,18 @@ t
obs_getuscensusmeasure_null
t
(1 row)
obs_getuscensusmeasure_null_geom
t
(1 row)
obs_getuscensuscategory_point
t
(1 row)
obs_getuscensuscategory_polygon
t
(1 row)
obs_getuscensuscategory_null
t
(1 row)
obs_getmeasurebyid_cartodb_census_tract
t
(1 row)
@@ -108,3 +123,6 @@ t
obs_getmeasurebyid_nulls
t
(1 row)
obs_getmeasurebyid_null_id
t
(1 row)

View File

@@ -203,6 +203,11 @@ SELECT abs(cdb_observatory.OBS_GetMeasure(
cdb_observatory._ProblemTestArea(),
'us.census.acs.B01003001') - 96230.2929825897) / 96230.2929825897 < 0.001 As OBS_GetMeasure_bad_geometry;
-- OBS_GetMeasure with NULL Input
SELECT cdb_observatory.OBS_GetMeasure(
NULL,
'us.census.acs.B01003001') IS NULL As OBS_GetMeasure_null;
-- Point-based OBS_GetCategory
SELECT cdb_observatory.OBS_GetCategory(
cdb_observatory._TestPoint(), 'us.census.spielman_singleton_segments.X10') = 'Wealthy, urban without Kids' As OBS_GetCategory_point;
@@ -211,6 +216,10 @@ SELECT cdb_observatory.OBS_GetCategory(
SELECT cdb_observatory.OBS_GetCategory(
cdb_observatory._TestArea(), 'us.census.spielman_singleton_segments.X10') = 'Wealthy, urban without Kids' As obs_getcategory_polygon;
-- NULL Input OBS_GetCategory
SELECT cdb_observatory.OBS_GetCategory(
NULL, 'us.census.spielman_singleton_segments.X10') IS NULL As obs_getcategory_null;
-- Point-based OBS_GetPopulation, default normalization (area)
SELECT (abs(OBS_GetPopulation - 10923.093200390833950) / 10923.093200390833950) < 0.001 As OBS_GetPopulation FROM
cdb_observatory.OBS_GetPopulation(
@@ -231,6 +240,13 @@ FROM
cdb_observatory._TestArea(), NULL
) As m(obs_getpopulation_polygon_null);
-- Null input OBS_GetPopulation
SELECT obs_getpopulation_polygon_null_geom IS NULL As obs_getpopulation_polygon_null_geom_test
FROM
cdb_observatory.OBS_GetPopulation(
NULL, NULL
) As m(obs_getpopulation_polygon_null_geom);
-- Point-based OBS_GetUSCensusMeasure, default normalization (area)
SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
cdb_observatory._testpoint(), 'male population') - 6789.5647735060920500) / 6789.5647735060920500) < 0.001 As obs_getuscensusmeasure_point_male_pop;
@@ -244,6 +260,11 @@ SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
cdb_observatory._testarea(), 'male population', NULL) - 6043.63061042765) / 6043.63061042765) < 0.001 As obs_getuscensusmeasure_null;
-- Poly-based OBS_GetUSCensusMeasure, Null input geom
SELECT cdb_observatory.obs_getuscensusmeasure(
NULL, 'male population', NULL) IS NULL As obs_getuscensusmeasure_null_geom;
-- Point-based OBS_GetUSCensusCategory
SELECT cdb_observatory.OBS_GetUSCensusCategory(
cdb_observatory._testpoint(), 'Spielman-Singleton Segments: 10 Clusters') = 'Wealthy, urban without Kids' As OBS_GetUSCensusCategory_point;
@@ -252,6 +273,10 @@ SELECT cdb_observatory.OBS_GetUSCensusCategory(
SELECT cdb_observatory.OBS_GetUSCensusCategory(
cdb_observatory._testarea(), 'Spielman-Singleton Segments: 10 Clusters') = 'Wealthy, urban without Kids' As OBS_GetUSCensusCategory_polygon;
-- Null-input OBS_GetUSCensusCategory
SELECT cdb_observatory.OBS_GetUSCensusCategory(
NULL, 'Spielman-Singleton Segments: 10 Clusters') IS NULL As OBS_GetUSCensusCategory_null;
-- OBS_GetMeasureById tests
-- typical query
@@ -285,3 +310,11 @@ SELECT cdb_observatory.OBS_GetMeasureById(
'us.census.tiger.block_group',
'2010 - 2014'
) IS NULL As OBS_GetMeasureById_nulls;
-- NULL input id
SELECT cdb_observatory.OBS_GetMeasureById(
NULL,
'us.census.acs.B01003001',
'us.census.tiger.block_group',
'2010 - 2014'
) IS NULL As OBS_GetMeasureById_null_id;

View File

@@ -0,0 +1,3 @@
nose
nose_parameterized
psycopg2

14
src/python/test/README.md Normal file
View File

@@ -0,0 +1,14 @@
### Integration/performance tests
Tests here are meant to be run on a box with an Observatory meta/data dump
loaded and ready to be tested against the API.
The local Python needs the requirements in `src/python/requirements.txt`.
In order to find and access the correct database, the `PGUSER`, `PGPASSWORD`,
`PGHOST`, `PGPORT` and `PGDATABASE` env variables should be set.
Tests should be executed as follows:
nosetests test/autotest.py
nosetests -s test/perftest.py

View File

@@ -1,77 +1,89 @@
from nose.tools import assert_equal, assert_is_not_none
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
import os
import re
import requests
from util import query
HOSTNAME = os.environ['OBS_HOSTNAME']
API_KEY = os.environ['OBS_API_KEY']
META_HOSTNAME = os.environ.get('OBS_META_HOSTNAME', HOSTNAME)
META_API_KEY = os.environ.get('OBS_META_API_KEY', API_KEY)
USE_SCHEMA = 'OBS_USE_SCHEMA' in os.environ
USE_SCHEMA = True
def query(q, is_meta=False, **options):
'''
Query the account. Returned is the response, wrapped by the requests
library.
'''
url = 'https://{hostname}/api/v2/sql'.format(
hostname=META_HOSTNAME if is_meta else HOSTNAME)
params = options.copy()
params['q'] = re.sub(r'\s+', ' ', q)
params['api_key'] = META_API_KEY if is_meta else API_KEY
return requests.get(url, params=params)
MEASURE_COLUMNS = [(r['numer_id'], r['point_only'], ) for r in query('''
MEASURE_COLUMNS = query('''
SELECT distinct numer_id, numer_aggregate NOT ILIKE 'sum' as point_only
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'numeric'
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
CATEGORY_COLUMNS = [(r['numer_id'], ) for r in query('''
CATEGORY_COLUMNS = query('''
SELECT distinct numer_id
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'text'
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
BOUNDARY_COLUMNS = [(r['id'], ) for r in query('''
SELECT id FROM obs_column
BOUNDARY_COLUMNS = query('''
SELECT id FROM observatory.obs_column
WHERE type ILIKE 'geometry'
AND weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
US_CENSUS_MEASURE_COLUMNS = [(r['numer_name'], ) for r in query('''
US_CENSUS_MEASURE_COLUMNS = query('''
SELECT distinct numer_name
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'numeric'
AND 'us.census.acs.acs' = ANY (subsection_tags)
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
SKIP_COLUMNS = set([
u'mx.inegi_columns.INDI18',
u'mx.inegi_columns.ECO40',
u'mx.inegi_columns.POB34',
u'mx.inegi_columns.POB63',
u'mx.inegi_columns.INDI7',
u'mx.inegi_columns.EDU28',
u'mx.inegi_columns.SCONY10',
u'mx.inegi_columns.EDU31',
u'mx.inegi_columns.POB7',
u'mx.inegi_columns.VIV30',
u'mx.inegi_columns.INDI12',
u'mx.inegi_columns.EDU13',
u'mx.inegi_columns.ECO43',
u'mx.inegi_columns.VIV9',
u'mx.inegi_columns.HOGAR25',
u'mx.inegi_columns.POB32',
u'mx.inegi_columns.ECO7',
u'mx.inegi_columns.INDI19',
u'mx.inegi_columns.INDI16',
u'mx.inegi_columns.POB65',
u'mx.inegi_columns.INDI3',
u'mx.inegi_columns.INDI9',
u'mx.inegi_columns.POB36',
u'mx.inegi_columns.POB33',
u'mx.inegi_columns.POB58',
u'mx.inegi_columns.DISC4',
])
def default_geometry_id(column_id):
'''
Returns default test point for the column_id.
'''
if column_id == 'whosonfirst.wof_disputed_geom':
return 'CDB_LatLng(33.78, 76.57)'
elif column_id == 'whosonfirst.wof_marinearea_geom':
return 'CDB_LatLng(43.33, -68.47)'
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return 'CDB_LatLng(40.7025, -73.7067)'
elif column_id.startswith('es.ine'):
return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
elif column_id.startswith('us.zillow'):
return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
else:
return 'CDB_LatLng(40.7, -73.9)'
#def default_geometry_id(column_id):
# '''
# Returns default test point for the column_id.
# '''
# if column_id == 'whosonfirst.wof_disputed_geom':
# return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
# elif column_id == 'whosonfirst.wof_marinearea_geom':
# return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
# elif column_id in ('us.census.tiger.school_district_elementary',
# 'us.census.tiger.school_district_secondary',
# 'us.census.tiger.school_district_elementary_clipped',
# 'us.census.tiger.school_district_secondary_clipped'):
# return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
# elif column_id.startswith('es.ine'):
# return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
# elif column_id.startswith('us.zillow'):
# return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
# elif column_id.startswith('ca.'):
# return ''
# else:
# return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_point(column_id):
@@ -79,27 +91,32 @@ def default_point(column_id):
Returns default test point for the column_id.
'''
if column_id == 'whosonfirst.wof_disputed_geom':
return 'CDB_LatLng(33.78, 76.57)'
return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
elif column_id == 'whosonfirst.wof_marinearea_geom':
return 'CDB_LatLng(43.33, -68.47)'
return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return 'CDB_LatLng(40.7025, -73.7067)'
return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
elif column_id.startswith('uk'):
if 'WA' in column_id:
return 'CDB_LatLng(51.46844551219723, -3.184833526611328)'
return 'ST_SetSRID(ST_MakePoint(-3.184833526611328, 51.46844551219723), 4326)'
else:
return 'CDB_LatLng(51.51461834694225, -0.08883476257324219)'
return 'ST_SetSRID(ST_MakePoint(-0.08883476257324219, 51.51461834694225), 4326)'
elif column_id.startswith('es'):
return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
elif column_id.startswith('us.zillow'):
return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
elif column_id.startswith('mx.'):
return 'CDB_LatLng(19.41347699386547, -99.17019367218018)'
return 'ST_SetSRID(ST_MakePoint(-99.17019367218018, 19.41347699386547), 4326)'
elif column_id.startswith('ca.'):
raise SkipTest('Skipping Canada until validation of data complete')
return 'ST_SetSRID(ST_MakePoint(-79.39716339111328, 43.65694347778308), 4326)'
elif column_id.startswith('th.'):
return 'ST_SetSRID(ST_MakePoint(100.49263000488281, 13.725377712079784), 4326)'
else:
return 'CDB_LatLng(40.7, -73.9)'
return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_area(column_id):
@@ -107,27 +124,26 @@ def default_area(column_id):
Returns default test area for the column_id
'''
point = default_point(column_id)
area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 1000), 4326)'.format(
area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 250), 4326)'.format(
point=point)
return area
@parameterized(US_CENSUS_MEASURE_COLUMNS)
def test_get_us_census_measure_points(name):
print 'test_get_us_census_measure_points, ', name
resp = query('''
SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}')
'''.format(name=name.replace("'", "''"),
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point('')))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
@parameterized(MEASURE_COLUMNS)
def test_get_measure_areas(column_id, point_only):
print 'test_get_measure_areas, ', column_id, point_only
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
if point_only:
return
resp = query('''
@@ -135,24 +151,23 @@ SELECT * FROM {schema}OBS_GetMeasure({area}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
area=default_area(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
@parameterized(MEASURE_COLUMNS)
def test_get_measure_points(column_id, point_only):
print 'test_get_measure_points, ', column_id, point_only
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
resp = query('''
SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_areas(column_id):
@@ -164,20 +179,20 @@ SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
@parameterized(CATEGORY_COLUMNS)
def test_get_category_points(column_id):
print 'test_get_category_points, ', column_id
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
resp = query('''
SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundaries_by_geometry(column_id):
@@ -189,7 +204,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_points_by_geometry(column_id):
@@ -201,7 +216,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_points(column_id):
@@ -213,7 +228,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_id(column_id):
@@ -225,7 +240,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_by_id(column_id):
@@ -237,4 +252,5 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])

View File

@@ -0,0 +1,62 @@
from nose.tools import assert_equal, assert_is_not_none
from nose_parameterized import parameterized
from util import query, commit
from time import time
USE_SCHEMA = True
for q in (
'DROP TABLE IF EXISTS obs_censustest',
'''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY,
the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_censustest (the_geom, name)
SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326),
'us.census.tiger.block_group_clipped') As m(the_geom, geoid)'''
):
query(q.format(
schema='cdb_observatory.' if USE_SCHEMA else '',
))
commit()
ARGS = {
'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'",
'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'",
'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'",
}
GEOMS = {
'point': 'ST_PointOnSurface(the_geom)',
'polygon_match': 'the_geom',
'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)',
}
@parameterized([
('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'),
('OBS_GetMeasureByID', 'us.census.tiger.county'),
('OBS_GetMeasure', GEOMS['point']),
('OBS_GetMeasure', GEOMS['polygon_match']),
('OBS_GetMeasure', GEOMS['polygon_buffered']),
('OBS_GetCategory', GEOMS['point']),
('OBS_GetCategory', GEOMS['polygon_match']),
('OBS_GetCategory', GEOMS['polygon_buffered']),
])
def test_performance(api_method, arg):
print api_method, arg
col = 'measure' if 'measure' in api_method.lower() else 'category'
for rows in (1, 10, 50, 100):
q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format(
col=col,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
args=ARGS[api_method].format(arg),
n=rows+1)
start = time()
query(q)
end = time()
print rows, ': ', (rows / (end - start)), ' QPS'

31
src/python/test/util.py Normal file
View File

@@ -0,0 +1,31 @@
import os
import psycopg2
DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format(
user=os.environ.get('PGUSER', 'postgres'),
password=os.environ.get('PGPASSWORD', ''),
host=os.environ.get('PGHOST', 'localhost'),
port=os.environ.get('PGPORT', '5432'),
database=os.environ.get('PGDATABASE', 'postgres'),
))
CURSOR = DB_CONN.cursor()
def query(q):
'''
Query the database.
'''
try:
CURSOR.execute(q)
return CURSOR
except:
DB_CONN.rollback()
raise
def commit():
try:
DB_CONN.commit()
except:
DB_CONN.rollback()
raise