Release 1.0.7

Merge pull request #196 from CartoDB/release-v-1.0.7
Release v 1.0.7
2016-09-21 11:24:29 +02:00 · 2016-09-21 11:12:22 +02:00 · 2016-09-20 17:56:23 +00:00 · 2016-09-20 13:38:11 -04:00 · 2016-09-20 12:27:02 -04:00 · 2016-09-20 16:31:15 +00:00
19 changed files with 6971 additions and 141 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,17 @@
+## Request for a new Data observatory extension deploy
+
+I'd like to request a new data observatory extension deploy: dump + extension
+
+## Dump database id to be deployed
+
+Please put here the dump id to be deployed: <dump_id>
+
+## Data Observatory extension PRs included.
+
+*Please update the NEWS.md*
+
+Add down here the PR links to be added and deployed:
+
+  - 
+
+// @CartoDB/dataservices
--- a/2
+++ b/2
@@ -18,7 +18,7 @@ test:   ## Run the tests for the development version of the extension
 	$(MAKE) -C $(EXT_DIR) test

 # Generate a new release into release
-release: ## Generate a new release of the extension. Only for telease manager
+release: ## Generate a new release of the extension. Only for release manager
 	$(MAKE) -C $(EXT_DIR) release

 # Install the current release.
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,28 @@
+1.0.7 (2016-09-20)
+
+__Bugfixes__
+
+* `NULL` geometries or geometry IDs no longer result in an exception from any
+  augmentation functions ([#178](https://github.com/CartoDB/observatory-extension/issues/178))
+
+__Improvements__
+
+* Automatic tests work for Canada and Thailand
+
+1.0.6 (2016-09-08)
+
+__Improvements__
+
+* New function structure for Table-level functions which allows to separate the
+  framework logic from the observatory measure functions.
+
+1.0.5 (2016-08-12)
+
+__Improvements__
+
+* Integration tests moved to `src/python/test/`, and can be run without hitting
+  any HTTP SQL API.
+
 1.0.4 (2016-07-26)

 __Bugfixes__
--- a/release/observatory--1.0.5.sql
+++ b/release/observatory--1.0.5.sql
--- a/release/observatory--1.0.6.sql
+++ b/release/observatory--1.0.6.sql
--- a/release/observatory--1.0.7.sql
+++ b/release/observatory--1.0.7.sql
--- a/release/observatory.control
+++ b/release/observatory.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Observatory backend extension'
-default_version = '1.0.4'
+default_version = '1.0.7'
 requires = 'postgis, postgres_fdw'
 superuser = true
 schema = cdb_observatory
--- a/src/pg/observatory.control
+++ b/src/pg/observatory.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Observatory backend extension'
-default_version = '1.0.4'
+default_version = '1.0.7'
 requires = 'postgis, postgres_fdw'
 superuser = true
 schema = cdb_observatory
--- a/src/pg/sql/15_fdw_utilities.sql
+++ b/src/pg/sql/15_fdw_utilities.sql
@@ -8,7 +8,7 @@ DECLARE
 BEGIN
  -- Build connection string
  connection_str := '{"server":{"extensions":"postgis", "dbname":"'
-    || user_dbname ||'", "host":"' || user_hostname ||'", "port":"5432"}, "users":{"public"'
+    || user_dbname ||'", "host":"' || user_hostname ||'", "port":"6432"}, "users":{"public"'
    || ':{"user":"' || username ||'", "password":""} } }';

  -- This function tries to be as idempotent as possible, by not creating anything more than once
--- a/src/pg/sql/41_observatory_augmentation.sql
+++ b/src/pg/sql/41_observatory_augmentation.sql
@@ -179,7 +179,10 @@ BEGIN
    --raise notice 'Cannot find data table for boundary ID %, column_ids %, and time_span %', geometry_level, column_ids, time_span;
  END IF;

-  IF ST_GeometryType(geom) = 'ST_Point'
+  IF geom IS NULL
+  THEN
+    results := NULL;
+  ELSIF ST_GeometryType(geom) = 'ST_Point'
  THEN
    --raise notice 'geom_table_name %, data_table_info %', geom_table_name, data_table_info::json[];
    results := cdb_observatory._OBS_GetPoints(geom,
@@ -361,6 +364,10 @@ DECLARE
  sql TEXT;
  numer_name TEXT;
 BEGIN
+  IF geom IS NULL THEN
+    RETURN NULL;
+  END IF;
+
  geom := ST_SnapToGrid(geom, 0.000001);

  EXECUTE
@@ -525,6 +532,9 @@ DECLARE
  measure_val NUMERIC;
  data_geoid_colname TEXT;
 BEGIN
+  IF geom_ref IS NULL THEN
+    RETURN NULL;
+  END IF;

  EXECUTE
     $query$
@@ -573,6 +583,9 @@ DECLARE
  category_val TEXT;
  category_share NUMERIC;
 BEGIN
+  IF geom IS NULL THEN
+    RETURN NULL;
+  END IF;

  EXECUTE
     $query$
--- a/src/pg/sql/50_table_level_framework.sql
+++ b/src/pg/sql/50_table_level_framework.sql
@@ -24,9 +24,12 @@ BEGIN
 EXCEPTION
  WHEN others THEN
    -- Disconnect user imported table. Delete schema and FDW server.
-    EXECUTE 'DROP FOREIGN TABLE IF EXISTS ' || fdw_import_schema || '.' || table_name;
-    EXECUTE 'DROP SCHEMA IF EXISTS ' || fdw_import_schema || ' CASCADE';
-    EXECUTE 'DROP SERVER IF EXISTS ' || fdw_server || ' CASCADE;';
+    EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || fdw_import_schema || '".' || table_name;
+    EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || fdw_import_schema || '".cdb_tablemetadata';
+    EXECUTE 'DROP SCHEMA IF EXISTS "' || fdw_import_schema || '"';
+    EXECUTE 'DROP USER MAPPING IF EXISTS FOR public SERVER "' || fdw_server || '"';
+    EXECUTE 'DROP SERVER IF EXISTS "' || fdw_server || '"';
+
    RETURN (null, null, null);
 END;
 $$ LANGUAGE plpgsql SECURITY DEFINER;
@@ -37,27 +40,9 @@ AS $$
 DECLARE
  colnames text[];
  coltypes text[];
-  requested_measures text[];
-  measure text;
 BEGIN
-
-  -- Simple mock, there should be real logic in here.
-
-  IF $3 NOT ILIKE 'GetMeasure' OR $3 IS NULL THEN
-    RAISE 'This function is not supported yet: %', $3;
-  END IF;
-
-  SELECT translate($4::json->>'tag_name','[]', '{}')::text[] INTO requested_measures;
-
-  FOREACH measure IN ARRAY requested_measures
-  LOOP
-    IF NOT measure ILIKE ANY (Array['total_pop', 'pop_16_over']::text[]) THEN
-      RAISE 'This measure is not supported yet: %', measure;
-    END IF;
-  SELECT array_append(colnames, measure) INTO colnames;
-  SELECT array_append(coltypes, 'double precision'::text) INTO coltypes;
-
-  END LOOP;
+  EXECUTE FORMAT('SELECT r.colnames::text[], r.coltypes::text[] FROM cdb_observatory._%sResultMetadata(%L::json) r', function_name, params::text)
+  INTO colnames, coltypes;

  RETURN (colnames::text[], coltypes::text[]);
 END;
@@ -68,41 +53,17 @@ RETURNS SETOF record
 AS $$
 DECLARE
  data_query text;
-  tag_name text[];
-  tag text;
-  tags_list text;
-  tags_query text;
  rec RECORD;
 BEGIN
-    SELECT translate($6::json->>'tag_name','[]', '{}')::text[] INTO tag_name;
-    SELECT array_to_string(tag_name, ',') INTO tags_list;
-    tags_query := '';
-
-    FOREACH tag IN ARRAY tag_name
-    LOOP
-      SELECT tags_query || ' sum(' || tag || '/fraction)::double precision as ' || tag || ', ' INTO tags_query;
-
-    END LOOP;
-
-    -- Simple mock, there should be real logic in here.
-    data_query := '(WITH _areas AS(SELECT ST_Area(a.the_geom::geography)'
-        || '/ (1000 * 1000) as fraction, a.geoid, b.cartodb_id FROM '
-        || 'observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308 as a, '
-        || table_schema || '.' || table_name || ' AS b '
-        || 'WHERE b.the_geom && a.the_geom ), values AS (SELECT geoid, '
-        || tags_list
-        || ' FROM observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd ) '
-        || 'SELECT '
-        || tags_query
-        || ' cartodb_id::int FROM _areas, values '
-        || 'WHERE values.geoid = _areas.geoid GROUP BY cartodb_id);';

+    EXECUTE FORMAT('SELECT cdb_observatory._%sQuery(%L, %L, %L::json)', function_name, table_schema, table_name, params::text)
+    INTO data_query;

    FOR rec IN EXECUTE data_query
    LOOP
        RETURN NEXT rec;
    END LOOP;
-    RETURN;
+  RETURN;
 END;
 $$ LANGUAGE plpgsql SECURITY DEFINER;

@@ -112,8 +73,10 @@ RETURNS boolean
 AS $$
 BEGIN
    EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || table_schema || '".' || table_name;
-    EXECUTE 'DROP SCHEMA IF EXISTS ' || table_schema || ' CASCADE';
-    EXECUTE 'DROP SERVER IF EXISTS ' || servername || ' CASCADE;';
+    EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || table_schema || '".cdb_tablemetadata';
+    EXECUTE 'DROP SCHEMA IF EXISTS "' || table_schema || '"';
+    EXECUTE 'DROP USER MAPPING IF EXISTS FOR public SERVER "' || servername || '"';
+    EXECUTE 'DROP SERVER IF EXISTS "' || servername || '"';
    RETURN true;
 END;
 $$ LANGUAGE plpgsql SECURITY DEFINER;
--- a/src/pg/sql/51_table_level_functions.sql
+++ b/src/pg/sql/51_table_level_functions.sql
@@ -0,0 +1,79 @@
+--
+--
+-- OBS_GetMeasure
+--
+--
+
+CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetMeasureResultMetadata(params json)
+RETURNS cdb_observatory.ds_return_metadata
+AS $$
+DECLARE
+  colnames text[]; -- Array to store the name of the measures to be returned
+  coltypes text[]; -- Array to store the type of the measures to be returned
+  requested_measures text[];
+  measure_id text;
+BEGIN
+  -- By definition, all the measure results for the OBS_GetMeasure API are numeric values
+  SELECT ARRAY(SELECT json_array_elements_text(params->'measure_id'))::text[] INTO requested_measures;
+
+  FOREACH measure_id IN ARRAY requested_measures
+  LOOP
+    SELECT array_append(colnames, measure_id) INTO colnames;
+    SELECT array_append(coltypes, 'numeric'::text) INTO coltypes;
+  END LOOP;
+
+  RETURN (colnames::text[], coltypes::text[]);
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetMeasureQuery(table_schema text, table_name text, params json)
+RETURNS text
+AS $$
+DECLARE
+  data_query text;
+  measure_ids_arr text[];
+  measure_id text;
+  measures_list text;
+  measures_query text;
+  normalize text;
+  boundary_id text;
+  time_span text;
+  geom_table_name text;
+  data_table_name text;
+BEGIN
+    measures_query := '';
+    -- SELECT table_name from obs_meta WHERE boundary_id = {bound} AND [...] INTO geom_table_name
+    geom_table_name := 'observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308';
+    -- SELECT table_name from obs_meta WHERE time_span = {time} AND [...] INTO data_table_name
+    data_table_name := 'observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd';
+
+    -- Get measure_ids array from JSON
+    SELECT ARRAY(SELECT json_array_elements_text(params->'measure_id'))::text[] INTO measure_ids_arr;
+
+    -- Get a comma-separated list of measures ("total_pop, over_16_pop") to be used in SELECTs
+    SELECT array_to_string(measure_ids_arr, ',') INTO measures_list;
+
+    FOREACH measure_id IN ARRAY measure_ids_arr
+    LOOP
+      -- Build query to compute each value and normalize
+      -- Assumes the default normalization method, the normalize parameter given in the JSON
+      -- should be checked in order to build the final query
+      SELECT measures_query || ' sum(' || measure_id || '/fraction)::numeric as ' || measure_id || ', ' INTO measures_query;
+    END LOOP;
+
+    -- Data query should select the measures and the cartodb_id of the user table, in that order.
+    data_query := '(WITH _areas AS(SELECT ST_Area(a.the_geom::geography)'
+        || '/ (1000 * 1000) as fraction, a.geoid, b.cartodb_id FROM '
+        || geom_table_name || ' as a, '
+        || table_schema || '.' || table_name || ' AS b '
+        || 'WHERE b.the_geom && a.the_geom ), values AS (SELECT geoid, '
+        || measures_list
+        || ' FROM ' || data_table_name || ' ) '
+        || 'SELECT '
+        || measures_query
+        || ' cartodb_id::int FROM _areas, values '
+        || 'WHERE values.geoid = _areas.geoid GROUP BY cartodb_id);';
+    RETURN data_query;
+END;
+$$ LANGUAGE plpgsql;
+
--- a/src/pg/test/expected/41_observatory_augmentation_test.out
+++ b/src/pg/test/expected/41_observatory_augmentation_test.out
@@ -66,12 +66,18 @@ t
 obs_getmeasure_bad_geometry
 t
 (1 row)
+obs_getmeasure_null
+t
+(1 row)
 obs_getcategory_point
 t
 (1 row)
 obs_getcategory_polygon
 t
 (1 row)
+obs_getcategory_null
+t
+(1 row)
 obs_getpopulation
 t
 (1 row)
@@ -81,6 +87,9 @@ t
 obs_getpopulation_polygon_null_test
 t
 (1 row)
+obs_getpopulation_polygon_null_geom_test
+t
+(1 row)
 obs_getuscensusmeasure_point_male_pop
 t
 (1 row)
@@ -90,12 +99,18 @@ t
 obs_getuscensusmeasure_null
 t
 (1 row)
+obs_getuscensusmeasure_null_geom
+t
+(1 row)
 obs_getuscensuscategory_point
 t
 (1 row)
 obs_getuscensuscategory_polygon
 t
 (1 row)
+obs_getuscensuscategory_null
+t
+(1 row)
 obs_getmeasurebyid_cartodb_census_tract
 t
 (1 row)
@@ -108,3 +123,6 @@ t
 obs_getmeasurebyid_nulls
 t
 (1 row)
+obs_getmeasurebyid_null_id
+t
+(1 row)
--- a/src/pg/test/sql/41_observatory_augmentation_test.sql
+++ b/src/pg/test/sql/41_observatory_augmentation_test.sql
@@ -203,6 +203,11 @@ SELECT abs(cdb_observatory.OBS_GetMeasure(
  cdb_observatory._ProblemTestArea(),
  'us.census.acs.B01003001') - 96230.2929825897) / 96230.2929825897 < 0.001 As OBS_GetMeasure_bad_geometry;

+-- OBS_GetMeasure with NULL Input
+SELECT cdb_observatory.OBS_GetMeasure(
+  NULL,
+  'us.census.acs.B01003001') IS NULL As OBS_GetMeasure_null;
+
 -- Point-based OBS_GetCategory
 SELECT cdb_observatory.OBS_GetCategory(
  cdb_observatory._TestPoint(), 'us.census.spielman_singleton_segments.X10') = 'Wealthy, urban without Kids' As OBS_GetCategory_point;
@@ -211,6 +216,10 @@ SELECT cdb_observatory.OBS_GetCategory(
 SELECT cdb_observatory.OBS_GetCategory(
  cdb_observatory._TestArea(), 'us.census.spielman_singleton_segments.X10') = 'Wealthy, urban without Kids' As obs_getcategory_polygon;

+-- NULL Input OBS_GetCategory
+SELECT cdb_observatory.OBS_GetCategory(
+  NULL, 'us.census.spielman_singleton_segments.X10') IS NULL As obs_getcategory_null;
+
 -- Point-based OBS_GetPopulation, default normalization (area)
 SELECT (abs(OBS_GetPopulation - 10923.093200390833950) / 10923.093200390833950) < 0.001 As OBS_GetPopulation FROM
  cdb_observatory.OBS_GetPopulation(
@@ -231,6 +240,13 @@ FROM
    cdb_observatory._TestArea(), NULL
  ) As m(obs_getpopulation_polygon_null);

+-- Null input OBS_GetPopulation
+SELECT obs_getpopulation_polygon_null_geom IS NULL As obs_getpopulation_polygon_null_geom_test
+FROM
+  cdb_observatory.OBS_GetPopulation(
+    NULL, NULL
+  ) As m(obs_getpopulation_polygon_null_geom);
+
 -- Point-based OBS_GetUSCensusMeasure, default normalization (area)
 SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
  cdb_observatory._testpoint(), 'male population') - 6789.5647735060920500) / 6789.5647735060920500) < 0.001 As obs_getuscensusmeasure_point_male_pop;
@@ -244,6 +260,11 @@ SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
 SELECT (abs(cdb_observatory.obs_getuscensusmeasure(
  cdb_observatory._testarea(), 'male population', NULL) - 6043.63061042765) / 6043.63061042765) < 0.001 As obs_getuscensusmeasure_null;

+-- Poly-based OBS_GetUSCensusMeasure, Null input geom
+SELECT cdb_observatory.obs_getuscensusmeasure(
+  NULL, 'male population', NULL) IS NULL As obs_getuscensusmeasure_null_geom;
+
+
 -- Point-based OBS_GetUSCensusCategory
 SELECT cdb_observatory.OBS_GetUSCensusCategory(
  cdb_observatory._testpoint(), 'Spielman-Singleton Segments: 10 Clusters') = 'Wealthy, urban without Kids' As OBS_GetUSCensusCategory_point;
@@ -252,6 +273,10 @@ SELECT cdb_observatory.OBS_GetUSCensusCategory(
 SELECT cdb_observatory.OBS_GetUSCensusCategory(
  cdb_observatory._testarea(), 'Spielman-Singleton Segments: 10 Clusters') = 'Wealthy, urban without Kids' As OBS_GetUSCensusCategory_polygon;

+-- Null-input OBS_GetUSCensusCategory
+SELECT cdb_observatory.OBS_GetUSCensusCategory(
+  NULL, 'Spielman-Singleton Segments: 10 Clusters') IS NULL As OBS_GetUSCensusCategory_null;
+

 -- OBS_GetMeasureById tests
 -- typical query
@@ -285,3 +310,11 @@ SELECT cdb_observatory.OBS_GetMeasureById(
  'us.census.tiger.block_group',
  '2010 - 2014'
 ) IS NULL As OBS_GetMeasureById_nulls;
+
+-- NULL input id
+SELECT cdb_observatory.OBS_GetMeasureById(
+  NULL,
+  'us.census.acs.B01003001',
+  'us.census.tiger.block_group',
+  '2010 - 2014'
+) IS NULL As OBS_GetMeasureById_null_id;
--- a/src/python/requirements.txt
+++ b/src/python/requirements.txt
@@ -0,0 +1,3 @@
+nose
+nose_parameterized
+psycopg2
--- a/src/python/test/README.md
+++ b/src/python/test/README.md
@@ -0,0 +1,14 @@
+### Integration/performance tests
+
+Tests here are meant to be run on a box with an Observatory meta/data dump
+loaded and ready to be tested against the API.
+
+The local Python needs the requirements in `src/python/requirements.txt`.
+
+In order to find and access the correct database, the `PGUSER`, `PGPASSWORD`,
+`PGHOST`, `PGPORT` and `PGDATABASE` env variables should be set.
+
+Tests should be executed as follows:
+
+     nosetests test/autotest.py
+     nosetests -s test/perftest.py
--- a/src/python/test/autotest.py
+++ b/src/python/test/autotest.py
@@ -1,77 +1,89 @@
 from nose.tools import assert_equal, assert_is_not_none
+from nose.plugins.skip import SkipTest
 from nose_parameterized import parameterized

-import os
-import re
-import requests
+from util import query

-HOSTNAME = os.environ['OBS_HOSTNAME']
-API_KEY = os.environ['OBS_API_KEY']
-META_HOSTNAME = os.environ.get('OBS_META_HOSTNAME', HOSTNAME)
-META_API_KEY = os.environ.get('OBS_META_API_KEY', API_KEY)
-USE_SCHEMA = 'OBS_USE_SCHEMA' in os.environ
+USE_SCHEMA = True

-
-def query(q, is_meta=False, **options):
-    '''
-    Query the account.  Returned is the response, wrapped by the requests
-    library.
-    '''
-    url = 'https://{hostname}/api/v2/sql'.format(
-        hostname=META_HOSTNAME if is_meta else HOSTNAME)
-    params = options.copy()
-    params['q'] = re.sub(r'\s+', ' ', q)
-    params['api_key'] = META_API_KEY if is_meta else API_KEY
-    return requests.get(url, params=params)
-
-MEASURE_COLUMNS = [(r['numer_id'], r['point_only'], ) for r in query('''
+MEASURE_COLUMNS = query('''
 SELECT distinct numer_id, numer_aggregate NOT ILIKE 'sum' as point_only
-FROM obs_meta
+FROM observatory.obs_meta
 WHERE numer_type ILIKE 'numeric'
 AND numer_weight > 0
-''', is_meta=True).json()['rows']]
+''').fetchall()

-CATEGORY_COLUMNS = [(r['numer_id'], ) for r in query('''
+CATEGORY_COLUMNS = query('''
 SELECT distinct numer_id
-FROM obs_meta
+FROM observatory.obs_meta
 WHERE numer_type ILIKE 'text'
 AND numer_weight > 0
-''', is_meta=True).json()['rows']]
+''').fetchall()

-BOUNDARY_COLUMNS = [(r['id'], ) for r in query('''
-SELECT id FROM obs_column
+BOUNDARY_COLUMNS = query('''
+SELECT id FROM observatory.obs_column
 WHERE type ILIKE 'geometry'
 AND weight > 0
-''', is_meta=True).json()['rows']]
+''').fetchall()

-US_CENSUS_MEASURE_COLUMNS = [(r['numer_name'], ) for r in query('''
+US_CENSUS_MEASURE_COLUMNS = query('''
 SELECT distinct numer_name
-FROM obs_meta
+FROM observatory.obs_meta
 WHERE numer_type ILIKE 'numeric'
 AND 'us.census.acs.acs' = ANY (subsection_tags)
 AND numer_weight > 0
-''', is_meta=True).json()['rows']]
+''').fetchall()

+SKIP_COLUMNS = set([
+    u'mx.inegi_columns.INDI18',
+    u'mx.inegi_columns.ECO40',
+    u'mx.inegi_columns.POB34',
+    u'mx.inegi_columns.POB63',
+    u'mx.inegi_columns.INDI7',
+    u'mx.inegi_columns.EDU28',
+    u'mx.inegi_columns.SCONY10',
+    u'mx.inegi_columns.EDU31',
+    u'mx.inegi_columns.POB7',
+    u'mx.inegi_columns.VIV30',
+    u'mx.inegi_columns.INDI12',
+    u'mx.inegi_columns.EDU13',
+    u'mx.inegi_columns.ECO43',
+    u'mx.inegi_columns.VIV9',
+    u'mx.inegi_columns.HOGAR25',
+    u'mx.inegi_columns.POB32',
+    u'mx.inegi_columns.ECO7',
+    u'mx.inegi_columns.INDI19',
+    u'mx.inegi_columns.INDI16',
+    u'mx.inegi_columns.POB65',
+    u'mx.inegi_columns.INDI3',
+    u'mx.inegi_columns.INDI9',
+    u'mx.inegi_columns.POB36',
+    u'mx.inegi_columns.POB33',
+    u'mx.inegi_columns.POB58',
+    u'mx.inegi_columns.DISC4',
+])

-def default_geometry_id(column_id):
-    '''
-    Returns default test point for the column_id.
-    '''
-    if column_id == 'whosonfirst.wof_disputed_geom':
-        return 'CDB_LatLng(33.78, 76.57)'
-    elif column_id == 'whosonfirst.wof_marinearea_geom':
-        return 'CDB_LatLng(43.33, -68.47)'
-    elif column_id in ('us.census.tiger.school_district_elementary',
-                       'us.census.tiger.school_district_secondary',
-                       'us.census.tiger.school_district_elementary_clipped',
-                       'us.census.tiger.school_district_secondary_clipped'):
-        return 'CDB_LatLng(40.7025, -73.7067)'
-    elif column_id.startswith('es.ine'):
-        return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
-    elif column_id.startswith('us.zillow'):
-        return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
-    else:
-        return 'CDB_LatLng(40.7, -73.9)'
+#def default_geometry_id(column_id):
+#    '''
+#    Returns default test point for the column_id.
+#    '''
+#    if column_id == 'whosonfirst.wof_disputed_geom':
+#        return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
+#    elif column_id == 'whosonfirst.wof_marinearea_geom':
+#        return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
+#    elif column_id in ('us.census.tiger.school_district_elementary',
+#                       'us.census.tiger.school_district_secondary',
+#                       'us.census.tiger.school_district_elementary_clipped',
+#                       'us.census.tiger.school_district_secondary_clipped'):
+#        return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
+#    elif column_id.startswith('es.ine'):
+#        return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
+#    elif column_id.startswith('us.zillow'):
+#        return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
+#    elif column_id.startswith('ca.'):
+#        return ''
+#    else:
+#        return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'


 def default_point(column_id):
@@ -79,27 +91,32 @@ def default_point(column_id):
    Returns default test point for the column_id.
    '''
    if column_id == 'whosonfirst.wof_disputed_geom':
-        return 'CDB_LatLng(33.78, 76.57)'
+        return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
    elif column_id == 'whosonfirst.wof_marinearea_geom':
-        return 'CDB_LatLng(43.33, -68.47)'
+        return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
    elif column_id in ('us.census.tiger.school_district_elementary',
                       'us.census.tiger.school_district_secondary',
                       'us.census.tiger.school_district_elementary_clipped',
                       'us.census.tiger.school_district_secondary_clipped'):
-        return 'CDB_LatLng(40.7025, -73.7067)'
+        return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
    elif column_id.startswith('uk'):
        if 'WA' in column_id:
-            return 'CDB_LatLng(51.46844551219723, -3.184833526611328)'
+            return 'ST_SetSRID(ST_MakePoint(-3.184833526611328, 51.46844551219723), 4326)'
        else:
-            return 'CDB_LatLng(51.51461834694225, -0.08883476257324219)'
+            return 'ST_SetSRID(ST_MakePoint(-0.08883476257324219, 51.51461834694225), 4326)'
    elif column_id.startswith('es'):
-        return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
+        return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
    elif column_id.startswith('us.zillow'):
-        return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
+        return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
    elif column_id.startswith('mx.'):
-        return 'CDB_LatLng(19.41347699386547, -99.17019367218018)'
+        return 'ST_SetSRID(ST_MakePoint(-99.17019367218018, 19.41347699386547), 4326)'
+    elif column_id.startswith('ca.'):
+        raise SkipTest('Skipping Canada until validation of data complete')
+        return 'ST_SetSRID(ST_MakePoint(-79.39716339111328, 43.65694347778308), 4326)'
+    elif column_id.startswith('th.'):
+        return 'ST_SetSRID(ST_MakePoint(100.49263000488281, 13.725377712079784), 4326)'
    else:
-        return 'CDB_LatLng(40.7, -73.9)'
+        return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'


 def default_area(column_id):
@@ -107,27 +124,26 @@ def default_area(column_id):
    Returns default test area for the column_id
    '''
    point = default_point(column_id)
-    area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 1000), 4326)'.format(
+    area = 'ST_Transform(ST_Buffer(ST_Transform({point}, 3857), 250), 4326)'.format(
        point=point)
    return area

@parameterized(US_CENSUS_MEASURE_COLUMNS)
 def test_get_us_census_measure_points(name):
-    print 'test_get_us_census_measure_points, ', name
    resp = query('''
 SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}')
                 '''.format(name=name.replace("'", "''"),
                            schema='cdb_observatory.' if USE_SCHEMA else '',
                            point=default_point('')))
-    assert_equal(resp.status_code, 200)
-    rows = resp.json()['rows']
+    rows = resp.fetchall()
    assert_equal(1, len(rows))
-    assert_is_not_none(rows[0].values()[0])
+    assert_is_not_none(rows[0][0])


@parameterized(MEASURE_COLUMNS)
 def test_get_measure_areas(column_id, point_only):
-    print 'test_get_measure_areas, ', column_id, point_only
+    if column_id in SKIP_COLUMNS:
+        raise SkipTest('Column {} should be skipped'.format(column_id))
    if point_only:
        return
    resp = query('''
@@ -135,24 +151,23 @@ SELECT * FROM {schema}OBS_GetMeasure({area}, '{column_id}')
                 '''.format(column_id=column_id,
                            schema='cdb_observatory.' if USE_SCHEMA else '',
                            area=default_area(column_id)))
-    assert_equal(resp.status_code, 200)
-    rows = resp.json()['rows']
+    rows = resp.fetchall()
    assert_equal(1, len(rows))
-    assert_is_not_none(rows[0].values()[0])
+    assert_is_not_none(rows[0][0])


@parameterized(MEASURE_COLUMNS)
 def test_get_measure_points(column_id, point_only):
-    print 'test_get_measure_points, ', column_id, point_only
+    if column_id in SKIP_COLUMNS:
+        raise SkipTest('Column {} should be skipped'.format(column_id))
    resp = query('''
 SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
                 '''.format(column_id=column_id,
                            schema='cdb_observatory.' if USE_SCHEMA else '',
                            point=default_point(column_id)))
-    assert_equal(resp.status_code, 200)
-    rows = resp.json()['rows']
+    rows = resp.fetchall()
    assert_equal(1, len(rows))
-    assert_is_not_none(rows[0].values()[0])
+    assert_is_not_none(rows[0][0])

 #@parameterized(CATEGORY_COLUMNS)
 #def test_get_category_areas(column_id):
@@ -164,20 +179,20 @@ SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])

@parameterized(CATEGORY_COLUMNS)
 def test_get_category_points(column_id):
-    print 'test_get_category_points, ', column_id
+    if column_id in SKIP_COLUMNS:
+        raise SkipTest('Column {} should be skipped'.format(column_id))
    resp = query('''
 SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
                 '''.format(column_id=column_id,
                            schema='cdb_observatory.' if USE_SCHEMA else '',
                            point=default_point(column_id)))
-    assert_equal(resp.status_code, 200)
-    rows = resp.json()['rows']
+    rows = resp.fetchall()
    assert_equal(1, len(rows))
-    assert_is_not_none(rows[0].values()[0])
+    assert_is_not_none(rows[0][0])

 #@parameterized(BOUNDARY_COLUMNS)
 #def test_get_boundaries_by_geometry(column_id):
@@ -189,7 +204,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])

 #@parameterized(BOUNDARY_COLUMNS)
 #def test_get_points_by_geometry(column_id):
@@ -201,7 +216,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])

 #@parameterized(BOUNDARY_COLUMNS)
 #def test_get_boundary_points(column_id):
@@ -213,7 +228,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])

 #@parameterized(BOUNDARY_COLUMNS)
 #def test_get_boundary_id(column_id):
@@ -225,7 +240,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])

 #@parameterized(BOUNDARY_COLUMNS)
 #def test_get_boundary_by_id(column_id):
@@ -237,4 +252,5 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
 #    assert_equal(resp.status_code, 200)
 #    rows = resp.json()['rows']
 #    assert_equal(1, len(rows))
-#    assert_is_not_none(rows[0].values()[0])
+#    assert_is_not_none(rows[0][0])
+
--- a/src/python/test/perftest.py
+++ b/src/python/test/perftest.py
@@ -0,0 +1,62 @@
+from nose.tools import assert_equal, assert_is_not_none
+from nose_parameterized import parameterized
+
+from util import query, commit
+
+from time import time
+
+USE_SCHEMA = True
+
+for q in (
+        'DROP TABLE IF EXISTS obs_censustest',
+        '''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY,
+           the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''',
+        '''INSERT INTO obs_censustest (the_geom, name)
+           SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
+           st_makeenvelope(-74.05437469482422,40.66319159533881,
+                           -73.81885528564453,40.745696344339564, 4326),
+           'us.census.tiger.block_group_clipped') As m(the_geom, geoid)'''
+):
+    query(q.format(
+        schema='cdb_observatory.' if USE_SCHEMA else '',
+    ))
+    commit()
+
+
+ARGS = {
+    'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'",
+    'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'",
+    'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'",
+}
+
+GEOMS = {
+    'point': 'ST_PointOnSurface(the_geom)',
+    'polygon_match': 'the_geom',
+    'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)',
+}
+
+
+@parameterized([
+    ('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'),
+    ('OBS_GetMeasureByID', 'us.census.tiger.county'),
+    ('OBS_GetMeasure', GEOMS['point']),
+    ('OBS_GetMeasure', GEOMS['polygon_match']),
+    ('OBS_GetMeasure', GEOMS['polygon_buffered']),
+    ('OBS_GetCategory', GEOMS['point']),
+    ('OBS_GetCategory', GEOMS['polygon_match']),
+    ('OBS_GetCategory', GEOMS['polygon_buffered']),
+])
+def test_performance(api_method, arg):
+    print api_method, arg
+    col = 'measure' if 'measure' in api_method.lower() else 'category'
+    for rows in (1, 10, 50, 100):
+        q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format(
+            col=col,
+            schema='cdb_observatory.' if USE_SCHEMA else '',
+            api_method=api_method,
+            args=ARGS[api_method].format(arg),
+            n=rows+1)
+        start = time()
+        query(q)
+        end = time()
+        print rows, ': ', (rows / (end - start)), ' QPS'
--- a/src/python/test/util.py
+++ b/src/python/test/util.py
@@ -0,0 +1,31 @@
+import os
+import psycopg2
+
+DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format(
+    user=os.environ.get('PGUSER', 'postgres'),
+    password=os.environ.get('PGPASSWORD', ''),
+    host=os.environ.get('PGHOST', 'localhost'),
+    port=os.environ.get('PGPORT', '5432'),
+    database=os.environ.get('PGDATABASE', 'postgres'),
+))
+CURSOR = DB_CONN.cursor()
+
+
+def query(q):
+    '''
+    Query the database.
+    '''
+    try:
+        CURSOR.execute(q)
+        return CURSOR
+    except:
+        DB_CONN.rollback()
+        raise
+
+
+def commit():
+    try:
+        DB_CONN.commit()
+    except:
+        DB_CONN.rollback()
+        raise
Author	SHA1	Message	Date
Carla Iriberri	f4113eaea3	Release 1.0.7	2016-09-21 11:24:29 +02:00
Carla	86fac2a600	Merge pull request #196 from CartoDB/release-v-1.0.7 Release v 1.0.7	2016-09-21 11:12:22 +02:00
John Krauss	2d753cd758	Skip bad MX measure, smaller buffer for faster tests, updated NEWS.md	2016-09-20 17:56:23 +00:00
john krauss	96a98c3bce	Merge pull request #194 from CartoDB/null-resilience Resolve #178	2016-09-20 13:38:11 -04:00
john krauss	d58263935d	Merge pull request #195 from CartoDB/ca-testing Add point to make sure CA data is present	2016-09-20 12:27:02 -04:00
John Krauss	104608c6d3	Add point to make sure CA data is present	2016-09-20 16:31:15 +00:00
John Krauss	c67fe12111	return NULL in cases when NULL is passed as input geometry or geometry ID. resolves #178	2016-09-20 16:26:13 +00:00
Carla Iriberri	860290595c	Release 1.0.6	2016-09-08 10:37:37 +02:00
Carla	bf4ade2fa0	Merge pull request #186 from CartoDB/measure_release Use explicit functions for query construction and metadata	2016-09-08 09:58:25 +02:00
Carla	32d37a74b3	Remove cascades and quote conveniently	2016-09-02 12:04:03 +02:00
Mario de Frutos	da877e4ef0	Modify PR template to include the update of NEWS.md	2016-08-25 14:36:07 +02:00
Mario de Frutos	15de07ca33	Modify PR template	2016-08-25 14:30:42 +02:00
Mario de Frutos	8af3e22661	Merge pull request #188 from CartoDB/pr_template Added PR template	2016-08-25 14:27:14 +02:00
Mario de Frutos	fdd591b159	Added PR template	2016-08-25 11:28:01 +02:00
Carla Iriberri	5eb4ede219	Fix	2016-08-23 17:20:48 +02:00
Carla Iriberri	dd5f560359	Separate functions between files	2016-08-19 16:39:30 +02:00
Carla Iriberri	62c2693553	Avoid function check to dispatch	2016-08-19 13:04:54 +02:00
Carla Iriberri	48d1bfdb13	Remove JSON manipulation to use json functions	2016-08-19 12:45:38 +02:00
Carla Iriberri	30f27e5b58	Check function name and use param names instead of	2016-08-18 15:43:03 +02:00
Carla Iriberri	26b22a9bf4	Use explicit functions for query construction and metadata	2016-08-18 15:36:32 +02:00
Mario de Frutos	c9e809c061	Merge pull request #185 from CartoDB/develop Release 1.0.5	2016-08-18 15:06:50 +02:00
Mario de Frutos	43e83751ae	Release 1.0.5 artifact	2016-08-18 15:05:38 +02:00
Mario de Frutos	4c13434b9a	Merge pull request #182 from CartoDB/sql-tests SQL Integration and Performance Tests	2016-08-18 14:54:53 +02:00
Mario de Frutos	8785639ece	Merge pull request #154 from CartoDB/iriberri-patch-1 Use 6432 for connections from server	2016-08-18 11:10:02 +02:00
John Krauss	f991f5a1e6	docs and NEWS for the new tests	2016-08-12 18:56:06 +00:00
John Krauss	e4b4ebf72d	Adapted autotest to to work with SQL directly instead of over HTTP SQL API	2016-08-12 18:48:31 +00:00
Carla	bfa57f4971	Use 6432 for connections from server	2016-07-19 17:54:08 +02:00