11 Commits
1.3.0 ... 1.3.2

Author SHA1 Message Date
John Krauss
50ec6dddf6 release-v1.3.2 artifact 2017-03-02 21:16:22 +00:00
John Krauss
0ebe9babeb update tests 2017-03-02 21:09:23 +00:00
John Krauss
4fad32d5f2 fix and NEWS.md 2017-03-02 21:07:29 +00:00
John Krauss
f0efa1e2eb release v1.3.1 2017-03-01 16:33:35 +00:00
John Krauss
bcbd8a2be4 change OBS_GetLegacyMetadata to return median/average measures too when called for polygons 2017-03-01 16:03:14 +00:00
John Krauss
71d891c067 handle blank aggregates 2017-02-16 17:53:38 +00:00
John Krauss
01b56fbfcc update fixtures 2017-02-16 17:38:18 +00:00
John Krauss
6215f6585c update NEWS.md 2017-02-16 17:23:47 +00:00
John Krauss
9bda063148 Merge branch 'nonsum-interpolation' into release-v-1.3.1 2017-02-16 17:20:38 +00:00
John Krauss
4a97689705 add point for au 2017-02-16 16:50:40 +00:00
John Krauss
2edb850a45 estimate average and median across arbitrary areas if universe is provided, otherwise return null and raise a notice. fixes #252 2017-02-10 01:08:10 +00:00
14 changed files with 5255 additions and 631 deletions

17
NEWS.md
View File

@@ -1,3 +1,20 @@
1.3.2 (2017-03-02)
__Bugfixes__
* Accept "prenormalized" as well as "predenominated" to bypass normalization.
This fixes issues with Camshaft.
1.3.1 (2017-02-16)
__Improvements__
* It is now possible to obtain measures that are averages or medians over
arbitrary polygons ([#254](https://github.com/CartoDB/observatory-extension/pull/254).
* Added test point for Australian data
* `OBS_GetLegacyMetadata` now returns median and averages in cases where it is
called for measures for polygons
1.3.0 (2017-01-17)
__API Changes__

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.3.0'
default_version = '1.3.2'
requires = 'postgis'
superuser = true
schema = cdb_observatory

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.3.0'
default_version = '1.3.2'
requires = 'postgis'
superuser = true
schema = cdb_observatory

View File

@@ -205,6 +205,18 @@ END;
$$ LANGUAGE plpgsql;
-- Function we can call to raise an exception in the midst of a SQL statement
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_RaiseNotice(
message TEXT
) RETURNS TEXT
AS $$
BEGIN
RAISE NOTICE '%', message;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Create a function that always returns the first non-NULL item
CREATE OR REPLACE FUNCTION cdb_observatory.first_agg ( anyelement, anyelement )
RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$

View File

@@ -174,6 +174,7 @@ BEGIN
CASE WHEN f.numer_id IS NULL THEN NULL ELSE denom_tablename END denom_tablename,
CASE WHEN f.numer_id IS NULL THEN NULL ELSE denom_name END denom_name,
CASE WHEN f.numer_id IS NULL THEN NULL ELSE denom_type END denom_type,
CASE WHEN f.numer_id IS NULL THEN NULL ELSE denom_reltype END denom_reltype,
m.geom_id,
m.geom_timespan,
geom_colname,
@@ -212,16 +213,23 @@ BEGIN
'numer_geomref_colname', cdb_observatory.FIRST(meta.numer_geomref_colname),
'numer_tablename', cdb_observatory.FIRST(meta.numer_tablename),
'numer_type', cdb_observatory.FIRST(meta.numer_type),
--'numer_description', cdb_observatory.FIRST(meta.numer_description),
--'numer_t_description', cdb_observatory.FIRST(meta.numer_t_description),
'denom_aggregate', cdb_observatory.FIRST(meta.denom_aggregate),
'denom_colname', cdb_observatory.FIRST(denom_colname),
'denom_geomref_colname', cdb_observatory.FIRST(denom_geomref_colname),
'denom_tablename', cdb_observatory.FIRST(denom_tablename),
'denom_type', cdb_observatory.FIRST(meta.denom_type),
'denom_reltype', cdb_observatory.FIRST(meta.denom_reltype),
--'denom_description', cdb_observatory.FIRST(meta.denom_description),
--'denom_t_description', cdb_observatory.FIRST(meta.denom_t_description),
'geom_colname', cdb_observatory.FIRST(geom_colname),
'geom_geomref_colname', cdb_observatory.FIRST(geom_geomref_colname),
'geom_tablename', cdb_observatory.FIRST(geom_tablename),
'geom_type', cdb_observatory.FIRST(meta.geom_type),
'geom_timespan', cdb_observatory.FIRST(meta.geom_timespan),
--'geom_description', cdb_observatory.FIRST(meta.geom_description),
--'geom_t_description', cdb_observatory.FIRST(meta.geom_t_description),
'numer_timespan', cdb_observatory.FIRST(numer_timespan),
'numer_name', cdb_observatory.FIRST(numer_name),
'denom_name', cdb_observatory.FIRST(denom_name),
@@ -301,7 +309,7 @@ BEGIN
map_type := 'areaNormalized';
ELSIF normalize ILIKE 'denom%' THEN
map_type := 'denominated';
ELSIF normalize ILIKE 'predenom%' THEN
ELSIF normalize ILIKE 'pre%' THEN
map_type := 'predenominated';
ELSE
-- defaults: area normalization for point if it's possible and none for
@@ -409,6 +417,7 @@ BEGIN
(unnest($1))->>'denom_geomref_colname' denom_geomref_colname,
(unnest($1))->>'denom_tablename' denom_tablename,
(unnest($1))->>'denom_type' denom_type,
(unnest($1))->>'denom_reltype' denom_reltype,
(unnest($1))->>'geom_id' geom_id,
(unnest($1))->>'geom_colname' geom_colname,
(unnest($1))->>'geom_geomref_colname' geom_geomref_colname,
@@ -546,6 +555,7 @@ BEGIN
(unnest($1))->>'denom_geomref_colname' denom_geomref_colname,
(unnest($1))->>'denom_tablename' denom_tablename,
(unnest($1))->>'denom_type' denom_type,
(unnest($1))->>'denom_reltype' denom_reltype,
(unnest($1))->>'geom_id' geom_id,
(unnest($1))->>'geom_colname' geom_colname,
(unnest($1))->>'geom_geomref_colname' geom_geomref_colname,
@@ -568,7 +578,8 @@ BEGIN
WHEN LOWER(numer_type) LIKE 'numeric' THEN
'''value'', ' || CASE
-- denominated
WHEN LOWER(normalization) LIKE 'denom%' OR (normalization IS NULL AND denom_id IS NOT NULL)
WHEN LOWER(normalization) LIKE 'denom%' OR
(normalization IS NULL AND LOWER(denom_reltype) LIKE 'denominator')
THEN ' CASE ' ||
-- denominated point-in-poly or user polygon is same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
@@ -597,7 +608,8 @@ BEGIN
' / (COUNT(*) / COUNT(distinct ' || geom_tablename || '.' || geom_geomref_colname || ')) ' ||
' END '
-- areaNormalized
WHEN LOWER(normalization) LIKE 'area%' OR (normalization IS NULL AND numer_aggregate ILIKE 'sum')
WHEN LOWER(normalization) LIKE 'area%' OR
(normalization IS NULL AND numer_aggregate ILIKE 'sum')
THEN ' CASE ' ||
-- areaNormalized point-in-poly or user polygon is the same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
@@ -618,15 +630,49 @@ BEGIN
' END / (ST_Area(' || geom_tablename || '.' || geom_colname || '::Geography) / 1000000)) ' ||
' / (COUNT(*) / COUNT(distinct ' || geom_tablename || '.' || geom_geomref_colname || ')) ' ||
' END '
-- prenormalized
ELSE ' CASE ' ||
-- median/average measures with universe
WHEN LOWER(numer_aggregate) IN ('median', 'average') AND
denom_reltype ILIKE 'universe' AND
(normalization IS NULL OR LOWER(normalization) LIKE 'pre%')
THEN ' CASE ' ||
-- predenominated point-in-poly or user polygon is the same as OBS- polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE ' ||
-- predenominated polygon interpolation weighted by universe
-- SUM (numer * denom * (% user geom in OBS geom)) / SUM (denom * (% user geom in OBS geom))
-- (10 * 1000 * 1) / (1000 * 1) = 10
-- (10 * 1000 * 1 + 50 * 10 * 1) / (1000 + 10) = 10500 / 10000 = 10.5
' SUM(' || numer_tablename || '.' || numer_colname ||
' * ' || denom_tablename || '.' || denom_colname ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) ' ||
' / SUM(' || denom_tablename || '.' || denom_colname ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) ' ||
' / (COUNT(*) / COUNT(distinct ' || geom_tablename || '.' || geom_geomref_colname || ')) ' ||
'END '
-- prenormalized for summable measures. point or summable only!
WHEN numer_aggregate ILIKE 'sum' AND
(normalization IS NULL OR LOWER(normalization) LIKE 'pre%')
THEN ' CASE ' ||
-- predenominated point-in-poly or user polygon is the same as OBS- polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE ' ||
-- predenominated polygon interpolation
-- TODO should weight by universe instead of area
-- SUM (numer * (% user geom in OBS geom))
' SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
@@ -638,6 +684,14 @@ BEGIN
' END) ' ||
' / (COUNT(*) / COUNT(distinct ' || geom_tablename || '.' || geom_geomref_colname || ')) ' ||
'END '
-- Everything else. Point only!
ELSE ' CASE ' ||
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE cdb_observatory._OBS_RaiseNotice(''Cannot perform calculation over polygon for ' ||
numer_id || '/' || coalesce(denom_id, '') || '/' || geom_id || '/' || numer_timespan || ''')::Numeric ' ||
' END '
END || ':: ' || numer_type
-- categorical/text

View File

@@ -369,8 +369,10 @@ RETURNS TABLE (
DECLARE
aggregate_condition TEXT DEFAULT '';
BEGIN
IF aggregate_type IS NOT NULL THEN
aggregate_condition := format(' AND numer_aggregate = %L ', aggregate_type);
IF LOWER(aggregate_type) ILIKE 'sum' THEN
aggregate_condition := ' AND numer_aggregate IN (''sum'', ''median'', ''average'') ';
ELSIF aggregate_type IS NOT NULL THEN
aggregate_condition := format(' AND numer_aggregate ILIKE %L ', aggregate_type);
END IF;
RETURN QUERY
EXECUTE format($string$

View File

@@ -48,6 +48,15 @@ t
obs_getmeasure_out_of_bounds_geometry
t
(1 row)
obs_getmeasure_estimate_for_blank_aggregate
t
(1 row)
obs_getmeasure_per_capita_income_average
t
(1 row)
obs_getmeasure_median_capita_income_average
t
(1 row)
obs_getcategory_point
t
(1 row)
@@ -162,9 +171,15 @@ t|t|t
id|data_polygon_measure_area|nullcol
t|t|t
(1 row)
id|data_point_measure_prenormalized|nullcol
t|t|t
(1 row)
id|data_point_measure_predenominated|nullcol
t|t|t
(1 row)
id|data_polygon_measure_prenormalized|nullcol
t|t|t
(1 row)
id|data_polygon_measure_predenominated|nullcol
t|t|t
(1 row)

View File

@@ -192,10 +192,16 @@ t
_median_income_in_legacy_builder_metadata
t
(1 row)
_gini_in_legacy_builder_metadata
t
(1 row)
_total_pop_in_legacy_builder_metadata_sums
t
(1 row)
_median_income_not_in_legacy_builder_metadata_sums
_median_income_in_legacy_builder_metadata_sums
t
(1 row)
_gini_not_in_legacy_builder_metadata_sums
t
(1 row)
_no_dupe_subsections_in_legacy_builder_metadata

File diff suppressed because one or more lines are too long

View File

@@ -106,6 +106,21 @@ SELECT cdb_observatory.OBS_GetMeasure(
ST_SetSRID(st_point(0, 0), 4326),
'us.census.acs.B01003001') IS NULL As OBS_GetMeasure_out_of_bounds_geometry;
-- OBS_GetMeasure over arbitrary area for a measure we cannot estimate
SELECT cdb_observatory.OBS_GetMeasure(
ST_Buffer(cdb_observatory._testpoint(), 0.1),
'us.census.acs.B19083001') IS NULL As OBS_GetMeasure_estimate_for_blank_aggregate;
-- OBS_GetMeasure over arbitrary area for an average measure we can estimate
SELECT abs(cdb_observatory.OBS_GetMeasure(
ST_Buffer(cdb_observatory._testpoint(), 0.01),
'us.census.acs.B19301001') - 20025) / 20025 < 0.001 As OBS_GetMeasure_per_capita_income_average;
-- OBS_GetMeasure over arbitrary area for a median measure we can estimate
SELECT abs(cdb_observatory.OBS_GetMeasure(
ST_Buffer(cdb_observatory._testpoint(), 0.01),
'us.census.acs.B19013001') - 39266) / 39266 < 0.001 As OBS_GetMeasure_median_capita_income_average;
-- Point-based OBS_GetCategory
SELECT cdb_observatory.OBS_GetCategory(
cdb_observatory._TestPoint(), 'us.census.spielman_singleton_segments.X10') = 'Wealthy, urban without Kids' As OBS_GetCategory_point;
@@ -451,6 +466,19 @@ SELECT id = 1 id,
data->1 IS NULL nullcol
FROM data;
-- OBS_GetData/OBS_GetMeta by point geom with one standard measure predenom
-- called "prednormalized"
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
'[{"numer_id": "us.census.acs.B01003001", "normalization": "prenormalized"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestPoint(), 1)::geomval],
(SELECT meta FROM meta)))
SELECT id = 1 id,
abs((data->0->>'value')::Numeric - 1900) / 1900 < 0.001 data_point_measure_prenormalized,
data->1 IS NULL nullcol
FROM data;
-- OBS_GetData/OBS_GetMeta by point geom with one standard measure predenom
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestPoint(),
@@ -463,6 +491,19 @@ SELECT id = 1 id,
data->1 IS NULL nullcol
FROM data;
-- OBS_GetData/OBS_GetMeta by polygon geom with one standard measure predenom
-- called "prenormalized"
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),
'[{"numer_id": "us.census.acs.B01003001", "normalization": "prenormalized"}]') meta),
data AS (SELECT * FROM cdb_observatory.OBS_GetData(
ARRAY[(cdb_observatory._TestArea(), 1)::geomval],
(SELECT meta FROM meta)))
SELECT id = 1 id,
abs((data->0->>'value')::Numeric - 12327) / 12327 < 0.001 data_polygon_measure_prenormalized,
data->1 IS NULL nullcol
FROM data;
-- OBS_GetData/OBS_GetMeta by polygon geom with one standard measure predenom
WITH
meta AS (SELECT cdb_observatory.OBS_GetMeta(cdb_observatory._TestArea(),

View File

@@ -499,15 +499,25 @@ SELECT 'us.census.acs.B19013001' IN (SELECT
FROM cdb_observatory.OBS_LegacyBuilderMetadata()
) AS _median_income_in_legacy_builder_metadata;
SELECT 'us.census.acs.B19083001' IN (SELECT
(jsonb_array_elements(((jsonb_array_elements(subsection))->'f1')->'columns')->'f1')->>'id' AS id
FROM cdb_observatory.OBS_LegacyBuilderMetadata()
) AS _gini_in_legacy_builder_metadata;
SELECT 'us.census.acs.B01003001' IN (SELECT
(jsonb_array_elements(((jsonb_array_elements(subsection))->'f1')->'columns')->'f1')->>'id' AS id
FROM cdb_observatory.OBS_LegacyBuilderMetadata('sum')
) AS _total_pop_in_legacy_builder_metadata_sums;
SELECT 'us.census.acs.B19013001' NOT IN (SELECT
SELECT 'us.census.acs.B19013001' IN (SELECT
(jsonb_array_elements(((jsonb_array_elements(subsection))->'f1')->'columns')->'f1')->>'id' AS id
FROM cdb_observatory.OBS_LegacyBuilderMetadata('sum')
) AS _median_income_not_in_legacy_builder_metadata_sums;
) AS _median_income_in_legacy_builder_metadata_sums;
SELECT 'us.census.acs.B19083001' NOT IN (SELECT
(jsonb_array_elements(((jsonb_array_elements(subsection))->'f1')->'columns')->'f1')->>'id' AS id
FROM cdb_observatory.OBS_LegacyBuilderMetadata('sum')
) AS _gini_not_in_legacy_builder_metadata_sums;
SELECT COUNT(*) = 0 _no_dupe_subsections_in_legacy_builder_metadata FROM (
SELECT name, subsection, count(*) FROM

View File

@@ -7,7 +7,7 @@ from util import query
USE_SCHEMA = True
MEASURE_COLUMNS = query('''
SELECT distinct numer_id, numer_aggregate NOT ILIKE 'sum' as point_only
SELECT distinct numer_id, Coalesce(numer_aggregate, '') NOT ILIKE 'sum' as point_only
FROM observatory.obs_meta
WHERE numer_type ILIKE 'numeric'
AND numer_weight > 0
@@ -160,6 +160,8 @@ def default_lonlat(column_id):
raise SkipTest('No tests for Eurostat!')
elif column_id.startswith('br.'):
return (-23.53, -46.63)
elif column_id.startswith('au.'):
return (-33.8806, 151.2131)
else:
raise Exception('No catalog point set for {}'.format(column_id))