Merge pull request #144 from CartoDB/getmeasure-using-obsmeta
Getmeasure using obsmeta
This commit is contained in:
@@ -158,28 +158,6 @@ BEGIN
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetRelatedColumn(columns_ids text[], reltype text )
|
||||
RETURNS TEXT[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result TEXT[];
|
||||
BEGIN
|
||||
EXECUTE '
|
||||
With ids as (
|
||||
select row_number() over() as no, id from (select unnest($1) as id) t
|
||||
)
|
||||
select array_agg(target_id order by no)
|
||||
FROM ids
|
||||
LEFT JOIN observatory.obs_column_to_column
|
||||
on source_id = id
|
||||
where reltype = $2 or reltype is null
|
||||
'
|
||||
INTO result
|
||||
using columns_ids, reltype;
|
||||
return result;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function that replaces all non digits or letters with _ trims and lowercases the
|
||||
-- passed measure name
|
||||
|
||||
|
||||
@@ -338,44 +338,173 @@ $$ LANGUAGE plpgsql;
|
||||
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
|
||||
geom geometry(Geometry, 4326),
|
||||
measure_id TEXT,
|
||||
normalize TEXT DEFAULT 'area', -- TODO none/null
|
||||
normalize TEXT DEFAULT NULL,
|
||||
boundary_id TEXT DEFAULT NULL,
|
||||
time_span TEXT DEFAULT NULL
|
||||
)
|
||||
RETURNS NUMERIC
|
||||
AS $$
|
||||
DECLARE
|
||||
geom_type TEXT;
|
||||
map_type TEXT;
|
||||
numer_aggregate TEXT;
|
||||
numer_colname TEXT;
|
||||
numer_geomref_colname TEXT;
|
||||
numer_tablename TEXT;
|
||||
denom_colname TEXT;
|
||||
denom_geomref_colname TEXT;
|
||||
denom_tablename TEXT;
|
||||
geom_colname TEXT;
|
||||
geom_geomref_colname TEXT;
|
||||
geom_tablename TEXT;
|
||||
result NUMERIC;
|
||||
measure_ids TEXT[];
|
||||
denominator_id TEXT;
|
||||
vals NUMERIC[];
|
||||
sql TEXT;
|
||||
numer_name TEXT;
|
||||
BEGIN
|
||||
|
||||
IF normalize ILIKE 'area' THEN
|
||||
measure_ids := ARRAY[measure_id];
|
||||
EXECUTE
|
||||
$query$
|
||||
SELECT numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename,
|
||||
denom_colname, denom_geomref_colname, denom_tablename,
|
||||
geom_colname, geom_geomref_colname, geom_tablename, numer_name
|
||||
FROM observatory.obs_meta
|
||||
WHERE (geom_id = $1 OR ($1 = ''))
|
||||
AND numer_id = $2
|
||||
AND (numer_timespan = $3 OR ($3 = ''))
|
||||
ORDER BY geom_weight DESC, numer_timespan DESC
|
||||
LIMIT 1
|
||||
$query$
|
||||
INTO numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename,
|
||||
denom_colname, denom_geomref_colname, denom_tablename,
|
||||
geom_colname, geom_geomref_colname, geom_tablename, numer_name
|
||||
USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, '');
|
||||
|
||||
IF ST_GeometryType(geom) = 'ST_Point' THEN
|
||||
geom_type := 'point';
|
||||
ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN
|
||||
geom_type := 'polygon';
|
||||
ELSE
|
||||
RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''',
|
||||
ST_GeometryType(geom);
|
||||
END IF;
|
||||
|
||||
IF normalize ILIKE 'area' AND numer_aggregate ILIKE 'sum' THEN
|
||||
map_type := 'areaNormalized';
|
||||
ELSIF normalize ILIKE 'denominator' THEN
|
||||
EXECUTE 'SELECT (cdb_observatory._OBS_GetRelatedColumn(ARRAY[$1], ''denominator''))[1]
|
||||
' INTO denominator_id
|
||||
USING measure_id;
|
||||
measure_ids := ARRAY[measure_id, denominator_id];
|
||||
ELSIF normalize ILIKE 'none' THEN
|
||||
-- TODO we need a switch on obs_get to disable area normalization
|
||||
RAISE EXCEPTION 'No normalization not yet supported.';
|
||||
map_type := 'denominated';
|
||||
ELSE
|
||||
RAISE EXCEPTION 'Only valid inputs for "normalize" are "area" (default) and "denominator".';
|
||||
-- defaults: area normalization for point if it's possible and none for
|
||||
-- polygon or non-summable point
|
||||
IF geom_type = 'point' AND numer_aggregate ILIKE 'sum' THEN
|
||||
map_type := 'areaNormalized';
|
||||
ELSE
|
||||
map_type := 'predenominated';
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
EXECUTE '
|
||||
SELECT ARRAY_AGG(val) FROM (SELECT (cdb_observatory._OBS_Get($1, $2, $3, $4)->>''value'')::NUMERIC val) b
|
||||
'
|
||||
INTO vals
|
||||
USING geom, measure_ids, time_span, boundary_id;
|
||||
|
||||
IF normalize ILIKE 'denominator' THEN
|
||||
RETURN (vals)[1]/(vals)[2];
|
||||
ELSE
|
||||
RETURN (vals)[1];
|
||||
IF geom_type = 'point' THEN
|
||||
IF map_type = 'areaNormalized' THEN
|
||||
sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
|
||||
FROM observatory.%I geom
|
||||
WHERE ST_Within(%L, geom.%I)
|
||||
LIMIT 1)
|
||||
SELECT numer.%I / (SELECT area FROM _geom)
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = (SELECT geom_ref FROM _geom)',
|
||||
geom_colname, geom_geomref_colname, geom_tablename,
|
||||
geom, geom_colname, numer_colname, numer_tablename,
|
||||
numer_geomref_colname);
|
||||
ELSIF map_type = 'denominated' THEN
|
||||
sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)',
|
||||
numer_colname, denom_colname, denom_tablename,
|
||||
denom_geomref_colname, numer_geomref_colname,
|
||||
numer_tablename,
|
||||
numer_geomref_colname, geom_geomref_colname,
|
||||
geom_tablename, geom, geom_colname);
|
||||
ELSIF map_type = 'predenominated' THEN
|
||||
sql = format('SELECT numer.%I
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)',
|
||||
numer_colname, numer_tablename,
|
||||
numer_geomref_colname, geom_geomref_colname, geom_tablename,
|
||||
geom, geom_colname);
|
||||
END IF;
|
||||
ELSIF geom_type = 'polygon' THEN
|
||||
IF map_type = 'areaNormalized' THEN
|
||||
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
|
||||
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
|
||||
FROM observatory.%I geom
|
||||
WHERE ST_Intersects(%L, geom.%I)
|
||||
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0)
|
||||
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
|
||||
ST_Area(%L::Geography)
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
|
||||
geom, geom_colname, geom_colname,
|
||||
geom_geomref_colname, geom_tablename,
|
||||
geom, geom_colname,
|
||||
geom, geom_colname, geom_colname,
|
||||
numer_colname, numer_geomref_colname,
|
||||
geom, numer_tablename,
|
||||
numer_geomref_colname);
|
||||
ELSIF map_type = 'denominated' THEN
|
||||
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
|
||||
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
|
||||
FROM observatory.%I geom
|
||||
WHERE ST_Intersects(%L, geom.%I)
|
||||
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0),
|
||||
_denom AS (SELECT denom.%I, denom.%I geom_ref
|
||||
FROM observatory.%I denom
|
||||
WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
|
||||
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
|
||||
SUM((SELECT _denom.%I * (SELECT _geom.overlap
|
||||
FROM _geom
|
||||
WHERE _geom.geom_ref = _denom.geom_ref)
|
||||
FROM _denom WHERE _denom.geom_ref = numer.%I))
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
|
||||
geom, geom_colname,
|
||||
geom_colname, geom_geomref_colname,
|
||||
geom_tablename,
|
||||
geom, geom_colname,
|
||||
geom, geom_colname, geom_colname,
|
||||
denom_colname, denom_geomref_colname,
|
||||
denom_tablename,
|
||||
denom_geomref_colname,
|
||||
numer_colname, numer_geomref_colname,
|
||||
denom_colname,
|
||||
numer_geomref_colname,
|
||||
numer_tablename,
|
||||
numer_geomref_colname);
|
||||
ELSIF map_type = 'predenominated' THEN
|
||||
IF numer_aggregate NOT ILIKE 'sum' THEN
|
||||
RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
|
||||
numer_name, numer_id;
|
||||
ELSE
|
||||
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
|
||||
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
|
||||
FROM observatory.%I geom
|
||||
WHERE ST_Intersects(%L, geom.%I)
|
||||
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0)
|
||||
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
|
||||
FROM observatory.%I numer
|
||||
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
|
||||
geom, geom_colname, geom_colname,
|
||||
geom_geomref_colname, geom_tablename,
|
||||
geom, geom_colname,
|
||||
geom, geom_colname, geom_colname,
|
||||
numer_colname, numer_geomref_colname,
|
||||
numer_tablename,
|
||||
numer_geomref_colname);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
EXECUTE sql INTO result;
|
||||
RETURN result;
|
||||
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
@@ -495,7 +624,7 @@ $$ LANGUAGE plpgsql;
|
||||
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetUSCensusMeasure(
|
||||
geom geometry(Geometry, 4326),
|
||||
name TEXT,
|
||||
normalize TEXT DEFAULT 'area',
|
||||
normalize TEXT DEFAULT NULL,
|
||||
boundary_id TEXT DEFAULT NULL,
|
||||
time_span TEXT DEFAULT NULL
|
||||
)
|
||||
@@ -561,7 +690,7 @@ $$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPopulation(
|
||||
geom geometry(Geometry, 4326),
|
||||
normalize TEXT DEFAULT 'area',
|
||||
normalize TEXT DEFAULT NULL,
|
||||
boundary_id TEXT DEFAULT NULL,
|
||||
time_span TEXT DEFAULT NULL
|
||||
)
|
||||
|
||||
@@ -9,21 +9,12 @@ t
|
||||
_obs_geomtable_with_null_response
|
||||
t
|
||||
(1 row)
|
||||
test_get_obs_column_with_geoid_and_census_1|test_get_obs_column_with_geoid_and_census_2
|
||||
t|t
|
||||
(1 row)
|
||||
obs_getcolumndata_missing_measure
|
||||
t
|
||||
(1 row)
|
||||
_obs_buildsnapshotquery_test_1
|
||||
t
|
||||
(1 row)
|
||||
_obs_buildsnapshotquery_test_2
|
||||
t
|
||||
(1 row)
|
||||
_obs_getrelatedcolumn_test
|
||||
t
|
||||
(1 row)
|
||||
_obs_standardizemeasurename_test
|
||||
t
|
||||
(1 row)
|
||||
|
||||
@@ -29,29 +29,6 @@ SELECT
|
||||
-- 'us.census.tiger.census_tract'
|
||||
-- );
|
||||
|
||||
WITH result as (
|
||||
SELECT
|
||||
array_agg(a) expected from cdb_observatory._OBS_GetColumnData(
|
||||
'us.census.tiger.census_tract',
|
||||
Array['us.census.spielman_singleton_segments.X55', 'us.census.acs.B01003001'],
|
||||
'2010 - 2014') a
|
||||
)
|
||||
select
|
||||
(expected)[1]::text = '{"colname":"x55","tablename":"obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d","aggregate":null,"name":"Spielman-Singleton Segments: 55 Clusters","type":"Text","description":"Sociodemographic classes from Spielman and Singleton 2015, 55 clusters","boundary_id":"us.census.tiger.census_tract"}' as test_get_obs_column_with_geoid_and_census_1,
|
||||
(expected)[2]::text = '{"colname":"total_pop","tablename":"obs_b393b5b88c6adda634b2071a8005b03c551b609a","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates.","boundary_id":"us.census.tiger.census_tract"}' as test_get_obs_column_with_geoid_and_census_2
|
||||
from result;
|
||||
|
||||
-- should be null-valued
|
||||
WITH result as (
|
||||
SELECT
|
||||
array_agg(a) expected from cdb_observatory._OBS_GetColumnData(
|
||||
'us.census.tiger.census_tract',
|
||||
Array['us.census.tiger.baloney'],
|
||||
'2010 - 2014') a
|
||||
)
|
||||
select expected is null as OBS_GetColumnData_missing_measure
|
||||
from result;
|
||||
|
||||
-- OBS_BuildSnapshotQuery
|
||||
-- Should give back: SELECT vals[1] As total_pop, vals[2] As male_pop, vals[3] As female_pop, vals[4] As median_age
|
||||
SELECT
|
||||
@@ -65,15 +42,6 @@ SELECT
|
||||
Array['mandarin_orange']
|
||||
) = 'SELECT vals[1] As mandarin_orange' As _OBS_BuildSnapshotQuery_test_2;
|
||||
|
||||
SELECT cdb_observatory._OBS_GetRelatedColumn(
|
||||
Array[
|
||||
'es.ine.t3_1',
|
||||
'us.census.acs.B01003001',
|
||||
'us.census.acs.B01001002'
|
||||
],
|
||||
'denominator'
|
||||
) = '{es.ine.t1_1,NULL,us.census.acs.B01003001}' As _OBS_GetRelatedColumn_test;
|
||||
|
||||
-- should give back a standardized measure name
|
||||
SELECT cdb_observatory._OBS_StandardizeMeasureName('test 343 %% 2 qqq }}{{}}') = 'test_343_2_qqq' As _OBS_StandardizeMeasureName_test;
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ WITH result as (
|
||||
-- Point-based OBS_GetMeasure with zillow
|
||||
SELECT abs(OBS_GetMeasure_zhvi_point - 583600) / 583600 < 0.001 AS OBS_GetMeasure_zhvi_point_test FROM cdb_observatory.OBS_GetMeasure(
|
||||
ST_SetSRID(ST_Point(-73.94602417945862, 40.6768220087458), 4326),
|
||||
'us.zillow.AllHomes_Zhvi', 'area', 'us.census.tiger.zcta5', '2014-01'
|
||||
'us.zillow.AllHomes_Zhvi', null, 'us.census.tiger.zcta5', '2014-01'
|
||||
) As t(OBS_GetMeasure_zhvi_point);
|
||||
|
||||
-- Point-based OBS_GetMeasure with zillow default to latest
|
||||
|
||||
Reference in New Issue
Block a user