improvements to scoring, fixing oversimplification and removing some premature optimization

This commit is contained in:
John Krauss
2016-12-01 21:50:39 +00:00
parent 4ce1648550
commit 44932be1f5
3 changed files with 18 additions and 21 deletions

View File

@@ -341,7 +341,7 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
normalize TEXT DEFAULT NULL,
boundary_id TEXT DEFAULT NULL,
time_span TEXT DEFAULT NULL,
simplification NUMERIC DEFAULT 0.0001
simplification NUMERIC DEFAULT 0.00001
)
RETURNS NUMERIC
AS $$
@@ -408,7 +408,7 @@ BEGIN
USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''),
CASE WHEN ST_GeometryType(geom) = 'ST_Point' THEN
st_buffer(geom::geography, 10)::geometry(geometry, 4326)
ELSE ST_Envelope(geom)
ELSE geom
END;
IF geom_id IS NULL THEN
@@ -485,7 +485,7 @@ BEGIN
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(FIRST(geom.%I)) overlap, geom.%I geom_ref
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I),

View File

@@ -431,10 +431,7 @@ BEGIN
RETURN QUERY
EXECUTE format($string$
SELECT
(1 / (abs(numgeoms - $3)
--* (1 / Coalesce(NullIf(notnull_percent, 0), 1))
--* (1 / Coalesce(NullIf(percentfill, 0), 0.0001))
))::Numeric * percentfill
((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric
AS score, *
FROM (
WITH clipped_geom AS (

View File

@@ -352,25 +352,25 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014;
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5',
'us.census.tiger.county', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@@ -378,8 +378,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY[ 'us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
@@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@@ -445,8 +445,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.county',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@@ -454,8 +454,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5',
'us.census.tiger.county', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_2500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),