Instead of arbitrary ids, the id of one of the aggregated records is used, so that if it is used in the UI to query point the information of one of the records grouped at the point will be retrieved.
456 lines
17 KiB
PL/PgSQL
456 lines
17 KiB
PL/PgSQL
-- Calculate the estimated extent of a cartodbfy'ed table.
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table.
|
|
-- Return value A box2d extent in 3857.
|
|
CREATE OR REPLACE FUNCTION _cdb_estimated_extent(reloid REGCLASS)
|
|
RETURNS box2d
|
|
AS $$
|
|
DECLARE
|
|
ext box2d;
|
|
ext_query text;
|
|
table_id record;
|
|
BEGIN
|
|
|
|
SELECT n.nspname AS schema_name, c.relname table_name INTO STRICT table_id
|
|
FROM pg_class c JOIN pg_namespace n on n.oid = c.relnamespace WHERE c.oid = reloid::oid;
|
|
|
|
ext_query = format(
|
|
'SELECT ST_EstimatedExtent(''%1$I'', ''%2$I'', ''%3$I'');',
|
|
table_id.schema_name, table_id.table_name, 'the_geom_webmercator'
|
|
);
|
|
|
|
BEGIN
|
|
EXECUTE ext_query INTO ext;
|
|
EXCEPTION
|
|
-- This is the typical ERROR: stats for "mytable" do not exist
|
|
WHEN internal_error THEN
|
|
-- Get stats and execute again
|
|
EXECUTE format('ANALYZE %1$I', reloid);
|
|
EXECUTE ext_query INTO ext;
|
|
END;
|
|
|
|
RETURN ext;
|
|
END;
|
|
$$ LANGUAGE PLPGSQL VOLATILE;
|
|
|
|
-- Determine the max feature density of a given dataset.
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- nz: number of zoom levels to consider from z0 upward.
|
|
-- Return value: feature density (num_features / webmercator_squared_meters).
|
|
CREATE OR REPLACE FUNCTION _CDB_Feature_Density(reloid REGCLASS, nz integer)
|
|
RETURNS FLOAT8
|
|
AS $$
|
|
DECLARE
|
|
fd FLOAT8;
|
|
min_features TEXT;
|
|
n integer = 4;
|
|
c FLOAT8;
|
|
BEGIN
|
|
-- TODO: for small total count or extents we could just:
|
|
-- EXECUTE 'SELECT Count(*)/ST_Area(ST_Extent(the_geom_webmercator)) FROM ' || reloid::text || ';' INTO fd;
|
|
|
|
-- min_features is a SQL subexpression which can depend on z and represents
|
|
-- the minimum number of features to recursively consider a tile.
|
|
-- We can either use a fixed minimum number of features per tile
|
|
-- or a minimum feature density by dividing the number of features by
|
|
-- the area of tiles at level Z: c*c*power(2, -2*z)
|
|
-- with c = CDB_XYZ_Resolution(-8) (earth circumference)
|
|
min_features = '500';
|
|
SELECT CDB_XYZ_Resolution(-8) INTO c;
|
|
|
|
-- We first compute a set of *seed* tiles, of the minimum Z level, z0, such that
|
|
-- they cover the extent of the table and we have at least n of them in each
|
|
-- linear dimension (i.e. at least n*n tiles cover the extent).
|
|
-- We compute the number of features in these tiles, and recursively in
|
|
-- subtiles up to level z0 + nz. Then we compute the maximum of the feature
|
|
-- density (per tile area in webmercator squared meters) for all the
|
|
-- considered tiles.
|
|
EXECUTE Format('
|
|
WITH RECURSIVE t(x, y, z, e) AS (
|
|
WITH ext AS (SELECT _cdb_estimated_extent(%6$s) as g),
|
|
base AS (
|
|
SELECT (-floor(log(2, (greatest(ST_XMax(ext.g)-ST_XMin(ext.g), ST_YMax(ext.g)-ST_YMin(ext.g))/(%4$s*%5$s))::numeric)))::integer z
|
|
FROM ext
|
|
),
|
|
lim AS (
|
|
SELECT
|
|
FLOOR((ST_XMin(ext.g)+CDB_XYZ_Resolution(0)*128)/(CDB_XYZ_Resolution(base.z)*256))::integer x0,
|
|
FLOOR((ST_XMax(ext.g)+CDB_XYZ_Resolution(0)*128)/(CDB_XYZ_Resolution(base.z)*256))::integer x1,
|
|
FLOOR((CDB_XYZ_Resolution(0)*128-ST_YMin(ext.g))/(CDB_XYZ_Resolution(base.z)*256))::integer y1,
|
|
FLOOR((CDB_XYZ_Resolution(0)*128-ST_YMax(ext.g))/(CDB_XYZ_Resolution(base.z)*256))::integer y0
|
|
FROM ext, base
|
|
),
|
|
seed AS (
|
|
SELECT xt, yt, base.z, (
|
|
SELECT count(*) FROM %1$s
|
|
WHERE the_geom_webmercator && CDB_XYZ_Extent(xt, yt, base.z)
|
|
) e
|
|
FROM base, lim, generate_series(lim.x0, lim.x1) xt, generate_series(lim.y0, lim.y1) yt
|
|
)
|
|
SELECT * from seed
|
|
UNION ALL
|
|
SELECT x*2 + xx, y*2 + yy, t.z+1, (
|
|
SELECT count(*) FROM %1$s
|
|
WHERE the_geom_webmercator && CDB_XYZ_Extent(x*2 + xx, y*2 + yy, t.z+1)
|
|
)
|
|
FROM t, base, (VALUES (0, 0), (0, 1), (1, 1), (1, 0)) AS c(xx, yy)
|
|
WHERE t.e > %2$s AND t.z < (base.z + %3$s)
|
|
)
|
|
SELECT MAX(e/ST_Area(CDB_XYZ_Extent(x,y,z))) FROM t where e > 0;
|
|
', reloid::text, min_features, nz, n, c, reloid::oid)
|
|
INTO fd;
|
|
RETURN fd;
|
|
END
|
|
$$ LANGUAGE PLPGSQL STABLE;
|
|
|
|
-- Experimental default strategy to assign a reference base Z level
|
|
-- to a cartodbfied table. The resulting Z level represents the
|
|
-- minimum scale level at which the table data can be rendered
|
|
-- without overcrowded results or loss of detail.
|
|
-- Parameters:
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- Return value: Z level as an integer
|
|
CREATE OR REPLACE FUNCTION _CDB_Feature_Density_Ref_Z_Strategy(reloid REGCLASS)
|
|
RETURNS INTEGER
|
|
AS $$
|
|
DECLARE
|
|
lim FLOAT8 := 500; -- TODO: determine/parameterize this
|
|
nz integer := 4;
|
|
fd FLOAT8;
|
|
c FLOAT8;
|
|
BEGIN
|
|
-- Compute fd as an estimation of the (maximum) number
|
|
-- of features per unit of tile area (in webmercator squared meters)
|
|
SELECT _CDB_Feature_Density(reloid, nz) INTO fd;
|
|
-- lim maximum number of (desiderable) features per tile
|
|
-- we have c = 2*Pi*R = CDB_XYZ_Resolution(-8) (earth circumference)
|
|
-- ta(z): tile area = power(c*power(2,z), 2) = c*c*power(2,2*z)
|
|
-- => fd*ta(z) if the average number of features per tile at level z
|
|
-- find minimum z so that fd*ta(z) <= lim
|
|
-- compute a rough 'feature density' value
|
|
SELECT CDB_XYZ_Resolution(-8) INTO c;
|
|
RETURN ceil(log(2.0, (c*c*fd/lim)::numeric)/2);
|
|
END;
|
|
$$ LANGUAGE PLPGSQL STABLE;
|
|
|
|
-- Overview table name for a given Z level and base dataset or overview table
|
|
-- Scope: private.
|
|
-- Parameters:
|
|
-- ref reference table (can be the base table of the dataset or an existing
|
|
-- overview) from which the overview is being generated.
|
|
-- ref_z Z level of the reference table
|
|
-- overview_z Z level of the overview to be named, must be smaller than ref_z
|
|
-- Return value: the name to be used for the overview
|
|
CREATE OR REPLACE FUNCTION _CDB_Overview_Name(ref REGCLASS, ref_z INTEGER, overview_z INTEGER)
|
|
RETURNS TEXT
|
|
AS $$
|
|
DECLARE
|
|
base TEXT;
|
|
suffix TEXT;
|
|
is_overview BOOLEAN;
|
|
BEGIN
|
|
suffix := Format('_ov%s', ref_z);
|
|
SELECT ref::text LIKE Format('%%%s', suffix) INTO is_overview;
|
|
IF is_overview THEN
|
|
SELECT substring(ref::text FROM 1 FOR length(ref::text)-length(suffix)) INTO base;
|
|
ELSE
|
|
base := ref;
|
|
END IF;
|
|
RETURN Format('%s_ov%s', base::text, overview_z);
|
|
END
|
|
$$ LANGUAGE PLPGSQL IMMUTABLE;
|
|
|
|
-- Sampling reduction method.
|
|
-- Valid for any kind of geometry.
|
|
-- Scope: private.
|
|
-- reloid original table (can be the base table of the dataset or an existing
|
|
-- overview) from which the overview is being generated.
|
|
-- ref_z Z level assigned to the original table
|
|
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
|
|
-- Return value: Name of the generated overview table
|
|
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER)
|
|
RETURNS REGCLASS
|
|
AS $$
|
|
DECLARE
|
|
overview_rel TEXT;
|
|
fraction FLOAT8;
|
|
base_name TEXT;
|
|
class_info RECORD;
|
|
num_samples INTEGER;
|
|
BEGIN
|
|
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
|
|
fraction := power(2, 2*(overview_z - ref_z));
|
|
|
|
EXECUTE Format('DROP TABLE IF EXISTS %s CASCADE;', overview_rel);
|
|
|
|
-- Estimate number of rows
|
|
SELECT reltuples, relpages FROM pg_class INTO STRICT class_info
|
|
WHERE oid = reloid::oid;
|
|
|
|
IF class_info.relpages < 2 OR fraction > 0.5 THEN
|
|
-- We'll avoid possible CDB_RandomTids problems
|
|
EXECUTE Format('
|
|
CREATE TABLE %s AS SELECT * FROM %s WHERE random() < %s;
|
|
', overview_rel, reloid, fraction);
|
|
ELSE
|
|
num_samples := ceil(class_info.reltuples*fraction);
|
|
EXECUTE Format('
|
|
CREATE TABLE %1$s AS SELECT * FROM %2$s
|
|
WHERE ctid = ANY (
|
|
ARRAY[
|
|
(SELECT CDB_RandomTids(''%2$s'', %3$s))
|
|
]
|
|
);
|
|
', overview_rel, reloid, num_samples);
|
|
END IF;
|
|
RETURN overview_rel;
|
|
END;
|
|
$$ LANGUAGE PLPGSQL;
|
|
|
|
-- Register new overview table (post-creation chores)
|
|
-- Scope: private
|
|
-- Parameters:
|
|
-- dataset: oid of the input dataset table, It must be a cartodbfy'ed table.
|
|
-- overview_table: oid of the overview table to be registered.
|
|
-- overview_z: intended Z level for the overview table
|
|
CREATE OR REPLACE FUNCTION _CDB_Register_Overview(dataset REGCLASS, overview_table REGCLASS, overview_z INTEGER)
|
|
RETURNS VOID
|
|
AS $$
|
|
DECLARE
|
|
sql TEXT;
|
|
table_owner TEXT;
|
|
BEGIN
|
|
-- preserve the owner of the base table
|
|
SELECT u.usename
|
|
FROM pg_catalog.pg_class c JOIN pg_catalog.pg_user u ON (c.relowner=u.usesysid)
|
|
WHERE c.relname = dataset::text
|
|
INTO table_owner;
|
|
EXECUTE Format('ALTER TABLE IF EXISTS %s OWNER TO %I', overview_table::text, table_owner);
|
|
PERFORM _CDB_Add_Indexes(overview_table);
|
|
|
|
-- TODO: we'll need to store metadata somewhere to define
|
|
-- which overlay levels are available. Here we should add this metadata
|
|
-- (or replace existing metadata)
|
|
END
|
|
$$ LANGUAGE PLPGSQL;
|
|
|
|
-- Dataset attributes (column names other than the
|
|
-- CartoDB primary key and geometry columns) which should be aggregated
|
|
-- in aggregated overviews.
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- Return value: set of attribute names
|
|
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes(reloid REGCLASS)
|
|
RETURNS SETOF information_schema.sql_identifier
|
|
AS $$
|
|
SELECT c FROM cartodb.CDB_ColumnNames(reloid) c, _CDB_Columns() cdb
|
|
WHERE c NOT IN (
|
|
cdb.pkey, cdb.geomcol, cdb.mercgeomcol
|
|
)
|
|
$$ LANGUAGE SQL STABLE;
|
|
|
|
-- List of dataset attributes to be aggregated in aggregated overview
|
|
-- as a comma-separated SQL expression.
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- Return value: SQL subexpression as text
|
|
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes_Expression(reloid REGCLASS)
|
|
RETURNS TEXT
|
|
AS $$
|
|
DECLARE
|
|
attr_list TEXT;
|
|
BEGIN
|
|
SELECT string_agg(s.c, ',') FROM (
|
|
SELECT * FROM _CDB_Aggregable_Attributes(reloid) c
|
|
) AS s INTO attr_list;
|
|
|
|
RETURN attr_list;
|
|
END
|
|
$$ LANGUAGE PLPGSQL STABLE;
|
|
|
|
-- SQL Aggregation expression for a datase attribute
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- column_name: column to be aggregated
|
|
-- table_alias: (optional) table qualifier for the column to be aggregated
|
|
-- Return SQL subexpression as text with aggregated attribute aliased
|
|
-- with its original name.
|
|
CREATE OR REPLACE FUNCTION _CDB_Attribute_Aggregation_Expression(reloid REGCLASS, column_name TEXT, table_alias TEXT DEFAULT '')
|
|
RETURNS TEXT
|
|
AS $$
|
|
DECLARE
|
|
column_type TEXT;
|
|
qualified_column TEXT;
|
|
BEGIN
|
|
IF table_alias <> '' THEN
|
|
qualified_column := Format('%I.%I', table_alias, column_name);
|
|
ELSE
|
|
qualified_column := Format('%I', column_name);
|
|
END IF;
|
|
|
|
column_type := cartodb.CDB_ColumnType(reloid, column_name);
|
|
|
|
CASE column_type
|
|
WHEN 'double precision', 'real', 'integer', 'bigint' THEN
|
|
RETURN Format('AVG(%s)::' || column_type, qualified_column);
|
|
WHEN 'text' THEN
|
|
-- TODO: we could define a new aggregate function that returns distinct
|
|
-- separated values with a limit, adding ellipsis if more values existed
|
|
-- e.g. with '/' as separator and a limit of three:
|
|
-- 'A', 'B', 'A', 'C', 'D' => 'A/B/C/...'
|
|
-- Other ideas: if value is unique then use it, otherwise use something
|
|
-- like '*' or '(varies)' or '(multiple values)', or NULL
|
|
RETURN 'CASE count(*) WHEN 1 THEN string_agg(' || qualified_column || ',''/'') ELSE ''*''' || ' END::' || column_type;
|
|
ELSE
|
|
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
|
|
END CASE;
|
|
END
|
|
$$ LANGUAGE PLPGSQL IMMUTABLE;
|
|
|
|
-- List of dataset aggregated attributes as a comma-separated SQL expression.
|
|
-- Scope: private.
|
|
-- Parameters
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
|
|
-- table_alias: (optional) table qualifier for the columns to be aggregated
|
|
-- Return value: SQL subexpression as text
|
|
CREATE OR REPLACE FUNCTION _CDB_Aggregated_Attributes_Expression(reloid REGCLASS, table_alias TEXT DEFAULT '')
|
|
RETURNS TEXT
|
|
AS $$
|
|
DECLARE
|
|
attr_list TEXT;
|
|
BEGIN
|
|
SELECT string_agg(_CDB_Attribute_Aggregation_Expression(reloid, s.c, table_alias) || Format(' AS %s', s.c), ',')
|
|
FROM (
|
|
SELECT * FROM _CDB_Aggregable_Attributes(reloid) c
|
|
) AS s INTO attr_list;
|
|
|
|
RETURN attr_list;
|
|
END
|
|
$$ LANGUAGE PLPGSQL STABLE;
|
|
|
|
-- Experimental Overview reduction method for point datasets.
|
|
-- It clusters the points using a grid, then aggregates the point in each
|
|
-- cluster into a point at the centroid of the clustered records.
|
|
-- Scope: public.
|
|
-- Parameters:
|
|
-- reloid original table (can be the base table of the dataset or an existing
|
|
-- overview) from which the overview is being generated.
|
|
-- ref_z Z level assigned to the original table
|
|
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
|
|
-- Return value: Name of the generated overview table
|
|
CREATE OR REPLACE FUNCTION CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER)
|
|
RETURNS REGCLASS
|
|
AS $$
|
|
DECLARE
|
|
overview_rel TEXT;
|
|
reduction FLOAT8;
|
|
base_name TEXT;
|
|
grid_px FLOAT8 = 7.5; -- Grid size in pixels at Z level overview_z
|
|
grid_m FLOAT8;
|
|
aggr_attributes TEXT;
|
|
attributes TEXT;
|
|
BEGIN
|
|
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
|
|
|
|
-- compute grid cell size using the overview_z dimension...
|
|
SELECT CDB_XYZ_Resolution(overview_z)*grid_px INTO grid_m;
|
|
|
|
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
|
|
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
|
|
IF attributes <> '' THEN
|
|
attributes := ', ' || attributes;
|
|
END IF;
|
|
IF aggr_attributes <> '' THEN
|
|
aggr_attributes := aggr_attributes || ', ';
|
|
END IF;
|
|
|
|
EXECUTE Format('DROP TABLE IF EXISTS %s CASCADE;', overview_rel);
|
|
|
|
-- Now we cluster the data using a grid of size grid_m
|
|
-- and selecte the centroid (average coordinates) of each cluster.
|
|
-- If we had a selected numeric attribute of interest we could use it
|
|
-- as a weight for the average coordinates.
|
|
EXECUTE Format('
|
|
CREATE TABLE %3$s AS
|
|
WITH clusters AS (
|
|
SELECT
|
|
%5$s
|
|
count(*) AS n,
|
|
SUM(ST_X(f.the_geom_webmercator)) AS sx,
|
|
SUM(ST_Y(f.the_geom_webmercator)) AS sy,
|
|
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
|
|
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
|
|
MIN(cartodb_id) AS cartodb_id
|
|
FROM %1$s f
|
|
GROUP BY gx, gy
|
|
)
|
|
SELECT
|
|
cartodb_id,
|
|
ST_Transform(ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857), 4326) AS the_geom,
|
|
ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857) AS the_geom_webmercator
|
|
%4$s
|
|
FROM clusters
|
|
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes);
|
|
|
|
RETURN overview_rel;
|
|
END;
|
|
$$ LANGUAGE PLPGSQL;
|
|
|
|
-- Create overview tables for a dataset.
|
|
-- Scope: public
|
|
-- Parameters:
|
|
-- reloid: oid of the input table. It must be a cartodbfy'ed table with
|
|
-- vector features.
|
|
-- refscale_strategy: function that computes the reference Z of the dataset
|
|
-- reduce_strategy: function that generates overviews from a base table
|
|
-- or higher level overview
|
|
-- Return value: Array with the names of the generated overview tables
|
|
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(
|
|
reloid REGCLASS,
|
|
refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy'::regproc,
|
|
reduce_strategy regproc DEFAULT '_CDB_Sampling_Reduce_Strategy'::regproc
|
|
)
|
|
RETURNS text[]
|
|
AS $$
|
|
DECLARE
|
|
ref_z integer;
|
|
overviews_z integer[];
|
|
base_z integer;
|
|
base_rel REGCLASS;
|
|
overview_z integer;
|
|
overview_tables REGCLASS[];
|
|
overviews_step integer := 1;
|
|
BEGIN
|
|
-- TODO: adjust statement_timeout here based on input table size?
|
|
|
|
-- Determine the referece zoom level
|
|
EXECUTE 'SELECT ' || quote_ident(refscale_strategy::text) || Format('(''%s'');', reloid) INTO ref_z;
|
|
|
|
-- Determine overlay zoom levels
|
|
-- TODO: should be handled by the refscale_stragegy?
|
|
overview_z := ref_z - 1;
|
|
WHILE overview_z >= 0 LOOP
|
|
SELECT array_append(overviews_z, overview_z) INTO overviews_z;
|
|
overview_z := overview_z - overviews_step;
|
|
END LOOP;
|
|
|
|
-- Create overlay tables
|
|
base_z := ref_z;
|
|
base_rel := reloid;
|
|
FOREACH overview_z IN ARRAY overviews_z LOOP
|
|
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s);', base_rel, base_z, overview_z) INTO base_rel;
|
|
base_z := overview_z;
|
|
PERFORM _CDB_Register_Overview(reloid, base_rel, base_z);
|
|
SELECT array_append(overview_tables, base_rel) INTO overview_tables;
|
|
END LOOP;
|
|
|
|
RETURN overview_tables;
|
|
END;
|
|
$$ LANGUAGE PLPGSQL;
|