Compare commits

..

9 Commits

Author SHA1 Message Date
Javier Goizueta
8f1435c049 Release 0.16.2 2016-04-27 18:30:26 +02:00
Javier Goizueta
8302f89413 Merge pull request #246 from CartoDB/245-categories-mode
Use the mode to aggregate category columns in overviews
2016-04-27 18:16:05 +02:00
Javier Goizueta
e9050178a8 Merge branch 'master' of github.com:CartoDB/cartodb-postgresql 2016-04-27 16:23:46 +02:00
Javier Goizueta
3e34ca4654 Overviews documentation fixes 2016-04-27 16:23:25 +02:00
Javier Goizueta
a067cc7da1 Generate stats used to identify category columns in overviews if needed
This only generates the stats if no stats are available for a table.
This doesn't warrant that the stats are up to date or accurate.
2016-04-27 15:06:09 +02:00
Javier Goizueta
2c43943df6 Fix syntax 2016-04-26 18:27:52 +02:00
Javier Goizueta
417cbe7902 Fix category columns aggregation in overviews
Overviews are created in cascade, each one from the inmediate
lower level, but the stats to decide if a column is a category
should be taken always from the base table.
2016-04-26 18:02:25 +02:00
Javier Goizueta
9a73703954 Use mode to aggregate categorical columns in overviews
Fixes #245
2016-04-26 15:15:24 +02:00
Rafa de la Torre
36ac831bd1 Update cartodbfy-requirements.rst
Fix broken link to doc
2016-04-26 14:43:24 +02:00
5 changed files with 111 additions and 13 deletions

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.16.1
EXTVERSION = 0.16.2
SED = sed
@@ -68,6 +68,7 @@ UPGRADABLE = \
0.15.1 \
0.16.0 \
0.16.1 \
0.16.2 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

View File

@@ -1,3 +1,9 @@
0.16.2 (2016-04-27)
-------------------
* Use the mode to aggregate category columns in overviews
[#246](https://github.com/CartoDB/cartodb-postgresql/pull/246)
0.16.1 (2016-04-25)
-------------------
@@ -10,6 +16,7 @@
* Compute webmercator resolution using full numeric precision
[#243](https://github.com/CartoDB/cartodb-postgresql/pull/243)
0.16.0 (2016-04-15)
-------------------
* Adds table for storing camshaft analysis nodes

View File

@@ -2,18 +2,25 @@ Overviews are tables that represent a *reduced* version of a dataset intended
for efficient rendering at certain zoom levels while preserving the
general visual appearance of the complete dataset.
The *reduction* consists in a fewer number of records
The *reduction* consists in havig a fewer number of records
(while each overview record may represent an aggregation of multiple records)
and/or simplified record geometries.
Overviews are created through the `CDB_CreateOverviews`.
Overviews are created through the `CDB_CreateOverviews` function.
The statement timeout may need to be adjusted before using this function,
as overview creation for large tables is a time-consuming operation.
The `CDB_Overviews` function can be used determine what overview tables
exist for a given dataset table and which zoom levels correspond to it.
The `CDB_DropOverviews` remove a dataset's existing overviews.
The `CDB_DropOverviews` function removes a dataset's existing overviews.
To know if overview tables exist for some base table, and to obtain
a list of which overview tables are approrpiate for which zoom levels,
the `CDB_Overviews` functions can be used.
The zoom level we're referring here to are those used
by the tiler: http://wiki.openstreetmap.org/wiki/Zoom_levels
### CDB_CreateOverviews
@@ -51,10 +58,14 @@ CDB_CreateOverviews(table_name, ref_z_strategy, reduction_strategy)
#### Tolerance / level of detail
The level of detail to be representable by each overview layer can
be specified as a tolerance in pixels (if different from the default of 2 pixels)
be specified as a tolerance in pixels (if different from the default of 1 pixel)
with the function `CDB_CreateOverviewsWithToleranceInPixels`
which has as a second additional argument the desired tolerance.
This tolerance defines the maximum deviation in pixels of the overviews
geometries with respect to the original geometries when overview tables
are used for their intendend zoom level.
### CDB_Overviews
Obtain overview metadata for a given table (existing overviews).
@@ -79,7 +90,7 @@ SELECT CDB_Overviews(CDB_QueryTablesText('SELECT * FROM table1, table2'));
The result of `CDB_Overviews` has three columns:
| base_table | z | overview_table |
|------------+---+----------------|
| ---------- | - | -------------- |
| table1 | 1 | table1_ov1 |
| table1 | 2 | table1_ov2 |
| table1 | 4 | table1_ov4 |

View File

@@ -33,7 +33,7 @@ Additionally, a CartoDB table can contain other columns.
See the `CartoDB User Table documentation`_
.. _CartoDB User Table documentation: https://github.com/CartoDB/cartodb-postgresql/blob/master/doc/CartoDB-user-table.md
.. _CartoDB User Table documentation: https://github.com/CartoDB/cartodb-postgresql/blob/master/doc/CartoDB-user-table.rst
for further information.
High level requirements

View File

@@ -88,6 +88,26 @@ AS $$
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
CREATE OR REPLACE FUNCTION _CDB_OverviewBaseTable(overview_table REGCLASS)
RETURNS REGCLASS
AS $$
DECLARE
table_name TEXT;
schema_name TEXT;
base_name TEXT;
base_table REGCLASS;
BEGIN
SELECT * FROM _cdb_split_table_name(overview_table) INTO schema_name, table_name;
base_name := _CDB_OverviewBaseTableName(table_name);
IF base_name != table_name THEN
base_table := Format('%I.%I', schema_name, base_name)::regclass;
ELSE
base_table := overview_table;
END IF;
RETURN base_table;
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Schema and relation names of a table given its reloid
-- Scope: private.
-- Parameters
@@ -531,6 +551,54 @@ AS $$
);
$$ LANGUAGE SQL STABLE;
CREATE OR REPLACE FUNCTION _cdb_categorical_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
schema_name TEXT;
table_name TEXT;
available BOOLEAN;
categorical BOOLEAN;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
SELECT n_distinct IS NOT NULL
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO available;
IF available IS NULL OR NOT available THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
SELECT n_distinct > 0 AND n_distinct <= 20
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO categorical;
RETURN categorical;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
CREATE OR REPLACE FUNCTION _cdb_mode_of_array(anyarray)
RETURNS anyelement AS
$$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$$
LANGUAGE SQL IMMUTABLE;
-- Tell Postgres how to use our aggregate
CREATE AGGREGATE _cdb_mode(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_cdb_mode_of_array,
INITCOND='{}'
);
-- SQL Aggregation expression for a datase attribute
-- Scope: private.
-- Parameters
@@ -548,6 +616,7 @@ DECLARE
has_counter_column BOOLEAN;
feature_count TEXT;
total_feature_count TEXT;
base_table REGCLASS;
BEGIN
IF table_alias <> '' THEN
qualified_column := Format('%I.%I', table_alias, column_name);
@@ -568,20 +637,30 @@ BEGIN
total_feature_count := 'count(*)';
END IF;
base_table := _CDB_OverviewBaseTable(reloid);
CASE column_type
WHEN 'double precision', 'real', 'integer', 'bigint', 'numeric' THEN
IF column_name = '_feature_count' THEN
RETURN 'SUM(_feature_count)';
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
IF column_type = 'integer' AND _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('CDB_Math_Mode(%s)::', qualified_column) || column_type;
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
END IF;
END IF;
WHEN 'text', 'character varying', 'character' THEN
IF _cdb_unlimited_text_column(reloid, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
IF _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('_cdb_mode(%s)::', qualified_column) || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
IF _cdb_unlimited_text_column(base_table, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END IF;
END IF;
WHEN 'boolean' THEN
RETURN 'CASE count(*) WHEN 1 THEN BOOL_AND(' || qualified_column || ') ELSE NULL END::' || column_type;