Compare commits

..

21 Commits

Author SHA1 Message Date
Javier Goizueta
8f1435c049 Release 0.16.2 2016-04-27 18:30:26 +02:00
Javier Goizueta
8302f89413 Merge pull request #246 from CartoDB/245-categories-mode
Use the mode to aggregate category columns in overviews
2016-04-27 18:16:05 +02:00
Javier Goizueta
e9050178a8 Merge branch 'master' of github.com:CartoDB/cartodb-postgresql 2016-04-27 16:23:46 +02:00
Javier Goizueta
3e34ca4654 Overviews documentation fixes 2016-04-27 16:23:25 +02:00
Javier Goizueta
a067cc7da1 Generate stats used to identify category columns in overviews if needed
This only generates the stats if no stats are available for a table.
This doesn't warrant that the stats are up to date or accurate.
2016-04-27 15:06:09 +02:00
Javier Goizueta
2c43943df6 Fix syntax 2016-04-26 18:27:52 +02:00
Javier Goizueta
417cbe7902 Fix category columns aggregation in overviews
Overviews are created in cascade, each one from the inmediate
lower level, but the stats to decide if a column is a category
should be taken always from the base table.
2016-04-26 18:02:25 +02:00
Javier Goizueta
9a73703954 Use mode to aggregate categorical columns in overviews
Fixes #245
2016-04-26 15:15:24 +02:00
Rafa de la Torre
36ac831bd1 Update cartodbfy-requirements.rst
Fix broken link to doc
2016-04-26 14:43:24 +02:00
Javier Goizueta
1358964628 Release 0.16.1 2016-04-25 18:47:42 +02:00
Javier Goizueta
efe381ad94 Merge pull request #243 from CartoDB/241-webmercator
Compute webmercator resolution with full accuracy
2016-04-25 17:30:40 +02:00
Javier Goizueta
f7cce21eb7 Merge pull request #242 from CartoDB/240-overviews-pixels
Adjust overview points to pixel centers
2016-04-25 17:30:25 +02:00
Javier Goizueta
18267477da Merge pull request #238 from CartoDB/235-column-names
Optimize column information functions
2016-04-25 17:30:07 +02:00
Javier Goizueta
11ad45306f Remove unneeded pg_catalog schema name 2016-04-25 16:30:58 +02:00
Javier Goizueta
75c7ae98e4 Compute webmercator resolution with full accuracy
Fixes #241
2016-04-25 14:02:26 +02:00
Javier Goizueta
3c12cf629f Optimize overview pixel adjustment for integer-pixel cells 2016-04-25 13:53:59 +02:00
Javier Goizueta
7b2100b51e Adjust overview coordinates to pixel centers
This makes the adjustment for all grid sizes, not only
for integral number of pixels.
2016-04-25 13:33:43 +02:00
Javier Goizueta
580ec38ab8 Adjust overview clustered point to pixel centers
Fixes #240
2016-04-23 15:07:06 +02:00
Javier Goizueta
65415bb335 Optimize funcion CDB_COlumnType 2016-04-18 19:07:33 +02:00
Javier Goizueta
06ebb27160 Optimize internal funcion _cdb_unlimited_text_column 2016-04-18 18:50:37 +02:00
Javier Goizueta
bd5ae84e90 Optimize CDB_ColumnNames
This implementation is about 1000 times faster
2016-04-18 18:49:58 +02:00
8 changed files with 172 additions and 44 deletions

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.16.0
EXTVERSION = 0.16.2
SED = sed
@@ -67,6 +67,8 @@ UPGRADABLE = \
0.15.0 \
0.15.1 \
0.16.0 \
0.16.1 \
0.16.2 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

19
NEWS.md
View File

@@ -1,3 +1,22 @@
0.16.2 (2016-04-27)
-------------------
* Use the mode to aggregate category columns in overviews
[#246](https://github.com/CartoDB/cartodb-postgresql/pull/246)
0.16.1 (2016-04-25)
-------------------
* Optimize column information functions performance
[#238](https://github.com/CartoDB/cartodb-postgresql/pull/238)
* Adjust overview points to pixel CDB_EqualIntervalBins
[#242](https://github.com/CartoDB/cartodb-postgresql/pull/242)
* Compute webmercator resolution using full numeric precision
[#243](https://github.com/CartoDB/cartodb-postgresql/pull/243)
0.16.0 (2016-04-15)
-------------------
* Adds table for storing camshaft analysis nodes

View File

@@ -2,18 +2,25 @@ Overviews are tables that represent a *reduced* version of a dataset intended
for efficient rendering at certain zoom levels while preserving the
general visual appearance of the complete dataset.
The *reduction* consists in a fewer number of records
The *reduction* consists in havig a fewer number of records
(while each overview record may represent an aggregation of multiple records)
and/or simplified record geometries.
Overviews are created through the `CDB_CreateOverviews`.
Overviews are created through the `CDB_CreateOverviews` function.
The statement timeout may need to be adjusted before using this function,
as overview creation for large tables is a time-consuming operation.
The `CDB_Overviews` function can be used determine what overview tables
exist for a given dataset table and which zoom levels correspond to it.
The `CDB_DropOverviews` remove a dataset's existing overviews.
The `CDB_DropOverviews` function removes a dataset's existing overviews.
To know if overview tables exist for some base table, and to obtain
a list of which overview tables are approrpiate for which zoom levels,
the `CDB_Overviews` functions can be used.
The zoom level we're referring here to are those used
by the tiler: http://wiki.openstreetmap.org/wiki/Zoom_levels
### CDB_CreateOverviews
@@ -51,10 +58,14 @@ CDB_CreateOverviews(table_name, ref_z_strategy, reduction_strategy)
#### Tolerance / level of detail
The level of detail to be representable by each overview layer can
be specified as a tolerance in pixels (if different from the default of 2 pixels)
be specified as a tolerance in pixels (if different from the default of 1 pixel)
with the function `CDB_CreateOverviewsWithToleranceInPixels`
which has as a second additional argument the desired tolerance.
This tolerance defines the maximum deviation in pixels of the overviews
geometries with respect to the original geometries when overview tables
are used for their intendend zoom level.
### CDB_Overviews
Obtain overview metadata for a given table (existing overviews).
@@ -79,7 +90,7 @@ SELECT CDB_Overviews(CDB_QueryTablesText('SELECT * FROM table1, table2'));
The result of `CDB_Overviews` has three columns:
| base_table | z | overview_table |
|------------+---+----------------|
| ---------- | - | -------------- |
| table1 | 1 | table1_ov1 |
| table1 | 2 | table1_ov2 |
| table1 | 4 | table1_ov4 |

View File

@@ -33,7 +33,7 @@ Additionally, a CartoDB table can contain other columns.
See the `CartoDB User Table documentation`_
.. _CartoDB User Table documentation: https://github.com/CartoDB/cartodb-postgresql/blob/master/doc/CartoDB-user-table.md
.. _CartoDB User Table documentation: https://github.com/CartoDB/cartodb-postgresql/blob/master/doc/CartoDB-user-table.rst
for further information.
High level requirements

View File

@@ -2,15 +2,13 @@
CREATE OR REPLACE FUNCTION CDB_ColumnNames(REGCLASS)
RETURNS SETOF information_schema.sql_identifier
AS $$
SELECT c.column_name
FROM information_schema.columns c, pg_class _tn, pg_namespace _sn
WHERE table_name = _tn.relname
AND table_schema = _sn.nspname
AND _tn.oid = $1::oid
AND _sn.oid = _tn.relnamespace
ORDER BY ordinal_position;
SELECT
a.attname::information_schema.sql_identifier column_name
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.oid = $1::oid
AND a.attstattarget < 0 -- exclude system columns
ORDER BY a.attnum;
$$ LANGUAGE SQL;
-- This is to migrate from pre-0.2.0 version

View File

@@ -2,15 +2,13 @@
CREATE OR REPLACE FUNCTION CDB_ColumnType(REGCLASS, TEXT)
RETURNS information_schema.character_data
AS $$
SELECT c.data_type
FROM information_schema.columns c, pg_class _tn, pg_namespace _sn
WHERE table_name = _tn.relname
AND table_schema = _sn.nspname
AND column_name = $2
AND _tn.oid = $1::oid
AND _sn.oid = _tn.relnamespace;
SELECT
format_type(a.atttypid, NULL)::information_schema.character_data data_type
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.oid = $1::oid
AND a.attname = $2
AND a.attstattarget < 0; -- exclude system columns
$$ LANGUAGE SQL;
-- This is to migrate from pre-0.2.0 version

View File

@@ -88,6 +88,26 @@ AS $$
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
CREATE OR REPLACE FUNCTION _CDB_OverviewBaseTable(overview_table REGCLASS)
RETURNS REGCLASS
AS $$
DECLARE
table_name TEXT;
schema_name TEXT;
base_name TEXT;
base_table REGCLASS;
BEGIN
SELECT * FROM _cdb_split_table_name(overview_table) INTO schema_name, table_name;
base_name := _CDB_OverviewBaseTableName(table_name);
IF base_name != table_name THEN
base_table := Format('%I.%I', schema_name, base_name)::regclass;
ELSE
base_table := overview_table;
END IF;
RETURN base_table;
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Schema and relation names of a table given its reloid
-- Scope: private.
-- Parameters
@@ -520,18 +540,65 @@ CREATE OR REPLACE FUNCTION _cdb_unlimited_text_column(reloid REGCLASS, col_name
RETURNS BOOLEAN
AS $$
SELECT EXISTS (
SELECT *
FROM information_schema.columns c, pg_class _tn, pg_namespace _sn
WHERE table_name = _tn.relname
AND table_schema = _sn.nspname
AND c.column_name = col_name
AND _tn.oid = reloid
AND _sn.oid = _tn.relnamespace
AND character_maximum_length IS NULL
AND c.data_type IN ('text', 'character varying', 'character')
SELECT a.attname
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
LEFT JOIN pg_type t ON t.oid = a.atttypid
WHERE c.oid = reloid
AND a.attname = col_name
AND format_type(a.atttypid, NULL) IN ('text', 'character varying', 'character')
AND format_type(a.atttypid, NULL) = format_type(a.atttypid, a.atttypmod)
);
$$ LANGUAGE SQL STABLE;
CREATE OR REPLACE FUNCTION _cdb_categorical_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
schema_name TEXT;
table_name TEXT;
available BOOLEAN;
categorical BOOLEAN;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
SELECT n_distinct IS NOT NULL
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO available;
IF available IS NULL OR NOT available THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
SELECT n_distinct > 0 AND n_distinct <= 20
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO categorical;
RETURN categorical;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
CREATE OR REPLACE FUNCTION _cdb_mode_of_array(anyarray)
RETURNS anyelement AS
$$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$$
LANGUAGE SQL IMMUTABLE;
-- Tell Postgres how to use our aggregate
CREATE AGGREGATE _cdb_mode(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_cdb_mode_of_array,
INITCOND='{}'
);
-- SQL Aggregation expression for a datase attribute
-- Scope: private.
-- Parameters
@@ -549,6 +616,7 @@ DECLARE
has_counter_column BOOLEAN;
feature_count TEXT;
total_feature_count TEXT;
base_table REGCLASS;
BEGIN
IF table_alias <> '' THEN
qualified_column := Format('%I.%I', table_alias, column_name);
@@ -569,20 +637,30 @@ BEGIN
total_feature_count := 'count(*)';
END IF;
base_table := _CDB_OverviewBaseTable(reloid);
CASE column_type
WHEN 'double precision', 'real', 'integer', 'bigint', 'numeric' THEN
IF column_name = '_feature_count' THEN
RETURN 'SUM(_feature_count)';
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
IF column_type = 'integer' AND _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('CDB_Math_Mode(%s)::', qualified_column) || column_type;
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
END IF;
END IF;
WHEN 'text', 'character varying', 'character' THEN
IF _cdb_unlimited_text_column(reloid, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
IF _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('_cdb_mode(%s)::', qualified_column) || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
IF _cdb_unlimited_text_column(base_table, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END IF;
END IF;
WHEN 'boolean' THEN
RETURN 'CASE count(*) WHEN 1 THEN BOOL_AND(' || qualified_column || ') ELSE NULL END::' || column_type;
@@ -652,7 +730,13 @@ AS $$
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
@@ -678,8 +762,10 @@ AS $$
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- compute grid cell size using the overview_z dimension...
SELECT CDB_XYZ_Resolution(overview_z)*grid_px INTO grid_m;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
@@ -690,7 +776,21 @@ AS $$
aggr_attributes := aggr_attributes || ', ';
END IF;
point_geom = Format('ST_SetSRID(ST_MakePoint(gx*%1$s + %2$s, gy*%1$s + %2$s), 3857)', grid_m, grid_m/2);
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (

View File

@@ -6,7 +6,7 @@ CREATE OR REPLACE FUNCTION CDB_XYZ_Resolution(z INTEGER)
RETURNS FLOAT8
AS $$
-- circumference divided by 256 is z0 resolution, then divide by 2^z
SELECT 40075017.0 / 256 / power(2, z);
SELECT 6378137.0*2.0*pi() / 256.0 / power(2.0, z);
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- }