Compare commits

...

31 Commits

Author SHA1 Message Date
Mario de Frutos
208ebb2724 Update NEWS.md for version 0.19.1 2017-06-05 11:20:38 +02:00
Mario de Frutos
951ec51968 Merge pull request #302 from CartoDB/regenerate_overviews_deadlock
Regenerate overviews deadlock
2017-06-05 11:17:14 +02:00
Mario de Frutos
8a3d506a53 Add some debug logs to be used in the future 2017-06-02 15:41:18 +02:00
Mario de Frutos
c839f74c63 Fixed travis shield 2017-06-02 15:30:19 +02:00
Mario de Frutos
8389b39c00 Bump new version 2017-06-02 15:23:47 +02:00
Mario de Frutos
4908bacc4b If existing overviews we regenerate them
Instead of make a DROP/CREATE always, in case we have existing
overviews we have to make DELETE/INSERT because DDL operations require
AccessExclusiveLock which is not compatible with AccessShareLock that
is used by the queries used to gather data for the tiler.

This incompatibility above mentioned leads to have deadlocks
2017-06-02 15:23:47 +02:00
Mario de Frutos
38fa3b485c Remove unused and old _CDB_GridCluster_Reduce_Strategy function for overviews 2017-06-02 13:05:05 +02:00
Javier Goizueta
2d473cf693 New version 0.19.0 2017-04-11 11:22:20 +02:00
Javier Goizueta
4193ff3874 Merge pull request #298 from CartoDB/295-estimate-row-count
Add CDB_EstimateRowCount function
2017-04-11 11:01:31 +02:00
Javier Goizueta
68a0752849 Use PG 9.5 for travis tests; fix tests 2017-04-10 15:58:49 +02:00
Javier Goizueta
815b5b429d Fix tests 2017-04-10 13:50:37 +02:00
Javier Goizueta
76bdb3657a Fix tests 2017-04-10 12:17:47 +02:00
Javier Goizueta
234373df11 Replace unnecessary count 2017-04-10 08:08:59 +02:00
Javier Goizueta
a486eed2e3 Add CDB_EstimateRowCount function
See #295
2017-04-07 15:35:48 +02:00
Mario de Frutos
795d92da8d Added CLA paragraph 2017-01-25 10:54:12 +01:00
Javier Goizueta
58e2e7e238 Release 0.18.5 2016-11-30 17:17:45 +01:00
Javier Goizueta
25d27263cb Merge pull request #249 from CartoDB/nullbins
Test behavior of binning fuctions with nulls
2016-11-30 16:09:23 +01:00
Javier Goizueta
bbadcc838e Merge pull request #244 from CartoDB/equalbins
Convert CDB_EqualIntervalBins to a single SQL statement and add float version
2016-11-30 16:09:05 +01:00
Javier Goizueta
b1a0904c07 Merge pull request #181 from CartoDB/update_to_cdb_stats
Fix for division by zero error on empty or homogenous array
2016-11-30 16:08:40 +01:00
Javier Goizueta
399b680b41 Merge pull request #283 from CartoDB/157-test-fixes
Fix tests: race condition with publicuser #157
2016-11-30 16:08:21 +01:00
Javier Goizueta
7c0636c5f9 Merge pull request #290 from CartoDB/286-overview-strategies
Add point overview strategies
2016-11-30 11:46:29 +01:00
Javier Goizueta
f58f870457 Remove use of first aggregator in sample-cluster overviews strategy
This is not more efficient but the geometry now corresponds to the cartodb_id
and the dependency with custom aggregators (firt) is removed.
2016-11-29 14:08:08 +01:00
Javier Goizueta
a7c8dc04e3 Release 0.18.4
This just fixes the lack of migration path from 0.18.2
2016-11-04 16:25:03 +01:00
Javier Goizueta
90ee56eb35 Merge pull request #288 from CartoDB/fix-migration
Fix migration script generation
2016-11-04 16:22:56 +01:00
Javier Goizueta
1032737600 Fix migration script generation
In the 0.18.3 release the script to migrate from 0.18.2 was missing.
This will generate a new version 0.18.4 that when install will generate
scripts to migrate from all old versions to it, so it will be possible
to migrate existing users to 0.18.4 (but not to 0.18.3)
2016-11-04 16:19:06 +01:00
Javier Goizueta
5992304b47 Add a couple of overview clustering strategies 2016-11-03 13:31:04 +01:00
Rafa de la Torre
30cd4cf1f9 Fix tests: race condition with publicuser #157 2016-10-17 16:31:10 +02:00
Paul Norman
3122a0479d Test behavior of binning fuctions with nulls
All test results are based off of existing behavior, which doesn't
always make sense (ref #247)
2016-04-28 09:59:33 -07:00
Paul Norman
956e56cd37 Use anyarray for equalintervalbins 2016-04-27 16:10:01 -07:00
Paul Norman
b19a5fc3dc Convert CDB_EqualIntervalBins to a single SQL statement and add float version 2016-04-25 14:35:26 -07:00
Stuart Lynn
0ecbbd8e71 Make sure that empty arrays or arrays with all the same entry return 0 for Skewness and Kurtosis rather than throwing a division by zero error 2015-12-04 14:54:15 -05:00
27 changed files with 567 additions and 88 deletions

View File

@@ -1,14 +1,41 @@
language: c
addons:
postgresql: 9.3
postgresql: 9.5
before_install:
# Add custom PPAs from cartodb
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
- sudo add-apt-repository -y ppa:cartodb/gis
- sudo add-apt-repository -y ppa:cartodb/gis-testing
- sudo apt-get update
#- sudo apt-get install -q postgresql-9.3-postgis-2.1
- sudo apt-get update
- sudo apt-get install -q postgresql-server-dev-9.3
- sudo apt-get install -q postgresql-plpython-9.3
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
# Install postgres db and build deps
- sudo /etc/init.d/postgresql stop # stop travis default instance
- sudo apt-get -y remove --purge postgresql-9.1
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
- sudo /etc/init.d/postgresql restart 9.5
script:
- make

View File

@@ -62,3 +62,7 @@ A useful query:
```sql
SELECT * FROM pg_extension_update_paths('cartodb') WHERE path IS NOT NULL AND source = cdb_version();
```
## Submitting Contributions
* You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions).

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.18.3
EXTVERSION = 0.19.1
SED = sed
@@ -75,7 +75,12 @@ UPGRADABLE = \
0.17.1 \
0.18.0 \
0.18.1 \
0.18.2 \
0.18.3 \
0.18.4 \
0.18.5 \
0.19.0 \
0.19.1 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)
@@ -105,7 +110,7 @@ PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
$(EXTENSION)--$(EXTVERSION).sql: $(CDBSCRIPTS) cartodb_version.sql Makefile
$(EXTENSION)--$(EXTVERSION).sql: $(CDBSCRIPTS) cartodb_version.sql Makefile
echo '\echo Use "CREATE EXTENSION $(EXTENSION)" to load this file. \quit' > $@
cat $(CDBSCRIPTS) | \
$(SED) -e 's/public\./cartodb./g' \
@@ -156,7 +161,7 @@ test_organization:
test_extension_new:
bash test/extension/test.sh
legacy_tests: legacy_regress
legacy_tests: legacy_regress
installcheck: legacy_tests test_extension_new test_organization

20
NEWS.md
View File

@@ -1,3 +1,23 @@
0.19.1 (2017-06-05)
* Fixed a deadlock problem when trying to regenarate overviews #302
0.19.0 (2017-04-11)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30)
* Add to new overview creation strategies #290
* Fix tests: race condition with publicuser #157
* Fix: CDB_Stats divisions by zero #181
* Better implementation of `CDB_EqualIntervalBins` #244
* New tests for binning functions #249
0.18.4 (2016-11-04)
* No functional changes; fixes the migration from previous versions #288
0.18.3 (2016-11-03)
* Exclude analysis cache tables from the quota #281

View File

@@ -1,8 +1,7 @@
cartodb-postgresql
==================
[![Build Status](http://api.travis-ci.org/CartoDB/cartodb-postgresql.svg?branch=master)]
(http://travis-ci.org/CartoDB/cartodb-postgresql)
[![Build Status](http://api.travis-ci.org/CartoDB/cartodb-postgresql.svg?branch=master)](http://travis-ci.org/CartoDB/cartodb-postgresql)
PostgreSQL extension for CartoDB

View File

@@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@@ -1,8 +1,8 @@
--
-- Calculate the equal interval bins for a given column
--
-- @param in_array A numeric array of numbers to determine the best
-- to determine the bin boundary
-- @param in_array An array of numbers to determine the best
-- bin boundary
--
-- @param breaks The number of bins you want to find.
--
@@ -11,27 +11,14 @@
--
--
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
DECLARE
diff numeric;
min_val numeric;
max_val numeric;
tmp_val numeric;
i INT := 1;
reply numeric[];
BEGIN
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
diff = (max_val - min_val) / breaks::numeric;
LOOP
IF i < breaks THEN
tmp_val = min_val + i::numeric * diff;
reply = array_append(reply, tmp_val);
i := i+1;
ELSE
reply = array_append(reply, max_val);
EXIT;
END IF;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array anyarray, breaks INT ) RETURNS anyarray as $$
WITH stats AS (
SELECT min(e), (max(e)-min(e))/breaks AS del
FROM (SELECT unnest(in_array) e) AS p)
SELECT array_agg(bins)
FROM (
SELECT min + generate_series(1,breaks)*del AS bins
FROM stats) q;
$$ LANGUAGE SQL IMMUTABLE;
DROP FUNCTION IF EXISTS CDB_EqualIntervalBins( numeric[], integer);

View File

@@ -0,0 +1,31 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
has_stats BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid
) INTO has_stats;
IF NOT has_stats THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@@ -257,7 +257,7 @@ $$ LANGUAGE PLPGSQL IMMUTABLE;
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, tolerance_px FLOAT8 DEFAULT NULL)
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, tolerance_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
@@ -268,6 +268,8 @@ AS $$
num_samples INTEGER;
schema_name TEXT;
table_name TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- TODO: compute fraction from tolerance_px if not NULL
@@ -275,7 +277,15 @@ AS $$
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
RAISE NOTICE 'Sampling reduce stategy deleting and inserting because % has overviews', overview_table_name;
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
RAISE NOTICE 'Sampling reduce stategy creating a new table overview %', overview_table_name;
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Estimate number of rows
SELECT reltuples, relpages FROM pg_class INTO STRICT class_info
@@ -284,21 +294,21 @@ AS $$
IF class_info.relpages < 2 OR fraction > 0.5 THEN
-- We'll avoid possible CDB_RandomTids problems
EXECUTE Format('
CREATE TABLE %I AS SELECT * FROM %s WHERE random() < %s;
', overview_rel, reloid, fraction);
%s SELECT * FROM %s WHERE random() < %s;
', creation_clause, reloid, fraction);
ELSE
num_samples := ceil(class_info.reltuples*fraction);
EXECUTE Format('
CREATE TABLE %4$I.%1$I AS SELECT * FROM %2$s
%1$s SELECT * FROM %2$s
WHERE ctid = ANY (
ARRAY[
(SELECT CDB_RandomTids(''%2$s'', %3$s))
]
);
', overview_rel, reloid, num_samples, schema_name);
', creation_clause, reloid, num_samples);
END IF;
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
@@ -581,7 +591,7 @@ $$ LANGUAGE PLPGSQL STABLE;
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
@@ -602,6 +612,8 @@ AS $$
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
@@ -671,14 +683,22 @@ AS $$
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
RAISE NOTICE 'Grid cluster strategy deleting and inserting because % has overviews', overview_table_name;
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
RAISE NOTICE 'Grid cluster strategy creating a new table overview %', overview_table_name;
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
%3$s
WITH clusters AS (
SELECT
%5$s
@@ -691,9 +711,263 @@ AS $$
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom'
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
RAISE NOTICE 'Grid cluster centroid strategy deleting and inserting because % has overviews', overview_table_name;
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
RAISE NOTICE 'Grid cluster centroid strategy creating a new table overview %', overview_table_name;
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
%3$s
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS _sum_of_x,
SUM(ST_Y(f.the_geom_webmercator)) AS _sum_of_y,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
RAISE NOTICE 'Grid cluster sampling strategy deleting and inserting because % has overviews', overview_table_name;
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
RAISE NOTICE 'Grid cluster sampling strategy creating a new table overview %', overview_table_name;
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
%3$s
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(_f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(_f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s _f
GROUP BY gx, gy
),
cluster_geom AS (
SELECT the_geom, the_geom_webmercator, clusters.*
FROM clusters INNER JOIN %1$s _g ON (clusters.cartodb_id = _g.cartodb_id)
)
SELECT %6$s FROM cluster_geom
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
@@ -708,7 +982,7 @@ $$ LANGUAGE PLPGSQL;
-- created by the strategy must have the same columns
-- as the base table and in the same order.
-- Return value: Array with the names of the generated overview tables
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(reloid REGCLASS, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8)'::regprocedure)
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(reloid REGCLASS, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
@@ -721,7 +995,7 @@ END;
$$ LANGUAGE PLPGSQL;
-- Create overviews with additional parameter to define the desired detail/tolerance in pixels
CREATE OR REPLACE FUNCTION CDB_CreateOverviewsWithToleranceInPixels(reloid REGCLASS, tolerance_px FLOAT8, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8)'::regprocedure)
CREATE OR REPLACE FUNCTION CDB_CreateOverviewsWithToleranceInPixels(reloid REGCLASS, tolerance_px FLOAT8, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
@@ -733,6 +1007,7 @@ DECLARE
overview_tables REGCLASS[];
overviews_step integer := 1;
has_counter_column boolean;
has_overviews_for_z boolean;
BEGIN
-- Determine the referece zoom level
EXECUTE 'SELECT ' || quote_ident(refscale_strategy::text) || Format('(''%s'', %s);', reloid, tolerance_px) INTO ref_z;
@@ -749,16 +1024,24 @@ BEGIN
overview_z := overview_z - overviews_step;
END LOOP;
-- Create overlay tables
-- TODO Check for non-used overview to delete them but we have to be aware that the
-- current queries, for example from a tiler, are been used with the old overviews
-- so if we remove the overviews in the process this could lead to errors
-- Create or reganerate overlay tables
base_z := ref_z;
base_rel := reloid;
FOREACH overview_z IN ARRAY overviews_z LOOP
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s, %s);', base_rel, base_z, overview_z, tolerance_px) INTO base_rel;
SELECT CASE WHEN count(*) > 0 THEN TRUE ELSE FALSE END from CDB_Overviews(reloid) WHERE z = overview_z INTO has_overviews_for_z;
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s, %s, ''%s'');', base_rel, base_z, overview_z, tolerance_px, has_overviews_for_z) INTO base_rel;
IF base_rel IS NULL THEN
EXIT;
END IF;
base_z := overview_z;
PERFORM _CDB_Register_Overview(reloid, base_rel, base_z);
IF NOT has_overviews_for_z THEN
RAISE NOTICE 'Registering overview: %', base_rel;
PERFORM _CDB_Register_Overview(reloid, base_rel, base_z);
END IF;
SELECT array_append(overview_tables, base_rel) INTO overview_tables;
END LOOP;
@@ -777,9 +1060,13 @@ BEGIN
END;
$$ LANGUAGE PLPGSQL;
-- Here are some older signatures of these functions, no longar in use.
-- Here are some older signatures of these functions, no longer in use.
-- They must be droped here, after the (new) definition of the function `CDB_CreateOverviews`
-- because that function used to contain references to them in the default argument values.
DROP FUNCTION IF EXISTS _CDB_Feature_Density_Ref_Z_Strategy(REGCLASS);
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8);
DROP FUNCTION IF EXISTS _CDB_GridClusterCentroid_Reduce_Strategy(REGCLASS, INTEGER, INTEGER, FLOAT8);
DROP FUNCTION IF EXISTS _CDB_GridClusterSample_Reduce_Strategy(REGCLASS, INTEGER, INTEGER, FLOAT8);
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8);

View File

@@ -4,7 +4,7 @@
-- @param in_array A numeric array of numbers
--
-- Returns: statistical quantity chosen
--
--
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
--
@@ -13,17 +13,21 @@ CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
k numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),4) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
FROM (SELECT unnest($4) e ) x'
INTO k
USING a, c, s, in_array;
IF c=0 THEN
RETURN 0;
ELSE
RETURN k;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ($2 ) - 3
FROM (SELECT unnest($3) e ) x'
INTO k
USING a, c, in_array;
RETURN k;
END IF;
END;
$$ language plpgsql IMMUTABLE;
@@ -32,16 +36,18 @@ CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
sk numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),3) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
IF c=0 THEN
RETURN 0;
ELSE
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 )
FROM (SELECT unnest($3) e ) x'
INTO sk
USING a, c, in_array;
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
FROM (SELECT unnest($4) e ) x'
INTO sk
USING a, c, s, in_array;
RETURN sk;
RETURN sk;
END IF;
END;
$$ language plpgsql IMMUTABLE;

View File

@@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@@ -1,5 +1,6 @@
BEGIN
CREATE TABLE
COPY 3
none||
only_com_dec|.|,
only_dot_dec|,|.

View File

@@ -2,4 +2,10 @@ WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data_nulls;

View File

@@ -5,3 +5,10 @@
213.8571429
256.4285714
299.0000000
15.0000000
29.0000000
43.0000000
57.0000000
71.0000000
85.0000000
99.0000000

View File

@@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(x, 7)),2) FROM data
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data_nulls;

View File

@@ -5,3 +5,9 @@
96.50
98.00
99.00
49.76
74.65
88.50
94.50
98.00
99.00

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(x, 7)) FROM data
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;

View File

@@ -1,7 +1,14 @@
13
29
43
57
71
83
99
86
129
172
213
257
299
37
51
97
157
213
241

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(x, 7)) FROM data
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;

View File

@@ -4,4 +4,11 @@
57
71
86
99
29
57
87
99

View File

@@ -1,3 +1,6 @@
SET client_min_messages TO error;
\set VERBOSITY terse
WITH q AS ( SELECT CDB_QueryStatements('
SELECT * FROM geometry_columns;
') as statement )

View File

@@ -1,3 +1,4 @@
SET
1|1|SELECT * FROM geometry_columns
2|1|SELECT * FROM geometry_columns
3|1|SELECT * FROM geometry_columns

View File

@@ -1,3 +1,5 @@
SET client_min_messages TO warning;
\set VERBOSITY terse
WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q )
SELECT q, CDB_QueryTables(q) from inp;

View File

@@ -1,17 +1,14 @@
SET
SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type}
SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class}
CREATE table "my'tab;le" as select 1|{}
SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class}
SELECT 1 as col1; select 2 as col2|{}
WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "nonexistant" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
begin; select * from pg_class; commit;|{pg_catalog.pg_class}
WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "test" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class}
CREATE SCHEMA
CREATE TABLE

View File

@@ -228,6 +228,7 @@ function tear_down() {
sql 'DROP ROLE cdb_testmember_2;'
tear_down_database
DATABASE=postgres sql postgres 'DROP ROLE IF EXISTS publicuser';
}
@@ -486,6 +487,18 @@ function test_foreign_tables() {
${CMD} -d fdw_target -f scripts-available/CDB_QueryTables.sql
${CMD} -d fdw_target -f scripts-available/CDB_TableMetadata.sql
DATABASE=fdw_target sql postgres "DO
\$\$
BEGIN
IF NOT EXISTS (
SELECT *
FROM pg_catalog.pg_user
WHERE usename = 'publicuser') THEN
CREATE ROLE publicuser LOGIN;
END IF;
END
\$\$;"
DATABASE=fdw_target sql postgres 'CREATE SCHEMA test_fdw;'
DATABASE=fdw_target sql postgres 'CREATE TABLE test_fdw.foo (a int);'
DATABASE=fdw_target sql postgres 'INSERT INTO test_fdw.foo (a) values (42);'