Compare commits

...

31 Commits

Author SHA1 Message Date
Javier Goizueta
2d473cf693 New version 0.19.0 2017-04-11 11:22:20 +02:00
Javier Goizueta
4193ff3874 Merge pull request #298 from CartoDB/295-estimate-row-count
Add CDB_EstimateRowCount function
2017-04-11 11:01:31 +02:00
Javier Goizueta
68a0752849 Use PG 9.5 for travis tests; fix tests 2017-04-10 15:58:49 +02:00
Javier Goizueta
815b5b429d Fix tests 2017-04-10 13:50:37 +02:00
Javier Goizueta
76bdb3657a Fix tests 2017-04-10 12:17:47 +02:00
Javier Goizueta
234373df11 Replace unnecessary count 2017-04-10 08:08:59 +02:00
Javier Goizueta
a486eed2e3 Add CDB_EstimateRowCount function
See #295
2017-04-07 15:35:48 +02:00
Mario de Frutos
795d92da8d Added CLA paragraph 2017-01-25 10:54:12 +01:00
Javier Goizueta
58e2e7e238 Release 0.18.5 2016-11-30 17:17:45 +01:00
Javier Goizueta
25d27263cb Merge pull request #249 from CartoDB/nullbins
Test behavior of binning fuctions with nulls
2016-11-30 16:09:23 +01:00
Javier Goizueta
bbadcc838e Merge pull request #244 from CartoDB/equalbins
Convert CDB_EqualIntervalBins to a single SQL statement and add float version
2016-11-30 16:09:05 +01:00
Javier Goizueta
b1a0904c07 Merge pull request #181 from CartoDB/update_to_cdb_stats
Fix for division by zero error on empty or homogenous array
2016-11-30 16:08:40 +01:00
Javier Goizueta
399b680b41 Merge pull request #283 from CartoDB/157-test-fixes
Fix tests: race condition with publicuser #157
2016-11-30 16:08:21 +01:00
Javier Goizueta
7c0636c5f9 Merge pull request #290 from CartoDB/286-overview-strategies
Add point overview strategies
2016-11-30 11:46:29 +01:00
Javier Goizueta
f58f870457 Remove use of first aggregator in sample-cluster overviews strategy
This is not more efficient but the geometry now corresponds to the cartodb_id
and the dependency with custom aggregators (firt) is removed.
2016-11-29 14:08:08 +01:00
Javier Goizueta
a7c8dc04e3 Release 0.18.4
This just fixes the lack of migration path from 0.18.2
2016-11-04 16:25:03 +01:00
Javier Goizueta
90ee56eb35 Merge pull request #288 from CartoDB/fix-migration
Fix migration script generation
2016-11-04 16:22:56 +01:00
Javier Goizueta
1032737600 Fix migration script generation
In the 0.18.3 release the script to migrate from 0.18.2 was missing.
This will generate a new version 0.18.4 that when install will generate
scripts to migrate from all old versions to it, so it will be possible
to migrate existing users to 0.18.4 (but not to 0.18.3)
2016-11-04 16:19:06 +01:00
Javier Goizueta
24639713f1 Release 0.18.3
The exclusion of analysis cache tables size from the quota (#281)
was meant to be included in 0.18.0 but missed the release, so
it's being released in this version.
2016-11-03 16:13:57 +01:00
Javier Goizueta
fff7e926c9 Merge pull request #287 from CartoDB/fix-281-merge
Fix 281 merge
2016-11-03 16:09:02 +01:00
Javier Goizueta
7d7ecc06f5 Merge branch 'master' into fix-281-merge 2016-11-03 15:57:24 +01:00
Javier Goizueta
5992304b47 Add a couple of overview clustering strategies 2016-11-03 13:31:04 +01:00
Rafa de la Torre
aa9286eaba Merge pull request #281 from CartoDB/277-exclude-analysis-quota
Exclude analysis cache tables from the quota
2016-10-17 17:56:21 +02:00
Rafa de la Torre
30cd4cf1f9 Fix tests: race condition with publicuser #157 2016-10-17 16:31:10 +02:00
Javier Goizueta
1356131ec1 Fix tests 2016-10-14 11:22:06 +02:00
Javier Goizueta
9731ce38ec Exclude configuration tables from the quota
cdb_tablemetadata was already being excluded, but not cdb_conf and cdb_analysis_catalog
2016-10-14 10:58:16 +02:00
Javier Goizueta
07892271e5 Exclude analysis cache tables from the quota
See #277
2016-10-14 10:56:14 +02:00
Paul Norman
3122a0479d Test behavior of binning fuctions with nulls
All test results are based off of existing behavior, which doesn't
always make sense (ref #247)
2016-04-28 09:59:33 -07:00
Paul Norman
956e56cd37 Use anyarray for equalintervalbins 2016-04-27 16:10:01 -07:00
Paul Norman
b19a5fc3dc Convert CDB_EqualIntervalBins to a single SQL statement and add float version 2016-04-25 14:35:26 -07:00
Stuart Lynn
0ecbbd8e71 Make sure that empty arrays or arrays with all the same entry return 0 for Skewness and Kurtosis rather than throwing a division by zero error 2015-12-04 14:54:15 -05:00
31 changed files with 639 additions and 77 deletions

View File

@@ -1,14 +1,41 @@
language: c
addons:
postgresql: 9.3
postgresql: 9.5
before_install:
# Add custom PPAs from cartodb
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
- sudo add-apt-repository -y ppa:cartodb/gis
- sudo add-apt-repository -y ppa:cartodb/gis-testing
- sudo apt-get update
#- sudo apt-get install -q postgresql-9.3-postgis-2.1
- sudo apt-get update
- sudo apt-get install -q postgresql-server-dev-9.3
- sudo apt-get install -q postgresql-plpython-9.3
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
# Install postgres db and build deps
- sudo /etc/init.d/postgresql stop # stop travis default instance
- sudo apt-get -y remove --purge postgresql-9.1
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
- sudo /etc/init.d/postgresql restart 9.5
script:
- make

View File

@@ -62,3 +62,7 @@ A useful query:
```sql
SELECT * FROM pg_extension_update_paths('cartodb') WHERE path IS NOT NULL AND source = cdb_version();
```
## Submitting Contributions
* You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions).

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.18.2
EXTVERSION = 0.19.0
SED = sed
@@ -76,6 +76,10 @@ UPGRADABLE = \
0.18.0 \
0.18.1 \
0.18.2 \
0.18.3 \
0.18.4 \
0.18.5 \
0.19.0 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

22
NEWS.md
View File

@@ -1,3 +1,23 @@
0.19.0 (2017-04-11)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30)
* Add to new overview creation strategies #290
* Fix tests: race condition with publicuser #157
* Fix: CDB_Stats divisions by zero #181
* Better implementation of `CDB_EqualIntervalBins` #244
* New tests for binning functions #249
0.18.4 (2016-11-04)
* No functional changes; fixes the migration from previous versions #288
0.18.3 (2016-11-03)
* Exclude analysis cache tables from the quota #281
0.18.2 (2016-10-20)
-------------------
@@ -14,8 +34,6 @@
* Fix: exclude NULL geometries when creating Overviews #269
* Function to check analysis tables limits #279
* Exclude analysis cache tables from the quota #281
0.17.1 (2016-08-16)
-------------------

View File

@@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@@ -1,8 +1,8 @@
--
-- Calculate the equal interval bins for a given column
--
-- @param in_array A numeric array of numbers to determine the best
-- to determine the bin boundary
-- @param in_array An array of numbers to determine the best
-- bin boundary
--
-- @param breaks The number of bins you want to find.
--
@@ -11,27 +11,14 @@
--
--
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
DECLARE
diff numeric;
min_val numeric;
max_val numeric;
tmp_val numeric;
i INT := 1;
reply numeric[];
BEGIN
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
diff = (max_val - min_val) / breaks::numeric;
LOOP
IF i < breaks THEN
tmp_val = min_val + i::numeric * diff;
reply = array_append(reply, tmp_val);
i := i+1;
ELSE
reply = array_append(reply, max_val);
EXIT;
END IF;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array anyarray, breaks INT ) RETURNS anyarray as $$
WITH stats AS (
SELECT min(e), (max(e)-min(e))/breaks AS del
FROM (SELECT unnest(in_array) e) AS p)
SELECT array_agg(bins)
FROM (
SELECT min + generate_series(1,breaks)*del AS bins
FROM stats) q;
$$ LANGUAGE SQL IMMUTABLE;
DROP FUNCTION IF EXISTS CDB_EqualIntervalBins( numeric[], integer);

View File

@@ -0,0 +1,31 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
has_stats BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid
) INTO has_stats;
IF NOT has_stats THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@@ -697,6 +697,356 @@ AS $$
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
Format('ST_Transform(%s, 4326) AS the_geom', point_geom)
WHEN 'the_geom_webmercator' THEN
Format('%s AS the_geom_webmercator', point_geom)
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom'
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS _sum_of_x,
SUM(ST_Y(f.the_geom_webmercator)) AS _sum_of_y,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(_f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(_f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s _f
GROUP BY gx, gy
),
cluster_geom AS (
SELECT the_geom, the_geom_webmercator, clusters.*
FROM clusters INNER JOIN %1$s _g ON (clusters.cartodb_id = _g.cartodb_id)
)
SELECT %6$s FROM cluster_geom
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- Create overview tables for a dataset.
-- Scope: public
-- Parameters:

View File

@@ -41,7 +41,7 @@ AS $$
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND c.relname NOT IN ('cdb_tablemetadata', 'spatial_ref_sys')
AND c.relname NOT IN ('cdb_tablemetadata', 'cdb_analysis_catalog', 'cdb_conf', 'spatial_ref_sys')
AND CASE WHEN schema_name IS NULL
THEN n.nspname NOT IN ('pg_catalog', 'information_schema', 'topology', 'cartodb')
ELSE n.nspname = schema_name

View File

@@ -26,19 +26,14 @@ BEGIN
WHERE o_table_schema = schema_name AND o_table_catalog = current_database()
),
user_tables AS (
SELECT table_name FROM information_schema.tables
WHERE table_catalog = current_database() AND table_schema = schema_name
AND table_name != 'spatial_ref_sys'
AND table_name != 'cdb_tablemetadata'
AND table_type = 'BASE TABLE'
SELECT table_name FROM _CDB_NonAnalysisTablesInSchema(schema_name)
),
table_cat AS (
SELECT
table_name,
(
EXISTS(select * from raster_tables where o_table_name = table_name)
OR
table_name SIMILAR TO _CDB_OverviewTableDiscriminator() || '[\w\d]*'
OR table_name SIMILAR TO _CDB_OverviewTableDiscriminator() || '[\w\d]*'
) AS is_overview,
EXISTS(SELECT * FROM raster_tables WHERE r_table_name = table_name) AS is_raster
FROM user_tables

View File

@@ -4,7 +4,7 @@
-- @param in_array A numeric array of numbers
--
-- Returns: statistical quantity chosen
--
--
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
--
@@ -13,17 +13,21 @@ CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
k numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),4) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
FROM (SELECT unnest($4) e ) x'
INTO k
USING a, c, s, in_array;
IF c=0 THEN
RETURN 0;
ELSE
RETURN k;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ($2 ) - 3
FROM (SELECT unnest($3) e ) x'
INTO k
USING a, c, in_array;
RETURN k;
END IF;
END;
$$ language plpgsql IMMUTABLE;
@@ -32,16 +36,18 @@ CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
sk numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),3) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
IF c=0 THEN
RETURN 0;
ELSE
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 )
FROM (SELECT unnest($3) e ) x'
INTO sk
USING a, c, in_array;
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
FROM (SELECT unnest($4) e ) x'
INTO sk
USING a, c, s, in_array;
RETURN sk;
RETURN sk;
END IF;
END;
$$ language plpgsql IMMUTABLE;

View File

@@ -14,7 +14,7 @@ SELECT c.relname
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND c.relname NOT IN ('cdb_tablemetadata', 'spatial_ref_sys')
AND c.relname NOT IN ('cdb_tablemetadata', 'cdb_analysis_catalog', 'cdb_conf', 'spatial_ref_sys')
AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'topology', 'cartodb')
AND CASE WHEN perm = 'public' THEN has_table_privilege('publicuser', c.oid, 'SELECT')
WHEN perm = 'private' THEN has_table_privilege(current_user, c.oid, 'SELECT') AND NOT has_table_privilege('publicuser', c.oid, 'SELECT')

View File

@@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@@ -1,5 +1,6 @@
BEGIN
CREATE TABLE
COPY 3
none||
only_com_dec|.|,
only_dot_dec|,|.

View File

@@ -2,4 +2,10 @@ WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data_nulls;

View File

@@ -5,3 +5,10 @@
213.8571429
256.4285714
299.0000000
15.0000000
29.0000000
43.0000000
57.0000000
71.0000000
85.0000000
99.0000000

View File

@@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(x, 7)),2) FROM data
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data_nulls;

View File

@@ -5,3 +5,9 @@
96.50
98.00
99.00
49.76
74.65
88.50
94.50
98.00
99.00

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(x, 7)) FROM data
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;

View File

@@ -1,7 +1,14 @@
13
29
43
57
71
83
99
86
129
172
213
257
299
37
51
97
157
213
241

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(x, 7)) FROM data
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;

View File

@@ -4,4 +4,11 @@
57
71
86
99
29
57
87
99

View File

@@ -1,3 +1,6 @@
SET client_min_messages TO error;
\set VERBOSITY terse
WITH q AS ( SELECT CDB_QueryStatements('
SELECT * FROM geometry_columns;
') as statement )

View File

@@ -1,3 +1,4 @@
SET
1|1|SELECT * FROM geometry_columns
2|1|SELECT * FROM geometry_columns
3|1|SELECT * FROM geometry_columns

View File

@@ -1,3 +1,5 @@
SET client_min_messages TO warning;
\set VERBOSITY terse
WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q )
SELECT q, CDB_QueryTables(q) from inp;

View File

@@ -1,17 +1,14 @@
SET
SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type}
SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class}
CREATE table "my'tab;le" as select 1|{}
SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class}
SELECT 1 as col1; select 2 as col2|{}
WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "nonexistant" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
begin; select * from pg_class; commit;|{pg_catalog.pg_class}
WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "test" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class}
CREATE SCHEMA
CREATE TABLE

View File

@@ -27,5 +27,21 @@ INSERT INTO big VALUES (8193);
SELECT CDB_SetUserQuotaInBytes(0);
INSERT INTO big VALUES (8194);
DROP TABLE big;
--analysis tables should be excluded from quota:
CREATE TABLE big(a int);
CREATE TRIGGER test_quota BEFORE UPDATE OR INSERT ON big
EXECUTE PROCEDURE CDB_CheckQuota(1, 1, 'public');
SELECT CDB_SetUserQuotaInBytes(1);
CREATE TABLE analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d4(id int);
INSERT INTO analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d4(id) VALUES (1),(2),(3),(4),(5);
INSERT INTO big VALUES (1); -- allowed, check runs before
DROP TABLE analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d4;
INSERT INTO big VALUES (2); -- disallowed, quota exceeds before
DROP TABLE big;
SELECT CDB_SetUserQuotaInBytes(0);
set client_min_messages to NOTICE;
DROP FUNCTION _CDB_UserQuotaInBytes();

View File

@@ -18,5 +18,15 @@ ERROR: Quota exceeded by 443.998046875KB
0
INSERT 0 1
DROP TABLE
CREATE TABLE
CREATE TRIGGER
1
CREATE TABLE
INSERT 0 5
INSERT 0 1
DROP TABLE
ERROR: Quota exceeded by 3.9990234375KB
DROP TABLE
0
SET
DROP FUNCTION

View File

@@ -182,6 +182,7 @@ function setup_database() {
log_info "########################### BOOTSTRAP ###########################"
${CMD} -d ${DATABASE} -f scripts-available/CDB_Organizations.sql
${CMD} -d ${DATABASE} -f scripts-available/CDB_OverviewsSupport.sql
${CMD} -d ${DATABASE} -f scripts-available/CDB_AnalysisSupport.sql
# trick to allow forcing a schema when loading SQL files (see: http://bit.ly/1HeLnhL)
${CMD} -d ${DATABASE} -f test/extension/run_at_cartodb_schema.sql
}
@@ -227,6 +228,7 @@ function tear_down() {
sql 'DROP ROLE cdb_testmember_2;'
tear_down_database
DATABASE=postgres sql postgres 'DROP ROLE IF EXISTS publicuser';
}
@@ -485,6 +487,18 @@ function test_foreign_tables() {
${CMD} -d fdw_target -f scripts-available/CDB_QueryTables.sql
${CMD} -d fdw_target -f scripts-available/CDB_TableMetadata.sql
DATABASE=fdw_target sql postgres "DO
\$\$
BEGIN
IF NOT EXISTS (
SELECT *
FROM pg_catalog.pg_user
WHERE usename = 'publicuser') THEN
CREATE ROLE publicuser LOGIN;
END IF;
END
\$\$;"
DATABASE=fdw_target sql postgres 'CREATE SCHEMA test_fdw;'
DATABASE=fdw_target sql postgres 'CREATE TABLE test_fdw.foo (a int);'
DATABASE=fdw_target sql postgres 'INSERT INTO test_fdw.foo (a) values (42);'