Compare commits

...

24 Commits

Author SHA1 Message Date
Javier Goizueta
2d473cf693 New version 0.19.0 2017-04-11 11:22:20 +02:00
Javier Goizueta
4193ff3874 Merge pull request #298 from CartoDB/295-estimate-row-count
Add CDB_EstimateRowCount function
2017-04-11 11:01:31 +02:00
Javier Goizueta
68a0752849 Use PG 9.5 for travis tests; fix tests 2017-04-10 15:58:49 +02:00
Javier Goizueta
815b5b429d Fix tests 2017-04-10 13:50:37 +02:00
Javier Goizueta
76bdb3657a Fix tests 2017-04-10 12:17:47 +02:00
Javier Goizueta
234373df11 Replace unnecessary count 2017-04-10 08:08:59 +02:00
Javier Goizueta
a486eed2e3 Add CDB_EstimateRowCount function
See #295
2017-04-07 15:35:48 +02:00
Mario de Frutos
795d92da8d Added CLA paragraph 2017-01-25 10:54:12 +01:00
Javier Goizueta
58e2e7e238 Release 0.18.5 2016-11-30 17:17:45 +01:00
Javier Goizueta
25d27263cb Merge pull request #249 from CartoDB/nullbins
Test behavior of binning fuctions with nulls
2016-11-30 16:09:23 +01:00
Javier Goizueta
bbadcc838e Merge pull request #244 from CartoDB/equalbins
Convert CDB_EqualIntervalBins to a single SQL statement and add float version
2016-11-30 16:09:05 +01:00
Javier Goizueta
b1a0904c07 Merge pull request #181 from CartoDB/update_to_cdb_stats
Fix for division by zero error on empty or homogenous array
2016-11-30 16:08:40 +01:00
Javier Goizueta
399b680b41 Merge pull request #283 from CartoDB/157-test-fixes
Fix tests: race condition with publicuser #157
2016-11-30 16:08:21 +01:00
Javier Goizueta
7c0636c5f9 Merge pull request #290 from CartoDB/286-overview-strategies
Add point overview strategies
2016-11-30 11:46:29 +01:00
Javier Goizueta
f58f870457 Remove use of first aggregator in sample-cluster overviews strategy
This is not more efficient but the geometry now corresponds to the cartodb_id
and the dependency with custom aggregators (firt) is removed.
2016-11-29 14:08:08 +01:00
Javier Goizueta
a7c8dc04e3 Release 0.18.4
This just fixes the lack of migration path from 0.18.2
2016-11-04 16:25:03 +01:00
Javier Goizueta
90ee56eb35 Merge pull request #288 from CartoDB/fix-migration
Fix migration script generation
2016-11-04 16:22:56 +01:00
Javier Goizueta
1032737600 Fix migration script generation
In the 0.18.3 release the script to migrate from 0.18.2 was missing.
This will generate a new version 0.18.4 that when install will generate
scripts to migrate from all old versions to it, so it will be possible
to migrate existing users to 0.18.4 (but not to 0.18.3)
2016-11-04 16:19:06 +01:00
Javier Goizueta
5992304b47 Add a couple of overview clustering strategies 2016-11-03 13:31:04 +01:00
Rafa de la Torre
30cd4cf1f9 Fix tests: race condition with publicuser #157 2016-10-17 16:31:10 +02:00
Paul Norman
3122a0479d Test behavior of binning fuctions with nulls
All test results are based off of existing behavior, which doesn't
always make sense (ref #247)
2016-04-28 09:59:33 -07:00
Paul Norman
956e56cd37 Use anyarray for equalintervalbins 2016-04-27 16:10:01 -07:00
Paul Norman
b19a5fc3dc Convert CDB_EqualIntervalBins to a single SQL statement and add float version 2016-04-25 14:35:26 -07:00
Stuart Lynn
0ecbbd8e71 Make sure that empty arrays or arrays with all the same entry return 0 for Skewness and Kurtosis rather than throwing a division by zero error 2015-12-04 14:54:15 -05:00
26 changed files with 604 additions and 66 deletions

View File

@@ -1,14 +1,41 @@
language: c
addons:
postgresql: 9.3
postgresql: 9.5
before_install:
# Add custom PPAs from cartodb
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
- sudo add-apt-repository -y ppa:cartodb/gis
- sudo add-apt-repository -y ppa:cartodb/gis-testing
- sudo apt-get update
#- sudo apt-get install -q postgresql-9.3-postgis-2.1
- sudo apt-get update
- sudo apt-get install -q postgresql-server-dev-9.3
- sudo apt-get install -q postgresql-plpython-9.3
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
# Install postgres db and build deps
- sudo /etc/init.d/postgresql stop # stop travis default instance
- sudo apt-get -y remove --purge postgresql-9.1
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
- sudo /etc/init.d/postgresql restart 9.5
script:
- make

View File

@@ -62,3 +62,7 @@ A useful query:
```sql
SELECT * FROM pg_extension_update_paths('cartodb') WHERE path IS NOT NULL AND source = cdb_version();
```
## Submitting Contributions
* You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions).

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.18.3
EXTVERSION = 0.19.0
SED = sed
@@ -75,7 +75,11 @@ UPGRADABLE = \
0.17.1 \
0.18.0 \
0.18.1 \
0.18.2 \
0.18.3 \
0.18.4 \
0.18.5 \
0.19.0 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

16
NEWS.md
View File

@@ -1,3 +1,19 @@
0.19.0 (2017-04-11)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30)
* Add to new overview creation strategies #290
* Fix tests: race condition with publicuser #157
* Fix: CDB_Stats divisions by zero #181
* Better implementation of `CDB_EqualIntervalBins` #244
* New tests for binning functions #249
0.18.4 (2016-11-04)
* No functional changes; fixes the migration from previous versions #288
0.18.3 (2016-11-03)
* Exclude analysis cache tables from the quota #281

View File

@@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@@ -1,8 +1,8 @@
--
-- Calculate the equal interval bins for a given column
--
-- @param in_array A numeric array of numbers to determine the best
-- to determine the bin boundary
-- @param in_array An array of numbers to determine the best
-- bin boundary
--
-- @param breaks The number of bins you want to find.
--
@@ -11,27 +11,14 @@
--
--
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
DECLARE
diff numeric;
min_val numeric;
max_val numeric;
tmp_val numeric;
i INT := 1;
reply numeric[];
BEGIN
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
diff = (max_val - min_val) / breaks::numeric;
LOOP
IF i < breaks THEN
tmp_val = min_val + i::numeric * diff;
reply = array_append(reply, tmp_val);
i := i+1;
ELSE
reply = array_append(reply, max_val);
EXIT;
END IF;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array anyarray, breaks INT ) RETURNS anyarray as $$
WITH stats AS (
SELECT min(e), (max(e)-min(e))/breaks AS del
FROM (SELECT unnest(in_array) e) AS p)
SELECT array_agg(bins)
FROM (
SELECT min + generate_series(1,breaks)*del AS bins
FROM stats) q;
$$ LANGUAGE SQL IMMUTABLE;
DROP FUNCTION IF EXISTS CDB_EqualIntervalBins( numeric[], integer);

View File

@@ -0,0 +1,31 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
has_stats BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid
) INTO has_stats;
IF NOT has_stats THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@@ -697,6 +697,356 @@ AS $$
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
Format('ST_Transform(%s, 4326) AS the_geom', point_geom)
WHEN 'the_geom_webmercator' THEN
Format('%s AS the_geom_webmercator', point_geom)
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom'
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS _sum_of_x,
SUM(ST_Y(f.the_geom_webmercator)) AS _sum_of_y,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(_f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(_f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s _f
GROUP BY gx, gy
),
cluster_geom AS (
SELECT the_geom, the_geom_webmercator, clusters.*
FROM clusters INNER JOIN %1$s _g ON (clusters.cartodb_id = _g.cartodb_id)
)
SELECT %6$s FROM cluster_geom
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- Create overview tables for a dataset.
-- Scope: public
-- Parameters:

View File

@@ -4,7 +4,7 @@
-- @param in_array A numeric array of numbers
--
-- Returns: statistical quantity chosen
--
--
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
--
@@ -13,17 +13,21 @@ CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
k numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),4) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
FROM (SELECT unnest($4) e ) x'
INTO k
USING a, c, s, in_array;
IF c=0 THEN
RETURN 0;
ELSE
RETURN k;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ($2 ) - 3
FROM (SELECT unnest($3) e ) x'
INTO k
USING a, c, in_array;
RETURN k;
END IF;
END;
$$ language plpgsql IMMUTABLE;
@@ -32,16 +36,18 @@ CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
sk numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),3) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
IF c=0 THEN
RETURN 0;
ELSE
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 )
FROM (SELECT unnest($3) e ) x'
INTO sk
USING a, c, in_array;
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
FROM (SELECT unnest($4) e ) x'
INTO sk
USING a, c, s, in_array;
RETURN sk;
RETURN sk;
END IF;
END;
$$ language plpgsql IMMUTABLE;

View File

@@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@@ -1,5 +1,6 @@
BEGIN
CREATE TABLE
COPY 3
none||
only_com_dec|.|,
only_dot_dec|,|.

View File

@@ -2,4 +2,10 @@ WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data_nulls;

View File

@@ -5,3 +5,10 @@
213.8571429
256.4285714
299.0000000
15.0000000
29.0000000
43.0000000
57.0000000
71.0000000
85.0000000
99.0000000

View File

@@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(x, 7)),2) FROM data
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data_nulls;

View File

@@ -5,3 +5,9 @@
96.50
98.00
99.00
49.76
74.65
88.50
94.50
98.00
99.00

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(x, 7)) FROM data
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;

View File

@@ -1,7 +1,14 @@
13
29
43
57
71
83
99
86
129
172
213
257
299
37
51
97
157
213
241

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(x, 7)) FROM data
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;

View File

@@ -4,4 +4,11 @@
57
71
86
99
29
57
87
99

View File

@@ -1,3 +1,6 @@
SET client_min_messages TO error;
\set VERBOSITY terse
WITH q AS ( SELECT CDB_QueryStatements('
SELECT * FROM geometry_columns;
') as statement )

View File

@@ -1,3 +1,4 @@
SET
1|1|SELECT * FROM geometry_columns
2|1|SELECT * FROM geometry_columns
3|1|SELECT * FROM geometry_columns

View File

@@ -1,3 +1,5 @@
SET client_min_messages TO warning;
\set VERBOSITY terse
WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q )
SELECT q, CDB_QueryTables(q) from inp;

View File

@@ -1,17 +1,14 @@
SET
SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type}
SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class}
CREATE table "my'tab;le" as select 1|{}
SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class}
SELECT 1 as col1; select 2 as col2|{}
WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "nonexistant" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
begin; select * from pg_class; commit;|{pg_catalog.pg_class}
WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "test" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class}
CREATE SCHEMA
CREATE TABLE

View File

@@ -228,6 +228,7 @@ function tear_down() {
sql 'DROP ROLE cdb_testmember_2;'
tear_down_database
DATABASE=postgres sql postgres 'DROP ROLE IF EXISTS publicuser';
}
@@ -486,6 +487,18 @@ function test_foreign_tables() {
${CMD} -d fdw_target -f scripts-available/CDB_QueryTables.sql
${CMD} -d fdw_target -f scripts-available/CDB_TableMetadata.sql
DATABASE=fdw_target sql postgres "DO
\$\$
BEGIN
IF NOT EXISTS (
SELECT *
FROM pg_catalog.pg_user
WHERE usename = 'publicuser') THEN
CREATE ROLE publicuser LOGIN;
END IF;
END
\$\$;"
DATABASE=fdw_target sql postgres 'CREATE SCHEMA test_fdw;'
DATABASE=fdw_target sql postgres 'CREATE TABLE test_fdw.foo (a int);'
DATABASE=fdw_target sql postgres 'INSERT INTO test_fdw.foo (a) values (42);'