Compare commits

..

21 Commits

Author SHA1 Message Date
Javier Goizueta
2d473cf693 New version 0.19.0 2017-04-11 11:22:20 +02:00
Javier Goizueta
4193ff3874 Merge pull request #298 from CartoDB/295-estimate-row-count
Add CDB_EstimateRowCount function
2017-04-11 11:01:31 +02:00
Javier Goizueta
68a0752849 Use PG 9.5 for travis tests; fix tests 2017-04-10 15:58:49 +02:00
Javier Goizueta
815b5b429d Fix tests 2017-04-10 13:50:37 +02:00
Javier Goizueta
76bdb3657a Fix tests 2017-04-10 12:17:47 +02:00
Javier Goizueta
234373df11 Replace unnecessary count 2017-04-10 08:08:59 +02:00
Javier Goizueta
a486eed2e3 Add CDB_EstimateRowCount function
See #295
2017-04-07 15:35:48 +02:00
Mario de Frutos
795d92da8d Added CLA paragraph 2017-01-25 10:54:12 +01:00
Javier Goizueta
58e2e7e238 Release 0.18.5 2016-11-30 17:17:45 +01:00
Javier Goizueta
25d27263cb Merge pull request #249 from CartoDB/nullbins
Test behavior of binning fuctions with nulls
2016-11-30 16:09:23 +01:00
Javier Goizueta
bbadcc838e Merge pull request #244 from CartoDB/equalbins
Convert CDB_EqualIntervalBins to a single SQL statement and add float version
2016-11-30 16:09:05 +01:00
Javier Goizueta
b1a0904c07 Merge pull request #181 from CartoDB/update_to_cdb_stats
Fix for division by zero error on empty or homogenous array
2016-11-30 16:08:40 +01:00
Javier Goizueta
399b680b41 Merge pull request #283 from CartoDB/157-test-fixes
Fix tests: race condition with publicuser #157
2016-11-30 16:08:21 +01:00
Javier Goizueta
7c0636c5f9 Merge pull request #290 from CartoDB/286-overview-strategies
Add point overview strategies
2016-11-30 11:46:29 +01:00
Javier Goizueta
f58f870457 Remove use of first aggregator in sample-cluster overviews strategy
This is not more efficient but the geometry now corresponds to the cartodb_id
and the dependency with custom aggregators (firt) is removed.
2016-11-29 14:08:08 +01:00
Javier Goizueta
5992304b47 Add a couple of overview clustering strategies 2016-11-03 13:31:04 +01:00
Rafa de la Torre
30cd4cf1f9 Fix tests: race condition with publicuser #157 2016-10-17 16:31:10 +02:00
Paul Norman
3122a0479d Test behavior of binning fuctions with nulls
All test results are based off of existing behavior, which doesn't
always make sense (ref #247)
2016-04-28 09:59:33 -07:00
Paul Norman
956e56cd37 Use anyarray for equalintervalbins 2016-04-27 16:10:01 -07:00
Paul Norman
b19a5fc3dc Convert CDB_EqualIntervalBins to a single SQL statement and add float version 2016-04-25 14:35:26 -07:00
Stuart Lynn
0ecbbd8e71 Make sure that empty arrays or arrays with all the same entry return 0 for Skewness and Kurtosis rather than throwing a division by zero error 2015-12-04 14:54:15 -05:00
26 changed files with 598 additions and 66 deletions

View File

@@ -1,14 +1,41 @@
language: c
addons:
postgresql: 9.3
postgresql: 9.5
before_install:
# Add custom PPAs from cartodb
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
- sudo add-apt-repository -y ppa:cartodb/gis
- sudo add-apt-repository -y ppa:cartodb/gis-testing
- sudo apt-get update
#- sudo apt-get install -q postgresql-9.3-postgis-2.1
- sudo apt-get update
- sudo apt-get install -q postgresql-server-dev-9.3
- sudo apt-get install -q postgresql-plpython-9.3
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
# Install postgres db and build deps
- sudo /etc/init.d/postgresql stop # stop travis default instance
- sudo apt-get -y remove --purge postgresql-9.1
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
- sudo /etc/init.d/postgresql restart 9.5
script:
- make

View File

@@ -62,3 +62,7 @@ A useful query:
```sql
SELECT * FROM pg_extension_update_paths('cartodb') WHERE path IS NOT NULL AND source = cdb_version();
```
## Submitting Contributions
* You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions).

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.18.4
EXTVERSION = 0.19.0
SED = sed
@@ -78,6 +78,8 @@ UPGRADABLE = \
0.18.2 \
0.18.3 \
0.18.4 \
0.18.5 \
0.19.0 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

12
NEWS.md
View File

@@ -1,3 +1,15 @@
0.19.0 (2017-04-11)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30)
* Add to new overview creation strategies #290
* Fix tests: race condition with publicuser #157
* Fix: CDB_Stats divisions by zero #181
* Better implementation of `CDB_EqualIntervalBins` #244
* New tests for binning functions #249
0.18.4 (2016-11-04)
* No functional changes; fixes the migration from previous versions #288

View File

@@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@@ -1,8 +1,8 @@
--
-- Calculate the equal interval bins for a given column
--
-- @param in_array A numeric array of numbers to determine the best
-- to determine the bin boundary
-- @param in_array An array of numbers to determine the best
-- bin boundary
--
-- @param breaks The number of bins you want to find.
--
@@ -11,27 +11,14 @@
--
--
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
DECLARE
diff numeric;
min_val numeric;
max_val numeric;
tmp_val numeric;
i INT := 1;
reply numeric[];
BEGIN
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
diff = (max_val - min_val) / breaks::numeric;
LOOP
IF i < breaks THEN
tmp_val = min_val + i::numeric * diff;
reply = array_append(reply, tmp_val);
i := i+1;
ELSE
reply = array_append(reply, max_val);
EXIT;
END IF;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array anyarray, breaks INT ) RETURNS anyarray as $$
WITH stats AS (
SELECT min(e), (max(e)-min(e))/breaks AS del
FROM (SELECT unnest(in_array) e) AS p)
SELECT array_agg(bins)
FROM (
SELECT min + generate_series(1,breaks)*del AS bins
FROM stats) q;
$$ LANGUAGE SQL IMMUTABLE;
DROP FUNCTION IF EXISTS CDB_EqualIntervalBins( numeric[], integer);

View File

@@ -0,0 +1,31 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
has_stats BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid
) INTO has_stats;
IF NOT has_stats THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@@ -697,6 +697,356 @@ AS $$
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
Format('ST_Transform(%s, 4326) AS the_geom', point_geom)
WHEN 'the_geom_webmercator' THEN
Format('%s AS the_geom_webmercator', point_geom)
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom'
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS _sum_of_x,
SUM(ST_Y(f.the_geom_webmercator)) AS _sum_of_y,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(_f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(_f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s _f
GROUP BY gx, gy
),
cluster_geom AS (
SELECT the_geom, the_geom_webmercator, clusters.*
FROM clusters INNER JOIN %1$s _g ON (clusters.cartodb_id = _g.cartodb_id)
)
SELECT %6$s FROM cluster_geom
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- Create overview tables for a dataset.
-- Scope: public
-- Parameters:

View File

@@ -4,7 +4,7 @@
-- @param in_array A numeric array of numbers
--
-- Returns: statistical quantity chosen
--
--
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
--
@@ -13,17 +13,21 @@ CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
k numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),4) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
FROM (SELECT unnest($4) e ) x'
INTO k
USING a, c, s, in_array;
IF c=0 THEN
RETURN 0;
ELSE
RETURN k;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ($2 ) - 3
FROM (SELECT unnest($3) e ) x'
INTO k
USING a, c, in_array;
RETURN k;
END IF;
END;
$$ language plpgsql IMMUTABLE;
@@ -32,16 +36,18 @@ CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC a
DECLARE
a numeric;
c numeric;
s numeric;
sk numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
SELECT AVG(e), COUNT(e)::numeric * power(stddev(e),3) INTO a, c FROM ( SELECT unnest(in_array) e ) x;
IF c=0 THEN
RETURN 0;
ELSE
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 )
FROM (SELECT unnest($3) e ) x'
INTO sk
USING a, c, in_array;
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
FROM (SELECT unnest($4) e ) x'
INTO sk
USING a, c, s, in_array;
RETURN sk;
RETURN sk;
END IF;
END;
$$ language plpgsql IMMUTABLE;

View File

@@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@@ -1,5 +1,6 @@
BEGIN
CREATE TABLE
COPY 3
none||
only_com_dec|.|,
only_dot_dec|,|.

View File

@@ -2,4 +2,10 @@ WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_EqualIntervalBins(s, 7)),7) FROM data_nulls;

View File

@@ -5,3 +5,10 @@
213.8571429
256.4285714
299.0000000
15.0000000
29.0000000
43.0000000
57.0000000
71.0000000
85.0000000
99.0000000

View File

@@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(x, 7)),2) FROM data
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT round(unnest(CDB_HeadsTailsBins(s, 7)),2) FROM data_nulls;

View File

@@ -5,3 +5,9 @@
96.50
98.00
99.00
49.76
74.65
88.50
94.50
98.00
99.00

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(x, 7)) FROM data
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;

View File

@@ -1,7 +1,14 @@
13
29
43
57
71
83
99
86
129
172
213
257
299
37
51
97
157
213
241

View File

@@ -1,5 +1,11 @@
WITH data AS (
SELECT array_agg(x) x FROM generate_series(1,100) x
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(x, 7)) FROM data
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;

View File

@@ -4,4 +4,11 @@
57
71
86
99
29
57
87
99

View File

@@ -1,3 +1,6 @@
SET client_min_messages TO error;
\set VERBOSITY terse
WITH q AS ( SELECT CDB_QueryStatements('
SELECT * FROM geometry_columns;
') as statement )

View File

@@ -1,3 +1,4 @@
SET
1|1|SELECT * FROM geometry_columns
2|1|SELECT * FROM geometry_columns
3|1|SELECT * FROM geometry_columns

View File

@@ -1,3 +1,5 @@
SET client_min_messages TO warning;
\set VERBOSITY terse
WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q )
SELECT q, CDB_QueryTables(q) from inp;

View File

@@ -1,17 +1,14 @@
SET
SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type}
SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class}
CREATE table "my'tab;le" as select 1|{}
SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class}
SELECT 1 as col1; select 2 as col2|{}
WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "nonexistant" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
begin; select * from pg_class; commit;|{pg_catalog.pg_class}
WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "test" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class}
CREATE SCHEMA
CREATE TABLE

View File

@@ -228,6 +228,7 @@ function tear_down() {
sql 'DROP ROLE cdb_testmember_2;'
tear_down_database
DATABASE=postgres sql postgres 'DROP ROLE IF EXISTS publicuser';
}
@@ -486,6 +487,18 @@ function test_foreign_tables() {
${CMD} -d fdw_target -f scripts-available/CDB_QueryTables.sql
${CMD} -d fdw_target -f scripts-available/CDB_TableMetadata.sql
DATABASE=fdw_target sql postgres "DO
\$\$
BEGIN
IF NOT EXISTS (
SELECT *
FROM pg_catalog.pg_user
WHERE usename = 'publicuser') THEN
CREATE ROLE publicuser LOGIN;
END IF;
END
\$\$;"
DATABASE=fdw_target sql postgres 'CREATE SCHEMA test_fdw;'
DATABASE=fdw_target sql postgres 'CREATE TABLE test_fdw.foo (a int);'
DATABASE=fdw_target sql postgres 'INSERT INTO test_fdw.foo (a) values (42);'