From a486eed2e365a4136aead5e8e5289e73cafa35b4 Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Fri, 7 Apr 2017 15:35:48 +0200 Subject: [PATCH] Add CDB_EstimateRowCount function See #295 --- NEWS.md | 4 +++ doc/CDB_EstimateRowCount.md | 25 +++++++++++++++++ scripts-available/CDB_EstimateRowCount.sql | 29 ++++++++++++++++++++ scripts-enabled/280-CDB_EstimateRowCount.sql | 1 + test/CDB_EstimateRowCountTest.sql | 10 +++++++ test/CDB_EstimateRowCountTest_expect | 9 ++++++ 6 files changed, 78 insertions(+) create mode 100644 doc/CDB_EstimateRowCount.md create mode 100644 scripts-available/CDB_EstimateRowCount.sql create mode 120000 scripts-enabled/280-CDB_EstimateRowCount.sql create mode 100644 test/CDB_EstimateRowCountTest.sql create mode 100644 test/CDB_EstimateRowCountTest_expect diff --git a/NEWS.md b/NEWS.md index 1d44f94..d02b26e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +0.19.0 (2017-04-XX) + +* Add new function `CDB_EstimateRowCount` #295 + 0.18.5 (2016-11-30) * Add to new overview creation strategies #290 diff --git a/doc/CDB_EstimateRowCount.md b/doc/CDB_EstimateRowCount.md new file mode 100644 index 0000000..e1e9287 --- /dev/null +++ b/doc/CDB_EstimateRowCount.md @@ -0,0 +1,25 @@ +Estimate the number of rows of a query. + + +#### Using the function + +```sql +SELECT CDB_EstimateRowCount($$ + UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US'); +$$) AS row_count; +``` + +Result: + +``` + row_count +----------- + 5 +(1 row) +``` + +#### Arguments + +CDB_EstimateRowCount(query) + +* **query** text: the SQL query to estimate the row count for. diff --git a/scripts-available/CDB_EstimateRowCount.sql b/scripts-available/CDB_EstimateRowCount.sql new file mode 100644 index 0000000..353b963 --- /dev/null +++ b/scripts-available/CDB_EstimateRowCount.sql @@ -0,0 +1,29 @@ +-- Internal function to generate stats for a table if they don't exist +CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS) +RETURNS VOID +AS $$ +DECLARE + num_cols INTEGER; +BEGIN + SELECT COUNT(*) FROM pg_catalog.pg_statistic WHERE starelid = reloid INTO num_cols; + IF num_cols = 0 THEN + EXECUTE Format('ANALYZE %s;', reloid); + END IF; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER; + +-- Return a row count estimate of the result of a query using statistics +CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text) +RETURNS Numeric +AS $$ +DECLARE + plan JSON; +BEGIN + -- Make sure statistics exist for all the tables of the query + PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname; + + -- Use the query planner to obtain an estimate of the number of result rows + EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan; + RETURN plan->0->'Plan'->'Plan Rows'; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT; diff --git a/scripts-enabled/280-CDB_EstimateRowCount.sql b/scripts-enabled/280-CDB_EstimateRowCount.sql new file mode 120000 index 0000000..9d6120a --- /dev/null +++ b/scripts-enabled/280-CDB_EstimateRowCount.sql @@ -0,0 +1 @@ +../scripts-available/CDB_EstimateRowCount.sql \ No newline at end of file diff --git a/test/CDB_EstimateRowCountTest.sql b/test/CDB_EstimateRowCountTest.sql new file mode 100644 index 0000000..4659bfa --- /dev/null +++ b/test/CDB_EstimateRowCountTest.sql @@ -0,0 +1,10 @@ +SET client_min_messages TO error; +\set VERBOSITY terse +CREATE TABLE tmptab1(id INT); +INSERT INTO tmptab1(id) VALUES (1), (2), (3); +CREATE TABLE tmptab2(id INT, value NUMERIC); +INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0); +SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count; +SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count; +DROP TABLE tmptab2; +DROP TABLE tmptab1; diff --git a/test/CDB_EstimateRowCountTest_expect b/test/CDB_EstimateRowCountTest_expect new file mode 100644 index 0000000..b5f3893 --- /dev/null +++ b/test/CDB_EstimateRowCountTest_expect @@ -0,0 +1,9 @@ +SET +CREATE TABLE +INSERT 0 3 +CREATE TABLE +INSERT 0 2 +1 +1 +DROP TABLE +DROP TABLE