From a486eed2e365a4136aead5e8e5289e73cafa35b4 Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Fri, 7 Apr 2017 15:35:48 +0200 Subject: [PATCH 1/5] Add CDB_EstimateRowCount function See #295 --- NEWS.md | 4 +++ doc/CDB_EstimateRowCount.md | 25 +++++++++++++++++ scripts-available/CDB_EstimateRowCount.sql | 29 ++++++++++++++++++++ scripts-enabled/280-CDB_EstimateRowCount.sql | 1 + test/CDB_EstimateRowCountTest.sql | 10 +++++++ test/CDB_EstimateRowCountTest_expect | 9 ++++++ 6 files changed, 78 insertions(+) create mode 100644 doc/CDB_EstimateRowCount.md create mode 100644 scripts-available/CDB_EstimateRowCount.sql create mode 120000 scripts-enabled/280-CDB_EstimateRowCount.sql create mode 100644 test/CDB_EstimateRowCountTest.sql create mode 100644 test/CDB_EstimateRowCountTest_expect diff --git a/NEWS.md b/NEWS.md index 1d44f94..d02b26e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +0.19.0 (2017-04-XX) + +* Add new function `CDB_EstimateRowCount` #295 + 0.18.5 (2016-11-30) * Add to new overview creation strategies #290 diff --git a/doc/CDB_EstimateRowCount.md b/doc/CDB_EstimateRowCount.md new file mode 100644 index 0000000..e1e9287 --- /dev/null +++ b/doc/CDB_EstimateRowCount.md @@ -0,0 +1,25 @@ +Estimate the number of rows of a query. + + +#### Using the function + +```sql +SELECT CDB_EstimateRowCount($$ + UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US'); +$$) AS row_count; +``` + +Result: + +``` + row_count +----------- + 5 +(1 row) +``` + +#### Arguments + +CDB_EstimateRowCount(query) + +* **query** text: the SQL query to estimate the row count for. diff --git a/scripts-available/CDB_EstimateRowCount.sql b/scripts-available/CDB_EstimateRowCount.sql new file mode 100644 index 0000000..353b963 --- /dev/null +++ b/scripts-available/CDB_EstimateRowCount.sql @@ -0,0 +1,29 @@ +-- Internal function to generate stats for a table if they don't exist +CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS) +RETURNS VOID +AS $$ +DECLARE + num_cols INTEGER; +BEGIN + SELECT COUNT(*) FROM pg_catalog.pg_statistic WHERE starelid = reloid INTO num_cols; + IF num_cols = 0 THEN + EXECUTE Format('ANALYZE %s;', reloid); + END IF; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER; + +-- Return a row count estimate of the result of a query using statistics +CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text) +RETURNS Numeric +AS $$ +DECLARE + plan JSON; +BEGIN + -- Make sure statistics exist for all the tables of the query + PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname; + + -- Use the query planner to obtain an estimate of the number of result rows + EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan; + RETURN plan->0->'Plan'->'Plan Rows'; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT; diff --git a/scripts-enabled/280-CDB_EstimateRowCount.sql b/scripts-enabled/280-CDB_EstimateRowCount.sql new file mode 120000 index 0000000..9d6120a --- /dev/null +++ b/scripts-enabled/280-CDB_EstimateRowCount.sql @@ -0,0 +1 @@ +../scripts-available/CDB_EstimateRowCount.sql \ No newline at end of file diff --git a/test/CDB_EstimateRowCountTest.sql b/test/CDB_EstimateRowCountTest.sql new file mode 100644 index 0000000..4659bfa --- /dev/null +++ b/test/CDB_EstimateRowCountTest.sql @@ -0,0 +1,10 @@ +SET client_min_messages TO error; +\set VERBOSITY terse +CREATE TABLE tmptab1(id INT); +INSERT INTO tmptab1(id) VALUES (1), (2), (3); +CREATE TABLE tmptab2(id INT, value NUMERIC); +INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0); +SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count; +SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count; +DROP TABLE tmptab2; +DROP TABLE tmptab1; diff --git a/test/CDB_EstimateRowCountTest_expect b/test/CDB_EstimateRowCountTest_expect new file mode 100644 index 0000000..b5f3893 --- /dev/null +++ b/test/CDB_EstimateRowCountTest_expect @@ -0,0 +1,9 @@ +SET +CREATE TABLE +INSERT 0 3 +CREATE TABLE +INSERT 0 2 +1 +1 +DROP TABLE +DROP TABLE From 234373df1143d53296ccc9ad92997f22c3ef28ca Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Mon, 10 Apr 2017 08:08:59 +0200 Subject: [PATCH 2/5] Replace unnecessary count --- scripts-available/CDB_EstimateRowCount.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts-available/CDB_EstimateRowCount.sql b/scripts-available/CDB_EstimateRowCount.sql index 353b963..6be76b0 100644 --- a/scripts-available/CDB_EstimateRowCount.sql +++ b/scripts-available/CDB_EstimateRowCount.sql @@ -3,10 +3,12 @@ CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS) RETURNS VOID AS $$ DECLARE - num_cols INTEGER; + has_stats BOOLEAN; BEGIN - SELECT COUNT(*) FROM pg_catalog.pg_statistic WHERE starelid = reloid INTO num_cols; - IF num_cols = 0 THEN + SELECT EXISTS ( + SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid + ) INTO has_stats; + IF NOT has_stats THEN EXECUTE Format('ANALYZE %s;', reloid); END IF; END From 76bdb3657ac2a38ca969422b449fb9314e1bcd99 Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Mon, 10 Apr 2017 12:17:47 +0200 Subject: [PATCH 3/5] Fix tests --- test/CDB_QueryStatementsTest.sql | 3 +++ test/CDB_QueryStatementsTest_expect | 1 + 2 files changed, 4 insertions(+) diff --git a/test/CDB_QueryStatementsTest.sql b/test/CDB_QueryStatementsTest.sql index 7e8873c..e85bc2a 100644 --- a/test/CDB_QueryStatementsTest.sql +++ b/test/CDB_QueryStatementsTest.sql @@ -1,3 +1,6 @@ +SET client_min_messages TO error; +\set VERBOSITY terse + WITH q AS ( SELECT CDB_QueryStatements(' SELECT * FROM geometry_columns; ') as statement ) diff --git a/test/CDB_QueryStatementsTest_expect b/test/CDB_QueryStatementsTest_expect index 7334d4b..085e3eb 100644 --- a/test/CDB_QueryStatementsTest_expect +++ b/test/CDB_QueryStatementsTest_expect @@ -1,3 +1,4 @@ +SET 1|1|SELECT * FROM geometry_columns 2|1|SELECT * FROM geometry_columns 3|1|SELECT * FROM geometry_columns From 815b5b429dcf61869e1df91b5db834eb7300d6bc Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Mon, 10 Apr 2017 13:50:37 +0200 Subject: [PATCH 4/5] Fix tests --- test/CDB_QueryTablesTest.sql | 2 ++ test/CDB_QueryTablesTest_expect | 1 + 2 files changed, 3 insertions(+) diff --git a/test/CDB_QueryTablesTest.sql b/test/CDB_QueryTablesTest.sql index 695bf70..04633d5 100644 --- a/test/CDB_QueryTablesTest.sql +++ b/test/CDB_QueryTablesTest.sql @@ -1,3 +1,5 @@ +SET client_min_messages TO warning; +\set VERBOSITY default WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q ) SELECT q, CDB_QueryTables(q) from inp; diff --git a/test/CDB_QueryTablesTest_expect b/test/CDB_QueryTablesTest_expect index 47ab857..c6ff979 100644 --- a/test/CDB_QueryTablesTest_expect +++ b/test/CDB_QueryTablesTest_expect @@ -1,3 +1,4 @@ +SET SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type} SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class} CREATE table "my'tab;le" as select 1|{} From 68a0752849eb195a84ea5a0c5aa73fa2158d40b3 Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Mon, 10 Apr 2017 15:58:49 +0200 Subject: [PATCH 5/5] Use PG 9.5 for travis tests; fix tests --- .travis.yml | 37 ++++++++++++++++++++++++++---- test/CDB_DigitSeparatorTest_expect | 1 + test/CDB_QueryTablesTest.sql | 2 +- test/CDB_QueryTablesTest_expect | 4 ---- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 875c2d8..f608433 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,41 @@ language: c addons: - postgresql: 9.3 + postgresql: 9.5 before_install: + # Add custom PPAs from cartodb + - sudo add-apt-repository -y ppa:cartodb/postgresql-9.5 + - sudo add-apt-repository -y ppa:cartodb/gis + - sudo add-apt-repository -y ppa:cartodb/gis-testing - sudo apt-get update - #- sudo apt-get install -q postgresql-9.3-postgis-2.1 - - sudo apt-get update - - sudo apt-get install -q postgresql-server-dev-9.3 - - sudo apt-get install -q postgresql-plpython-9.3 + + # Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis) + - sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2 + + # Install postgres db and build deps + - sudo /etc/init.d/postgresql stop # stop travis default instance + - sudo apt-get -y remove --purge postgresql-9.1 + - sudo apt-get -y remove --purge postgresql-9.2 + - sudo apt-get -y remove --purge postgresql-9.3 + - sudo apt-get -y remove --purge postgresql-9.4 + - sudo apt-get -y remove --purge postgresql-9.5 + - sudo rm -rf /var/lib/postgresql/ + - sudo rm -rf /var/log/postgresql/ + - sudo rm -rf /etc/postgresql/ + - sudo apt-get -y remove --purge postgis-2.2 + - sudo apt-get -y autoremove + + - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 + - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 + + # configure it to accept local connections from postgres + - echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \ + | sudo tee /etc/postgresql/9.5/main/pg_hba.conf + - sudo /etc/init.d/postgresql restart 9.5 script: - make diff --git a/test/CDB_DigitSeparatorTest_expect b/test/CDB_DigitSeparatorTest_expect index c9a4c6c..e7b3aea 100644 --- a/test/CDB_DigitSeparatorTest_expect +++ b/test/CDB_DigitSeparatorTest_expect @@ -1,5 +1,6 @@ BEGIN CREATE TABLE +COPY 3 none|| only_com_dec|.|, only_dot_dec|,|. diff --git a/test/CDB_QueryTablesTest.sql b/test/CDB_QueryTablesTest.sql index 04633d5..101bf57 100644 --- a/test/CDB_QueryTablesTest.sql +++ b/test/CDB_QueryTablesTest.sql @@ -1,5 +1,5 @@ SET client_min_messages TO warning; -\set VERBOSITY default +\set VERBOSITY terse WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q ) SELECT q, CDB_QueryTables(q) from inp; diff --git a/test/CDB_QueryTablesTest_expect b/test/CDB_QueryTablesTest_expect index c6ff979..44ec6ed 100644 --- a/test/CDB_QueryTablesTest_expect +++ b/test/CDB_QueryTablesTest_expect @@ -5,14 +5,10 @@ CREATE table "my'tab;le" as select 1|{} SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class} SELECT 1 as col1; select 2 as col2|{} WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist) -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN ERROR: relation "nonexistant" does not exist -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN begin; select * from pg_class; commit;|{pg_catalog.pg_class} WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist) -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN ERROR: relation "test" does not exist -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class} CREATE SCHEMA CREATE TABLE