diff --git a/.travis.yml b/.travis.yml index 875c2d8..f608433 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,41 @@ language: c addons: - postgresql: 9.3 + postgresql: 9.5 before_install: + # Add custom PPAs from cartodb + - sudo add-apt-repository -y ppa:cartodb/postgresql-9.5 + - sudo add-apt-repository -y ppa:cartodb/gis + - sudo add-apt-repository -y ppa:cartodb/gis-testing - sudo apt-get update - #- sudo apt-get install -q postgresql-9.3-postgis-2.1 - - sudo apt-get update - - sudo apt-get install -q postgresql-server-dev-9.3 - - sudo apt-get install -q postgresql-plpython-9.3 + + # Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis) + - sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2 + + # Install postgres db and build deps + - sudo /etc/init.d/postgresql stop # stop travis default instance + - sudo apt-get -y remove --purge postgresql-9.1 + - sudo apt-get -y remove --purge postgresql-9.2 + - sudo apt-get -y remove --purge postgresql-9.3 + - sudo apt-get -y remove --purge postgresql-9.4 + - sudo apt-get -y remove --purge postgresql-9.5 + - sudo rm -rf /var/lib/postgresql/ + - sudo rm -rf /var/log/postgresql/ + - sudo rm -rf /etc/postgresql/ + - sudo apt-get -y remove --purge postgis-2.2 + - sudo apt-get -y autoremove + + - sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2 + - sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2 + - sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2 + + # configure it to accept local connections from postgres + - echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \ + | sudo tee /etc/postgresql/9.5/main/pg_hba.conf + - sudo /etc/init.d/postgresql restart 9.5 script: - make diff --git a/NEWS.md b/NEWS.md index 1d44f94..d02b26e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +0.19.0 (2017-04-XX) + +* Add new function `CDB_EstimateRowCount` #295 + 0.18.5 (2016-11-30) * Add to new overview creation strategies #290 diff --git a/doc/CDB_EstimateRowCount.md b/doc/CDB_EstimateRowCount.md new file mode 100644 index 0000000..e1e9287 --- /dev/null +++ b/doc/CDB_EstimateRowCount.md @@ -0,0 +1,25 @@ +Estimate the number of rows of a query. + + +#### Using the function + +```sql +SELECT CDB_EstimateRowCount($$ + UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US'); +$$) AS row_count; +``` + +Result: + +``` + row_count +----------- + 5 +(1 row) +``` + +#### Arguments + +CDB_EstimateRowCount(query) + +* **query** text: the SQL query to estimate the row count for. diff --git a/scripts-available/CDB_EstimateRowCount.sql b/scripts-available/CDB_EstimateRowCount.sql new file mode 100644 index 0000000..6be76b0 --- /dev/null +++ b/scripts-available/CDB_EstimateRowCount.sql @@ -0,0 +1,31 @@ +-- Internal function to generate stats for a table if they don't exist +CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS) +RETURNS VOID +AS $$ +DECLARE + has_stats BOOLEAN; +BEGIN + SELECT EXISTS ( + SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid + ) INTO has_stats; + IF NOT has_stats THEN + EXECUTE Format('ANALYZE %s;', reloid); + END IF; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER; + +-- Return a row count estimate of the result of a query using statistics +CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text) +RETURNS Numeric +AS $$ +DECLARE + plan JSON; +BEGIN + -- Make sure statistics exist for all the tables of the query + PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname; + + -- Use the query planner to obtain an estimate of the number of result rows + EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan; + RETURN plan->0->'Plan'->'Plan Rows'; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT; diff --git a/scripts-enabled/280-CDB_EstimateRowCount.sql b/scripts-enabled/280-CDB_EstimateRowCount.sql new file mode 120000 index 0000000..9d6120a --- /dev/null +++ b/scripts-enabled/280-CDB_EstimateRowCount.sql @@ -0,0 +1 @@ +../scripts-available/CDB_EstimateRowCount.sql \ No newline at end of file diff --git a/test/CDB_DigitSeparatorTest_expect b/test/CDB_DigitSeparatorTest_expect index c9a4c6c..e7b3aea 100644 --- a/test/CDB_DigitSeparatorTest_expect +++ b/test/CDB_DigitSeparatorTest_expect @@ -1,5 +1,6 @@ BEGIN CREATE TABLE +COPY 3 none|| only_com_dec|.|, only_dot_dec|,|. diff --git a/test/CDB_EstimateRowCountTest.sql b/test/CDB_EstimateRowCountTest.sql new file mode 100644 index 0000000..4659bfa --- /dev/null +++ b/test/CDB_EstimateRowCountTest.sql @@ -0,0 +1,10 @@ +SET client_min_messages TO error; +\set VERBOSITY terse +CREATE TABLE tmptab1(id INT); +INSERT INTO tmptab1(id) VALUES (1), (2), (3); +CREATE TABLE tmptab2(id INT, value NUMERIC); +INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0); +SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count; +SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count; +DROP TABLE tmptab2; +DROP TABLE tmptab1; diff --git a/test/CDB_EstimateRowCountTest_expect b/test/CDB_EstimateRowCountTest_expect new file mode 100644 index 0000000..b5f3893 --- /dev/null +++ b/test/CDB_EstimateRowCountTest_expect @@ -0,0 +1,9 @@ +SET +CREATE TABLE +INSERT 0 3 +CREATE TABLE +INSERT 0 2 +1 +1 +DROP TABLE +DROP TABLE diff --git a/test/CDB_QueryStatementsTest.sql b/test/CDB_QueryStatementsTest.sql index 7e8873c..e85bc2a 100644 --- a/test/CDB_QueryStatementsTest.sql +++ b/test/CDB_QueryStatementsTest.sql @@ -1,3 +1,6 @@ +SET client_min_messages TO error; +\set VERBOSITY terse + WITH q AS ( SELECT CDB_QueryStatements(' SELECT * FROM geometry_columns; ') as statement ) diff --git a/test/CDB_QueryStatementsTest_expect b/test/CDB_QueryStatementsTest_expect index 7334d4b..085e3eb 100644 --- a/test/CDB_QueryStatementsTest_expect +++ b/test/CDB_QueryStatementsTest_expect @@ -1,3 +1,4 @@ +SET 1|1|SELECT * FROM geometry_columns 2|1|SELECT * FROM geometry_columns 3|1|SELECT * FROM geometry_columns diff --git a/test/CDB_QueryTablesTest.sql b/test/CDB_QueryTablesTest.sql index 695bf70..101bf57 100644 --- a/test/CDB_QueryTablesTest.sql +++ b/test/CDB_QueryTablesTest.sql @@ -1,3 +1,5 @@ +SET client_min_messages TO warning; +\set VERBOSITY terse WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q ) SELECT q, CDB_QueryTables(q) from inp; diff --git a/test/CDB_QueryTablesTest_expect b/test/CDB_QueryTablesTest_expect index 47ab857..44ec6ed 100644 --- a/test/CDB_QueryTablesTest_expect +++ b/test/CDB_QueryTablesTest_expect @@ -1,17 +1,14 @@ +SET SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type} SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class} CREATE table "my'tab;le" as select 1|{} SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class} SELECT 1 as col1; select 2 as col2|{} WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist) -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN ERROR: relation "nonexistant" does not exist -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN begin; select * from pg_class; commit;|{pg_catalog.pg_class} WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist) -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN ERROR: relation "test" does not exist -CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class} CREATE SCHEMA CREATE TABLE