From da4331ac78cf76844b95274a5d6479d3d81dddb3 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Fri, 24 May 2019 18:38:28 +0200 Subject: [PATCH 01/15] WIP: A function to sync tables It assumes there's a cartodb_id column in both source and target. It does not perform unnecessary actions. It respects augmented columns in target table, if they exist. It is meant to be efficient. --- scripts-available/CDB_SyncTable.sql | 137 ++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 scripts-available/CDB_SyncTable.sql diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql new file mode 100644 index 0000000..b883649 --- /dev/null +++ b/scripts-available/CDB_SyncTable.sql @@ -0,0 +1,137 @@ +/* + Sample usage: + + SELECT _CDB_GetColumns('public.films'); +*/ +CREATE OR REPLACE FUNCTION _CDB_GetColumns(src_table REGCLASS) +RETURNS SETOF NAME +AS $$ + SELECT + a.attname as "colname" + FROM + pg_catalog.pg_attribute a + WHERE + a.attnum > 0 + AND NOT a.attisdropped + AND a.attrelid = ( + SELECT c.oid + FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + WHERE c.oid = src_table::oid + AND pg_catalog.pg_table_is_visible(c.oid) + ); +$$ LANGUAGE sql STABLE PARALLEL UNSAFE; + + +/* + A Table Syncer + + Assumptions: + - Both tables contain a consistent cartodb_id column + - Destination table has all columns of the source + + Sample usage: + + SELECT CDB_SyncTable('radar_stations', 'public', 'syncdest'); + +*/ +CREATE OR REPLACE FUNCTION CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME) +RETURNS void +AS $$ +DECLARE + fq_dest_table TEXT; + + colnames TEXT[]; + quoted_colnames TEXT; + + src_hash_table_name NAME; + dst_hash_table_name NAME; + + num_rows BIGINT; + err_context text; +BEGIN + -- If the destination table does not exist, just copy the source table + fq_dest_table := format('%I.%I', dst_schema, dst_table); + EXECUTE format('CREATE TABLE IF NOT EXISTS %s as TABLE %I', fq_dest_table, src_table); + GET DIAGNOSTICS num_rows = ROW_COUNT; + IF num_rows > 0 THEN + RAISE NOTICE 'INSERTED % row(s)', num_rows; + RETURN; + END IF; + + -- Get the list of columns from the source table, excluding cartodb_id + SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c WHERE c <> 'cartodb_id') INTO colnames; + quoted_colnames := array_to_string(colnames, ','); + RAISE NOTICE 'quoted_colnames = %', quoted_colnames; + + src_hash_table_name := format('src_sync_%s', txid_current()); + dst_hash_table_name := format('dst_sync_%s', txid_current()); + + BEGIN + -- TODO: use ON COMMIT DROP instead of Cleanup + EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT)', src_hash_table_name); + EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT)', dst_hash_table_name); + + -- Compute hash for src_table h[cartodb_id] = hash(row) + -- It'll take the form of a temp table with an index (easy to run set operations) + EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %I', src_hash_table_name, quoted_colnames, src_table); + + -- Compute hash for dst_table, only for columns present in src_table + EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); + + -- Deal with deleted rows: ids in dest but not in source + EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'DELETED % row(s)', num_rows; + + -- Deal with inserted rows: ids in source but not in dest + EXECUTE format(' + INSERT INTO %s (cartodb_id,%s) + SELECT h.cartodb_id,%s FROM %I h + LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id + WHERE h.cartodb_id NOT IN (SELECT cartodb_id FROM %I); + ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, src_table, dst_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'INSERTED % row(s)', num_rows; + + -- Deal with modified rows: ids in source and dest but different hashes + /* + UPDATE syncdes dst SET + the_geom = changed.the_geom, + the_geom_webmercator = changed.the_geom_webmercator, + id = changed.id, + elevation = changed.elevation, + latitude = changed.latitude, + longitude = changed.longitude, + name = changed.name + FROM ( + SELECT cartodb_id,the_geom,the_geom_webmercator,id,elevation,latitude,longitude,name + FROM radar_stations src + WHERE cartodb_id IN + (SELECT sh.cartodb_id FROM src_sync_615543 sh + LEFT JOIN dst_sync_615543 dh ON sh.cartodb_id = dh.cartodb_id + WHERE sh.hash <> dh.hash) + ) changed + WHERE dst.cartodb_id = changed.cartodb_id; + */ + --GET DIAGNOSTICS num_rows = ROW_COUNT; + --RAISE NOTICE 'MODIFIED % row(s)', num_rows; + + -- Cleanup + --EXECUTE format('DROP TABLE IF EXISTS %I', src_hash_table_name); + --EXECUTE format('DROP TABLE IF EXISTS %I', dst_hash_table_name); + EXCEPTION + WHEN others THEN + -- Cleanup + EXECUTE format('DROP TABLE IF EXISTS %I', src_hash_table_name); + EXECUTE format('DROP TABLE IF EXISTS %I', dst_hash_table_name); + + -- Exception reporting + GET STACKED DIAGNOSTICS err_context = PG_EXCEPTION_CONTEXT; + RAISE INFO 'Error Name:%',SQLERRM; + RAISE INFO 'Error State:%', SQLSTATE; + RAISE INFO 'Error Context:%', err_context; + END; + +END; +$$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE; From 982ddfdeffc9a1082a45d8772e04d071ab46f021 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 11:31:07 +0200 Subject: [PATCH 02/15] Helper function to generate UPDATE SET clause --- scripts-available/CDB_SyncTable.sql | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index b883649..c97dbdc 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -1,4 +1,6 @@ /* + Gets the column names of a given table. + Sample usage: SELECT _CDB_GetColumns('public.films'); @@ -23,6 +25,33 @@ AS $$ $$ LANGUAGE sql STABLE PARALLEL UNSAFE; +/* + Given an array of quoted column names, it generates an UPDATE SET + clause with the following form: + + the_geom = changed.the_geom, + id = changed.id, + elevation = changed.elevation + + Example of usage: + + SELECT __CDB_GetUpdateSetClause('{the_geom, id, elevation}', 'changed'); +*/ +CREATE OR REPLACE FUNCTION __CDB_GetUpdateSetClause(colnames TEXT[], update_source TEXT) +RETURNS TEXT +AS $$ +DECLARE + set_clause_list TEXT[]; + col TEXT; +BEGIN + FOREACH col IN ARRAY colnames + LOOP + set_clause_list := array_append(set_clause_list, format('%1$s = %2$s.%1$s', col, update_source)); + END lOOP; + RETURN array_to_string(set_clause_list, ', '); +END; +$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; + /* A Table Syncer @@ -79,6 +108,8 @@ BEGIN -- Compute hash for dst_table, only for columns present in src_table EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); + -- TODO create indexes + -- Deal with deleted rows: ids in dest but not in source EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); GET DIAGNOSTICS num_rows = ROW_COUNT; From a8d57abda62d0ead25cb841c2a292de0060b4b30 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 11:54:38 +0200 Subject: [PATCH 03/15] Update changed rows --- scripts-available/CDB_SyncTable.sql | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index c97dbdc..8dd08de 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -76,6 +76,8 @@ DECLARE src_hash_table_name NAME; dst_hash_table_name NAME; + update_set_clause TEXT; + num_rows BIGINT; err_context text; BEGIN @@ -126,27 +128,21 @@ BEGIN RAISE NOTICE 'INSERTED % row(s)', num_rows; -- Deal with modified rows: ids in source and dest but different hashes - /* - UPDATE syncdes dst SET - the_geom = changed.the_geom, - the_geom_webmercator = changed.the_geom_webmercator, - id = changed.id, - elevation = changed.elevation, - latitude = changed.latitude, - longitude = changed.longitude, - name = changed.name + update_set_clause := __CDB_GetUpdateSetClause(colnames, 'changed'); + EXECUTE format(' + UPDATE %1$s dst SET %2$s FROM ( - SELECT cartodb_id,the_geom,the_geom_webmercator,id,elevation,latitude,longitude,name - FROM radar_stations src + SELECT cartodb_id,%3$s + FROM %4$s src WHERE cartodb_id IN - (SELECT sh.cartodb_id FROM src_sync_615543 sh - LEFT JOIN dst_sync_615543 dh ON sh.cartodb_id = dh.cartodb_id + (SELECT sh.cartodb_id FROM %5$I sh + LEFT JOIN %6$I dh ON sh.cartodb_id = dh.cartodb_id WHERE sh.hash <> dh.hash) ) changed WHERE dst.cartodb_id = changed.cartodb_id; - */ - --GET DIAGNOSTICS num_rows = ROW_COUNT; - --RAISE NOTICE 'MODIFIED % row(s)', num_rows; + ', fq_dest_table, update_set_clause, quoted_colnames, src_table, src_hash_table_name, dst_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'MODIFIED % row(s)', num_rows; -- Cleanup --EXECUTE format('DROP TABLE IF EXISTS %I', src_hash_table_name); From f461faf0b6763062ebb379e47b182927056b3c7f Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 11:56:41 +0200 Subject: [PATCH 04/15] Simplify udpate: use * instead of column list --- scripts-available/CDB_SyncTable.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index 8dd08de..6bdb684 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -132,15 +132,15 @@ BEGIN EXECUTE format(' UPDATE %1$s dst SET %2$s FROM ( - SELECT cartodb_id,%3$s - FROM %4$s src + SELECT * + FROM %3$s src WHERE cartodb_id IN - (SELECT sh.cartodb_id FROM %5$I sh - LEFT JOIN %6$I dh ON sh.cartodb_id = dh.cartodb_id + (SELECT sh.cartodb_id FROM %4$I sh + LEFT JOIN %5$I dh ON sh.cartodb_id = dh.cartodb_id WHERE sh.hash <> dh.hash) ) changed WHERE dst.cartodb_id = changed.cartodb_id; - ', fq_dest_table, update_set_clause, quoted_colnames, src_table, src_hash_table_name, dst_hash_table_name); + ', fq_dest_table, update_set_clause, src_table, src_hash_table_name, dst_hash_table_name); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'MODIFIED % row(s)', num_rows; From 951f25765450eb1236fc830012cf7c02058d0e4b Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 12:02:12 +0200 Subject: [PATCH 05/15] Simplify code by using ON COMMIT DROP Simplify code by relying on automatic removal of temp tables with ON COMMIT DROP, so that we avoid the messy EXCEPTION management. --- scripts-available/CDB_SyncTable.sql | 93 ++++++++++++----------------- 1 file changed, 37 insertions(+), 56 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index 6bdb684..108642d 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -98,67 +98,48 @@ BEGIN src_hash_table_name := format('src_sync_%s', txid_current()); dst_hash_table_name := format('dst_sync_%s', txid_current()); - BEGIN - -- TODO: use ON COMMIT DROP instead of Cleanup - EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT)', src_hash_table_name); - EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT)', dst_hash_table_name); + EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', src_hash_table_name); + EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', dst_hash_table_name); - -- Compute hash for src_table h[cartodb_id] = hash(row) - -- It'll take the form of a temp table with an index (easy to run set operations) - EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %I', src_hash_table_name, quoted_colnames, src_table); + -- Compute hash for src_table h[cartodb_id] = hash(row) + -- It'll take the form of a temp table with an index (easy to run set operations) + EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %I', src_hash_table_name, quoted_colnames, src_table); - -- Compute hash for dst_table, only for columns present in src_table - EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); + -- Compute hash for dst_table, only for columns present in src_table + EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); - -- TODO create indexes + -- TODO create indexes - -- Deal with deleted rows: ids in dest but not in source - EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); - GET DIAGNOSTICS num_rows = ROW_COUNT; - RAISE NOTICE 'DELETED % row(s)', num_rows; + -- Deal with deleted rows: ids in dest but not in source + EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'DELETED % row(s)', num_rows; - -- Deal with inserted rows: ids in source but not in dest - EXECUTE format(' - INSERT INTO %s (cartodb_id,%s) - SELECT h.cartodb_id,%s FROM %I h - LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id - WHERE h.cartodb_id NOT IN (SELECT cartodb_id FROM %I); - ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, src_table, dst_hash_table_name); - GET DIAGNOSTICS num_rows = ROW_COUNT; - RAISE NOTICE 'INSERTED % row(s)', num_rows; - - -- Deal with modified rows: ids in source and dest but different hashes - update_set_clause := __CDB_GetUpdateSetClause(colnames, 'changed'); - EXECUTE format(' - UPDATE %1$s dst SET %2$s - FROM ( - SELECT * - FROM %3$s src - WHERE cartodb_id IN - (SELECT sh.cartodb_id FROM %4$I sh - LEFT JOIN %5$I dh ON sh.cartodb_id = dh.cartodb_id - WHERE sh.hash <> dh.hash) - ) changed - WHERE dst.cartodb_id = changed.cartodb_id; - ', fq_dest_table, update_set_clause, src_table, src_hash_table_name, dst_hash_table_name); - GET DIAGNOSTICS num_rows = ROW_COUNT; - RAISE NOTICE 'MODIFIED % row(s)', num_rows; - - -- Cleanup - --EXECUTE format('DROP TABLE IF EXISTS %I', src_hash_table_name); - --EXECUTE format('DROP TABLE IF EXISTS %I', dst_hash_table_name); - EXCEPTION - WHEN others THEN - -- Cleanup - EXECUTE format('DROP TABLE IF EXISTS %I', src_hash_table_name); - EXECUTE format('DROP TABLE IF EXISTS %I', dst_hash_table_name); - - -- Exception reporting - GET STACKED DIAGNOSTICS err_context = PG_EXCEPTION_CONTEXT; - RAISE INFO 'Error Name:%',SQLERRM; - RAISE INFO 'Error State:%', SQLSTATE; - RAISE INFO 'Error Context:%', err_context; - END; + -- Deal with inserted rows: ids in source but not in dest + EXECUTE format(' + INSERT INTO %s (cartodb_id,%s) + SELECT h.cartodb_id,%s FROM %I h + LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id + WHERE h.cartodb_id NOT IN (SELECT cartodb_id FROM %I); + ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, src_table, dst_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'INSERTED % row(s)', num_rows; + -- Deal with modified rows: ids in source and dest but different hashes + update_set_clause := __CDB_GetUpdateSetClause(colnames, 'changed'); + EXECUTE format(' + UPDATE %1$s dst SET %2$s + FROM ( + SELECT * + FROM %3$s src + WHERE cartodb_id IN + (SELECT sh.cartodb_id FROM %4$I sh + LEFT JOIN %5$I dh ON sh.cartodb_id = dh.cartodb_id + WHERE sh.hash <> dh.hash) + ) changed + WHERE dst.cartodb_id = changed.cartodb_id; + ', fq_dest_table, update_set_clause, src_table, src_hash_table_name, dst_hash_table_name); + GET DIAGNOSTICS num_rows = ROW_COUNT; + RAISE NOTICE 'MODIFIED % row(s)', num_rows; END; $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE; From 81d0f338cfff4f6aca83e14e979092649bee34bd Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 15:18:13 +0200 Subject: [PATCH 06/15] Create HASH indices on the temp tables --- scripts-available/CDB_SyncTable.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index 108642d..7084cb3 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -93,7 +93,6 @@ BEGIN -- Get the list of columns from the source table, excluding cartodb_id SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c WHERE c <> 'cartodb_id') INTO colnames; quoted_colnames := array_to_string(colnames, ','); - RAISE NOTICE 'quoted_colnames = %', quoted_colnames; src_hash_table_name := format('src_sync_%s', txid_current()); dst_hash_table_name := format('dst_sync_%s', txid_current()); @@ -108,7 +107,10 @@ BEGIN -- Compute hash for dst_table, only for columns present in src_table EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); - -- TODO create indexes + -- Create indexes + -- We use hash indexes as they are fit for id comparison. + EXECUTE format('CREATE INDEX ON %I USING HASH (cartodb_id)', src_hash_table_name); + EXECUTE format('CREATE INDEX ON %I USING HASH (cartodb_id)', dst_hash_table_name); -- Deal with deleted rows: ids in dest but not in source EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); From 7606585672091b6457d1189298d6f87bb7ca7acc Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 15:20:06 +0200 Subject: [PATCH 07/15] Perf optimization: use EXCEPT instead of NOT IN With javitonino's help, greatly reduce the processing time by using EXCEPT instead of NOT IN, which causes it to use a `HashSetOp Except` plan on the subqueries rather than a `Seq Scan` on `Materialize`'d subtables. --- scripts-available/CDB_SyncTable.sql | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index 7084cb3..9c407a2 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -113,17 +113,20 @@ BEGIN EXECUTE format('CREATE INDEX ON %I USING HASH (cartodb_id)', dst_hash_table_name); -- Deal with deleted rows: ids in dest but not in source - EXECUTE format('DELETE FROM %s WHERE cartodb_id in (SELECT cartodb_id FROM %I WHERE cartodb_id NOT IN (SELECT cartodb_id FROM %I))', fq_dest_table, dst_hash_table_name, src_hash_table_name); + EXECUTE format( + 'DELETE FROM %s WHERE cartodb_id IN (SELECT cartodb_id FROM %I EXCEPT SELECT cartodb_id FROM %I)', + fq_dest_table, + dst_hash_table_name, + src_hash_table_name); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'DELETED % row(s)', num_rows; -- Deal with inserted rows: ids in source but not in dest EXECUTE format(' INSERT INTO %s (cartodb_id,%s) - SELECT h.cartodb_id,%s FROM %I h - LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id - WHERE h.cartodb_id NOT IN (SELECT cartodb_id FROM %I); - ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, src_table, dst_hash_table_name); + SELECT h.cartodb_id,%s FROM (SELECT cartodb_id FROM %I EXCEPT SELECT cartodb_id FROM %I) h + LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id; + ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, dst_hash_table_name, src_table); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'INSERTED % row(s)', num_rows; From ee9d08a2bec95fa4e1b162a2871c7c2ac17661d6 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Mon, 27 May 2019 17:56:36 +0200 Subject: [PATCH 08/15] Some functional E2E tests for the CDB_SyncTable() function --- scripts-enabled/CDB_SyncTable.sql | 1 + test/CDB_SyncTableTest.sql | 44 +++++++++++++++++++++++++++++++ test/CDB_SyncTableTest_expect | 39 +++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 120000 scripts-enabled/CDB_SyncTable.sql create mode 100644 test/CDB_SyncTableTest.sql create mode 100644 test/CDB_SyncTableTest_expect diff --git a/scripts-enabled/CDB_SyncTable.sql b/scripts-enabled/CDB_SyncTable.sql new file mode 120000 index 0000000..c258915 --- /dev/null +++ b/scripts-enabled/CDB_SyncTable.sql @@ -0,0 +1 @@ +../scripts-available/CDB_SyncTable.sql \ No newline at end of file diff --git a/test/CDB_SyncTableTest.sql b/test/CDB_SyncTableTest.sql new file mode 100644 index 0000000..694daee --- /dev/null +++ b/test/CDB_SyncTableTest.sql @@ -0,0 +1,44 @@ +-- Setup: create and populate a table to test the syncs +\set QUIET on +SET client_min_messages TO error; +CREATE TABLE test_sync_source ( + cartodb_id bigint, + lat double precision, + lon double precision, + name text +); +INSERT INTO test_sync_source VALUES + (1, 1.0, 1.0, 'foo'), + (2, 2.0, 2.0, 'bar'), + (3, 3.0, 3.0, 'patata'), + (4, 4.0, 4.0, 'melon'); +SET client_min_messages TO notice; +\set QUIET off + +\echo 'First table sync: it should be simply just copied to the destination' +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); + +\echo 'Next table sync: there shall be no changes' +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); + +\echo 'Remove a row from the source and check it is deleted from the dest table' +DELETE FROM test_sync_source WHERE cartodb_id = 3; +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); + +\echo 'Insert a new row and check that it is inserted in the dest table' +INSERT INTO test_sync_source VALUES (5, 5.0, 5.0, 'sandia'); +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); + +\echo 'Modify row and check that it is modified in the dest table' +UPDATE test_sync_source SET name = 'cantaloupe' WHERE cartodb_id = 4; +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); + +\echo 'Sanity check: the end result is the same source table' +SELECT * FROM test_sync_source ORDER BY cartodb_id; +SELECT * FROM test_sync_dest ORDER BY cartodb_id; + +-- Cleanup +\set QUIET on +DROP TABLE IF EXISTS test_sync_source; +DROP TABLE IF EXISTS test_sync_dest; +\set QUIET off diff --git a/test/CDB_SyncTableTest_expect b/test/CDB_SyncTableTest_expect new file mode 100644 index 0000000..c4dc3dd --- /dev/null +++ b/test/CDB_SyncTableTest_expect @@ -0,0 +1,39 @@ +First table sync: it should be simply just copied to the destination +NOTICE: INSERTED 4 row(s) + +Next table sync: there shall be no changes +NOTICE: relation "test_sync_dest" already exists, skipping +NOTICE: DELETED 0 row(s) +NOTICE: INSERTED 0 row(s) +NOTICE: MODIFIED 0 row(s) + +Remove a row from the source and check it is deleted from the dest table +DELETE 1 +NOTICE: relation "test_sync_dest" already exists, skipping +NOTICE: DELETED 1 row(s) +NOTICE: INSERTED 0 row(s) +NOTICE: MODIFIED 0 row(s) + +Insert a new row and check that it is inserted in the dest table +INSERT 0 1 +NOTICE: relation "test_sync_dest" already exists, skipping +NOTICE: DELETED 0 row(s) +NOTICE: INSERTED 1 row(s) +NOTICE: MODIFIED 0 row(s) + +Modify row and check that it is modified in the dest table +UPDATE 1 +NOTICE: relation "test_sync_dest" already exists, skipping +NOTICE: DELETED 0 row(s) +NOTICE: INSERTED 0 row(s) +NOTICE: MODIFIED 1 row(s) + +Sanity check: the end result is the same source table +1|1|1|foo +2|2|2|bar +4|4|4|cantaloupe +5|5|5|sandia +1|1|1|foo +2|2|2|bar +4|4|4|cantaloupe +5|5|5|sandia From f442c21fa4f35f60bfcc402f679e0a2f7ed2a48f Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 28 May 2019 09:34:10 +0200 Subject: [PATCH 09/15] Fix the test/CDB_AnalysisCheckTest.sql These tests are failing in PG11: ``` *** /home/travis/build/CartoDB/cartodb-postgresql/expected/test/CDB_AnalysisCheckTest.out 2019-05-27 16:09:45.063543994 +0000 --- /home/travis/build/CartoDB/cartodb-postgresql/results/test/CDB_AnalysisCheckTest.out 2019-05-27 16:12:39.770847666 +0000 *************** *** 5,12 **** CREATE TABLE CREATE TABLE CREATE TABLE - (analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d5,public,analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d5) (analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94,public,analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94) 0 1 --- 5,12 ---- CREATE TABLE CREATE TABLE CREATE TABLE (analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94,public,analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94) + (analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d5,public,analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d5) 0 1 ``` The reason for that is that they rely on row ordering that cannot be guaranteed as per SQL Standard. Forcing that assumed ordering is enough to get it working again. --- test/CDB_AnalysisCheckTest.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CDB_AnalysisCheckTest.sql b/test/CDB_AnalysisCheckTest.sql index a1aa527..1f85e7a 100644 --- a/test/CDB_AnalysisCheckTest.sql +++ b/test/CDB_AnalysisCheckTest.sql @@ -7,7 +7,7 @@ SELECT _CDB_AnalysisDataSize('public'); CREATE TABLE analysis_2f13a3dbd7_41bd92976fc6dd97072afe4ee450054f4c0715d5(id int); CREATE TABLE analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94(id int); CREATE TABLE analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da9(id int); -SELECT _CDB_AnalysisTablesInSchema('public'); +SELECT _CDB_AnalysisTablesInSchema('public') t ORDER BY t; SELECT _CDB_AnalysisDataSize('public'); SELECT CDB_CheckAnalysisQuota('analysis_2f13a3dbd7_f00cee44e9e6152b450bde3a92eb9ae0d099da94'); SELECT CDB_SetUserQuotaInBytes(1); From 45c21d7f4249312c927d406dd9c5fb8afe2c5662 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 28 May 2019 15:15:57 +0200 Subject: [PATCH 10/15] Make other tests resilient to CDB_SyncTable test failures Use a transaction to avoid leaving stuff after our tests, that affect Quota and User Tables tests. --- test/CDB_SyncTableTest.sql | 8 ++++---- test/CDB_SyncTableTest_expect | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/CDB_SyncTableTest.sql b/test/CDB_SyncTableTest.sql index 694daee..b8790b2 100644 --- a/test/CDB_SyncTableTest.sql +++ b/test/CDB_SyncTableTest.sql @@ -1,5 +1,6 @@ -- Setup: create and populate a table to test the syncs \set QUIET on +BEGIN; SET client_min_messages TO error; CREATE TABLE test_sync_source ( cartodb_id bigint, @@ -15,6 +16,7 @@ INSERT INTO test_sync_source VALUES SET client_min_messages TO notice; \set QUIET off + \echo 'First table sync: it should be simply just copied to the destination' SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); @@ -37,8 +39,6 @@ SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest'); SELECT * FROM test_sync_source ORDER BY cartodb_id; SELECT * FROM test_sync_dest ORDER BY cartodb_id; + -- Cleanup -\set QUIET on -DROP TABLE IF EXISTS test_sync_source; -DROP TABLE IF EXISTS test_sync_dest; -\set QUIET off +ROLLBACK; diff --git a/test/CDB_SyncTableTest_expect b/test/CDB_SyncTableTest_expect index c4dc3dd..5973f7d 100644 --- a/test/CDB_SyncTableTest_expect +++ b/test/CDB_SyncTableTest_expect @@ -37,3 +37,4 @@ Sanity check: the end result is the same source table 2|2|2|bar 4|4|4|cantaloupe 5|5|5|sandia +ROLLBACK From 2f8ea7e4ea675702d017c081f5131bb16b2c03b8 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 28 May 2019 15:39:02 +0200 Subject: [PATCH 11/15] Avoid tables name clashing when executing within same transaction Generate more unique temp table names when the CDB_SyncTable function is executed multiple times within the same transaction. When executed in isolation, there will be always an implicit surrounding transaction. But when executed several times within the same transaction it can give an `ERROR: relation "src_sync_718794" already exists`. E.g: ``` BEGIN; SELECT cartodb.CDB_SyncTable('source1', 'public', 'dest1'); SELECT cartodb.CDB_SyncTable('source12, 'public', 'dest2'); COMMIT; ``` --- scripts-available/CDB_SyncTable.sql | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index 9c407a2..f56e71d 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -52,6 +52,22 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; + +/* + Given a prefix, generate a safe unique NAME for a temp table. + + Example of usage: + + SELECT __CDB_GenerateUniqueName('src_sync'); --> src_sync_718794_120106 + +*/ +CREATE OR REPLACE FUNCTION __CDB_GenerateUniqueName(prefix TEXT) +RETURNS NAME +AS $$ + SELECT format('%s_%s_%s', prefix, txid_current(), (random()*1000000)::int)::NAME; +$$ LANGUAGE sql VOLATILE PARALLEL UNSAFE; + + /* A Table Syncer @@ -94,8 +110,8 @@ BEGIN SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c WHERE c <> 'cartodb_id') INTO colnames; quoted_colnames := array_to_string(colnames, ','); - src_hash_table_name := format('src_sync_%s', txid_current()); - dst_hash_table_name := format('dst_sync_%s', txid_current()); + src_hash_table_name := __CDB_GenerateUniqueName('src_sync'); + dst_hash_table_name := __CDB_GenerateUniqueName('dst_sync'); EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', src_hash_table_name); EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', dst_hash_table_name); From a2723a3c900d2e74e1f1d8faaf5633b5fe22fa6b Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 28 May 2019 16:11:56 +0200 Subject: [PATCH 12/15] Exclude certain columns from sync if instructed to do so For the Geocoding (and in general for LDS use cases) it may come in handy to exclude geometry columns from the list of stuff to syncrhonize. Otherwise they may be lost, overwritten with NULL values. --- scripts-available/CDB_SyncTable.sql | 8 +++++--- test/CDB_SyncTableTest.sql | 14 ++++++++++++++ test/CDB_SyncTableTest_expect | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index f56e71d..f15610b 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -80,7 +80,7 @@ $$ LANGUAGE sql VOLATILE PARALLEL UNSAFE; SELECT CDB_SyncTable('radar_stations', 'public', 'syncdest'); */ -CREATE OR REPLACE FUNCTION CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME) +CREATE OR REPLACE FUNCTION CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}') RETURNS void AS $$ DECLARE @@ -106,8 +106,10 @@ BEGIN RETURN; END IF; - -- Get the list of columns from the source table, excluding cartodb_id - SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c WHERE c <> 'cartodb_id') INTO colnames; + skip_cols := skip_cols || '{cartodb_id}'; + + -- Get the list of columns from the source table, excluding skip_cols + SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c EXCEPT SELECT unnest(skip_cols)) INTO colnames; quoted_colnames := array_to_string(colnames, ','); src_hash_table_name := __CDB_GenerateUniqueName('src_sync'); diff --git a/test/CDB_SyncTableTest.sql b/test/CDB_SyncTableTest.sql index b8790b2..a945640 100644 --- a/test/CDB_SyncTableTest.sql +++ b/test/CDB_SyncTableTest.sql @@ -40,5 +40,19 @@ SELECT * FROM test_sync_source ORDER BY cartodb_id; SELECT * FROM test_sync_dest ORDER BY cartodb_id; +\echo 'It shall exclude geom columns if instructed to do so' +\set QUIET on +SET client_min_messages TO error; +SELECT cartodb.CDB_SetUserQuotaInBytes(0); -- Set user quota to infinite +SELECT cartodb.CDB_CartodbfyTable('test_sync_source'); +SELECT cartodb.CDB_CartodbfyTable('test_sync_dest'); +UPDATE test_sync_dest SET the_geom = cartodb.CDB_LatLng(lat, lon); -- A "gecoding" +\set QUIET off +SET client_min_messages TO notice; +SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}'); +SELECT * FROM test_sync_source; +SELECT * FROM test_sync_dest; + + -- Cleanup ROLLBACK; diff --git a/test/CDB_SyncTableTest_expect b/test/CDB_SyncTableTest_expect index 5973f7d..ffe8ee2 100644 --- a/test/CDB_SyncTableTest_expect +++ b/test/CDB_SyncTableTest_expect @@ -37,4 +37,25 @@ Sanity check: the end result is the same source table 2|2|2|bar 4|4|4|cantaloupe 5|5|5|sandia +It shall exclude geom columns if instructed to do so +0 +test_sync_source +test_sync_dest +SET +NOTICE: relation "test_sync_dest" already exists, skipping +NOTICE: cdb_invalidate_varnish(public.test_sync_dest) called +NOTICE: DELETED 0 row(s) +NOTICE: cdb_invalidate_varnish(public.test_sync_dest) called +NOTICE: INSERTED 0 row(s) +NOTICE: cdb_invalidate_varnish(public.test_sync_dest) called +NOTICE: MODIFIED 0 row(s) + +1|||1|1|foo +2|||2|2|bar +5|||5|5|sandia +4|||4|4|cantaloupe +1|0101000020E6100000000000000000F03F000000000000F03F|0101000020110F0000DB0B4ADA772DFB402B432E49D22DFB40|1|1|foo +2|0101000020E610000000000000000000400000000000000040|0101000020110F00003C0C4ADA772D0B4177F404ABE12E0B41|2|2|bar +5|0101000020E610000000000000000014400000000000001440|0101000020110F000099476EE86AFC20413E7EB983F2012141|5|5|sandia +4|0101000020E610000000000000000010400000000000001040|0101000020110F00003C0C4ADA772D1B4160AB497020331B41|4|4|cantaloupe ROLLBACK From 26ad966ab6444f87d592cd20b20377e4b5193096 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 28 May 2019 16:40:01 +0200 Subject: [PATCH 13/15] Add some timing info --- scripts-available/CDB_SyncTable.sql | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index f15610b..cda57a2 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -73,11 +73,12 @@ $$ LANGUAGE sql VOLATILE PARALLEL UNSAFE; Assumptions: - Both tables contain a consistent cartodb_id column - - Destination table has all columns of the source + - Destination table has all columns of the source or does not exist Sample usage: SELECT CDB_SyncTable('radar_stations', 'public', 'syncdest'); + SELECT CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}'); */ CREATE OR REPLACE FUNCTION CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}') @@ -96,6 +97,8 @@ DECLARE num_rows BIGINT; err_context text; + + t timestamptz; BEGIN -- If the destination table does not exist, just copy the source table fq_dest_table := format('%I.%I', dst_schema, dst_table); @@ -118,19 +121,22 @@ BEGIN EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', src_hash_table_name); EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', dst_hash_table_name); - -- Compute hash for src_table h[cartodb_id] = hash(row) + -- Compute hash tables for src_table and dst_table h[cartodb_id] = hash(row) -- It'll take the form of a temp table with an index (easy to run set operations) + t := clock_timestamp(); EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %I', src_hash_table_name, quoted_colnames, src_table); - - -- Compute hash for dst_table, only for columns present in src_table EXECUTE format('INSERT INTO %I SELECT cartodb_id, md5(ROW(%s)::text) hash FROM %s', dst_hash_table_name, quoted_colnames, fq_dest_table); + RAISE DEBUG 'Populate hash tables time (s): %', clock_timestamp() - t; -- Create indexes -- We use hash indexes as they are fit for id comparison. + t := clock_timestamp(); EXECUTE format('CREATE INDEX ON %I USING HASH (cartodb_id)', src_hash_table_name); EXECUTE format('CREATE INDEX ON %I USING HASH (cartodb_id)', dst_hash_table_name); + RAISE DEBUG 'Index creation on hash tables time (s): %', clock_timestamp() - t; -- Deal with deleted rows: ids in dest but not in source + t := clock_timestamp(); EXECUTE format( 'DELETE FROM %s WHERE cartodb_id IN (SELECT cartodb_id FROM %I EXCEPT SELECT cartodb_id FROM %I)', fq_dest_table, @@ -138,8 +144,10 @@ BEGIN src_hash_table_name); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'DELETED % row(s)', num_rows; + RAISE DEBUG 'DELETE time (s): %', clock_timestamp() - t; -- Deal with inserted rows: ids in source but not in dest + t := clock_timestamp(); EXECUTE format(' INSERT INTO %s (cartodb_id,%s) SELECT h.cartodb_id,%s FROM (SELECT cartodb_id FROM %I EXCEPT SELECT cartodb_id FROM %I) h @@ -147,8 +155,10 @@ BEGIN ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, dst_hash_table_name, src_table); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'INSERTED % row(s)', num_rows; + RAISE DEBUG 'INSERT time (s): %', clock_timestamp() - t; -- Deal with modified rows: ids in source and dest but different hashes + t := clock_timestamp(); update_set_clause := __CDB_GetUpdateSetClause(colnames, 'changed'); EXECUTE format(' UPDATE %1$s dst SET %2$s @@ -164,5 +174,6 @@ BEGIN ', fq_dest_table, update_set_clause, src_table, src_hash_table_name, dst_hash_table_name); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'MODIFIED % row(s)', num_rows; + RAISE DEBUG 'UPDATE time (s): %', clock_timestamp() - t; END; $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE; From 9254723719764515600837b5780054fe58a49655 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Wed, 29 May 2019 13:13:30 +0200 Subject: [PATCH 14/15] Make format expression more readable Make an EXECUTE format('', param1, ..., paramN) more readable by adding the index of the argument to print (`position`) in the format specifier. --- scripts-available/CDB_SyncTable.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index cda57a2..fdf02c2 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -149,10 +149,10 @@ BEGIN -- Deal with inserted rows: ids in source but not in dest t := clock_timestamp(); EXECUTE format(' - INSERT INTO %s (cartodb_id,%s) - SELECT h.cartodb_id,%s FROM (SELECT cartodb_id FROM %I EXCEPT SELECT cartodb_id FROM %I) h - LEFT JOIN %I s ON s.cartodb_id = h.cartodb_id; - ', fq_dest_table, quoted_colnames, quoted_colnames, src_hash_table_name, dst_hash_table_name, src_table); + INSERT INTO %1$s (cartodb_id,%2$s) + SELECT h.cartodb_id,%2$s FROM (SELECT cartodb_id FROM %3$I EXCEPT SELECT cartodb_id FROM %4$I) h + LEFT JOIN %5$I s ON s.cartodb_id = h.cartodb_id; + ', fq_dest_table, quoted_colnames, src_hash_table_name, dst_hash_table_name, src_table); GET DIAGNOSTICS num_rows = ROW_COUNT; RAISE NOTICE 'INSERTED % row(s)', num_rows; RAISE DEBUG 'INSERT time (s): %', clock_timestamp() - t; From 42dc03d77b1218dbfeced79bd8b4539b0353fa83 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Thu, 30 May 2019 16:56:12 +0200 Subject: [PATCH 15/15] Qualify functions with extension schema This avoids some issues with search_path and scripting black magic. --- scripts-available/CDB_SyncTable.sql | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts-available/CDB_SyncTable.sql b/scripts-available/CDB_SyncTable.sql index fdf02c2..521495a 100644 --- a/scripts-available/CDB_SyncTable.sql +++ b/scripts-available/CDB_SyncTable.sql @@ -3,9 +3,9 @@ Sample usage: - SELECT _CDB_GetColumns('public.films'); + SELECT cartodb._CDB_GetColumns('public.films'); */ -CREATE OR REPLACE FUNCTION _CDB_GetColumns(src_table REGCLASS) +CREATE OR REPLACE FUNCTION cartodb._CDB_GetColumns(src_table REGCLASS) RETURNS SETOF NAME AS $$ SELECT @@ -35,9 +35,9 @@ $$ LANGUAGE sql STABLE PARALLEL UNSAFE; Example of usage: - SELECT __CDB_GetUpdateSetClause('{the_geom, id, elevation}', 'changed'); + SELECT cartodb.__CDB_GetUpdateSetClause('{the_geom, id, elevation}', 'changed'); */ -CREATE OR REPLACE FUNCTION __CDB_GetUpdateSetClause(colnames TEXT[], update_source TEXT) +CREATE OR REPLACE FUNCTION cartodb.__CDB_GetUpdateSetClause(colnames TEXT[], update_source TEXT) RETURNS TEXT AS $$ DECLARE @@ -58,10 +58,10 @@ $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE; Example of usage: - SELECT __CDB_GenerateUniqueName('src_sync'); --> src_sync_718794_120106 + SELECT cartodb.__CDB_GenerateUniqueName('src_sync'); --> src_sync_718794_120106 */ -CREATE OR REPLACE FUNCTION __CDB_GenerateUniqueName(prefix TEXT) +CREATE OR REPLACE FUNCTION cartodb.__CDB_GenerateUniqueName(prefix TEXT) RETURNS NAME AS $$ SELECT format('%s_%s_%s', prefix, txid_current(), (random()*1000000)::int)::NAME; @@ -77,11 +77,11 @@ $$ LANGUAGE sql VOLATILE PARALLEL UNSAFE; Sample usage: - SELECT CDB_SyncTable('radar_stations', 'public', 'syncdest'); - SELECT CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}'); + SELECT cartodb.CDB_SyncTable('radar_stations', 'public', 'syncdest'); + SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}'); */ -CREATE OR REPLACE FUNCTION CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}') +CREATE OR REPLACE FUNCTION cartodb.CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}') RETURNS void AS $$ DECLARE @@ -112,11 +112,11 @@ BEGIN skip_cols := skip_cols || '{cartodb_id}'; -- Get the list of columns from the source table, excluding skip_cols - SELECT ARRAY(SELECT quote_ident(c) FROM _CDB_GetColumns(src_table) as c EXCEPT SELECT unnest(skip_cols)) INTO colnames; + SELECT ARRAY(SELECT quote_ident(c) FROM cartodb._CDB_GetColumns(src_table) as c EXCEPT SELECT unnest(skip_cols)) INTO colnames; quoted_colnames := array_to_string(colnames, ','); - src_hash_table_name := __CDB_GenerateUniqueName('src_sync'); - dst_hash_table_name := __CDB_GenerateUniqueName('dst_sync'); + src_hash_table_name := cartodb.__CDB_GenerateUniqueName('src_sync'); + dst_hash_table_name := cartodb.__CDB_GenerateUniqueName('dst_sync'); EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', src_hash_table_name); EXECUTE format('CREATE TEMP TABLE %I(cartodb_id BIGINT, hash TEXT) ON COMMIT DROP', dst_hash_table_name); @@ -159,7 +159,7 @@ BEGIN -- Deal with modified rows: ids in source and dest but different hashes t := clock_timestamp(); - update_set_clause := __CDB_GetUpdateSetClause(colnames, 'changed'); + update_set_clause := cartodb.__CDB_GetUpdateSetClause(colnames, 'changed'); EXECUTE format(' UPDATE %1$s dst SET %2$s FROM (