Merge branch 'master' into table-syncer-notmps

# Conflicts:
#	scripts-available/CDB_SyncTable.sql
This commit is contained in:
Javier Goizueta
2019-07-02 12:21:49 +02:00
4 changed files with 72 additions and 12 deletions

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.27.2
EXTVERSION = 0.28.0
SED = sed
AWK = awk
@@ -99,6 +99,7 @@ UPGRADABLE = \
0.27.0 \
0.27.1 \
0.27.2 \
0.28.0 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

View File

@@ -1,3 +1,6 @@
0.28.0 (2019-07-01)
* New function CDB_SyncTable (#355)
0.27.2 (2019-06-21)
* Improvements and fixes in Ghost tables functions (#360)

56
doc/CDB_SyncTable.md Normal file
View File

@@ -0,0 +1,56 @@
Synchronize two tables. This function will synchronize a *destination* table with a *source* table.
The idea is that the *destination* is a replica of *source* and *source* has been subject to
modifications that are to be applied to *destination*.
This will be achieved by deleting the rows in the destination not present
in the source, inserting rows of the source not in the destination and updating modified rows.
If the destination table does not exist it will be created and all the rows of the source inserted into it.
Both tables must have a consistent `cartodb_id` primary key column which will be used to match
the source and destination rows.
Note that both tables do not necessarily become identical after the synchronization, since additional columns
may have been added to the destination; those columns will not be altered by the synchronization.
In addition some source columns may be skipped by listing them in the optional last argument; such columns
will not be updated in the destination, so if they are present in it their values won't be altered.
#### Using the function
Import some data using COPY FROM into a temporary table, then synchronize a table with the data and
finally delete the temporary table. This could be used import and update some data periodically while
allowing to add columns to the data that will be preserved across updates.
```sql
CREATE tmp_pois(cartodb_id int, name text, type text, longitude double precision, latitude double precision, rank int);
COPY tmp_pois FROM '/tmp/pois.csv';
SELECT CDB_SyncTable('tmp_pois', 'public', 'pois');
DROP TABLE tmp_pois;
```
Now we could perform some changes to the `pois` to maintain our own ranking:
```sql
UPDATE pois SET rank = random()*4 + 1;
```
Then, if the source were updated at `/tmp/pois.csv` we could synchronize with it while preserving our `rank` values with:
```sql
CREATE tmp_pois(cartodb_id int, name text, type text, longitude double precision, latitude double precision, rank int);
COPY tmp_pois FROM '/tmp/pois.csv';
SELECT CDB_SyncTable('tmp_pois', 'public', 'pois', '{rank}');
DROP TABLE tmp_pois;
```
#### Arguments
```
CDB_SyncTable(src_table, dst_schema, dst_table, skip_cols)
```
* **src_table** REGCLASS the source data for the synchronization
* **dst_scgena** REGNAMESPACE the destination schema
* **dst_table** NAME the destination table to be updated
* **skip_cols** NAME[] an array of column names, empty by default, which will be skipped

View File

@@ -3,9 +3,9 @@
Sample usage:
SELECT cartodb._CDB_GetColumns('public.films');
SELECT @extschema@._CDB_GetColumns('public.films');
*/
CREATE OR REPLACE FUNCTION cartodb._CDB_GetColumns(src_table REGCLASS)
CREATE OR REPLACE FUNCTION @extschema@._CDB_GetColumns(src_table REGCLASS)
RETURNS SETOF NAME
AS $$
SELECT
@@ -35,9 +35,9 @@ $$ LANGUAGE sql STABLE PARALLEL UNSAFE;
Example of usage:
SELECT cartodb.__CDB_GetUpdateSetClause('{the_geom, id, elevation}', 'changed');
SELECT @extschema@.__CDB_GetUpdateSetClause('{the_geom, id, elevation}', 'changed');
*/
CREATE OR REPLACE FUNCTION cartodb.__CDB_GetUpdateSetClause(colnames TEXT[], update_source TEXT)
CREATE OR REPLACE FUNCTION @extschema@.__CDB_GetUpdateSetClause(colnames TEXT[], update_source TEXT)
RETURNS TEXT
AS $$
DECLARE
@@ -58,10 +58,10 @@ $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
Example of usage:
SELECT cartodb.__CDB_GenerateUniqueName('src_sync'); --> src_sync_718794_120106
SELECT @extschema@.__CDB_GenerateUniqueName('src_sync'); --> src_sync_718794_120106
*/
CREATE OR REPLACE FUNCTION cartodb.__CDB_GenerateUniqueName(prefix TEXT)
CREATE OR REPLACE FUNCTION @extschema@.__CDB_GenerateUniqueName(prefix TEXT)
RETURNS NAME
AS $$
SELECT format('%s_%s_%s', prefix, txid_current(), (random()*1000000)::int)::NAME;
@@ -90,11 +90,11 @@ $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE;
Sample usage:
SELECT cartodb.CDB_SyncTable('radar_stations', 'public', 'syncdest');
SELECT cartodb.CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}');
SELECT CDB_SyncTable('radar_stations', 'public', 'syncdest');
SELECT CDB_SyncTable('test_sync_source', 'public', 'test_sync_dest', '{the_geom, the_geom_webmercator}');
*/
CREATE OR REPLACE FUNCTION cartodb.CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}')
CREATE OR REPLACE FUNCTION @extschema@.CDB_SyncTable(src_table REGCLASS, dst_schema REGNAMESPACE, dst_table NAME, skip_cols NAME[] = '{}')
RETURNS void
AS $$
DECLARE
@@ -126,7 +126,7 @@ BEGIN
skip_cols := skip_cols || '{cartodb_id}';
-- Get the list of columns from the source table, excluding skip_cols
SELECT ARRAY(SELECT quote_ident(c) FROM cartodb._CDB_GetColumns(src_table) as c EXCEPT SELECT unnest(skip_cols)) INTO colnames;
SELECT ARRAY(SELECT quote_ident(c) FROM @extschema@._CDB_GetColumns(src_table) as c EXCEPT SELECT unnest(skip_cols)) INTO colnames;
-- Deal with deleted rows: ids in dest but not in source
t := clock_timestamp();
@@ -149,7 +149,7 @@ BEGIN
-- Deal with modified rows: ids in source and dest but different hashes
t := clock_timestamp();
update_set_clause := cartodb.__CDB_GetUpdateSetClause(colnames, '_changed');
update_set_clause := @extschema@.__CDB_GetUpdateSetClause(colnames, '_changed');
dst_colnames := array_to_string(cartodb.__CDB_QualifyColumns('_dst', colnames), ',');
src_colnames := array_to_string(cartodb.__CDB_QualifyColumns('_src', colnames), ',');
EXECUTE format('