Batching, better support for null columns, and bulk geocoding integration tests
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
||||
country_column text, state_column text, city_column text, street_column text)
|
||||
country_column text, state_column text, city_column text, street_column text, batch_size integer DEFAULT 100)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
query_row_count integer;
|
||||
@@ -7,11 +7,18 @@ DECLARE
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
BATCHES_SIZE CONSTANT numeric := 100;
|
||||
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
||||
MAX_BATCH_SIZE CONSTANT numeric := 1000;
|
||||
current_row_count integer ;
|
||||
|
||||
temp_table_name text;
|
||||
BEGIN
|
||||
IF batch_size IS NULL THEN
|
||||
batch_size := DEFAULT_BATCH_SIZE;
|
||||
ELSIF batch_size > MAX_BATCH_SIZE THEN
|
||||
RAISE EXCEPTION 'batch_size must be lower than %', MAX_BATCH_SIZE + 1;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
@@ -21,7 +28,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', BATCHES_SIZE, query) INTO batches_n;
|
||||
EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
@@ -31,6 +38,11 @@ BEGIN
|
||||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
||||
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
||||
into street_column, city_column, state_column, country_column;
|
||||
|
||||
FOR cartodb_id_batch in 0..(batches_n - 1)
|
||||
LOOP
|
||||
|
||||
@@ -44,7 +56,7 @@ BEGIN
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING BATCHES_SIZE, cartodb_id_batch;
|
||||
USING batch_size, cartodb_id_batch;
|
||||
|
||||
GET DIAGNOSTICS current_row_count = ROW_COUNT;
|
||||
RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count;
|
||||
|
||||
Reference in New Issue
Block a user