Compare commits

..

12 Commits

Author SHA1 Message Date
Javier Goizueta
885c7c50fb Merge pull request #547 from CartoDB/development
Release python-0.21.2
2019-03-04 18:48:55 +01:00
Javier Goizueta
a38c5b275d Merge pull request #546 from CartoDB/545-tomtom-qps
Detect alternative TomTom rate limit header
2019-03-04 18:26:43 +01:00
Javier Goizueta
45542b2f28 Release new python lib version 2019-03-04 18:25:25 +01:00
Javier Goizueta
f0a9779a8d Detect alternative TomTom rate limit header
Also flexibilizes detection by making it case-insensitive and allowing for text around the message
Fixes 545
2019-03-04 15:43:22 +01:00
Javier Goizueta
dd1df8695f Merge pull request #544 from CartoDB/development
Release client extension 0.26.2
2019-02-26 10:18:48 +01:00
Javier Goizueta
8507067901 Release 0.26.2 client extension 2019-02-25 16:48:36 +01:00
Javier Goizueta
00f77cca8a Merge pull request #543 from CartoDB/fix-bulk-geocoding-soft-limit
Fix bulk geocoding soft limit
2019-02-25 16:39:29 +01:00
Javier Goizueta
dfaf0d5245 Prevent use of quota for bogus configurations
This is a stop-gap measure for the hypothetical case of a service configuration that yields NULL quota values.
2019-02-25 16:32:01 +01:00
Rafa de la Torre
f63d73b9d7 Fix the bulk geocoding when soft_limit is enabled
Fix the case where there's not enough quota to geocode a table but the
soft_limit is set to true.

The function `cdb_enough_quota` accounts for the `soft_limit` flag, as
well as for the remaining quota and the rows to be geocoded.
2019-02-19 12:27:29 +01:00
Rafa de la Torre
29e636f115 Test for bulk geocoding w/ soft_limit enabled 2019-02-19 12:24:06 +01:00
Javier Torres
f082c918f0 Merge pull request #542 from CartoDB/development
Client 0.26.1, Server 0.35.1 -> Fix batch geocoder geometry type
2018-12-31 11:04:45 +01:00
Javier Torres
f996cb35db Merge pull request #541 from CartoDB/development
Python 0.21.1: Fix batch geocoder precision
2018-12-26 12:18:29 +01:00
13 changed files with 5860 additions and 8 deletions

10
NEWS.md
View File

@@ -1,3 +1,13 @@
Mar 4th, 2019
==============
* Version `0.21.2` of the python library
* Fixed TomTom Qps respondes (#546)
Feb 25th, 2019
==============
* Version `0.26.2` of the client extension
* Fixed bulk geocoding soft limit (#543)
Dec 31st, 2018
==============
* Version `0.35.1` of the server extension and `0.26.1` of the client

View File

@@ -0,0 +1,96 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '0.26.2'" to load this file. \quit
-- Make sure we have a sane search path to create/update the extension
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
query_row_count integer;
enough_quota boolean;
remaining_quota integer;
max_batch_size integer;
cartodb_id_batch integer;
batches_n integer;
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
temp_table_name text;
username text;
orgname text;
apikey_permissions json;
BEGIN
SELECT u, o, p INTO username, orgname, apikey_permissions FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
IF apikey_permissions IS NULL OR NOT apikey_permissions::jsonb ? 'geocoding' THEN
RAISE EXCEPTION 'Geocoding permission denied' USING ERRCODE = '01007';
END IF;
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
INTO remaining_quota, max_batch_size
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
WHERE service = 'hires_geocoder';
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
IF batch_size IS NULL THEN
batch_size := max_batch_size;
ELSIF batch_size > max_batch_size THEN
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
END IF;
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
batch_size := MAX_SAFE_BATCH_SIZE;
END IF;
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
INTO query_row_count, batches_n;
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
query_row_count, query, country_column, state_column, city_column, street_column;
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
IF enough_quota IS NULL OR NOT enough_quota THEN
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
END IF;
RAISE DEBUG 'batches_n: %', batches_n;
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
'(cartodb_id integer, the_geom geometry(Point,4326), metadata jsonb)',
temp_table_name);
select
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
coalesce(state_column, ''''''), coalesce(country_column, '''''')
into street_column, city_column, state_column, country_column;
IF batches_n > 0 THEN
FOR cartodb_id_batch in 0..(batches_n - 1)
LOOP
EXECUTE format(
'WITH geocoding_data as (' ||
' SELECT ' ||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
' floor((row_number() over () - 1)::float/$1) as batch' ||
' FROM (%s) _x' ||
') ' ||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
'FROM geocoding_data ' ||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
USING batch_size, cartodb_id_batch;
END LOOP;
END IF;
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;

View File

@@ -0,0 +1,96 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '0.26.1'" to load this file. \quit
-- Make sure we have a sane search path to create/update the extension
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
query_row_count integer;
enough_quota boolean;
remaining_quota integer;
max_batch_size integer;
cartodb_id_batch integer;
batches_n integer;
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
temp_table_name text;
username text;
orgname text;
apikey_permissions json;
BEGIN
SELECT u, o, p INTO username, orgname, apikey_permissions FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
IF apikey_permissions IS NULL OR NOT apikey_permissions::jsonb ? 'geocoding' THEN
RAISE EXCEPTION 'Geocoding permission denied' USING ERRCODE = '01007';
END IF;
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
INTO remaining_quota, max_batch_size
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
WHERE service = 'hires_geocoder';
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
IF batch_size IS NULL THEN
batch_size := max_batch_size;
ELSIF batch_size > max_batch_size THEN
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
END IF;
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
batch_size := MAX_SAFE_BATCH_SIZE;
END IF;
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
INTO query_row_count, batches_n;
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
query_row_count, query, country_column, state_column, city_column, street_column;
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
IF remaining_quota < query_row_count THEN
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
END IF;
RAISE DEBUG 'batches_n: %', batches_n;
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
'(cartodb_id integer, the_geom geometry(Point,4326), metadata jsonb)',
temp_table_name);
select
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
coalesce(state_column, ''''''), coalesce(country_column, '''''')
into street_column, city_column, state_column, country_column;
IF batches_n > 0 THEN
FOR cartodb_id_batch in 0..(batches_n - 1)
LOOP
EXECUTE format(
'WITH geocoding_data as (' ||
' SELECT ' ||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
' floor((row_number() over () - 1)::float/$1) as batch' ||
' FROM (%s) _x' ||
') ' ||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
'FROM geocoding_data ' ||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
USING batch_size, cartodb_id_batch;
END LOOP;
END IF;
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB dataservices client API extension'
default_version = '0.26.1'
default_version = '0.26.2'
requires = 'plproxy, cartodb'
superuser = true
schema = cdb_dataservices_client

View File

@@ -49,7 +49,7 @@ BEGIN
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
query_row_count, query, country_column, state_column, city_column, street_column;
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
IF remaining_quota < query_row_count THEN
IF enough_quota IS NULL OR NOT enough_quota THEN
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
END IF;

View File

@@ -9,6 +9,13 @@ CREATE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_si
RETURNS BOOLEAN as $$
SELECT FALSE;
$$ LANGUAGE SQL;
ALTER FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb) RENAME TO _cdb_bulk_geocode_street_point_mocked;
CREATE FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
BEGIN
RAISE NOTICE 'called with this searches: %', searches;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
-- No permissions granted
-- Test bulk size not mandatory (it will get the optimal)
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
@@ -42,6 +49,18 @@ ERROR: Remaining quota: 0. Estimated cost: 1
-- Test quota check by mocking quota 0
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
ERROR: Remaining quota: 0. Estimated cost: 1
-- Check that when cdb_enough_quota returns true (ie. when soft_limit is set to true, even if not enough quota)
-- it is able to proceed with the bulk geocode
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
RETURNS BOOLEAN as $$
SELECT TRUE;
$$ LANGUAGE SQL;
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
NOTICE: called with this searches: [{"id": 1, "city": "", "state": "", "address": "Valladolid, Spain", "country": ""}]
cdb_bulk_geocode_street_point
-------------------------------
(0 rows)
-- Remove permissions
SELECT CDB_Conf_RemoveConf('api_keys_postgres');
cdb_conf_removeconf
@@ -51,5 +70,7 @@ SELECT CDB_Conf_RemoveConf('api_keys_postgres');
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
DROP FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;
ALTER FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point_mocked(searches jsonb) RENAME TO _cdb_bulk_geocode_street_point;

View File

@@ -12,6 +12,14 @@ RETURNS BOOLEAN as $$
SELECT FALSE;
$$ LANGUAGE SQL;
ALTER FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb) RENAME TO _cdb_bulk_geocode_street_point_mocked;
CREATE FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
BEGIN
RAISE NOTICE 'called with this searches: %', searches;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
-- No permissions granted
-- Test bulk size not mandatory (it will get the optimal)
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
@@ -32,12 +40,21 @@ SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartod
-- Test quota check by mocking quota 0
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
-- Check that when cdb_enough_quota returns true (ie. when soft_limit is set to true, even if not enough quota)
-- it is able to proceed with the bulk geocode
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
RETURNS BOOLEAN as $$
SELECT TRUE;
$$ LANGUAGE SQL;
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
-- Remove permissions
SELECT CDB_Conf_RemoveConf('api_keys_postgres');
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
DROP FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;
ALTER FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point_mocked(searches jsonb) RENAME TO _cdb_bulk_geocode_street_point;

View File

@@ -2,13 +2,17 @@ import time
import random
from datetime import datetime
from exceptions import TimeoutException
import re
DEFAULT_RETRY_TIMEOUT = 60
DEFAULT_QUERIES_PER_SECOND = 10
TOMTOM_403_RATE_LIMIT_HEADER = 'Account Over Queries Per Second Limit'
TOMTOM_403_RATE_LIMIT_HEADERS = [
'Account Over Queries Per Second Limit',
'Developer Over Qps'
]
TOMTOM_DETAIL_HEADER = 'X-Error-Detail-Header'
TOMTOM_403_RATE_LIMIT_HEADER_PATTERN = re.compile('|'.join(TOMTOM_403_RATE_LIMIT_HEADERS), re.IGNORECASE)
def qps_retry(original_function=None, **options):
""" Query Per Second retry decorator
@@ -49,9 +53,11 @@ class QPSService:
response = getattr(e, 'response', None)
if response is not None:
if self._provider is not None and self._provider == 'tomtom' and (response.status_code == 403):
if response.headers.get(TOMTOM_DETAIL_HEADER) != TOMTOM_403_RATE_LIMIT_HEADER:
detail_header = response.headers.get(TOMTOM_DETAIL_HEADER)
if detail_header and TOMTOM_403_RATE_LIMIT_HEADER_PATTERN.search(detail_header):
self.retry(start_time, attempt_number)
else:
raise e
self.retry(start_time, attempt_number)
elif response.status_code == 429:
self.retry(start_time, attempt_number)
else:

View File

@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
setup(
name='cartodb_services',
version='0.21.1',
version='0.21.2',
description='CartoDB Services API Python Library',