Compare commits
118 Commits
python-0.1
...
python-0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6bff9b8d2 | ||
|
|
075f602a7f | ||
|
|
e69849fb86 | ||
|
|
11ec6075c3 | ||
|
|
c6720bf689 | ||
|
|
3524ee1e24 | ||
|
|
80dcde2db0 | ||
|
|
fa3d7db5f8 | ||
|
|
d060ab3d41 | ||
|
|
3a5360c96c | ||
|
|
fc75f1afc8 | ||
|
|
4be3aa88fd | ||
|
|
8162bff204 | ||
|
|
1b31c089ce | ||
|
|
faf9b7237b | ||
|
|
5d2303e1de | ||
|
|
07f5be9207 | ||
|
|
bcb34d1cea | ||
|
|
c5d9db61e6 | ||
|
|
1ff512839d | ||
|
|
9a1b1e2832 | ||
|
|
1cebbe7af0 | ||
|
|
2862c80025 | ||
|
|
abbaf83e97 | ||
|
|
cd5e6510a6 | ||
|
|
fd097724f1 | ||
|
|
96fbf3080a | ||
|
|
0d490bbb19 | ||
|
|
f86558c30b | ||
|
|
0bd2fbf80a | ||
|
|
887fc15915 | ||
|
|
5c09a2eb29 | ||
|
|
b0c1948c14 | ||
|
|
0c5e9da028 | ||
|
|
f534da906c | ||
|
|
5e34faefe5 | ||
|
|
5e8dbaf239 | ||
|
|
b90d402fa9 | ||
|
|
d060bd8229 | ||
|
|
c104f6f34b | ||
|
|
e9ed3bca18 | ||
|
|
e2762a6e03 | ||
|
|
8cb9e123b1 | ||
|
|
e82346e7f6 | ||
|
|
080de34163 | ||
|
|
0a92ae1445 | ||
|
|
0b635377ef | ||
|
|
f2197d4b2a | ||
|
|
6e78da55b2 | ||
|
|
4123a4c442 | ||
|
|
dbb4f9204a | ||
|
|
67fee1cce8 | ||
|
|
b779742585 | ||
|
|
da78b0bc65 | ||
|
|
d46d51c3bb | ||
|
|
0b2ee85c11 | ||
|
|
825e3b7ee8 | ||
|
|
2af9204542 | ||
|
|
34e622b809 | ||
|
|
531ad28158 | ||
|
|
286a75fa8e | ||
|
|
a6c5c21131 | ||
|
|
f6b7c13dde | ||
|
|
1ffe3658fe | ||
|
|
8e430ce1c1 | ||
|
|
8ebd22bc26 | ||
|
|
0ff950d01e | ||
|
|
fed8894c33 | ||
|
|
cce5f92312 | ||
|
|
40ace9cfaa | ||
|
|
f618e4aec3 | ||
|
|
ae84122c3d | ||
|
|
b8475bac30 | ||
|
|
bf8b76b5fe | ||
|
|
31afc82b56 | ||
|
|
5be43e15c0 | ||
|
|
6da70fd8ea | ||
|
|
d00a48f16e | ||
|
|
91012ea62d | ||
|
|
23e3de9da5 | ||
|
|
6c89ca8d70 | ||
|
|
3c07133912 | ||
|
|
5b46c1527e | ||
|
|
89e9bf1ed6 | ||
|
|
ff6cbd1d5b | ||
|
|
8968f0e6ec | ||
|
|
44744de73d | ||
|
|
754c364d22 | ||
|
|
9856adb7ce | ||
|
|
e416a8a641 | ||
|
|
fc610313bf | ||
|
|
18e2349713 | ||
|
|
e884b1d1f4 | ||
|
|
45b8fc4ecf | ||
|
|
379257b4b4 | ||
|
|
8fe9903e7a | ||
|
|
d0b04a97b8 | ||
|
|
a931086e29 | ||
|
|
8f4249ee24 | ||
|
|
71b87834b3 | ||
|
|
9c90c539f8 | ||
|
|
ed828c3b89 | ||
|
|
675ef72e30 | ||
|
|
c13d29e4c2 | ||
|
|
d5e47e39ab | ||
|
|
c2a207b1cd | ||
|
|
e280444479 | ||
|
|
9b64d91998 | ||
|
|
91d93bef79 | ||
|
|
bbbf70f3ac | ||
|
|
4d2abc7667 | ||
|
|
58d70e252f | ||
|
|
e85f43f1d1 | ||
|
|
f3f2b213e7 | ||
|
|
34fc6439d2 | ||
|
|
3f08d37ef7 | ||
|
|
be446c1bf2 | ||
|
|
5251534283 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -5,3 +5,5 @@ cartodb_services.egg-info/
|
||||
build/
|
||||
dist/
|
||||
.vscode/
|
||||
.idea/
|
||||
venv/
|
||||
|
||||
5
NEWS.md
5
NEWS.md
@@ -1,3 +1,8 @@
|
||||
Jul 19th, 2018
|
||||
==============
|
||||
* Version `0.25.0` of the client, `0.32.0` of the server, and `0.19.1` of the Python library.
|
||||
* Support for batch street-level geocoding.
|
||||
|
||||
May 7th, 2018
|
||||
=============
|
||||
* Version `0.24.0` of the client, `0.31.0` of the server, and `0.18.0` of the python library.
|
||||
|
||||
276
client/cdb_dataservices_client--0.24.0--0.25.0.sql
Normal file
276
client/cdb_dataservices_client--0.24.0--0.25.0.sql
Normal file
@@ -0,0 +1,276 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '<%= version %>'" to load this file. \quit
|
||||
|
||||
-- Make sure we have a sane search path to create/update the extension
|
||||
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
-- Taken from https://wiki.postgresql.org/wiki/Count_estimate
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) RETURNS INTEGER AS
|
||||
$func$
|
||||
DECLARE
|
||||
rec record;
|
||||
ROWS INTEGER;
|
||||
BEGIN
|
||||
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
|
||||
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
|
||||
EXIT WHEN ROWS IS NOT NULL;
|
||||
END LOOP;
|
||||
|
||||
RETURN ROWS;
|
||||
END
|
||||
$func$ LANGUAGE plpgsql;
|
||||
|
||||
-- Taken from https://stackoverflow.com/a/48013356/351721
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) RETURNS text[] AS $f$
|
||||
SELECT array_agg(x) || ARRAY[]::text[] FROM jsonb_array_elements_text($1) t(x);
|
||||
$f$ LANGUAGE sql IMMUTABLE;--
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.service_quota_info_batch AS (
|
||||
service cdb_dataservices_client.service_type,
|
||||
monthly_quota NUMERIC,
|
||||
used_quota NUMERIC,
|
||||
soft_limit BOOLEAN,
|
||||
provider TEXT,
|
||||
max_batch_size NUMERIC
|
||||
);
|
||||
|
||||
--
|
||||
-- Public dataservices API function
|
||||
--
|
||||
-- These are the only ones with permissions to publicuser role
|
||||
-- and should also be the only ones with SECURITY DEFINER
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point (searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
|
||||
username text;
|
||||
orgname text;
|
||||
BEGIN
|
||||
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
|
||||
RAISE EXCEPTION 'The api_key must be provided';
|
||||
END IF;
|
||||
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
RETURN QUERY SELECT * FROM cdb_dataservices_client.__cdb_bulk_geocode_street_point(username, orgname, searches);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
|
||||
|
||||
--
|
||||
-- Public dataservices API function
|
||||
--
|
||||
-- These are the only ones with permissions to publicuser role
|
||||
-- and should also be the only ones with SECURITY DEFINER
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
|
||||
RETURNS SETOF service_quota_info_batch AS $$
|
||||
DECLARE
|
||||
|
||||
username text;
|
||||
orgname text;
|
||||
BEGIN
|
||||
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
|
||||
RAISE EXCEPTION 'The api_key must be provided';
|
||||
END IF;
|
||||
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
RETURN QUERY SELECT * FROM cdb_dataservices_client._cdb_service_quota_info_batch(username, orgname);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
||||
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
query_row_count integer;
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
||||
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
|
||||
|
||||
temp_table_name text;
|
||||
BEGIN
|
||||
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
|
||||
INTO remaining_quota, max_batch_size
|
||||
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
|
||||
WHERE service = 'hires_geocoder';
|
||||
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
|
||||
|
||||
IF batch_size IS NULL THEN
|
||||
batch_size := max_batch_size;
|
||||
ELSIF batch_size > max_batch_size THEN
|
||||
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
|
||||
END IF;
|
||||
|
||||
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
|
||||
IF remaining_quota < query_row_count THEN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
|
||||
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
||||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
||||
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
||||
into street_column, city_column, state_column, country_column;
|
||||
|
||||
IF batches_n > 0 THEN
|
||||
FOR cartodb_id_batch in 0..(batches_n - 1)
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((row_number() over () - 1)::float/$1) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
--
|
||||
-- Exception-safe private DataServices API function
|
||||
--
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe (searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
|
||||
username text;
|
||||
orgname text;
|
||||
_returned_sqlstate TEXT;
|
||||
_message_text TEXT;
|
||||
_pg_exception_context TEXT;
|
||||
BEGIN
|
||||
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
|
||||
RAISE EXCEPTION 'The api_key must be provided';
|
||||
END IF;
|
||||
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM cdb_dataservices_client.__cdb_bulk_geocode_street_point(username, orgname, searches);
|
||||
EXCEPTION
|
||||
WHEN OTHERS THEN
|
||||
GET STACKED DIAGNOSTICS _returned_sqlstate = RETURNED_SQLSTATE,
|
||||
_message_text = MESSAGE_TEXT,
|
||||
_pg_exception_context = PG_EXCEPTION_CONTEXT;
|
||||
RAISE WARNING USING ERRCODE = _returned_sqlstate, MESSAGE = _message_text, DETAIL = _pg_exception_context;
|
||||
|
||||
END;
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
|
||||
|
||||
--
|
||||
-- Exception-safe private DataServices API function
|
||||
--
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe ()
|
||||
RETURNS SETOF service_quota_info_batch AS $$
|
||||
DECLARE
|
||||
|
||||
username text;
|
||||
orgname text;
|
||||
_returned_sqlstate TEXT;
|
||||
_message_text TEXT;
|
||||
_pg_exception_context TEXT;
|
||||
BEGIN
|
||||
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
|
||||
RAISE EXCEPTION 'The api_key must be provided';
|
||||
END IF;
|
||||
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM cdb_dataservices_client._cdb_service_quota_info_batch(username, orgname);
|
||||
EXCEPTION
|
||||
WHEN OTHERS THEN
|
||||
GET STACKED DIAGNOSTICS _returned_sqlstate = RETURNED_SQLSTATE,
|
||||
_message_text = MESSAGE_TEXT,
|
||||
_pg_exception_context = PG_EXCEPTION_CONTEXT;
|
||||
RAISE WARNING USING ERRCODE = _returned_sqlstate, MESSAGE = _message_text, DETAIL = _pg_exception_context;
|
||||
|
||||
END;
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb);
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
CONNECT cdb_dataservices_client._server_conn_str();
|
||||
|
||||
SELECT * FROM cdb_dataservices_server._cdb_bulk_geocode_street_point (username, orgname, searches);
|
||||
|
||||
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch (username text, orgname text);
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch (username text, orgname text)
|
||||
RETURNS SETOF service_quota_info_batch AS $$
|
||||
CONNECT cdb_dataservices_client._server_conn_str();
|
||||
|
||||
SELECT * FROM cdb_dataservices_server.cdb_service_quota_info_batch (username, orgname);
|
||||
|
||||
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe(searches jsonb ) TO publicuser;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe( ) TO publicuser;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point(query text, street_column text, city_column text, state_column text, country_column text, batch_size integer) TO publicuser;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb) TO publicuser;
|
||||
29
client/cdb_dataservices_client--0.25.0--0.24.0.sql
Normal file
29
client/cdb_dataservices_client--0.25.0--0.24.0.sql
Normal file
@@ -0,0 +1,29 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '<%= version %>'" to load this file. \quit
|
||||
|
||||
-- Make sure we have a sane search path to create/update the extension
|
||||
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_count_estimate(query text);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_bulk_geocode_street_point (jsonb);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_service_quota_info_batch();
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_bulk_geocode_street_point (text, text, text, text, text, integer);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe (jsonb);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe ();
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point (text, text, jsonb);
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch (text, text);
|
||||
|
||||
DROP TYPE IF EXISTS cdb_dataservices_client.service_quota_info_batch;
|
||||
|
||||
DROP TYPE IF EXISTS cdb_dataservices_client.geocoding;
|
||||
5218
client/cdb_dataservices_client--0.25.0.sql
Normal file
5218
client/cdb_dataservices_client--0.25.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB dataservices client API extension'
|
||||
default_version = '0.24.0'
|
||||
default_version = '0.25.0'
|
||||
requires = 'plproxy, cartodb'
|
||||
superuser = true
|
||||
schema = cdb_dataservices_client
|
||||
|
||||
@@ -70,6 +70,13 @@
|
||||
- { name: state_province, type: text, default: 'NULL'}
|
||||
- { name: country, type: text, default: 'NULL'}
|
||||
|
||||
- name: _cdb_bulk_geocode_street_point
|
||||
return_type: SETOF cdb_dataservices_client.geocoding
|
||||
multi_row: true
|
||||
multi_field: true
|
||||
params:
|
||||
- { name: searches, type: jsonb } # Array of JSON objects with id, address, city, state and country fields
|
||||
|
||||
- name: cdb_here_geocode_street_point
|
||||
return_type: Geometry
|
||||
params:
|
||||
@@ -510,6 +517,13 @@
|
||||
params:
|
||||
- {}
|
||||
|
||||
- name: cdb_service_quota_info_batch
|
||||
return_type: SETOF service_quota_info_batch
|
||||
multi_row: true
|
||||
multi_field: true
|
||||
params:
|
||||
- {}
|
||||
|
||||
- name: cdb_enough_quota
|
||||
return_type: BOOLEAN
|
||||
params:
|
||||
|
||||
20
client/sql/05_utils.sql
Normal file
20
client/sql/05_utils.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- Taken from https://wiki.postgresql.org/wiki/Count_estimate
|
||||
CREATE FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) RETURNS INTEGER AS
|
||||
$func$
|
||||
DECLARE
|
||||
rec record;
|
||||
ROWS INTEGER;
|
||||
BEGIN
|
||||
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
|
||||
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
|
||||
EXIT WHEN ROWS IS NOT NULL;
|
||||
END LOOP;
|
||||
|
||||
RETURN ROWS;
|
||||
END
|
||||
$func$ LANGUAGE plpgsql;
|
||||
|
||||
-- Taken from https://stackoverflow.com/a/48013356/351721
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) RETURNS text[] AS $f$
|
||||
SELECT array_agg(x) || ARRAY[]::text[] FROM jsonb_array_elements_text($1) t(x);
|
||||
$f$ LANGUAGE sql IMMUTABLE;
|
||||
@@ -4,6 +4,12 @@ CREATE TYPE cdb_dataservices_client.isoline AS (
|
||||
the_geom geometry(Multipolygon,4326)
|
||||
);
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.simple_route AS (
|
||||
shape geometry(LineString,4326),
|
||||
length real,
|
||||
@@ -35,3 +41,12 @@ CREATE TYPE cdb_dataservices_client.service_quota_info AS (
|
||||
soft_limit BOOLEAN,
|
||||
provider TEXT
|
||||
);
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.service_quota_info_batch AS (
|
||||
service cdb_dataservices_client.service_type,
|
||||
monthly_quota NUMERIC,
|
||||
used_quota NUMERIC,
|
||||
soft_limit BOOLEAN,
|
||||
provider TEXT,
|
||||
max_batch_size NUMERIC
|
||||
);
|
||||
|
||||
76
client/sql/21_bulk_geocoding_functions.sql
Normal file
76
client/sql/21_bulk_geocoding_functions.sql
Normal file
@@ -0,0 +1,76 @@
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
||||
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
query_row_count integer;
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
||||
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
|
||||
|
||||
temp_table_name text;
|
||||
BEGIN
|
||||
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
|
||||
INTO remaining_quota, max_batch_size
|
||||
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
|
||||
WHERE service = 'hires_geocoder';
|
||||
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
|
||||
|
||||
IF batch_size IS NULL THEN
|
||||
batch_size := max_batch_size;
|
||||
ELSIF batch_size > max_batch_size THEN
|
||||
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
|
||||
END IF;
|
||||
|
||||
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
|
||||
IF remaining_quota < query_row_count THEN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
|
||||
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
||||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
||||
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
||||
into street_column, city_column, state_column, country_column;
|
||||
|
||||
IF batches_n > 0 THEN
|
||||
FOR cartodb_id_batch in 0..(batches_n - 1)
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((row_number() over () - 1)::float/$1) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
|
||||
@@ -1,3 +1,7 @@
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PrepareTableOBS_GetMeasure(output_table_name text, params json) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PopulateTableOBS_GetMeasure(table_name text, output_table_name text, params json) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSON) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point(query text, street_column text, city_column text, state_column text, country_column text, batch_size integer) TO publicuser;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) TO publicuser;
|
||||
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) TO publicuser;
|
||||
|
||||
21
client/test/expected/21_bulk_geocoding_functions_test.out
Normal file
21
client/test/expected/21_bulk_geocoding_functions_test.out
Normal file
@@ -0,0 +1,21 @@
|
||||
\set VERBOSITY terse
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() RENAME TO cdb_service_quota_info_batch_mocked;
|
||||
CREATE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
|
||||
RETURNS SETOF cdb_dataservices_client.service_quota_info_batch AS $$
|
||||
SELECT 'hires_geocoder'::cdb_dataservices_client.service_type AS service, 0::NUMERIC AS monthly_quota, 0::NUMERIC AS used_quota, FALSE AS soft_limit, 'google' AS provider, 1::NUMERIC AS max_batch_size;
|
||||
$$ LANGUAGE SQL;
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota_mocked;
|
||||
CREATE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
|
||||
RETURNS BOOLEAN as $$
|
||||
SELECT FALSE;
|
||||
$$ LANGUAGE SQL;
|
||||
-- Test bulk size not mandatory (it will get the optimal)
|
||||
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
|
||||
ERROR: Remaining quota: 0. Estimated cost: 1
|
||||
-- Test quota check by mocking quota 0
|
||||
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
|
||||
ERROR: Remaining quota: 0. Estimated cost: 1
|
||||
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
|
||||
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;
|
||||
26
client/test/sql/21_bulk_geocoding_functions_test.sql
Normal file
26
client/test/sql/21_bulk_geocoding_functions_test.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
\set VERBOSITY terse
|
||||
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() RENAME TO cdb_service_quota_info_batch_mocked;
|
||||
CREATE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
|
||||
RETURNS SETOF cdb_dataservices_client.service_quota_info_batch AS $$
|
||||
SELECT 'hires_geocoder'::cdb_dataservices_client.service_type AS service, 0::NUMERIC AS monthly_quota, 0::NUMERIC AS used_quota, FALSE AS soft_limit, 'google' AS provider, 1::NUMERIC AS max_batch_size;
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota_mocked;
|
||||
CREATE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
|
||||
RETURNS BOOLEAN as $$
|
||||
SELECT FALSE;
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Test bulk size not mandatory (it will get the optimal)
|
||||
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
|
||||
|
||||
-- Test quota check by mocking quota 0
|
||||
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
|
||||
|
||||
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
|
||||
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
|
||||
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
|
||||
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;
|
||||
|
||||
0
server/__init__.py
Normal file
0
server/__init__.py
Normal file
@@ -83,3 +83,6 @@ deploy: release_remove_parallel_deploy
|
||||
$(INSTALL_DATA) old_versions/*.sql *.sql '$(DESTDIR)$(datadir)/extension/'
|
||||
|
||||
install: deploy
|
||||
|
||||
reinstall: install
|
||||
psql -U postgres -d dataservices_db -c "drop extension if exists cdb_dataservices_server; create extension cdb_dataservices_server;"
|
||||
|
||||
148
server/extension/cdb_dataservices_server--0.31.0--0.32.0.sql
Normal file
148
server/extension/cdb_dataservices_server--0.31.0--0.32.0.sql
Normal file
@@ -0,0 +1,148 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '<%= version %>'" to load this file. \quit
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type inner join pg_namespace ON (pg_type.typnamespace = pg_namespace.oid)
|
||||
WHERE pg_type.typname = 'service_quota_info_batch'
|
||||
AND pg_namespace.nspname = 'cdb_dataservices_server') THEN
|
||||
CREATE TYPE cdb_dataservices_server.service_quota_info_batch AS (
|
||||
service cdb_dataservices_server.service_type,
|
||||
monthly_quota NUMERIC,
|
||||
used_quota NUMERIC,
|
||||
soft_limit BOOLEAN,
|
||||
provider TEXT,
|
||||
max_batch_size NUMERIC
|
||||
);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info_batch(
|
||||
username TEXT,
|
||||
orgname TEXT)
|
||||
RETURNS SETOF cdb_dataservices_server.service_quota_info_batch AS $$
|
||||
from cartodb_services.bulk_geocoders import BATCH_GEOCODER_CLASS_BY_PROVIDER
|
||||
from cartodb_services.tools import Logger,LoggerConfig
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
|
||||
sqi = plpy.execute("SELECT * from cdb_dataservices_server.cdb_service_quota_info({0},{1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
|
||||
|
||||
ret = []
|
||||
for info in sqi:
|
||||
if info['service'] == 'hires_geocoder':
|
||||
provider = info['provider']
|
||||
batch_geocoder_class = BATCH_GEOCODER_CLASS_BY_PROVIDER.get(provider, None)
|
||||
if batch_geocoder_class and hasattr(batch_geocoder_class, 'MAX_BATCH_SIZE'):
|
||||
max_batch_size = batch_geocoder_class.MAX_BATCH_SIZE
|
||||
else:
|
||||
max_batch_size = 1
|
||||
|
||||
info['max_batch_size'] = max_batch_size
|
||||
else:
|
||||
info['max_batch_size'] = 1
|
||||
|
||||
ret += [[info['service'], info['monthly_quota'], info['used_quota'], info['soft_limit'], info['provider'], info['max_batch_size']]]
|
||||
|
||||
return ret
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
-- TODO: could cartodb_id be replaced by rowid, maybe needing extra care for offset?
|
||||
CREATE TYPE cdb_dataservices_server.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services.metrics import metrics
|
||||
from cartodb_services.tools import Logger
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
|
||||
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
|
||||
user_geocoder_config = GD["user_geocoder_config_{0}".format(username)]
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
|
||||
params = {'searches': searches}
|
||||
|
||||
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
|
||||
if user_geocoder_config.google_geocoder:
|
||||
provider_function = "_cdb_bulk_google_geocode_street_point";
|
||||
elif user_geocoder_config.heremaps_geocoder:
|
||||
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
|
||||
elif user_geocoder_config.tomtom_geocoder:
|
||||
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
|
||||
elif user_geocoder_config.mapbox_geocoder:
|
||||
provider_function = "_cdb_bulk_mapbox_geocode_street_point";
|
||||
else:
|
||||
raise Exception('Requested geocoder is not available')
|
||||
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
|
||||
return plpy.execute(plan, [username, orgname, searches])
|
||||
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import LegacyServiceManager
|
||||
from cartodb_services.google import GoogleMapsBulkGeocoder
|
||||
|
||||
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
|
||||
geocoder = GoogleMapsBulkGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import LegacyServiceManager
|
||||
from cartodb_services.here import HereMapsBulkGeocoder
|
||||
|
||||
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
|
||||
geocoder = HereMapsBulkGeocoder(service_manager.config.heremaps_app_id, service_manager.config.heremaps_app_code, service_manager.logger, service_manager.config.heremaps_service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
|
||||
from cartodb_services.tomtom import TomTomBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.mapbox_geocoder_config import MapboxGeocoderConfigBuilder
|
||||
from cartodb_services.mapbox import MapboxBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', MapboxGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = MapboxBulkGeocoder(service_manager.config.mapbox_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
15
server/extension/cdb_dataservices_server--0.32.0--0.31.0.sql
Normal file
15
server/extension/cdb_dataservices_server--0.32.0--0.31.0.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '<%= version %>'" to load this file. \quit
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
|
||||
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server.cdb_service_quota_info_batch(TEXT, TEXT);
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_geocode_street_point(TEXT, TEXT, jsonb);
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_google_geocode_street_point(TEXT, TEXT, jsonb);
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(TEXT, TEXT, jsonb);
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(TEXT, TEXT, jsonb);
|
||||
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(TEXT, TEXT, jsonb);
|
||||
DROP TYPE IF EXISTS cdb_dataservices_server.geocoding;
|
||||
DROP TYPE IF EXISTS cdb_dataservices_server.service_quota_info_batch;
|
||||
3801
server/extension/cdb_dataservices_server--0.32.0.sql
Normal file
3801
server/extension/cdb_dataservices_server--0.32.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB dataservices server extension'
|
||||
default_version = '0.31.0'
|
||||
default_version = '0.32.0'
|
||||
requires = 'plpythonu, plproxy, postgis, cdb_geocoder'
|
||||
superuser = true
|
||||
schema = cdb_dataservices_server
|
||||
|
||||
@@ -27,6 +27,23 @@ BEGIN
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type inner join pg_namespace ON (pg_type.typnamespace = pg_namespace.oid)
|
||||
WHERE pg_type.typname = 'service_quota_info_batch'
|
||||
AND pg_namespace.nspname = 'cdb_dataservices_server') THEN
|
||||
CREATE TYPE cdb_dataservices_server.service_quota_info_batch AS (
|
||||
service cdb_dataservices_server.service_type,
|
||||
monthly_quota NUMERIC,
|
||||
used_quota NUMERIC,
|
||||
soft_limit BOOLEAN,
|
||||
provider TEXT,
|
||||
max_batch_size NUMERIC
|
||||
);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info(
|
||||
username TEXT,
|
||||
orgname TEXT)
|
||||
@@ -92,6 +109,35 @@ RETURNS SETOF cdb_dataservices_server.service_quota_info AS $$
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info_batch(
|
||||
username TEXT,
|
||||
orgname TEXT)
|
||||
RETURNS SETOF cdb_dataservices_server.service_quota_info_batch AS $$
|
||||
from cartodb_services.bulk_geocoders import BATCH_GEOCODER_CLASS_BY_PROVIDER
|
||||
from cartodb_services.tools import Logger,LoggerConfig
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
|
||||
sqi = plpy.execute("SELECT * from cdb_dataservices_server.cdb_service_quota_info({0},{1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
|
||||
|
||||
ret = []
|
||||
for info in sqi:
|
||||
if info['service'] == 'hires_geocoder':
|
||||
provider = info['provider']
|
||||
batch_geocoder_class = BATCH_GEOCODER_CLASS_BY_PROVIDER.get(provider, None)
|
||||
if batch_geocoder_class and hasattr(batch_geocoder_class, 'MAX_BATCH_SIZE'):
|
||||
max_batch_size = batch_geocoder_class.MAX_BATCH_SIZE
|
||||
else:
|
||||
max_batch_size = 1
|
||||
|
||||
info['max_batch_size'] = max_batch_size
|
||||
else:
|
||||
info['max_batch_size'] = 1
|
||||
|
||||
ret += [[info['service'], info['monthly_quota'], info['used_quota'], info['soft_limit'], info['provider'], info['max_batch_size']]]
|
||||
|
||||
return ret
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_enough_quota(
|
||||
username TEXT,
|
||||
orgname TEXT,
|
||||
|
||||
97
server/extension/sql/21_bulk_geocode_street.sql
Normal file
97
server/extension/sql/21_bulk_geocode_street.sql
Normal file
@@ -0,0 +1,97 @@
|
||||
-- TODO: could cartodb_id be replaced by rowid, maybe needing extra care for offset?
|
||||
CREATE TYPE cdb_dataservices_server.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services.metrics import metrics
|
||||
from cartodb_services.tools import Logger
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
|
||||
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
|
||||
user_geocoder_config = GD["user_geocoder_config_{0}".format(username)]
|
||||
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
|
||||
params = {'searches': searches}
|
||||
|
||||
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
|
||||
if user_geocoder_config.google_geocoder:
|
||||
provider_function = "_cdb_bulk_google_geocode_street_point";
|
||||
elif user_geocoder_config.heremaps_geocoder:
|
||||
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
|
||||
elif user_geocoder_config.tomtom_geocoder:
|
||||
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
|
||||
elif user_geocoder_config.mapbox_geocoder:
|
||||
provider_function = "_cdb_bulk_mapbox_geocode_street_point";
|
||||
else:
|
||||
raise Exception('Requested geocoder is not available')
|
||||
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
|
||||
return plpy.execute(plan, [username, orgname, searches])
|
||||
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import LegacyServiceManager
|
||||
from cartodb_services.google import GoogleMapsBulkGeocoder
|
||||
|
||||
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
|
||||
geocoder = GoogleMapsBulkGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import LegacyServiceManager
|
||||
from cartodb_services.here import HereMapsBulkGeocoder
|
||||
|
||||
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
|
||||
geocoder = HereMapsBulkGeocoder(service_manager.config.heremaps_app_id, service_manager.config.heremaps_app_code, service_manager.logger, service_manager.config.heremaps_service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
|
||||
from cartodb_services.tomtom import TomTomBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.mapbox_geocoder_config import MapboxGeocoderConfigBuilder
|
||||
from cartodb_services.mapbox import MapboxBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', MapboxGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = MapboxBulkGeocoder(service_manager.config.mapbox_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
-- Check that the public function is callable, even with no data
|
||||
-- It should return NULL
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Valencia', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
------------------------------
|
||||
|
||||
@@ -35,42 +35,42 @@ INSERT INTO country_decoder (synonyms, iso2) VALUES (Array['spain', 'Spain'], 'E
|
||||
INSERT INTO admin1_decoder (admin1, synonyms, iso2) VALUES ('Valencia', Array['valencia', 'Valencia'], 'ES');
|
||||
-- This should return the point inserted above
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Valencia', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
(1 row)
|
||||
|
||||
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche', 'valencia', 'Spain');
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
|
||||
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
|
||||
cdb_geocode_namedplace_point
|
||||
----------------------------------------------------
|
||||
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
|
||||
|
||||
0
server/lib/__init__.py
Normal file
0
server/lib/__init__.py
Normal file
0
server/lib/python/__init__.py
Normal file
0
server/lib/python/__init__.py
Normal file
0
server/lib/python/cartodb_services/__init__.py
Normal file
0
server/lib/python/cartodb_services/__init__.py
Normal file
@@ -33,3 +33,5 @@ def _reset():
|
||||
|
||||
plpy = None
|
||||
GD = None
|
||||
|
||||
from geocoder import run_street_point_geocoder, StreetPointBulkGeocoder
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
from google import GoogleMapsBulkGeocoder
|
||||
from here import HereMapsBulkGeocoder
|
||||
from tomtom import TomTomBulkGeocoder
|
||||
from mapbox import MapboxBulkGeocoder
|
||||
|
||||
BATCH_GEOCODER_CLASS_BY_PROVIDER = {
|
||||
'google': GoogleMapsBulkGeocoder,
|
||||
'heremaps': HereMapsBulkGeocoder,
|
||||
'tomtom': TomTomBulkGeocoder,
|
||||
'mapbox': MapboxBulkGeocoder
|
||||
}
|
||||
176
server/lib/python/cartodb_services/cartodb_services/geocoder.py
Normal file
176
server/lib/python/cartodb_services/cartodb_services/geocoder.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from tools import QuotaExceededException, Logger
|
||||
from collections import namedtuple
|
||||
import json
|
||||
|
||||
|
||||
PRECISION_PRECISE = 'precise'
|
||||
PRECISION_INTERPOLATED = 'interpolated'
|
||||
|
||||
def geocoder_metadata(relevance, precision, match_types):
|
||||
return {
|
||||
'relevance': round(relevance, 2),
|
||||
'precision': precision,
|
||||
'match_types': match_types
|
||||
}
|
||||
|
||||
|
||||
def geocoder_error_response(message):
|
||||
return [[], {'error': message}]
|
||||
|
||||
|
||||
# Single empty result
|
||||
EMPTY_RESPONSE = [[], {}]
|
||||
# HTTP 429 and related
|
||||
TOO_MANY_REQUESTS_ERROR_RESPONSE = geocoder_error_response('Rate limit exceeded')
|
||||
# Full empty _batch_geocode response
|
||||
EMPTY_BATCH_RESPONSE = []
|
||||
|
||||
|
||||
def compose_address(street, city=None, state=None, country=None):
|
||||
return ', '.join(filter(None, [street, city, state, country]))
|
||||
|
||||
|
||||
def run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches_string):
|
||||
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
|
||||
logger_config = GD["logger_config"]
|
||||
|
||||
logger = Logger(logger_config)
|
||||
|
||||
success_count, failed_count, empty_count = 0, 0, 0
|
||||
|
||||
try:
|
||||
searches = json.loads(searches_string)
|
||||
except Exception as e:
|
||||
logger.error('Parsing searches', exception=e, data={'searches': searches_string})
|
||||
raise e
|
||||
|
||||
try:
|
||||
service_manager.assert_within_limits(quota=False)
|
||||
geocode_results = geocoder.bulk_geocode(searches)
|
||||
results = []
|
||||
a_failed_one = None
|
||||
if not geocode_results == EMPTY_BATCH_RESPONSE:
|
||||
for result in geocode_results:
|
||||
metadata = result[2] if len(result) > 2 else {}
|
||||
try:
|
||||
if metadata.get('error', None):
|
||||
results.append([result[0], None, json.dumps(metadata)])
|
||||
a_failed_one = result
|
||||
failed_count += 1
|
||||
elif result[1] and len(result[1]) == 2:
|
||||
plan = plpy.prepare("SELECT ST_SetSRID(ST_MakePoint($1, $2), 4326) as the_geom; ", ["double precision", "double precision"])
|
||||
point = plpy.execute(plan, result[1], 1)[0]
|
||||
results.append([result[0], point['the_geom'], json.dumps(metadata)])
|
||||
success_count += 1
|
||||
else:
|
||||
results.append([result[0], None, json.dumps(metadata)])
|
||||
empty_count += 1
|
||||
except Exception as e:
|
||||
import sys
|
||||
logger.error("Error processing geocode", sys.exc_info(), data={"username": username, "orgname": orgname})
|
||||
metadata['processing_error'] = 'Error: {}'.format(e.message)
|
||||
results.append([result[0], None, json.dumps(metadata)])
|
||||
failed_count += 1
|
||||
|
||||
missing_count = len(searches) - success_count - failed_count - empty_count
|
||||
|
||||
if a_failed_one:
|
||||
logger.warning("failed geocoding",
|
||||
data={
|
||||
"username": username,
|
||||
"orgname": orgname,
|
||||
"failed": str(a_failed_one),
|
||||
"success_count": success_count,
|
||||
"empty_count": empty_count,
|
||||
"missing_count": missing_count,
|
||||
"failed_count": failed_count
|
||||
})
|
||||
else:
|
||||
logger.debug("finished geocoding",
|
||||
data={
|
||||
"username": username,
|
||||
"orgname": orgname,
|
||||
"success_count": success_count,
|
||||
"empty_count": empty_count,
|
||||
"missing_count": missing_count,
|
||||
"failed_count": failed_count
|
||||
})
|
||||
service_manager.quota_service.increment_success_service_use(success_count)
|
||||
service_manager.quota_service.increment_empty_service_use(empty_count + missing_count)
|
||||
service_manager.quota_service.increment_failed_service_use(failed_count)
|
||||
|
||||
return results
|
||||
except QuotaExceededException as qe:
|
||||
logger.debug('QuotaExceededException at run_street_point_geocoder', qe,
|
||||
data={"username": username, "orgname": orgname})
|
||||
service_manager.quota_service.increment_failed_service_use(len(searches))
|
||||
return []
|
||||
except BaseException as e:
|
||||
import sys
|
||||
service_manager.quota_service.increment_failed_service_use(len(searches))
|
||||
service_manager.logger.error('Error trying to bulk geocode street point', sys.exc_info(), data={"username": username, "orgname": orgname})
|
||||
raise Exception('Error trying to bulk geocode street')
|
||||
finally:
|
||||
service_manager.quota_service.increment_total_service_use(len(searches))
|
||||
|
||||
|
||||
StreetGeocoderSearch = namedtuple('StreetGeocoderSearch', 'id address city state country')
|
||||
|
||||
|
||||
class StreetPointBulkGeocoder:
|
||||
"""
|
||||
Classes extending StreetPointBulkGeocoder should implement:
|
||||
* _batch_geocode(street_geocoder_searches)
|
||||
* MAX_BATCH_SIZE
|
||||
|
||||
If they want to provide an alternative serial (for small batches):
|
||||
* _should_use_batch(street_geocoder_searches)
|
||||
* _serial_geocode(street_geocoder_searches)
|
||||
"""
|
||||
|
||||
SEARCH_KEYS = ['id', 'address', 'city', 'state', 'country']
|
||||
|
||||
def bulk_geocode(self, decoded_searches):
|
||||
"""
|
||||
:param decoded_searches: JSON array
|
||||
:return: array of tuples with three elements:
|
||||
* id
|
||||
* latitude and longitude (array of two elements)
|
||||
* empty array (future use: metadata)
|
||||
"""
|
||||
street_geocoder_searches = []
|
||||
for search in decoded_searches:
|
||||
search_id, address, city, state, country = \
|
||||
[search.get(k, None) for k in self.SEARCH_KEYS]
|
||||
street_geocoder_searches.append(
|
||||
StreetGeocoderSearch(search_id, address, city, state, country))
|
||||
|
||||
if len(street_geocoder_searches) > self.MAX_BATCH_SIZE:
|
||||
raise Exception("Batch size can't be larger than {}".format(self.MAX_BATCH_SIZE))
|
||||
try:
|
||||
if self._should_use_batch(street_geocoder_searches):
|
||||
return self._batch_geocode(street_geocoder_searches)
|
||||
else:
|
||||
return self._serial_geocode(street_geocoder_searches)
|
||||
except Exception as e:
|
||||
msg = "Error running geocode: {}".format(e)
|
||||
self._logger.error(msg, e)
|
||||
errors = [geocoder_error_response(msg)] * len(decoded_searches)
|
||||
results = []
|
||||
for s, r in zip(decoded_searches, errors):
|
||||
results.append((s['id'], r[0], r[1]))
|
||||
return results
|
||||
|
||||
def _batch_geocode(self, street_geocoder_searches):
|
||||
raise NotImplementedError('Subclasses must implement _batch_geocode')
|
||||
|
||||
def _serial_geocode(self, street_geocoder_searches):
|
||||
raise NotImplementedError('Subclasses must implement _serial_geocode')
|
||||
|
||||
def _should_use_batch(self, street_geocoder_searches):
|
||||
return True
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
from geocoder import GoogleMapsGeocoder
|
||||
from bulk_geocoder import GoogleMapsBulkGeocoder
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
from multiprocessing import Pool
|
||||
from exceptions import MalformedResult
|
||||
from cartodb_services import StreetPointBulkGeocoder
|
||||
from cartodb_services.geocoder import compose_address, geocoder_error_response
|
||||
from cartodb_services.google import GoogleMapsGeocoder
|
||||
|
||||
|
||||
def async_geocoder(geocoder, address, components):
|
||||
return geocoder.geocode(address=address, components=components)
|
||||
|
||||
|
||||
class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
|
||||
"""A Google Maps Geocoder wrapper for python"""
|
||||
MAX_BATCH_SIZE = 1000
|
||||
MIN_BATCHED_SEARCH = 2 # Batched is a parallelization
|
||||
PARALLEL_PROCESSES = 13
|
||||
|
||||
def __init__(self, client_id, client_secret, logger):
|
||||
GoogleMapsGeocoder.__init__(self, client_id, client_secret, logger)
|
||||
|
||||
def _should_use_batch(self, searches):
|
||||
return len(searches) >= self.MIN_BATCHED_SEARCH
|
||||
|
||||
def _serial_geocode(self, searches):
|
||||
results = []
|
||||
for search in searches:
|
||||
(cartodb_id, street, city, state, country) = search
|
||||
try:
|
||||
lng_lat, metadata = self.geocode_meta(street, city, state, country)
|
||||
except Exception as e:
|
||||
self._logger.error("Error geocoding", e)
|
||||
lng_lat, metadata = geocoder_error_response("Error geocoding")
|
||||
results.append((cartodb_id, lng_lat, metadata))
|
||||
return results
|
||||
|
||||
def _batch_geocode(self, searches):
|
||||
bulk_results = {}
|
||||
pool = Pool(processes=self.PARALLEL_PROCESSES)
|
||||
for search in searches:
|
||||
(cartodb_id, street, city, state, country) = search
|
||||
address = compose_address(street, city, state, country)
|
||||
if address:
|
||||
components = self._build_optional_parameters(city, state, country)
|
||||
result = pool.apply_async(async_geocoder,
|
||||
(self.geocoder, address, components))
|
||||
bulk_results[cartodb_id] = result
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
try:
|
||||
results = []
|
||||
for cartodb_id, bulk_result in bulk_results.items():
|
||||
try:
|
||||
lng_lat, metadata = self._process_results(bulk_result.get())
|
||||
except Exception as e:
|
||||
msg = 'Error at Google async_geocoder'
|
||||
self._logger.error(msg, e)
|
||||
lng_lat, metadata = geocoder_error_response(msg)
|
||||
|
||||
results.append((cartodb_id, lng_lat, metadata))
|
||||
return results
|
||||
except Exception as e:
|
||||
self._logger.error('General error', exception=e)
|
||||
raise e
|
||||
@@ -5,6 +5,7 @@ import googlemaps
|
||||
import base64
|
||||
from exceptions import InvalidGoogleCredentials
|
||||
|
||||
|
||||
class GoogleMapsClientFactory():
|
||||
clients = {}
|
||||
|
||||
@@ -13,11 +14,14 @@ class GoogleMapsClientFactory():
|
||||
cache_key = "{}:{}:{}".format(client_id, client_secret, channel)
|
||||
client = cls.clients.get(cache_key)
|
||||
if not client:
|
||||
cls.assert_valid_crendentials(client_secret)
|
||||
client = googlemaps.Client(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
channel=channel)
|
||||
if client_id:
|
||||
cls.assert_valid_crendentials(client_secret)
|
||||
client = googlemaps.Client(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
channel=channel)
|
||||
else:
|
||||
client = googlemaps.Client(key=client_secret)
|
||||
cls.clients[cache_key] = client
|
||||
return client
|
||||
|
||||
|
||||
@@ -1,16 +1,41 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import googlemaps
|
||||
from urlparse import parse_qs
|
||||
|
||||
from exceptions import MalformedResult
|
||||
from cartodb_services.geocoder import compose_address, geocoder_metadata, PRECISION_PRECISE, PRECISION_INTERPOLATED, EMPTY_RESPONSE
|
||||
from cartodb_services.google.exceptions import InvalidGoogleCredentials
|
||||
from client_factory import GoogleMapsClientFactory
|
||||
|
||||
PARTIAL_FACTOR = 0.8
|
||||
RELEVANCE_BY_LOCATION_TYPE = {
|
||||
'ROOFTOP': 1,
|
||||
'GEOMETRIC_CENTER': 0.9,
|
||||
'RANGE_INTERPOLATED': 0.8,
|
||||
'APPROXIMATE': 0.7
|
||||
}
|
||||
PRECISION_BY_LOCATION_TYPE = {
|
||||
'ROOFTOP': PRECISION_PRECISE,
|
||||
'GEOMETRIC_CENTER': PRECISION_PRECISE,
|
||||
'RANGE_INTERPOLATED': PRECISION_INTERPOLATED,
|
||||
'APPROXIMATE': PRECISION_INTERPOLATED
|
||||
}
|
||||
MATCH_TYPE_BY_MATCH_LEVEL = {
|
||||
'point_of_interest': 'point_of_interest',
|
||||
'country': 'country',
|
||||
'administrative_area_level_1': 'state',
|
||||
'administrative_area_level_2': 'county',
|
||||
'locality': 'locality',
|
||||
'sublocality': 'district',
|
||||
'street_address': 'street',
|
||||
'intersection': 'intersection',
|
||||
'street_number': 'street_number',
|
||||
'postal_code': 'postal_code'
|
||||
}
|
||||
|
||||
class GoogleMapsGeocoder:
|
||||
"""A Google Maps Geocoder wrapper for python"""
|
||||
|
||||
class GoogleMapsGeocoder():
|
||||
|
||||
def __init__(self, client_id, client_secret, logger):
|
||||
if client_id is None:
|
||||
@@ -20,25 +45,49 @@ class GoogleMapsGeocoder:
|
||||
self.geocoder = GoogleMapsClientFactory.get(self.client_id, self.client_secret, self.channel)
|
||||
self._logger = logger
|
||||
|
||||
def geocode(self, searchtext, city=None, state=None,
|
||||
country=None):
|
||||
def geocode(self, searchtext, city=None, state=None, country=None):
|
||||
return self.geocode_meta(searchtext, city, state, country)[0]
|
||||
|
||||
def geocode_meta(self, searchtext, city=None, state=None, country=None):
|
||||
address = compose_address(searchtext, city, state, country)
|
||||
try:
|
||||
opt_params = self._build_optional_parameters(city, state, country)
|
||||
results = self.geocoder.geocode(address=searchtext,
|
||||
results = self.geocoder.geocode(address=address,
|
||||
components=opt_params)
|
||||
if results:
|
||||
return self._extract_lng_lat_from_result(results[0])
|
||||
else:
|
||||
return []
|
||||
except KeyError:
|
||||
return self._process_results(results)
|
||||
except KeyError as e:
|
||||
self._logger.error('address: {}'.format(address), e)
|
||||
raise MalformedResult()
|
||||
|
||||
def _process_results(self, results):
|
||||
if results:
|
||||
return [
|
||||
self._extract_lng_lat_from_result(results[0]),
|
||||
self._extract_metadata_from_result(results[0])
|
||||
]
|
||||
else:
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
def _extract_lng_lat_from_result(self, result):
|
||||
location = result['geometry']['location']
|
||||
longitude = location['lng']
|
||||
latitude = location['lat']
|
||||
return [longitude, latitude]
|
||||
|
||||
def _extract_metadata_from_result(self, result):
|
||||
location_type = result['geometry']['location_type']
|
||||
base_relevance = RELEVANCE_BY_LOCATION_TYPE[location_type]
|
||||
partial_match = result.get('partial_match', False)
|
||||
partial_factor = PARTIAL_FACTOR if partial_match else 1
|
||||
match_types = [MATCH_TYPE_BY_MATCH_LEVEL.get(match_level, None)
|
||||
for match_level in result['types']]
|
||||
return geocoder_metadata(
|
||||
base_relevance * partial_factor,
|
||||
PRECISION_BY_LOCATION_TYPE[location_type],
|
||||
filter(None, match_types)
|
||||
)
|
||||
|
||||
|
||||
def _build_optional_parameters(self, city=None, state=None,
|
||||
country=None):
|
||||
optional_params = {}
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
from geocoder import HereMapsGeocoder
|
||||
from bulk_geocoder import HereMapsBulkGeocoder
|
||||
from routing import HereMapsRoutingIsoline
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import requests, time, zipfile, io, csv, cStringIO
|
||||
import xml.etree.ElementTree as ET
|
||||
from collections import namedtuple
|
||||
from requests.adapters import HTTPAdapter
|
||||
from cartodb_services import StreetPointBulkGeocoder
|
||||
from cartodb_services.here import HereMapsGeocoder
|
||||
from cartodb_services.geocoder import geocoder_metadata, geocoder_error_response
|
||||
from cartodb_services.metrics import Traceable
|
||||
from cartodb_services.tools.exceptions import ServiceException
|
||||
|
||||
|
||||
HereJobStatus = namedtuple('HereJobStatus', 'total_count processed_count status')
|
||||
|
||||
class HereMapsBulkGeocoder(HereMapsGeocoder, StreetPointBulkGeocoder):
|
||||
MAX_BATCH_SIZE = 1000000 # From the docs
|
||||
MIN_BATCHED_SEARCH = 100 # Under this, serial will be used
|
||||
BATCH_URL = 'https://batch.geocoder.cit.api.here.com/6.2/jobs'
|
||||
# https://developer.here.com/documentation/batch-geocoder/topics/read-batch-request-output.html
|
||||
META_COLS = ['relevance', 'matchType', 'matchCode', 'matchLevel', 'matchQualityStreet']
|
||||
MAX_STALLED_RETRIES = 100
|
||||
BATCH_RETRY_SLEEP_S = 5
|
||||
JOB_FINAL_STATES = ['completed', 'cancelled', 'deleted', 'failed']
|
||||
|
||||
def __init__(self, app_id, app_code, logger, service_params=None, maxresults=HereMapsGeocoder.DEFAULT_MAXRESULTS):
|
||||
HereMapsGeocoder.__init__(self, app_id, app_code, logger, service_params, maxresults)
|
||||
self.session = requests.Session()
|
||||
self.session.mount(self.BATCH_URL,
|
||||
HTTPAdapter(max_retries=self.max_retries))
|
||||
self.credentials_params = {
|
||||
'app_id': self.app_id,
|
||||
'app_code': self.app_code,
|
||||
}
|
||||
|
||||
def _should_use_batch(self, searches):
|
||||
return len(searches) >= self.MIN_BATCHED_SEARCH
|
||||
|
||||
def _serial_geocode(self, searches):
|
||||
results = []
|
||||
for search in searches:
|
||||
(search_id, address, city, state, country) = search
|
||||
try:
|
||||
result = self.geocode_meta(searchtext=address, city=city, state=state, country=country)
|
||||
except Exception as e:
|
||||
self._logger.error("Error geocoding", e)
|
||||
result = geocoder_error_response("Error geocoding")
|
||||
results.append((search_id, result[0], result[1]))
|
||||
return results
|
||||
|
||||
def _batch_geocode(self, searches):
|
||||
request_id = self._send_batch(self._searches_to_csv(searches))
|
||||
|
||||
last_processed = 0
|
||||
stalled_retries = 0
|
||||
# https://developer.here.com/documentation/batch-geocoder/topics/job-status.html
|
||||
while True:
|
||||
job_info = self._job_status(request_id)
|
||||
if job_info.processed_count == last_processed:
|
||||
stalled_retries += 1
|
||||
if stalled_retries > self.MAX_STALLED_RETRIES:
|
||||
raise Exception('Too many retries for job {}'.format(request_id))
|
||||
else:
|
||||
stalled_retries = 0
|
||||
last_processed = job_info.processed_count
|
||||
|
||||
if job_info.status in self.JOB_FINAL_STATES:
|
||||
break
|
||||
else:
|
||||
time.sleep(self.BATCH_RETRY_SLEEP_S)
|
||||
|
||||
results = self._download_results(request_id)
|
||||
|
||||
return results
|
||||
|
||||
def _searches_to_csv(self, searches):
|
||||
queue = cStringIO.StringIO()
|
||||
writer = csv.writer(queue, delimiter='|')
|
||||
writer.writerow(['recId', 'searchText', 'country'])
|
||||
|
||||
for search in searches:
|
||||
fields = [search.address, search.city, search.state]
|
||||
search_text = ', '.join(filter(None, fields))
|
||||
row = [s.encode("utf-8") if s else ''
|
||||
for s in [str(search.id), search_text, search.country]]
|
||||
writer.writerow(row)
|
||||
|
||||
return queue.getvalue()
|
||||
|
||||
def _send_batch(self, data):
|
||||
cols = 'displayLatitude,displayLongitude,' + ','.join(self.META_COLS)
|
||||
request_params = self.credentials_params.copy()
|
||||
request_params.update({
|
||||
'gen': 8,
|
||||
'action': 'run',
|
||||
# 'mailto': 'juanignaciosl@carto.com',
|
||||
'header': 'true',
|
||||
'inDelim': '|',
|
||||
'outDelim': '|',
|
||||
'outCols': cols,
|
||||
'outputcombined': 'true'
|
||||
})
|
||||
|
||||
response = self.session.post(self.BATCH_URL, data=data,
|
||||
params=request_params,
|
||||
timeout=(self.connect_timeout, self.read_timeout))
|
||||
|
||||
if response.status_code == 200:
|
||||
root = ET.fromstring(response.text)
|
||||
return root.find('./Response/MetaInfo/RequestId').text
|
||||
else:
|
||||
raise ServiceException("Error sending HERE batch", response)
|
||||
|
||||
def _job_status(self, request_id):
|
||||
polling_params = self.credentials_params.copy()
|
||||
polling_params.update({'action': 'status'})
|
||||
polling_r = self.session.get("{}/{}".format(self.BATCH_URL, request_id),
|
||||
params=polling_params,
|
||||
timeout=(self.connect_timeout, self.read_timeout))
|
||||
polling_root = ET.fromstring(polling_r.text)
|
||||
return HereJobStatus(
|
||||
total_count=int(polling_root.find('./Response/TotalCount').text),
|
||||
processed_count=int(polling_root.find('./Response/ProcessedCount').text),
|
||||
status=polling_root.find('./Response/Status').text)
|
||||
|
||||
def _download_results(self, job_id):
|
||||
result_r = self.session.get("{}/{}/result".format(self.BATCH_URL, job_id),
|
||||
params=self.credentials_params,
|
||||
timeout=(self.connect_timeout, self.read_timeout))
|
||||
root_zip = zipfile.ZipFile(io.BytesIO(result_r.content))
|
||||
|
||||
results = []
|
||||
for name in root_zip.namelist():
|
||||
if name.endswith('_out.txt'):
|
||||
reader = csv.DictReader(root_zip.open(name), delimiter='|')
|
||||
for row in reader:
|
||||
if row['SeqNumber'] == '1': # First per requested data
|
||||
precision = self.PRECISION_BY_MATCH_TYPE[
|
||||
row.get('matchType', 'pointAddress')]
|
||||
match_type = self.MATCH_TYPE_BY_MATCH_LEVEL.get(row['matchLevel'], None)
|
||||
results.append((row['recId'],
|
||||
[row['displayLongitude'], row['displayLatitude']],
|
||||
geocoder_metadata(
|
||||
float(row['relevance']),
|
||||
precision,
|
||||
[match_type] if match_type else []
|
||||
)))
|
||||
|
||||
return results
|
||||
|
||||
@@ -6,9 +6,9 @@ import requests
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from exceptions import *
|
||||
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata, EMPTY_RESPONSE
|
||||
from cartodb_services.metrics import Traceable
|
||||
|
||||
|
||||
class HereMapsGeocoder(Traceable):
|
||||
'A Here Maps Geocoder wrapper for python'
|
||||
|
||||
@@ -52,6 +52,23 @@ class HereMapsGeocoder(Traceable):
|
||||
'strictlanguagemode'
|
||||
] + ADDRESS_PARAMS
|
||||
|
||||
PRECISION_BY_MATCH_TYPE = {
|
||||
'pointAddress': PRECISION_PRECISE,
|
||||
'interpolated': PRECISION_INTERPOLATED
|
||||
}
|
||||
MATCH_TYPE_BY_MATCH_LEVEL = {
|
||||
'landmark': 'point_of_interest',
|
||||
'country': 'country',
|
||||
'state': 'state',
|
||||
'county': 'county',
|
||||
'city': 'locality',
|
||||
'district': 'district',
|
||||
'street': 'street',
|
||||
'intersection': 'intersection',
|
||||
'houseNumber': 'street_number',
|
||||
'postalCode': 'postal_code'
|
||||
}
|
||||
|
||||
def __init__(self, app_id, app_code, logger, service_params=None, maxresults=DEFAULT_MAXRESULTS):
|
||||
service_params = service_params or {}
|
||||
self.app_id = app_id
|
||||
@@ -65,12 +82,15 @@ class HereMapsGeocoder(Traceable):
|
||||
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
|
||||
|
||||
def geocode(self, **kwargs):
|
||||
return self.geocode_meta(**kwargs)[0]
|
||||
|
||||
def geocode_meta(self, **kwargs):
|
||||
params = {}
|
||||
for key, value in kwargs.iteritems():
|
||||
if value and value.strip():
|
||||
params[key] = value
|
||||
if not params:
|
||||
return []
|
||||
return EMPTY_RESPONSE
|
||||
return self._execute_geocode(params)
|
||||
|
||||
def _execute_geocode(self, params):
|
||||
@@ -78,11 +98,13 @@ class HereMapsGeocoder(Traceable):
|
||||
raise BadGeocodingParams(params)
|
||||
try:
|
||||
response = self._perform_request(params)
|
||||
results = response['Response']['View'][0]['Result'][0]
|
||||
return self._extract_lng_lat_from_result(results)
|
||||
result = response['Response']['View'][0]['Result'][0]
|
||||
return [self._extract_lng_lat_from_result(result),
|
||||
self._extract_metadata_from_result(result)]
|
||||
except IndexError:
|
||||
return []
|
||||
except KeyError:
|
||||
return EMPTY_RESPONSE
|
||||
except KeyError as e:
|
||||
self._logger.error('params: {}'.format(params), e)
|
||||
raise MalformedResult()
|
||||
|
||||
def _perform_request(self, params):
|
||||
@@ -105,7 +127,7 @@ class HereMapsGeocoder(Traceable):
|
||||
self._logger.warning('Error 4xx trying to geocode street using HERE',
|
||||
data={"response": response.json(), "params":
|
||||
params})
|
||||
return []
|
||||
return EMPTY_RESPONSE
|
||||
else:
|
||||
self._logger.error('Error trying to geocode street using HERE',
|
||||
data={"response": response.json(), "params":
|
||||
@@ -118,3 +140,14 @@ class HereMapsGeocoder(Traceable):
|
||||
latitude = location['DisplayPosition']['Latitude']
|
||||
|
||||
return [longitude, latitude]
|
||||
|
||||
def _extract_metadata_from_result(self, result):
|
||||
# See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
|
||||
precision = self.PRECISION_BY_MATCH_TYPE[
|
||||
result.get('MatchType', 'pointAddress')]
|
||||
match_type = self.MATCH_TYPE_BY_MATCH_LEVEL.get(result['MatchLevel'], None)
|
||||
return geocoder_metadata(
|
||||
result['Relevance'],
|
||||
precision,
|
||||
[match_type] if match_type else []
|
||||
)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from routing import MapboxRouting, MapboxRoutingResponse
|
||||
from geocoder import MapboxGeocoder
|
||||
from bulk_geocoder import MapboxBulkGeocoder
|
||||
from isolines import MapboxIsolines, MapboxIsochronesResponse
|
||||
from matrix_client import MapboxMatrixClient
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
import requests
|
||||
from cartodb_services import StreetPointBulkGeocoder
|
||||
from cartodb_services.mapbox import MapboxGeocoder
|
||||
from iso3166 import countries
|
||||
from cartodb_services.tools.country import country_to_iso3
|
||||
|
||||
|
||||
class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder):
|
||||
MAX_BATCH_SIZE = 50 # From the docs
|
||||
MIN_BATCHED_SEARCH = 0
|
||||
READ_TIMEOUT = 60
|
||||
CONNECT_TIMEOUT = 10
|
||||
MAX_RETRIES = 1
|
||||
|
||||
def __init__(self, token, logger, service_params=None):
|
||||
MapboxGeocoder.__init__(self, token, logger, service_params)
|
||||
self.connect_timeout = service_params.get('connect_timeout', self.CONNECT_TIMEOUT)
|
||||
self.read_timeout = service_params.get('read_timeout', self.READ_TIMEOUT)
|
||||
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
|
||||
self.session = requests.Session()
|
||||
|
||||
def _should_use_batch(self, searches):
|
||||
return len(searches) >= self.MIN_BATCHED_SEARCH
|
||||
|
||||
def _serial_geocode(self, searches):
|
||||
results = []
|
||||
for search in searches:
|
||||
elements = self._encoded_elements(search)
|
||||
result = self.geocode_meta(*elements)
|
||||
|
||||
results.append((search[0], result[0], result[1]))
|
||||
return results
|
||||
|
||||
def _encoded_elements(self, search):
|
||||
(search_id, address, city, state, country) = search
|
||||
address = address.encode('utf-8') if address else None
|
||||
city = city.encode('utf-8') if city else None
|
||||
state = state.encode('utf-8') if state else None
|
||||
country = self._country_code(country) if country else None
|
||||
return address, city, state, country
|
||||
|
||||
def _batch_geocode(self, searches):
|
||||
if len(searches) == 1:
|
||||
return self._serial_geocode(searches)
|
||||
else:
|
||||
frees = []
|
||||
for search in searches:
|
||||
elements = self._encoded_elements(search)
|
||||
free = ', '.join([elem for elem in elements if elem])
|
||||
frees.append(free)
|
||||
|
||||
full_results = self.geocode_free_text_meta(frees)
|
||||
results = []
|
||||
for s, r in zip(searches, full_results):
|
||||
results.append((s[0], r[0], r[1]))
|
||||
return results
|
||||
|
||||
def _country_code(self, country):
|
||||
country_iso3166 = None
|
||||
country_iso3 = country_to_iso3(country)
|
||||
if country_iso3:
|
||||
country_iso3166 = countries.get(country_iso3).alpha2.lower()
|
||||
|
||||
return country_iso3166
|
||||
|
||||
@@ -5,6 +5,7 @@ Python client for the Mapbox Geocoder service.
|
||||
import json
|
||||
import requests
|
||||
from mapbox import Geocoder
|
||||
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata, EMPTY_RESPONSE, EMPTY_BATCH_RESPONSE, TOO_MANY_REQUESTS_ERROR_RESPONSE, geocoder_error_response
|
||||
from cartodb_services.metrics import Traceable
|
||||
from cartodb_services.tools.exceptions import ServiceException
|
||||
from cartodb_services.tools.qps import qps_retry
|
||||
@@ -22,6 +23,17 @@ ENTRY_COORDINATES = 'coordinates'
|
||||
ENTRY_TYPE = 'type'
|
||||
TYPE_POINT = 'Point'
|
||||
|
||||
MATCH_TYPE_BY_MATCH_LEVEL = {
|
||||
'poi': 'point_of_interest',
|
||||
'poi.landmark': 'point_of_interest',
|
||||
'place': 'point_of_interest',
|
||||
'country': 'country',
|
||||
'region': 'state',
|
||||
'locality': 'locality',
|
||||
'district': 'district',
|
||||
'address': 'street'
|
||||
}
|
||||
|
||||
|
||||
class MapboxGeocoder(Traceable):
|
||||
'''
|
||||
@@ -40,18 +52,24 @@ class MapboxGeocoder(Traceable):
|
||||
def _parse_geocoder_response(self, response):
|
||||
json_response = json.loads(response)
|
||||
|
||||
# If Mapbox returns more that one result, take the first one
|
||||
if json_response:
|
||||
if type(json_response) == list:
|
||||
json_response = json_response[0]
|
||||
if type(json_response) != list:
|
||||
json_response = [json_response]
|
||||
|
||||
if json_response[ENTRY_FEATURES]:
|
||||
feature = json_response[ENTRY_FEATURES][0]
|
||||
return self._extract_lng_lat_from_feature(feature)
|
||||
else:
|
||||
return []
|
||||
result = []
|
||||
for a_json_response in json_response:
|
||||
if a_json_response[ENTRY_FEATURES]:
|
||||
feature = a_json_response[ENTRY_FEATURES][0]
|
||||
result.append([
|
||||
self._extract_lng_lat_from_feature(feature),
|
||||
self._extract_metadata_from_result(feature)
|
||||
]
|
||||
)
|
||||
else:
|
||||
result.append(EMPTY_RESPONSE)
|
||||
return result
|
||||
else:
|
||||
return []
|
||||
return EMPTY_BATCH_RESPONSE
|
||||
|
||||
def _extract_lng_lat_from_feature(self, feature):
|
||||
geometry = feature[ENTRY_GEOMETRY]
|
||||
@@ -64,6 +82,23 @@ class MapboxGeocoder(Traceable):
|
||||
latitude = location[1]
|
||||
return [longitude, latitude]
|
||||
|
||||
def _extract_metadata_from_result(self, result):
|
||||
if result[ENTRY_GEOMETRY].get('interpolated', False):
|
||||
precision = PRECISION_INTERPOLATED
|
||||
else:
|
||||
precision = PRECISION_PRECISE
|
||||
|
||||
match_types = [MATCH_TYPE_BY_MATCH_LEVEL.get(match_level, None)
|
||||
for match_level in result['place_type']]
|
||||
return geocoder_metadata(
|
||||
self._normalize_relevance(float(result['relevance'])),
|
||||
precision,
|
||||
filter(None, match_types)
|
||||
)
|
||||
|
||||
def _normalize_relevance(self, relevance):
|
||||
return 1 if relevance >= 0.99 else relevance
|
||||
|
||||
def _validate_input(self, searchtext, city=None, state_province=None,
|
||||
country=None):
|
||||
if searchtext and searchtext.strip():
|
||||
@@ -78,8 +113,28 @@ class MapboxGeocoder(Traceable):
|
||||
@qps_retry(qps=10)
|
||||
def geocode(self, searchtext, city=None, state_province=None,
|
||||
country=None):
|
||||
"""
|
||||
:param searchtext:
|
||||
:param city:
|
||||
:param state_province:
|
||||
:param country: Country ISO 3166 code
|
||||
:return: [x, y] on success, raises ServiceException on error
|
||||
"""
|
||||
response = self.geocode_meta(searchtext, city, state_province, country)
|
||||
if response:
|
||||
error_message = response[1].get('error', None)
|
||||
if error_message:
|
||||
raise ServiceException(error_message, None)
|
||||
else:
|
||||
return response[0]
|
||||
else:
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
@qps_retry(qps=10)
|
||||
def geocode_meta(self, searchtext, city=None, state_province=None,
|
||||
country=None):
|
||||
if not self._validate_input(searchtext, city, state_province, country):
|
||||
return []
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
address = []
|
||||
if searchtext and searchtext.strip():
|
||||
@@ -89,30 +144,52 @@ class MapboxGeocoder(Traceable):
|
||||
if state_province:
|
||||
address.append(normalize(state_province))
|
||||
|
||||
free_search = ', '.join(address)
|
||||
|
||||
response = self.geocode_free_text_meta([free_search], country)
|
||||
return response[0] if response else EMPTY_RESPONSE
|
||||
|
||||
@qps_retry(qps=10)
|
||||
def geocode_free_text_meta(self, free_searches, country=None):
|
||||
"""
|
||||
:param free_searches: Free text searches
|
||||
:param country: Country ISO 3166 code
|
||||
:return: list of [x, y] on success, [] on error
|
||||
"""
|
||||
country = [country] if country else None
|
||||
|
||||
try:
|
||||
response = self._geocoder.forward(address=', '.join(address).decode('utf-8'),
|
||||
country=country,
|
||||
limit=1)
|
||||
|
||||
free_search = ';'.join([self._escape(fs) for fs in free_searches])
|
||||
response = self._geocoder.forward(address=free_search.decode('utf-8'),
|
||||
limit=1,
|
||||
country=country)
|
||||
if response.status_code == requests.codes.ok:
|
||||
return self._parse_geocoder_response(response.text)
|
||||
elif response.status_code == requests.codes.too_many_requests:
|
||||
return [TOO_MANY_REQUESTS_ERROR_RESPONSE] * len(free_searches)
|
||||
elif response.status_code == requests.codes.bad_request:
|
||||
return []
|
||||
return EMPTY_BATCH_RESPONSE
|
||||
elif response.status_code == requests.codes.unprocessable_entity:
|
||||
return []
|
||||
return EMPTY_BATCH_RESPONSE
|
||||
else:
|
||||
raise ServiceException(response.status_code, response)
|
||||
msg = "Unkown status: {}".format(response.status_code)
|
||||
self._logger.warning(msg, data={"searches": free_searches})
|
||||
return [geocoder_error_response(msg)] * len(free_searches)
|
||||
except requests.Timeout as te:
|
||||
# In case of timeout we want to stop the job because the server
|
||||
# could be down
|
||||
self._logger.error('Timeout connecting to Mapbox geocoding server',
|
||||
te)
|
||||
raise ServiceException('Error geocoding {0} using Mapbox'.format(
|
||||
searchtext), None)
|
||||
msg = 'Timeout connecting to Mapbox geocoding server'
|
||||
self._logger.error(msg, te)
|
||||
return [geocoder_error_response(msg)] * len(free_searches)
|
||||
except requests.ConnectionError as ce:
|
||||
# Don't raise the exception to continue with the geocoding job
|
||||
self._logger.error('Error connecting to Mapbox geocoding server',
|
||||
exception=ce)
|
||||
return []
|
||||
return EMPTY_BATCH_RESPONSE
|
||||
|
||||
def _escape(self, free_search):
|
||||
# Semicolon is used to separate batch geocoding; there's no documented
|
||||
# way to pass actual semicolons, and %3B or ; won't work (check
|
||||
# TestBulkStreetFunctions.test_semicolon and the docs,
|
||||
# https://www.mapbox.com/api-documentation/#batch-requests)
|
||||
return free_search.replace(';', ',')
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from geocoder import TomTomGeocoder
|
||||
from bulk_geocoder import TomTomBulkGeocoder
|
||||
from routing import TomTomRouting, TomTomRoutingResponse
|
||||
from isolines import TomTomIsolines, TomTomIsochronesResponse
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
import json, requests, time
|
||||
from requests.adapters import HTTPAdapter
|
||||
from cartodb_services import StreetPointBulkGeocoder
|
||||
from cartodb_services.geocoder import geocoder_error_response
|
||||
from cartodb_services.tomtom import TomTomGeocoder
|
||||
from cartodb_services.tools.exceptions import ServiceException
|
||||
|
||||
|
||||
class TomTomBulkGeocoder(TomTomGeocoder, StreetPointBulkGeocoder):
|
||||
MAX_BATCH_SIZE = 1000000 # From the docs
|
||||
MIN_BATCHED_SEARCH = 10 # Batch API is really fast
|
||||
BASE_URL = 'https://api.tomtom.com'
|
||||
BATCH_URL = BASE_URL + '/search/2/batch.json'
|
||||
MAX_STALLED_RETRIES = 100
|
||||
BATCH_RETRY_SLEEP_S = 5
|
||||
READ_TIMEOUT = 60
|
||||
CONNECT_TIMEOUT = 10
|
||||
MAX_RETRIES = 1
|
||||
|
||||
def __init__(self, apikey, logger, service_params=None):
|
||||
TomTomGeocoder.__init__(self, apikey, logger, service_params)
|
||||
self.connect_timeout = service_params.get('connect_timeout', self.CONNECT_TIMEOUT)
|
||||
self.read_timeout = service_params.get('read_timeout', self.READ_TIMEOUT)
|
||||
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({'Content-Type': 'application/json'})
|
||||
self.session.mount(self.BATCH_URL,
|
||||
HTTPAdapter(max_retries=self.max_retries))
|
||||
|
||||
def _should_use_batch(self, searches):
|
||||
return len(searches) >= self.MIN_BATCHED_SEARCH
|
||||
|
||||
def _serial_geocode(self, searches):
|
||||
results = []
|
||||
for search in searches:
|
||||
(search_id, address, city, state, country) = search
|
||||
address = address.encode('utf-8') if address else None
|
||||
city = city.encode('utf-8') if city else None
|
||||
state = state.encode('utf-8') if state else None
|
||||
country = country.encode('utf-8') if country else None
|
||||
result = self.geocode_meta(searchtext=address, city=city,
|
||||
state_province=state, country=country)
|
||||
results.append((search_id, result[0], result[1]))
|
||||
return results
|
||||
|
||||
def _batch_geocode(self, searches):
|
||||
full_results = self._geocode_searches(searches)
|
||||
results = []
|
||||
for s, r in zip(searches, full_results):
|
||||
results.append((s[0], r[0], r[1]))
|
||||
return results
|
||||
|
||||
def _geocode_searches(self, searches):
|
||||
try:
|
||||
location = self._send_batch(searches)
|
||||
return self._download_results(location)
|
||||
except Exception as e:
|
||||
msg = "Error running TomTom batch geocode: {}".format(e)
|
||||
self._logger.error(msg, e)
|
||||
return [geocoder_error_response(msg)] * len(searches)
|
||||
|
||||
def _send_batch(self, searches):
|
||||
body = {'batchItems': [{'query': self._query(s)} for s in searches]}
|
||||
request_params = {
|
||||
'key': self._apikey
|
||||
}
|
||||
response = self.session.post(self.BATCH_URL, data=json.dumps(body),
|
||||
allow_redirects=False,
|
||||
params=request_params,
|
||||
timeout=(self.connect_timeout, self.read_timeout))
|
||||
if response.status_code == 303:
|
||||
return response.headers['Location']
|
||||
else:
|
||||
msg = "Error sending batch: {}; Headers: {}".format(
|
||||
response.text.encode('utf-8'), response.headers)
|
||||
self._logger.error(msg)
|
||||
raise ServiceException(msg, response)
|
||||
|
||||
def _download_results(self, location):
|
||||
stalled_retries = 0
|
||||
while True:
|
||||
response = self.session.get(self.BASE_URL + location)
|
||||
if response.status_code == 200:
|
||||
return self._parse_results(response.json())
|
||||
elif response.status_code == 202:
|
||||
stalled_retries += 1
|
||||
if stalled_retries > self.MAX_STALLED_RETRIES:
|
||||
raise Exception('Too many retries for job {}'.format(location))
|
||||
location = response.headers['Location']
|
||||
time.sleep(self.BATCH_RETRY_SLEEP_S)
|
||||
else:
|
||||
msg = "Error downloading batch: {}; Headers: {}".format(
|
||||
response.text.encode('utf-8'), response.headers)
|
||||
self._logger.error(msg)
|
||||
raise ServiceException(msg, response)
|
||||
|
||||
def _query(self, search):
|
||||
(search_id, address, city, state, country) = search
|
||||
searchtext = ', '.join(filter(None, [address, city, state]))
|
||||
return self._request_uri(searchtext=searchtext, country=country)
|
||||
|
||||
def _parse_results(self, json_body):
|
||||
return [self._parse_response(item['statusCode'], item['response'])
|
||||
for item in json_body['batchItems']]
|
||||
|
||||
@@ -4,20 +4,32 @@
|
||||
import json
|
||||
import requests
|
||||
from uritemplate import URITemplate
|
||||
from math import tanh
|
||||
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata, EMPTY_RESPONSE, geocoder_error_response
|
||||
from cartodb_services.metrics import Traceable
|
||||
from cartodb_services.tools.exceptions import ServiceException
|
||||
from cartodb_services.tools.qps import qps_retry
|
||||
from cartodb_services.tools.normalize import normalize
|
||||
|
||||
BASEURI = ('https://api.tomtom.com/search/2/geocode/'
|
||||
'{searchtext}.JSON'
|
||||
'?key={apiKey}'
|
||||
'&limit=1')
|
||||
HOST = 'https://api.tomtom.com'
|
||||
API_BASEURI = '/search/2'
|
||||
REQUEST_BASEURI = ('/geocode/'
|
||||
'{searchtext}.json'
|
||||
'?limit=1')
|
||||
ENTRY_RESULTS = 'results'
|
||||
ENTRY_POSITION = 'position'
|
||||
ENTRY_LON = 'lon'
|
||||
ENTRY_LAT = 'lat'
|
||||
|
||||
SCORE_NORMALIZATION_FACTOR = 0.15
|
||||
PRECISION_SCORE_THRESHOLD = 0.5
|
||||
MATCH_TYPE_BY_MATCH_LEVEL = {
|
||||
'POI': 'point_of_interest',
|
||||
'Street': 'street',
|
||||
'Address Range': 'street',
|
||||
'Cross Street': 'intersection',
|
||||
'Point Address': 'street_number'
|
||||
}
|
||||
|
||||
class TomTomGeocoder(Traceable):
|
||||
'''
|
||||
@@ -29,21 +41,17 @@ class TomTomGeocoder(Traceable):
|
||||
self._apikey = apikey
|
||||
self._logger = logger
|
||||
|
||||
def _uri(self, searchtext, countries=None):
|
||||
baseuri = BASEURI + '&countrySet={}'.format(countries) \
|
||||
if countries else BASEURI
|
||||
uri = URITemplate(baseuri).expand(apiKey=self._apikey,
|
||||
searchtext=searchtext.encode('utf-8'))
|
||||
return uri
|
||||
def _uri(self, searchtext, country=None):
|
||||
return HOST + API_BASEURI + \
|
||||
self._request_uri(searchtext, country, self._apikey)
|
||||
|
||||
def _parse_geocoder_response(self, response):
|
||||
json_response = json.loads(response)
|
||||
|
||||
if json_response and json_response[ENTRY_RESULTS]:
|
||||
result = json_response[ENTRY_RESULTS][0]
|
||||
return self._extract_lng_lat_from_feature(result)
|
||||
else:
|
||||
return []
|
||||
def _request_uri(self, searchtext, country=None, apiKey=None):
|
||||
baseuri = REQUEST_BASEURI
|
||||
if country:
|
||||
baseuri += '&countrySet={}'.format(country)
|
||||
baseuri = baseuri + '&key={apiKey}' if apiKey else baseuri
|
||||
return URITemplate(baseuri).expand(apiKey=apiKey,
|
||||
searchtext=searchtext.encode('utf-8'))
|
||||
|
||||
def _extract_lng_lat_from_feature(self, result):
|
||||
position = result[ENTRY_POSITION]
|
||||
@@ -65,6 +73,16 @@ class TomTomGeocoder(Traceable):
|
||||
@qps_retry(qps=5)
|
||||
def geocode(self, searchtext, city=None, state_province=None,
|
||||
country=None):
|
||||
response = self.geocode_meta(searchtext, city, state_province, country)
|
||||
error_message = response[1].get('error', None)
|
||||
if error_message:
|
||||
raise ServiceException(error_message, None)
|
||||
else:
|
||||
return response[0]
|
||||
|
||||
@qps_retry(qps=5)
|
||||
def geocode_meta(self, searchtext, city=None, state_province=None,
|
||||
country=None):
|
||||
if searchtext:
|
||||
searchtext = searchtext.decode('utf-8')
|
||||
if city:
|
||||
@@ -75,7 +93,7 @@ class TomTomGeocoder(Traceable):
|
||||
country = country.decode('utf-8')
|
||||
|
||||
if not self._validate_input(searchtext, city, state_province, country):
|
||||
return []
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
address = []
|
||||
if searchtext and searchtext.strip():
|
||||
@@ -85,28 +103,61 @@ class TomTomGeocoder(Traceable):
|
||||
if state_province:
|
||||
address.append(normalize(state_province))
|
||||
|
||||
uri = self._uri(searchtext=', '.join(address), countries=country)
|
||||
uri = self._uri(searchtext=', '.join(address), country=country)
|
||||
|
||||
try:
|
||||
response = requests.get(uri)
|
||||
|
||||
if response.status_code == requests.codes.ok:
|
||||
return self._parse_geocoder_response(response.text)
|
||||
elif response.status_code == requests.codes.bad_request:
|
||||
return []
|
||||
elif response.status_code == requests.codes.unprocessable_entity:
|
||||
return []
|
||||
else:
|
||||
raise ServiceException(response.status_code, response)
|
||||
return self._parse_response(response.status_code, response.text)
|
||||
except requests.Timeout as te:
|
||||
# In case of timeout we want to stop the job because the server
|
||||
# could be down
|
||||
self._logger.error('Timeout connecting to TomTom geocoding server',
|
||||
te)
|
||||
raise ServiceException('Error geocoding {0} using TomTom'.format(
|
||||
searchtext), None)
|
||||
msg = 'Timeout connecting to TomTom geocoding server'
|
||||
self._logger.error(msg, te)
|
||||
return geocoder_error_response(msg)
|
||||
except requests.ConnectionError as ce:
|
||||
# Don't raise the exception to continue with the geocoding job
|
||||
self._logger.error('Error connecting to TomTom geocoding server',
|
||||
exception=ce)
|
||||
return []
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
def _parse_response(self, status_code, text):
|
||||
if status_code == requests.codes.ok:
|
||||
return self._parse_geocoder_response(text)
|
||||
elif status_code == requests.codes.bad_request:
|
||||
return EMPTY_RESPONSE
|
||||
elif status_code == requests.codes.unprocessable_entity:
|
||||
return EMPTY_RESPONSE
|
||||
else:
|
||||
msg = 'Unknown response {}: {}'.format(str(status_code), text)
|
||||
self._logger.warning('Error parsing TomTom geocoding response',
|
||||
data={'msg': msg})
|
||||
return geocoder_error_response(msg)
|
||||
|
||||
def _parse_geocoder_response(self, response):
|
||||
json_response = json.loads(response) \
|
||||
if type(response) != dict else response
|
||||
|
||||
if json_response and json_response[ENTRY_RESULTS]:
|
||||
result = json_response[ENTRY_RESULTS][0]
|
||||
return [
|
||||
self._extract_lng_lat_from_feature(result),
|
||||
self._extract_metadata_from_result(result)
|
||||
]
|
||||
else:
|
||||
return EMPTY_RESPONSE
|
||||
|
||||
def _extract_metadata_from_result(self, result):
|
||||
score = self._normalize_score(result['score'])
|
||||
match_type = MATCH_TYPE_BY_MATCH_LEVEL.get(result['type'], None)
|
||||
return geocoder_metadata(
|
||||
score,
|
||||
self._precision_from_score(score),
|
||||
[match_type] if match_type else []
|
||||
)
|
||||
|
||||
def _normalize_score(self, score):
|
||||
return tanh(score * SCORE_NORMALIZATION_FACTOR)
|
||||
|
||||
def _precision_from_score(self, score):
|
||||
return PRECISION_PRECISE \
|
||||
if score > PRECISION_SCORE_THRESHOLD else PRECISION_INTERPOLATED
|
||||
|
||||
@@ -35,28 +35,28 @@ class Logger:
|
||||
return
|
||||
self._send_to_rollbar('debug', text, exception, data)
|
||||
self._send_to_log_file('debug', text, exception, data)
|
||||
self._send_to_plpy('debug', text)
|
||||
self._send_to_plpy('debug', text, exception)
|
||||
|
||||
def info(self, text, exception=None, data={}):
|
||||
if not self._check_min_level('info'):
|
||||
return
|
||||
self._send_to_rollbar('info', text, exception, data)
|
||||
self._send_to_log_file('info', text, exception, data)
|
||||
self._send_to_plpy('info', text)
|
||||
self._send_to_plpy('info', text, exception)
|
||||
|
||||
def warning(self, text, exception=None, data={}):
|
||||
if not self._check_min_level('warning'):
|
||||
return
|
||||
self._send_to_rollbar('warning', text, exception, data)
|
||||
self._send_to_log_file('warning', text, exception, data)
|
||||
self._send_to_plpy('warning', text)
|
||||
self._send_to_plpy('warning', text, exception)
|
||||
|
||||
def error(self, text, exception=None, data={}):
|
||||
if not self._check_min_level('error'):
|
||||
return
|
||||
self._send_to_rollbar('error', text, exception, data)
|
||||
self._send_to_log_file('error', text, exception, data)
|
||||
self._send_to_plpy('error', text)
|
||||
self._send_to_plpy('error', text, exception)
|
||||
|
||||
def _check_min_level(self, level):
|
||||
return True if self.LEVELS[level] >= self._min_level else False
|
||||
@@ -85,18 +85,31 @@ class Logger:
|
||||
elif level == 'error':
|
||||
self._file_logger.error(text, extra=extra_data)
|
||||
|
||||
def _send_to_plpy(self, level, text):
|
||||
def _send_to_plpy(self, level, text, exception=None):
|
||||
# exception might also be a tuple generated by sys.exc_info
|
||||
if exception:
|
||||
if isinstance(exception, tuple) and len(exception) > 1:
|
||||
exception = exception[1]
|
||||
exception_message = '. Exception: {}'.format(exception)
|
||||
else:
|
||||
exception_message = ''
|
||||
|
||||
# Adding trace breaks tests
|
||||
# trace = traceback.format_exc(15)
|
||||
# message = '{}{}. Trace: {}'.format(text, exception_message, trace)
|
||||
message = '{}{}'.format(text, exception_message)
|
||||
|
||||
if self._check_plpy():
|
||||
if level == 'debug':
|
||||
plpy.debug(text)
|
||||
plpy.debug(message)
|
||||
elif level == 'info':
|
||||
plpy.info(text)
|
||||
plpy.info(message)
|
||||
elif level == 'warning':
|
||||
plpy.warning(text)
|
||||
plpy.warning(message)
|
||||
elif level == 'error':
|
||||
# Plpy.error and fatal raises exceptions and we only want to
|
||||
# log an error, exceptions should be raise explicitly
|
||||
plpy.warning(text)
|
||||
plpy.warning(message)
|
||||
|
||||
def _parse_log_extra_data(self, exception, data):
|
||||
extra_data = {}
|
||||
|
||||
@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
|
||||
setup(
|
||||
name='cartodb_services',
|
||||
|
||||
version='0.18.0',
|
||||
version='0.19.1',
|
||||
|
||||
description='CartoDB Services API Python Library',
|
||||
|
||||
|
||||
@@ -52,11 +52,14 @@ class MockPlPy:
|
||||
self._logged_queries = []
|
||||
self._log_executed_queries = True
|
||||
|
||||
def warning(self, msg):
|
||||
self.warnings.append(msg)
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
self.notices.append(msg)
|
||||
self.debugs.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
from unittest import TestCase
|
||||
from mock import Mock, MagicMock
|
||||
from nose.tools import assert_not_equal, assert_equal, assert_true
|
||||
from cartodb_services.tools import QuotaExceededException
|
||||
from cartodb_services.geocoder import run_street_point_geocoder, StreetGeocoderSearch
|
||||
|
||||
|
||||
SEARCH_FIXTURES = {
|
||||
'two': [
|
||||
StreetGeocoderSearch(id=1, address='Paseo Zorrilla 1, Valladolid',
|
||||
city=None, state=None, country=None),
|
||||
StreetGeocoderSearch(id=2, address='Paseo Zorrilla 2, Valladolid',
|
||||
city=None, state=None, country=None)
|
||||
],
|
||||
'wrong': [
|
||||
StreetGeocoderSearch(id=100, address='deowpfjoepwjfopejwpofjewpojgf',
|
||||
city=None, state=None, country=None),
|
||||
],
|
||||
'error': [
|
||||
StreetGeocoderSearch(id=200, address=None, city=None, state=None,
|
||||
country=None),
|
||||
],
|
||||
'broken_middle': [
|
||||
StreetGeocoderSearch(id=301, address='Paseo Zorrilla 1, Valladolid',
|
||||
city=None, state=None, country=None),
|
||||
StreetGeocoderSearch(id=302, address='Marsopolis',
|
||||
city=None, state=None, country=None),
|
||||
StreetGeocoderSearch(id=303, address='Paseo Zorrilla 2, Valladolid',
|
||||
city=None, state=None, country=None)
|
||||
],
|
||||
}
|
||||
|
||||
BULK_RESULTS_FIXTURES = {
|
||||
'two': [
|
||||
(1, [0, 0], {}),
|
||||
(2, [0, 0], {}),
|
||||
],
|
||||
'wrong': [
|
||||
(100, [], {})
|
||||
],
|
||||
'error': [
|
||||
(200, [], {'error': 'Something wrong happened'})
|
||||
],
|
||||
'broken_middle': [
|
||||
(301, [0, 0], {}),
|
||||
(302, ['a', 'b'], {}),
|
||||
(303, [0, 0], {}),
|
||||
]
|
||||
}
|
||||
|
||||
EXPECTED_RESULTS_FIXTURES = {
|
||||
'two': [
|
||||
[1, [0, 0], '{}'],
|
||||
[2, [0, 0], '{}'],
|
||||
],
|
||||
'wrong': [
|
||||
[100, None, '{}']
|
||||
],
|
||||
'error': [
|
||||
[200, None, '{"error": "Something wrong happened"}']
|
||||
],
|
||||
'broken_middle': [
|
||||
[301, [0, 0], '{}'],
|
||||
[302, None, '{"processing_error": "Error: NO!"}'],
|
||||
[303, [0, 0], '{}'],
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class TestRunStreetPointGeocoder(TestCase):
|
||||
def _run_geocoder(self, plpy=None, gd=None, geocoder=None,
|
||||
service_manager=None, username=None, orgname=None,
|
||||
searches=None):
|
||||
return run_street_point_geocoder(
|
||||
plpy if plpy else self.plpy_mock,
|
||||
gd if gd else self.gd_mock,
|
||||
geocoder if geocoder else self.geocoder_mock,
|
||||
service_manager if service_manager else self.service_manager_mock,
|
||||
username if username else 'any_username',
|
||||
orgname if orgname else None,
|
||||
json.dumps(searches) if searches else '[]')
|
||||
|
||||
def setUp(self):
|
||||
point = [0,0]
|
||||
self.plpy_mock = Mock()
|
||||
self.plpy_mock.execute = MagicMock(return_value=[{'the_geom': point}])
|
||||
|
||||
self.logger_config_mock = MagicMock(min_log_level='debug',
|
||||
log_file_path='/tmp/ptest.log',
|
||||
rollbar_api_key=None)
|
||||
self.gd_mock = {'logger_config': self.logger_config_mock}
|
||||
|
||||
self.geocoder_mock = Mock()
|
||||
|
||||
self.quota_service_mock = Mock()
|
||||
|
||||
self.service_manager_mock = Mock()
|
||||
self.service_manager_mock.quota_service = self.quota_service_mock
|
||||
self.service_manager_mock.assert_within_limits = MagicMock()
|
||||
|
||||
def test_count_increment_total_and_failed_service_use_on_error(self):
|
||||
self.service_manager_mock.assert_within_limits = \
|
||||
Mock(side_effect=Exception('Fail!'))
|
||||
searches = []
|
||||
|
||||
with(self.assertRaises(BaseException)):
|
||||
self._run_geocoder(service_manager=self.service_manager_mock,
|
||||
searches=searches)
|
||||
|
||||
self.quota_service_mock.increment_total_service_use. \
|
||||
assert_called_once_with(len(searches))
|
||||
self.quota_service_mock.increment_failed_service_use. \
|
||||
assert_called_once_with(len(searches))
|
||||
|
||||
def test_count_increment_failed_service_use_on_quota_error(self):
|
||||
self.service_manager_mock.assert_within_limits = \
|
||||
Mock(side_effect=QuotaExceededException())
|
||||
searches = SEARCH_FIXTURES['two']
|
||||
|
||||
result = self._run_geocoder(service_manager=self.service_manager_mock,
|
||||
searches=searches)
|
||||
assert_equal(result, [])
|
||||
self.quota_service_mock.increment_failed_service_use. \
|
||||
assert_called_once_with(len(searches))
|
||||
|
||||
def test_increment_success_service_use_on_complete_response(self):
|
||||
searches = SEARCH_FIXTURES['two']
|
||||
results = [
|
||||
(1, [0, 0], {}),
|
||||
(2, [0, 0], {}),
|
||||
]
|
||||
expected_results = [
|
||||
[1, [0, 0], '{}'],
|
||||
[2, [0, 0], '{}'],
|
||||
]
|
||||
self.geocoder_mock.bulk_geocode = MagicMock(return_value=results)
|
||||
|
||||
result = self._run_geocoder(geocoder=self.geocoder_mock,
|
||||
searches=searches)
|
||||
assert_equal(result, expected_results)
|
||||
self.quota_service_mock.increment_success_service_use. \
|
||||
assert_called_once_with(len(results))
|
||||
|
||||
def test_increment_empty_service_use_on_complete_response(self):
|
||||
searches = SEARCH_FIXTURES['two']
|
||||
results = []
|
||||
self.geocoder_mock.bulk_geocode = MagicMock(return_value=results)
|
||||
|
||||
result = self._run_geocoder(geocoder=self.geocoder_mock,
|
||||
searches=searches)
|
||||
|
||||
assert_equal(result, results)
|
||||
self.quota_service_mock.increment_empty_service_use. \
|
||||
assert_called_once_with(len(searches))
|
||||
|
||||
def test_increment_mixed_empty_service_use_on_complete_response(self):
|
||||
searches = SEARCH_FIXTURES['two'] + SEARCH_FIXTURES['wrong']
|
||||
bulk_results = BULK_RESULTS_FIXTURES['two'] + BULK_RESULTS_FIXTURES['wrong']
|
||||
self.geocoder_mock.bulk_geocode = MagicMock(return_value=bulk_results)
|
||||
|
||||
result = self._run_geocoder(geocoder=self.geocoder_mock,
|
||||
searches=searches)
|
||||
|
||||
assert_equal(result, EXPECTED_RESULTS_FIXTURES['two'] + EXPECTED_RESULTS_FIXTURES['wrong'])
|
||||
self.quota_service_mock.increment_success_service_use. \
|
||||
assert_called_once_with(len(SEARCH_FIXTURES['two']))
|
||||
self.quota_service_mock.increment_empty_service_use. \
|
||||
assert_called_once_with(len(SEARCH_FIXTURES['wrong']))
|
||||
|
||||
def test_increment_mixed_error_service_use_on_complete_response(self):
|
||||
searches = SEARCH_FIXTURES['two'] + SEARCH_FIXTURES['error']
|
||||
bulk_results = BULK_RESULTS_FIXTURES['two'] + BULK_RESULTS_FIXTURES['error']
|
||||
self.geocoder_mock.bulk_geocode = MagicMock(return_value=bulk_results)
|
||||
|
||||
result = self._run_geocoder(geocoder=self.geocoder_mock,
|
||||
searches=searches)
|
||||
|
||||
assert_equal(result, EXPECTED_RESULTS_FIXTURES['two'] + EXPECTED_RESULTS_FIXTURES['error'])
|
||||
self.quota_service_mock.increment_success_service_use. \
|
||||
assert_called_once_with(len(SEARCH_FIXTURES['two']))
|
||||
self.quota_service_mock.increment_failed_service_use. \
|
||||
assert_called_once_with(len(SEARCH_FIXTURES['error']))
|
||||
|
||||
def test_controlled_failure_on_query_break(self):
|
||||
searches = SEARCH_FIXTURES['broken_middle']
|
||||
bulk_results = BULK_RESULTS_FIXTURES['broken_middle']
|
||||
self.geocoder_mock.bulk_geocode = MagicMock(return_value=bulk_results)
|
||||
def break_on_302(*args):
|
||||
if len(args) == 3:
|
||||
plan, values, limit = args
|
||||
if values[0] == 'a':
|
||||
raise Exception('NO!')
|
||||
|
||||
return [{'the_geom': [0,0]}]
|
||||
self.plpy_mock.execute = break_on_302
|
||||
|
||||
result = self._run_geocoder(geocoder=self.geocoder_mock,
|
||||
searches=searches)
|
||||
|
||||
assert_equal(result, EXPECTED_RESULTS_FIXTURES['broken_middle'])
|
||||
self.quota_service_mock.increment_success_service_use. \
|
||||
assert_called_once_with(2)
|
||||
self.quota_service_mock.increment_failed_service_use. \
|
||||
assert_called_once_with(1)
|
||||
|
||||
|
||||
|
||||
@@ -17,16 +17,16 @@ VALID_PROFILE = DEFAULT_PROFILE
|
||||
INVALID_PROFILE = 'invalid_profile'
|
||||
|
||||
WELL_KNOWN_SHAPE = [(40.73312, -73.98891), (40.73353, -73.98987),
|
||||
(40.73398, -73.99095), (40.73453, -73.99227),
|
||||
(40.73531, -73.99412), (40.73467, -73.99459),
|
||||
(40.73442, -73.99477), (40.73435, -73.99482),
|
||||
(40.73403, -73.99505), (40.73344, -73.99549),
|
||||
(40.73286, -73.9959), (40.73226, -73.99635),
|
||||
(40.73186, -73.99664), (40.73147, -73.99693),
|
||||
(40.73141, -73.99698), (40.73147, -73.99707),
|
||||
(40.73219, -73.99856), (40.73222, -73.99861),
|
||||
(40.73398, -73.99095), (40.73321, -73.99111),
|
||||
(40.73245, -73.99129), (40.7333, -73.99332),
|
||||
(40.7338, -73.99449), (40.73403, -73.99505),
|
||||
(40.73344, -73.99549), (40.73286, -73.9959),
|
||||
(40.73226, -73.99635), (40.73186, -73.99664),
|
||||
(40.73147, -73.99693), (40.73141, -73.99698),
|
||||
(40.73147, -73.99707), (40.73219, -73.99856),
|
||||
(40.73222, -73.99861), (40.73225, -73.99868),
|
||||
(40.73293, -74.00007), (40.733, -74.00001)]
|
||||
WELL_KNOWN_LENGTH = 1317.9
|
||||
WELL_KNOWN_LENGTH = 1384.8
|
||||
|
||||
|
||||
class MapboxRoutingTestCase(unittest.TestCase):
|
||||
|
||||
@@ -2,6 +2,29 @@ import os
|
||||
import requests
|
||||
import json
|
||||
|
||||
from nose.tools import assert_true
|
||||
|
||||
|
||||
# From https://www.python.org/dev/peps/pep-0485/#proposed-implementation
|
||||
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
|
||||
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
|
||||
|
||||
|
||||
def assert_close_enough(xy_a, xy_b, rel_tol=0.0001, abs_tol=0.0005):
|
||||
"""
|
||||
Asserts that the given points are "close enough", in a square.
|
||||
:param xy_a: Array of 2 elements, X and Y.
|
||||
:param xy_b: Array of 2 elements, X and Y.
|
||||
:param rel_tol: Relative tolerance. Default: 0.001 (0.1%).
|
||||
:param abs_tol: Absolute tolerance. Default: 0.0005.
|
||||
"""
|
||||
|
||||
for i in [0, 1]:
|
||||
assert_true(isclose(xy_a[i], xy_b[i], rel_tol, abs_tol),
|
||||
"Coord {} error: {} and {} are not closer than {}, {}".format(
|
||||
i, xy_a[i], xy_b[i], rel_tol, abs_tol
|
||||
))
|
||||
|
||||
|
||||
class IntegrationTestHelper:
|
||||
|
||||
@@ -22,13 +45,17 @@ class IntegrationTestHelper:
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def execute_query(cls, sql_api_url, query):
|
||||
def execute_query_raw(cls, sql_api_url, query):
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
query_url = "{0}?q={1}".format(sql_api_url, query)
|
||||
print "Executing query: {0}".format(query_url)
|
||||
query_response = requests.get(query_url)
|
||||
if query_response.status_code != 200:
|
||||
raise Exception(json.loads(query_response.text)['error'])
|
||||
query_response_data = json.loads(query_response.text)
|
||||
return json.loads(query_response.text)
|
||||
|
||||
@classmethod
|
||||
def execute_query(cls, sql_api_url, query):
|
||||
return cls.execute_query_raw(sql_api_url, query)['rows'][0]
|
||||
|
||||
|
||||
return query_response_data['rows'][0]
|
||||
|
||||
@@ -1,10 +1,108 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from unittest import TestCase
|
||||
from nose.tools import assert_raises
|
||||
from nose.tools import assert_not_equal, assert_equal
|
||||
from nose.tools import assert_not_equal, assert_equal, assert_true
|
||||
from ..helpers.integration_test_helper import IntegrationTestHelper
|
||||
from ..helpers.integration_test_helper import assert_close_enough, isclose
|
||||
|
||||
class TestStreetFunctionsSetUp(TestCase):
|
||||
provider = None
|
||||
fixture_points = None
|
||||
|
||||
class TestStreetFunctions(TestCase):
|
||||
GOOGLE_POINTS = {
|
||||
'Plaza Mayor 1, Valladolid': [-4.728252, 41.6517025],
|
||||
'Paseo Zorrilla, Valladolid': [-4.7404453, 41.6314339],
|
||||
'1900 amphitheatre parkway': [-122.0875324, 37.4227968],
|
||||
'1901 amphitheatre parkway': [-122.0885504, 37.4238657],
|
||||
'1902 amphitheatre parkway': [-122.0876674, 37.4235729],
|
||||
'Valladolid': [-4.7245321, 41.652251],
|
||||
'Valladolid, Spain': [-4.7245321, 41.652251],
|
||||
'Valladolid, Mexico': [-88.2022488, 20.68964],
|
||||
'Madrid': [-3.7037902, 40.4167754],
|
||||
'Logroño, Spain': [-2.4449852, 42.4627195],
|
||||
'Logroño, Argentina': [-61.6961807, -29.5031057],
|
||||
'Plaza España, Barcelona': [2.1482563, 41.375485]
|
||||
}
|
||||
|
||||
HERE_POINTS = {
|
||||
'Plaza Mayor 1, Valladolid': [-4.729, 41.65258],
|
||||
'Paseo Zorrilla, Valladolid': [-4.73869, 41.63817],
|
||||
'1900 amphitheatre parkway': [-122.0879468, 37.4234763],
|
||||
'1901 amphitheatre parkway': [-122.0879253, 37.4238725],
|
||||
'1902 amphitheatre parkway': [-122.0879531, 37.4234775],
|
||||
'Valladolid': [-4.73214, 41.6542],
|
||||
'Valladolid, Spain': [-4.73214, 41.6542],
|
||||
'Valladolid, Mexico': [-88.20117, 20.69021],
|
||||
'Madrid': [-3.70578, 40.42028],
|
||||
'Logroño, Spain': [-2.45194, 42.46592],
|
||||
'Logroño, Argentina': [-61.69604, -29.50425],
|
||||
'Plaza España, Barcelona': [2.14834, 41.37494]
|
||||
}
|
||||
|
||||
TOMTOM_POINTS = HERE_POINTS.copy()
|
||||
TOMTOM_POINTS.update({
|
||||
'Plaza Mayor 1, Valladolid': [-4.7286, 41.6523],
|
||||
'Paseo Zorrilla, Valladolid': [-4.74031, 41.63181],
|
||||
'Valladolid': [-4.72838, 41.6542],
|
||||
'Valladolid, Spain': [-4.72838, 41.6542],
|
||||
'Madrid': [-3.70035, 40.42028],
|
||||
'Logroño, Spain': [-2.44998, 42.46592],
|
||||
'Plaza España, Barcelona': [2.14856, 41.37516]
|
||||
})
|
||||
|
||||
MAPBOX_POINTS = GOOGLE_POINTS.copy()
|
||||
MAPBOX_POINTS.update({
|
||||
'Logroño, Spain': [-2.44556, 42.47],
|
||||
'Logroño, Argentina': [-70.687195, -33.470901], # TODO: huge mismatch
|
||||
'Valladolid': [-4.72856, 41.652251],
|
||||
'Valladolid, Spain': [-4.72856, 41.652251],
|
||||
'1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch
|
||||
'Madrid': [-3.69194, 40.4167754],
|
||||
'Plaza España, Barcelona': [2.342231, 41.50677] # TODO: not ideal
|
||||
})
|
||||
|
||||
FIXTURE_POINTS = {
|
||||
'google': GOOGLE_POINTS,
|
||||
'heremaps': HERE_POINTS,
|
||||
'tomtom': TOMTOM_POINTS,
|
||||
'mapbox': MAPBOX_POINTS
|
||||
}
|
||||
|
||||
GOOGLE_METADATAS = {
|
||||
'Plaza España, Barcelona':
|
||||
{'relevance': 0.9, 'precision': 'precise', 'match_types': ['point_of_interest']},
|
||||
'Santiago Rusiñol 123, Valladolid':
|
||||
{'relevance': 0.56, 'precision': 'interpolated', 'match_types': ['locality']}
|
||||
}
|
||||
|
||||
HERE_METADATAS = {
|
||||
'Plaza España, Barcelona':
|
||||
{'relevance': 1, 'precision': 'precise', 'match_types': ['street']},
|
||||
'Santiago Rusiñol 123, Valladolid':
|
||||
{'relevance': 0.89, 'precision': 'precise', 'match_types': ['street']} # Wrong. See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
|
||||
}
|
||||
|
||||
TOMTOM_METADATAS = {
|
||||
'Plaza España, Barcelona':
|
||||
{'relevance': 0.85, 'precision': 'precise', 'match_types': ['street']},
|
||||
'Santiago Rusiñol 123, Valladolid':
|
||||
{'relevance': 0.45, 'precision': 'interpolated', 'match_types': ['street']}
|
||||
}
|
||||
|
||||
MAPBOX_METADATAS = {
|
||||
'Plaza España, Barcelona':
|
||||
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']},
|
||||
'Santiago Rusiñol 123, Valladolid':
|
||||
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']} # TODO: wrong
|
||||
}
|
||||
|
||||
METADATAS = {
|
||||
'google': GOOGLE_METADATAS,
|
||||
'heremaps': HERE_METADATAS,
|
||||
'tomtom': TOMTOM_METADATAS,
|
||||
'mapbox': MAPBOX_METADATAS
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
self.env_variables = IntegrationTestHelper.get_environment_variables()
|
||||
@@ -15,19 +113,332 @@ class TestStreetFunctions(TestCase):
|
||||
self.env_variables['api_key']
|
||||
)
|
||||
|
||||
if not self.fixture_points:
|
||||
query = "select provider from " \
|
||||
"cdb_dataservices_client.cdb_service_quota_info() " \
|
||||
"where service = 'hires_geocoder'"
|
||||
response = self._run_authenticated(query)
|
||||
provider = response['rows'][0]['provider']
|
||||
self.fixture_points = self.FIXTURE_POINTS[provider]
|
||||
|
||||
self.metadata = self.METADATAS[provider]
|
||||
|
||||
|
||||
def _run_authenticated(self, query):
|
||||
authenticated_query = "{}&api_key={}".format(query,
|
||||
self.env_variables[
|
||||
'api_key'])
|
||||
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
|
||||
authenticated_query)
|
||||
|
||||
def _used_quota(self):
|
||||
query = "select used_quota " \
|
||||
"from cdb_dataservices_client.cdb_service_quota_info() " \
|
||||
"where service = 'hires_geocoder'"
|
||||
return self._run_authenticated(query)['rows'][0]['used_quota']
|
||||
|
||||
class TestStreetFunctions(TestStreetFunctionsSetUp):
|
||||
|
||||
def test_if_select_with_street_point_is_ok(self):
|
||||
query = "SELECT cdb_geocode_street_point(street) " \
|
||||
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
|
||||
"as geometry FROM {0} LIMIT 1&api_key={1}".format(
|
||||
self.env_variables['table_name'],
|
||||
self.env_variables['api_key'])
|
||||
self.env_variables['table_name'],
|
||||
self.env_variables['api_key'])
|
||||
geometry = IntegrationTestHelper.execute_query(self.sql_api_url, query)
|
||||
assert_not_equal(geometry['geometry'], None)
|
||||
|
||||
def test_if_select_with_street_without_api_key_raise_error(self):
|
||||
query = "SELECT cdb_geocode_street_point(street) " \
|
||||
"as geometry FROM {0} LIMIT 1".format(
|
||||
self.env_variables['table_name'])
|
||||
table = self.env_variables['table_name']
|
||||
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
|
||||
"as geometry FROM {0} LIMIT 1".format(table)
|
||||
try:
|
||||
IntegrationTestHelper.execute_query(self.sql_api_url, query)
|
||||
except Exception as e:
|
||||
assert_equal(e.message[0], "The api_key must be provided")
|
||||
assert_equal(e.message[0],
|
||||
"permission denied for relation {}".format(table))
|
||||
|
||||
def test_component_aggregation(self):
|
||||
query = "select st_x(the_geom), st_y(the_geom) from (" \
|
||||
"select cdb_dataservices_client.cdb_geocode_street_point( " \
|
||||
"'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
|
||||
response = self._run_authenticated(query)
|
||||
row = response['rows'][0]
|
||||
x_y = [row['st_x'], row['st_y']]
|
||||
# Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762]
|
||||
assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona'])
|
||||
|
||||
class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
|
||||
|
||||
def test_full_spec(self):
|
||||
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'select 1 as cartodb_id, ''Spain'' as country, " \
|
||||
"''Castilla y León'' as state, ''Valladolid'' as city, " \
|
||||
"''Plaza Mayor 1'' as street " \
|
||||
"UNION " \
|
||||
"select 2 as cartodb_id, ''Spain'' as country, " \
|
||||
"''Castilla y León'' as state, ''Valladolid'' as city, " \
|
||||
"''Paseo Zorrilla'' as street' " \
|
||||
", 'street', 'city', 'state', 'country')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['Plaza Mayor 1, Valladolid'],
|
||||
2: self.fixture_points['Paseo Zorrilla, Valladolid']
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_empty_columns(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', '''''', '''''', '''''')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
|
||||
self.fixture_points['1901 amphitheatre parkway'])
|
||||
|
||||
def test_null_columns(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
|
||||
self.fixture_points['1901 amphitheatre parkway'])
|
||||
|
||||
def test_batching(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
|
||||
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
|
||||
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', null, null, null, 2)"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['1900 amphitheatre parkway'],
|
||||
2: self.fixture_points['1901 amphitheatre parkway'],
|
||||
3: self.fixture_points['1902 amphitheatre parkway'],
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_batch_size_1(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
|
||||
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
|
||||
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', null, null, null, 1)"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['1900 amphitheatre parkway'],
|
||||
2: self.fixture_points['1901 amphitheatre parkway'],
|
||||
3: self.fixture_points['1902 amphitheatre parkway'],
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_city_column_geocoding(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"city\": \"Valladolid\"}," \
|
||||
"{\"cartodb_id\": 2, \"city\": \"Madrid\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, city text)', " \
|
||||
"'city')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
assert_equal(response['total_rows'], 2)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['Valladolid'],
|
||||
2: self.fixture_points['Madrid']
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_free_text_geocoding(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from (" \
|
||||
"select 1 as cartodb_id, ''W 26th Street'' as address, " \
|
||||
"null as city , null as state , null as country" \
|
||||
")_x', " \
|
||||
"'''Logroño, La Rioja, Spain''')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
|
||||
self.fixture_points['Logroño, Spain'])
|
||||
|
||||
def test_templating_geocoding(self):
|
||||
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
|
||||
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'select 1 as cartodb_id, ''Logroño'' as city', " \
|
||||
"'city || '', '' || ''Spain''') " \
|
||||
"UNION " \
|
||||
"SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
|
||||
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'select 2 as cartodb_id, ''Logroño'' as city', " \
|
||||
"'city || '', '' || ''Argentina''')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['Logroño, Spain'],
|
||||
2: self.fixture_points['Logroño, Argentina']
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_template_with_two_columns_geocoding(self):
|
||||
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
|
||||
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
" 'select * from (' ||" \
|
||||
" ' select 1 as cartodb_id, ''Valladolid'' as city, ''Mexico'' as country ' ||" \
|
||||
" ' union all ' ||" \
|
||||
" ' select 2, ''Valladolid'', ''Spain''' ||" \
|
||||
" ') _x'," \
|
||||
"'city || '', '' || country')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
points_by_cartodb_id = {
|
||||
1: self.fixture_points['Valladolid, Mexico'],
|
||||
2: self.fixture_points['Valladolid, Spain']
|
||||
}
|
||||
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
|
||||
|
||||
def test_large_batches(self):
|
||||
"""
|
||||
Useful just to test a good batch size
|
||||
"""
|
||||
n = 110
|
||||
first_cartodb_id = -1
|
||||
first_street_number = 1
|
||||
batch_size = 'NULL' # NULL for optimal
|
||||
streets = []
|
||||
for i in range(0, n):
|
||||
streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \
|
||||
'Toronto, Canada"}}'.format(first_cartodb_id + i,
|
||||
first_street_number + i))
|
||||
|
||||
used_quota = self._used_quota()
|
||||
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', null, null, null, {})".format(','.join(streets), batch_size)
|
||||
response = self._run_authenticated(query)
|
||||
assert_equal(n, len(response['rows']))
|
||||
for row in response['rows']:
|
||||
assert_not_equal(row['st_x'], None)
|
||||
assert_not_equal(row['metadata'], {})
|
||||
metadata = row['metadata']
|
||||
assert_not_equal(metadata['relevance'], None)
|
||||
assert_not_equal(metadata['precision'], None)
|
||||
assert_not_equal(metadata['match_types'], None)
|
||||
|
||||
assert_equal(self._used_quota(), used_quota + n)
|
||||
|
||||
def test_missing_components_on_private_function(self):
|
||||
query = "SELECT _cdb_bulk_geocode_street_point(" \
|
||||
" '[{\"id\": \"1\", \"address\": \"Amphitheatre Parkway 22\"}]' " \
|
||||
")"
|
||||
response = self._run_authenticated(query)
|
||||
assert_equal(1, len(response['rows']))
|
||||
|
||||
def test_semicolon(self):
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
"'select * from jsonb_to_recordset(''[" \
|
||||
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway; mountain view; ca; us\"}," \
|
||||
"{\"cartodb_id\": 2, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}" \
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', null, null, null)"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
x_y_by_cartodb_id = self._x_y_by_cartodb_id(response)
|
||||
assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2])
|
||||
|
||||
def test_component_aggregation(self):
|
||||
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'select 1 as cartodb_id, ''Spain'' as country, " \
|
||||
"''Barcelona'' as city, " \
|
||||
"''Plaza España'' as street' " \
|
||||
", 'street', 'city', NULL, 'country')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
|
||||
self.fixture_points['Plaza España, Barcelona'])
|
||||
|
||||
def _test_known_table(self):
|
||||
subquery = 'select * from unknown_table where cartodb_id < 1100'
|
||||
subquery_count = 'select count(1) from ({}) _x'.format(subquery)
|
||||
count = self._run_authenticated(subquery_count)['rows'][0]['count']
|
||||
|
||||
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'{}' " \
|
||||
", 'street', 'city', NULL, 'country')".format(subquery)
|
||||
response = self._run_authenticated(query)
|
||||
assert_equal(len(response['rows']), count)
|
||||
assert_not_equal(response['rows'][0]['st_x'], None)
|
||||
|
||||
def test_metadata(self):
|
||||
query = "select metadata " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
|
||||
"'select 1 as cartodb_id, ''Spain'' as country, " \
|
||||
"''Barcelona'' as city, " \
|
||||
"''Plaza España'' as street " \
|
||||
"UNION " \
|
||||
"select 2 as cartodb_id, ''Spain'' as country, " \
|
||||
"''Valladolid'' as city, " \
|
||||
"''Santiago Rusiñol 123'' as street' " \
|
||||
", 'street', 'city', NULL, 'country')"
|
||||
response = self._run_authenticated(query)
|
||||
|
||||
expected = [
|
||||
self.metadata['Plaza España, Barcelona'],
|
||||
self.metadata['Santiago Rusiñol 123, Valladolid']
|
||||
]
|
||||
assert_equal(len(response['rows']), len(expected))
|
||||
for r, e in zip(response['rows'], expected):
|
||||
self.assert_metadata(r['metadata'], e)
|
||||
|
||||
def _run_authenticated(self, query):
|
||||
authenticated_query = "{}&api_key={}".format(query,
|
||||
self.env_variables[
|
||||
'api_key'])
|
||||
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
|
||||
authenticated_query)
|
||||
|
||||
@staticmethod
|
||||
def _x_y_by_cartodb_id(response):
|
||||
return {r['cartodb_id']: [r['st_x'], r['st_y']]
|
||||
for r in response['rows']}
|
||||
|
||||
@staticmethod
|
||||
def assert_close_points(points_a_by_cartodb_id, points_b_by_cartodb_id):
|
||||
assert_equal(len(points_a_by_cartodb_id), len(points_b_by_cartodb_id))
|
||||
for cartodb_id, point in points_a_by_cartodb_id.iteritems():
|
||||
assert_close_enough(point, points_b_by_cartodb_id[cartodb_id])
|
||||
|
||||
@staticmethod
|
||||
def assert_metadata(metadata, expected):
|
||||
relevance = metadata['relevance']
|
||||
expected_relevance = expected['relevance']
|
||||
assert_true(isclose(relevance, expected_relevance, 0.02),
|
||||
'{} not close to {}'.format(relevance, expected_relevance))
|
||||
|
||||
assert_equal(metadata['precision'], expected['precision'])
|
||||
|
||||
assert_equal(metadata['match_types'], expected['match_types'])
|
||||
|
||||
3
test/requirements.txt
Normal file
3
test/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
# Integration tests dependencies
|
||||
requests==2.9.1
|
||||
nose==1.3.7
|
||||
@@ -74,11 +74,11 @@ def set_environment_variables(username, api_key, table_name, host, schema):
|
||||
|
||||
|
||||
def clean_environment_variables():
|
||||
del os.environ["GEOCODER_API_TEST_USERNAME"]
|
||||
del os.environ["GEOCODER_API_TEST_API_KEY"]
|
||||
del os.environ["GEOCODER_API_TEST_TABLE_NAME"]
|
||||
del os.environ["GEOCODER_API_TEST_HOST"]
|
||||
del os.environ["GEOCODER_API_TEST_SCHEMA"]
|
||||
os.environ.pop("GEOCODER_API_TEST_USERNAME", None)
|
||||
os.environ.pop("GEOCODER_API_TEST_API_KEY", None)
|
||||
os.environ.pop("GEOCODER_API_TEST_TABLE_NAME", None)
|
||||
os.environ.pop("GEOCODER_API_TEST_HOST", None)
|
||||
os.environ.pop("GEOCODER_API_TEST_SCHEMA", None)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user