Compare commits

...

90 Commits

Author SHA1 Message Date
Juan Ignacio Sánchez Lara
f86558c30b Merge pull request #507 from CartoDB/development
Version `0.25.0` of the client, `0.32.0` of the server, and `0.19.0` of the Python library
2018-07-19 11:36:44 +02:00
Juan Ignacio Sánchez Lara
0bd2fbf80a Merge pull request #499 from CartoDB/geocoder_boost
Geocoder boost
2018-07-19 11:32:23 +02:00
Juan Ignacio Sánchez Lara
887fc15915 NEWS versions update 2018-07-19 11:31:35 +02:00
Juan Ignacio Sánchez Lara
5c09a2eb29 Missing grant 2018-07-19 10:36:49 +02:00
Juan Ignacio Sánchez Lara
b0c1948c14 TL;DR: safer deployment and minor fixes
- Instead of modifying cdb_service_quota_info to return max_batch_size,
a new type (service_quota_info_batch) and a new function
(cdb_service_quota_info_batch) are created. That makes deployment safe.
- Fixes geocoding with forced batch size 1.
- Improves namespacing for count_estimate (->
cdb_dataservices_client.cdb_count_estimate).
- Improves namespacing for jsonb_array_casttext (->
cdb_dataservices_client.cdb_jsonb_array_casttext).
2018-07-18 14:57:40 +02:00
Juan Ignacio Sánchez Lara
0c5e9da028 Upgrade/downgrade scripts for server 0.32.0 and client 0.25.0 2018-07-17 19:48:13 +02:00
Juan Ignacio Sánchez Lara
f534da906c Merge pull request #506 from CartoDB/geocoder_boost_refactor
Geocoder boost refactor
2018-07-17 17:59:30 +02:00
Juan Ignacio Sánchez Lara
5e34faefe5 Quota test 2018-07-17 14:39:56 +02:00
Juan Ignacio Sánchez Lara
5e8dbaf239 Improvements in fixtures accuracy 2018-07-17 13:56:44 +02:00
Juan Ignacio Sánchez Lara
b90d402fa9 Round relevance (plus refactor) 2018-07-17 13:56:01 +02:00
Juan Ignacio Sánchez Lara
d060bd8229 Simplification of batching 2018-07-17 13:24:39 +02:00
Juan Ignacio Sánchez Lara
c104f6f34b Metadata attributes constant extraction 2018-07-17 12:46:16 +02:00
Juan Ignacio Sánchez Lara
e9ed3bca18 Safer comparison 2018-07-17 12:27:42 +02:00
Juan Ignacio Sánchez Lara
e2762a6e03 Removed debug traces 2018-07-17 10:06:43 +02:00
Juan Ignacio Sánchez Lara
8cb9e123b1 Helper function to convert json arrays to PG arrays 2018-07-16 19:55:04 +02:00
Juan Ignacio Sánchez Lara
e82346e7f6 match_types for batched HERE metadata 2018-07-16 12:43:40 +02:00
Juan Ignacio Sánchez Lara
080de34163 match_types for Mapbox metadata 2018-07-16 12:11:40 +02:00
Juan Ignacio Sánchez Lara
0a92ae1445 match_types for TomTom metadata 2018-07-16 12:01:55 +02:00
Juan Ignacio Sánchez Lara
0b635377ef match_types for HERE metadata 2018-07-16 11:59:43 +02:00
Juan Ignacio Sánchez Lara
f2197d4b2a match_types for Google metadata 2018-07-16 11:34:33 +02:00
Juan Ignacio Sánchez Lara
6e78da55b2 Precision metadata for Mapbox 2018-07-11 19:28:16 +02:00
Juan Ignacio Sánchez Lara
4123a4c442 Precision metadata for TomTom 2018-07-11 19:09:02 +02:00
Juan Ignacio Sánchez Lara
dbb4f9204a Precision metadata for HERE 2018-07-11 15:30:51 +02:00
Juan Ignacio Sánchez Lara
67fee1cce8 Precision metadata for Google 2018-07-11 14:06:50 +02:00
Juan Ignacio Sánchez Lara
b779742585 Fix logging on error 2018-07-11 12:51:56 +02:00
Juan Ignacio Sánchez Lara
da78b0bc65 Fix batching with negatives cartodb_id 2018-07-11 12:28:39 +02:00
Juan Ignacio Sánchez Lara
d46d51c3bb Relevance metadata for Google 2018-07-11 11:43:54 +02:00
Juan Ignacio Sánchez Lara
0b2ee85c11 TomTom normalization 2018-07-11 09:30:28 +02:00
Juan Ignacio Sánchez Lara
825e3b7ee8 Relevance metadata for Mapbox 2018-07-11 09:30:08 +02:00
Juan Ignacio Sánchez Lara
2af9204542 Relevance metadata for TomTom 2018-07-10 21:21:42 +02:00
Juan Ignacio Sánchez Lara
34e622b809 Relevance metadata for HERE 2018-07-10 20:30:01 +02:00
Juan Ignacio Sánchez Lara
531ad28158 Send optimal batch size 2018-07-10 19:31:58 +02:00
Juan Ignacio Sánchez Lara
286a75fa8e _bulk_geocode logic extraction 2018-07-10 15:17:14 +02:00
Juan Ignacio Sánchez Lara
a6c5c21131 Serial geocode for Google bulk 2018-07-10 13:45:01 +02:00
Juan Ignacio Sánchez Lara
f6b7c13dde GoogleMapsBulkGeocoder extraction 2018-07-10 13:38:51 +02:00
Juan Ignacio Sánchez Lara
1ffe3658fe Revert "maxresults depends on batch"
This reverts commit bf8b76b5fe.
2018-07-10 12:28:16 +02:00
Juan Ignacio Sánchez Lara
8e430ce1c1 Google geocoder works better concatenating all components 2018-07-10 11:17:21 +02:00
Juan Ignacio Sánchez Lara
8ebd22bc26 Fixes error message check 2018-07-10 11:17:21 +02:00
Juan Ignacio Sánchez Lara
0ff950d01e Merge pull request #505 from CartoDB/geocoder_boost_mapbox
Mapbox bulk geocoding
2018-07-10 11:16:52 +02:00
Juan Ignacio Sánchez Lara
fed8894c33 Merge pull request #504 from CartoDB/geocoder_boost_tomtom
Geocoder boost tomtom
2018-07-10 09:50:44 +02:00
Juan Ignacio Sánchez Lara
cce5f92312 Escape semicolons at Mapbox batch 2018-07-09 18:17:45 +02:00
Juan Ignacio Sánchez Lara
40ace9cfaa Send _serial_geocode for single results 2018-07-09 17:42:08 +02:00
Juan Ignacio Sánchez Lara
f618e4aec3 Mapbox bulk geocoding 2018-07-09 17:35:56 +02:00
Juan Ignacio Sánchez Lara
ae84122c3d countries --> country 2018-07-09 15:35:37 +02:00
Juan Ignacio Sánchez Lara
b8475bac30 TomTom batch geocoding 2018-07-06 20:11:48 +02:00
Juan Ignacio Sánchez Lara
bf8b76b5fe maxresults depends on batch 2018-07-06 20:04:28 +02:00
Juan Ignacio Sánchez Lara
31afc82b56 TomTom bulk geocoding by bypassing to serial 2018-07-06 15:52:45 +02:00
Juan Ignacio Sánchez Lara
5be43e15c0 Fix error message on TomTom error 2018-07-06 15:52:45 +02:00
Juan Ignacio Sánchez Lara
6da70fd8ea Fix encoding of missing fields 2018-07-06 15:52:10 +02:00
Juan Ignacio Sánchez Lara
d00a48f16e Check array length 2018-07-05 17:57:17 +02:00
Juan Ignacio Sánchez Lara
91012ea62d Updated version script 2018-07-05 16:26:28 +02:00
Juan Ignacio Sánchez Lara
23e3de9da5 Add missing permission for cdb_bulk_geocode_street_point 2018-07-05 15:10:05 +02:00
Juan Ignacio Sánchez Lara
6c89ca8d70 Fix exception raising 2018-07-05 12:42:18 +02:00
Juan Ignacio Sánchez Lara
3c07133912 Improve logging of exceptions from sys.exc_info 2018-07-05 08:50:26 +02:00
Juan Ignacio Sánchez Lara
5b46c1527e Revert "Revert expected warnings"
This reverts commit 89e9bf1ed6.
2018-07-05 08:23:33 +02:00
Juan Ignacio Sánchez Lara
89e9bf1ed6 Revert expected warnings 2018-07-04 15:07:54 +02:00
Antonio Carlón
ff6cbd1d5b Merge pull request #503 from CartoDB/geocoder_boost_here
Geocoder boost HERE
2018-07-04 14:26:20 +02:00
Juan Ignacio Sánchez Lara
8968f0e6ec Fix message 2018-07-04 13:36:18 +02:00
Juan Ignacio Sánchez Lara
44744de73d Explicit check for result length 2018-07-04 13:29:37 +02:00
Juan Ignacio Sánchez Lara
754c364d22 Reduce precision on fixture points 2018-07-04 13:24:11 +02:00
Juan Ignacio Sánchez Lara
9856adb7ce Explicit NotImplementedError 2018-07-04 12:33:29 +02:00
Juan Ignacio Sánchez Lara
e416a8a641 HERE batch support 2018-07-02 18:35:36 +02:00
Juan Ignacio Sánchez Lara
fc610313bf Test refactor and Here serial batch 2018-06-29 19:18:53 +02:00
Juan Ignacio Sánchez Lara
18e2349713 Bulk geocoding refactor 2018-06-29 14:59:07 +02:00
Juan Ignacio Sánchez Lara
e884b1d1f4 Fixture fix 2018-06-28 13:11:28 +02:00
Juan Ignacio Sánchez Lara
45b8fc4ecf Quota and batch size checks fixes and tests 2018-06-28 13:06:52 +02:00
Juan Ignacio Sánchez Lara
379257b4b4 Fix quota check 2018-06-27 19:07:19 +02:00
Juan Ignacio Sánchez Lara
8fe9903e7a searchtext -> searches for bulk geocoding 2018-06-27 15:14:11 +02:00
Juan Ignacio Sánchez Lara
d0b04a97b8 Better Google geocoding through concatenation 2018-06-27 13:47:57 +02:00
Juan Ignacio Sánchez Lara
a931086e29 Log now also logs the exception, fixing fixtures 2018-06-27 13:13:08 +02:00
Juan Ignacio Sánchez Lara
8f4249ee24 Merge branch 'development' into geocoder_boost 2018-06-27 11:17:56 +02:00
Mario de Frutos
71b87834b3 Merge pull request #501 from CartoDB/fix_mapbox_routing_fixture
Fix mapbox routing fixture
2018-06-27 11:13:22 +02:00
Juan Ignacio Sánchez Lara
9c90c539f8 Fix Mapbox routing fixture shape 2018-06-27 11:09:32 +02:00
Juan Ignacio Sánchez Lara
ed828c3b89 test_templating_with_two_columns_geocoding 2018-06-27 10:13:59 +02:00
Juan Ignacio Sánchez Lara
675ef72e30 test_templating_geocoding 2018-06-27 08:57:04 +02:00
Juan Ignacio Sánchez Lara
c13d29e4c2 test_free_text_geocoding 2018-06-26 16:52:41 +02:00
Juan Ignacio Sánchez Lara
d5e47e39ab Revert bulk geocoding column parameters order and test_city_column_geocoding 2018-06-26 13:59:44 +02:00
Juan Ignacio Sánchez Lara
c2a207b1cd Batching, better support for null columns, and bulk geocoding integration tests 2018-06-26 13:35:24 +02:00
Juan Ignacio Sánchez Lara
e280444479 Integration tests dependencies 2018-06-26 11:07:38 +02:00
Juan Ignacio Sánchez Lara
9b64d91998 Fixed env variables cleaning 2018-06-26 10:56:14 +02:00
Juan Ignacio Sánchez Lara
91d93bef79 Random temporary table name 2018-06-22 14:34:42 +02:00
Juan Ignacio Sánchez Lara
bbbf70f3ac Street level columns for country, city and state 2018-06-22 12:19:51 +02:00
Juan Ignacio Sánchez Lara
4d2abc7667 Simpler and more precise quota check 2018-06-22 09:41:29 +02:00
Juan Ignacio Sánchez Lara
58d70e252f checked, bulked cdb_bulk_geocode_street_point 2018-06-21 10:23:39 +02:00
Juan Ignacio Sánchez Lara
e85f43f1d1 cdb_bulk_geocode_street_point skeleton 2018-06-15 09:55:52 +02:00
Juan Ignacio Sánchez Lara
f3f2b213e7 Bump versions 2018-06-11 16:26:57 +02:00
Juan Ignacio Sánchez Lara
34fc6439d2 cdb_bulk_geocode_street_point functions 2018-06-11 16:12:41 +02:00
Juan Ignacio Sánchez Lara
3f08d37ef7 Google bulk_geocoder 2018-06-11 12:56:07 +02:00
Juan Ignacio Sánchez Lara
be446c1bf2 exception logging at _send_to_plpy 2018-06-07 10:29:57 +02:00
Juan Ignacio Sánchez Lara
5251534283 Allow using non-Premium keys for Google Maps client 2018-06-04 18:07:16 +02:00
54 changed files with 11089 additions and 109 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ cartodb_services.egg-info/
build/
dist/
.vscode/
.idea/
venv/

View File

@@ -1,3 +1,8 @@
Jul 19th, 2018
==============
* Version `0.25.0` of the client, `0.32.0` of the server, and `0.19.0` of the Python library.
* Support for batch street-level geocoding.
May 7th, 2018
=============
* Version `0.24.0` of the client, `0.31.0` of the server, and `0.18.0` of the python library.

View File

@@ -0,0 +1,276 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '<%= version %>'" to load this file. \quit
-- Make sure we have a sane search path to create/update the extension
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
-- HERE goes your code to upgrade/downgrade
-- Taken from https://wiki.postgresql.org/wiki/Count_estimate
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) RETURNS INTEGER AS
$func$
DECLARE
rec record;
ROWS INTEGER;
BEGIN
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
EXIT WHEN ROWS IS NOT NULL;
END LOOP;
RETURN ROWS;
END
$func$ LANGUAGE plpgsql;
-- Taken from https://stackoverflow.com/a/48013356/351721
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) RETURNS text[] AS $f$
SELECT array_agg(x) || ARRAY[]::text[] FROM jsonb_array_elements_text($1) t(x);
$f$ LANGUAGE sql IMMUTABLE;--
CREATE TYPE cdb_dataservices_client.geocoding AS (
cartodb_id integer,
the_geom geometry(Multipolygon,4326),
metadata jsonb
);
CREATE TYPE cdb_dataservices_client.service_quota_info_batch AS (
service cdb_dataservices_client.service_type,
monthly_quota NUMERIC,
used_quota NUMERIC,
soft_limit BOOLEAN,
provider TEXT,
max_batch_size NUMERIC
);
--
-- Public dataservices API function
--
-- These are the only ones with permissions to publicuser role
-- and should also be the only ones with SECURITY DEFINER
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point (searches jsonb)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
username text;
orgname text;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
RETURN QUERY SELECT * FROM cdb_dataservices_client.__cdb_bulk_geocode_street_point(username, orgname, searches);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
--
-- Public dataservices API function
--
-- These are the only ones with permissions to publicuser role
-- and should also be the only ones with SECURITY DEFINER
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
RETURNS SETOF service_quota_info_batch AS $$
DECLARE
username text;
orgname text;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
RETURN QUERY SELECT * FROM cdb_dataservices_client._cdb_service_quota_info_batch(username, orgname);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
query_row_count integer;
enough_quota boolean;
remaining_quota integer;
max_batch_size integer;
cartodb_id_batch integer;
batches_n integer;
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
temp_table_name text;
BEGIN
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
INTO remaining_quota, max_batch_size
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
WHERE service = 'hires_geocoder';
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
IF batch_size IS NULL THEN
batch_size := max_batch_size;
ELSIF batch_size > max_batch_size THEN
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
END IF;
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
batch_size := MAX_SAFE_BATCH_SIZE;
END IF;
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
INTO query_row_count, batches_n;
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
query_row_count, query, country_column, state_column, city_column, street_column;
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
IF remaining_quota < query_row_count THEN
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
END IF;
RAISE DEBUG 'batches_n: %', batches_n;
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
temp_table_name);
select
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
coalesce(state_column, ''''''), coalesce(country_column, '''''')
into street_column, city_column, state_column, country_column;
IF batches_n > 0 THEN
FOR cartodb_id_batch in 0..(batches_n - 1)
LOOP
EXECUTE format(
'WITH geocoding_data as (' ||
' SELECT ' ||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
' floor((row_number() over () - 1)::float/$1) as batch' ||
' FROM (%s) _x' ||
') ' ||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
'FROM geocoding_data ' ||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
USING batch_size, cartodb_id_batch;
END LOOP;
END IF;
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
--
-- Exception-safe private DataServices API function
--
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe (searches jsonb)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
username text;
orgname text;
_returned_sqlstate TEXT;
_message_text TEXT;
_pg_exception_context TEXT;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
BEGIN
RETURN QUERY SELECT * FROM cdb_dataservices_client.__cdb_bulk_geocode_street_point(username, orgname, searches);
EXCEPTION
WHEN OTHERS THEN
GET STACKED DIAGNOSTICS _returned_sqlstate = RETURNED_SQLSTATE,
_message_text = MESSAGE_TEXT,
_pg_exception_context = PG_EXCEPTION_CONTEXT;
RAISE WARNING USING ERRCODE = _returned_sqlstate, MESSAGE = _message_text, DETAIL = _pg_exception_context;
END;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
--
-- Exception-safe private DataServices API function
--
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe ()
RETURNS SETOF service_quota_info_batch AS $$
DECLARE
username text;
orgname text;
_returned_sqlstate TEXT;
_message_text TEXT;
_pg_exception_context TEXT;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
BEGIN
RETURN QUERY SELECT * FROM cdb_dataservices_client._cdb_service_quota_info_batch(username, orgname);
EXCEPTION
WHEN OTHERS THEN
GET STACKED DIAGNOSTICS _returned_sqlstate = RETURNED_SQLSTATE,
_message_text = MESSAGE_TEXT,
_pg_exception_context = PG_EXCEPTION_CONTEXT;
RAISE WARNING USING ERRCODE = _returned_sqlstate, MESSAGE = _message_text, DETAIL = _pg_exception_context;
END;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER STABLE PARALLEL UNSAFE;
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb);
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
CONNECT cdb_dataservices_client._server_conn_str();
SELECT * FROM cdb_dataservices_server._cdb_bulk_geocode_street_point (username, orgname, searches);
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch (username text, orgname text);
CREATE OR REPLACE FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch (username text, orgname text)
RETURNS SETOF service_quota_info_batch AS $$
CONNECT cdb_dataservices_client._server_conn_str();
SELECT * FROM cdb_dataservices_server.cdb_service_quota_info_batch (username, orgname);
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._cdb_bulk_geocode_street_point(searches jsonb) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe(searches jsonb ) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe( ) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point(query text, street_column text, city_column text, state_column text, country_column text, batch_size integer) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.__cdb_bulk_geocode_street_point (username text, orgname text, searches jsonb) TO publicuser;

View File

@@ -0,0 +1,29 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '<%= version %>'" to load this file. \quit
-- Make sure we have a sane search path to create/update the extension
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
-- HERE goes your code to upgrade/downgrade
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_count_estimate(query text);
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_bulk_geocode_street_point (jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_service_quota_info_batch();
DROP FUNCTION IF EXISTS cdb_dataservices_client.cdb_bulk_geocode_street_point (text, text, text, text, text, integer);
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point_exception_safe (jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch_exception_safe ();
DROP FUNCTION IF EXISTS cdb_dataservices_client.__cdb_bulk_geocode_street_point (text, text, jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_client._cdb_service_quota_info_batch (text, text);
DROP TYPE IF EXISTS cdb_dataservices_client.service_quota_info_batch;
DROP TYPE IF EXISTS cdb_dataservices_client.geocoding;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB dataservices client API extension'
default_version = '0.24.0'
default_version = '0.25.0'
requires = 'plproxy, cartodb'
superuser = true
schema = cdb_dataservices_client

View File

@@ -70,6 +70,13 @@
- { name: state_province, type: text, default: 'NULL'}
- { name: country, type: text, default: 'NULL'}
- name: _cdb_bulk_geocode_street_point
return_type: SETOF cdb_dataservices_client.geocoding
multi_row: true
multi_field: true
params:
- { name: searches, type: jsonb } # Array of JSON objects with id, address, city, state and country fields
- name: cdb_here_geocode_street_point
return_type: Geometry
params:
@@ -510,6 +517,13 @@
params:
- {}
- name: cdb_service_quota_info_batch
return_type: SETOF service_quota_info_batch
multi_row: true
multi_field: true
params:
- {}
- name: cdb_enough_quota
return_type: BOOLEAN
params:

20
client/sql/05_utils.sql Normal file
View File

@@ -0,0 +1,20 @@
-- Taken from https://wiki.postgresql.org/wiki/Count_estimate
CREATE FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) RETURNS INTEGER AS
$func$
DECLARE
rec record;
ROWS INTEGER;
BEGIN
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
EXIT WHEN ROWS IS NOT NULL;
END LOOP;
RETURN ROWS;
END
$func$ LANGUAGE plpgsql;
-- Taken from https://stackoverflow.com/a/48013356/351721
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) RETURNS text[] AS $f$
SELECT array_agg(x) || ARRAY[]::text[] FROM jsonb_array_elements_text($1) t(x);
$f$ LANGUAGE sql IMMUTABLE;

View File

@@ -4,6 +4,12 @@ CREATE TYPE cdb_dataservices_client.isoline AS (
the_geom geometry(Multipolygon,4326)
);
CREATE TYPE cdb_dataservices_client.geocoding AS (
cartodb_id integer,
the_geom geometry(Multipolygon,4326),
metadata jsonb
);
CREATE TYPE cdb_dataservices_client.simple_route AS (
shape geometry(LineString,4326),
length real,
@@ -35,3 +41,12 @@ CREATE TYPE cdb_dataservices_client.service_quota_info AS (
soft_limit BOOLEAN,
provider TEXT
);
CREATE TYPE cdb_dataservices_client.service_quota_info_batch AS (
service cdb_dataservices_client.service_type,
monthly_quota NUMERIC,
used_quota NUMERIC,
soft_limit BOOLEAN,
provider TEXT,
max_batch_size NUMERIC
);

View File

@@ -0,0 +1,76 @@
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
DECLARE
query_row_count integer;
enough_quota boolean;
remaining_quota integer;
max_batch_size integer;
cartodb_id_batch integer;
batches_n integer;
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
temp_table_name text;
BEGIN
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
INTO remaining_quota, max_batch_size
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
WHERE service = 'hires_geocoder';
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
IF batch_size IS NULL THEN
batch_size := max_batch_size;
ELSIF batch_size > max_batch_size THEN
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
END IF;
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
batch_size := MAX_SAFE_BATCH_SIZE;
END IF;
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
INTO query_row_count, batches_n;
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
query_row_count, query, country_column, state_column, city_column, street_column;
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
IF remaining_quota < query_row_count THEN
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
END IF;
RAISE DEBUG 'batches_n: %', batches_n;
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
temp_table_name);
select
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
coalesce(state_column, ''''''), coalesce(country_column, '''''')
into street_column, city_column, state_column, country_column;
IF batches_n > 0 THEN
FOR cartodb_id_batch in 0..(batches_n - 1)
LOOP
EXECUTE format(
'WITH geocoding_data as (' ||
' SELECT ' ||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
' floor((row_number() over () - 1)::float/$1) as batch' ||
' FROM (%s) _x' ||
') ' ||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
'FROM geocoding_data ' ||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
USING batch_size, cartodb_id_batch;
END LOOP;
END IF;
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;

View File

@@ -1,3 +1,7 @@
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PrepareTableOBS_GetMeasure(output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PopulateTableOBS_GetMeasure(table_name text, output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSON) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point(query text, street_column text, city_column text, state_column text, country_column text, batch_size integer) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_jsonb_array_casttext(jsonb) TO publicuser;

View File

@@ -0,0 +1,21 @@
\set VERBOSITY terse
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() RENAME TO cdb_service_quota_info_batch_mocked;
CREATE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
RETURNS SETOF cdb_dataservices_client.service_quota_info_batch AS $$
SELECT 'hires_geocoder'::cdb_dataservices_client.service_type AS service, 0::NUMERIC AS monthly_quota, 0::NUMERIC AS used_quota, FALSE AS soft_limit, 'google' AS provider, 1::NUMERIC AS max_batch_size;
$$ LANGUAGE SQL;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota_mocked;
CREATE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
RETURNS BOOLEAN as $$
SELECT FALSE;
$$ LANGUAGE SQL;
-- Test bulk size not mandatory (it will get the optimal)
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
ERROR: Remaining quota: 0. Estimated cost: 1
-- Test quota check by mocking quota 0
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
ERROR: Remaining quota: 0. Estimated cost: 1
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;

View File

@@ -0,0 +1,26 @@
\set VERBOSITY terse
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch() RENAME TO cdb_service_quota_info_batch_mocked;
CREATE FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch ()
RETURNS SETOF cdb_dataservices_client.service_quota_info_batch AS $$
SELECT 'hires_geocoder'::cdb_dataservices_client.service_type AS service, 0::NUMERIC AS monthly_quota, 0::NUMERIC AS used_quota, FALSE AS soft_limit, 'google' AS provider, 1::NUMERIC AS max_batch_size;
$$ LANGUAGE SQL;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota_mocked;
CREATE FUNCTION cdb_dataservices_client.cdb_enough_quota (service TEXT ,input_size NUMERIC)
RETURNS BOOLEAN as $$
SELECT FALSE;
$$ LANGUAGE SQL;
-- Test bulk size not mandatory (it will get the optimal)
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''', null, null, null, null);
-- Test quota check by mocking quota 0
SELECT cdb_dataservices_client.cdb_bulk_geocode_street_point('select 1 as cartodb_id', '''Valladolid, Spain''');
DROP FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch;
DROP FUNCTION cdb_dataservices_client.cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_enough_quota_mocked (service TEXT ,input_size NUMERIC) RENAME TO cdb_enough_quota;
ALTER FUNCTION cdb_dataservices_client.cdb_service_quota_info_batch_mocked() RENAME TO cdb_service_quota_info_batch;

0
server/__init__.py Normal file
View File

View File

@@ -83,3 +83,6 @@ deploy: release_remove_parallel_deploy
$(INSTALL_DATA) old_versions/*.sql *.sql '$(DESTDIR)$(datadir)/extension/'
install: deploy
reinstall: install
psql -U postgres -d dataservices_db -c "drop extension if exists cdb_dataservices_server; create extension cdb_dataservices_server;"

View File

@@ -0,0 +1,148 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '<%= version %>'" to load this file. \quit
-- HERE goes your code to upgrade/downgrade
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type inner join pg_namespace ON (pg_type.typnamespace = pg_namespace.oid)
WHERE pg_type.typname = 'service_quota_info_batch'
AND pg_namespace.nspname = 'cdb_dataservices_server') THEN
CREATE TYPE cdb_dataservices_server.service_quota_info_batch AS (
service cdb_dataservices_server.service_type,
monthly_quota NUMERIC,
used_quota NUMERIC,
soft_limit BOOLEAN,
provider TEXT,
max_batch_size NUMERIC
);
END IF;
END $$;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info_batch(
username TEXT,
orgname TEXT)
RETURNS SETOF cdb_dataservices_server.service_quota_info_batch AS $$
from cartodb_services.bulk_geocoders import BATCH_GEOCODER_CLASS_BY_PROVIDER
from cartodb_services.tools import Logger,LoggerConfig
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
sqi = plpy.execute("SELECT * from cdb_dataservices_server.cdb_service_quota_info({0},{1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
ret = []
for info in sqi:
if info['service'] == 'hires_geocoder':
provider = info['provider']
batch_geocoder_class = BATCH_GEOCODER_CLASS_BY_PROVIDER.get(provider, None)
if batch_geocoder_class and hasattr(batch_geocoder_class, 'MAX_BATCH_SIZE'):
max_batch_size = batch_geocoder_class.MAX_BATCH_SIZE
else:
max_batch_size = 1
info['max_batch_size'] = max_batch_size
else:
info['max_batch_size'] = 1
ret += [[info['service'], info['monthly_quota'], info['used_quota'], info['soft_limit'], info['provider'], info['max_batch_size']]]
return ret
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
-- TODO: could cartodb_id be replaced by rowid, maybe needing extra care for offset?
CREATE TYPE cdb_dataservices_server.geocoding AS (
cartodb_id integer,
the_geom geometry(Multipolygon,4326),
metadata jsonb
);
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services.metrics import metrics
from cartodb_services.tools import Logger
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
plpy.execute("SELECT cdb_dataservices_server._get_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
user_geocoder_config = GD["user_geocoder_config_{0}".format(username)]
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
logger_config = GD["logger_config"]
logger = Logger(logger_config)
params = {'searches': searches}
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
if user_geocoder_config.google_geocoder:
provider_function = "_cdb_bulk_google_geocode_street_point";
elif user_geocoder_config.heremaps_geocoder:
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
elif user_geocoder_config.tomtom_geocoder:
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
elif user_geocoder_config.mapbox_geocoder:
provider_function = "_cdb_bulk_mapbox_geocode_street_point";
else:
raise Exception('Requested geocoder is not available')
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
return plpy.execute(plan, [username, orgname, searches])
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import LegacyServiceManager
from cartodb_services.google import GoogleMapsBulkGeocoder
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
geocoder = GoogleMapsBulkGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import LegacyServiceManager
from cartodb_services.here import HereMapsBulkGeocoder
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
geocoder = HereMapsBulkGeocoder(service_manager.config.heremaps_app_id, service_manager.config.heremaps_app_code, service_manager.logger, service_manager.config.heremaps_service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import ServiceManager
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
from cartodb_services.tomtom import TomTomBulkGeocoder
from cartodb_services.tools import Logger
import cartodb_services
cartodb_services.init(plpy, GD)
logger_config = GD["logger_config"]
logger = Logger(logger_config)
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import ServiceManager
from cartodb_services.refactor.service.mapbox_geocoder_config import MapboxGeocoderConfigBuilder
from cartodb_services.mapbox import MapboxBulkGeocoder
from cartodb_services.tools import Logger
import cartodb_services
cartodb_services.init(plpy, GD)
logger_config = GD["logger_config"]
logger = Logger(logger_config)
service_manager = ServiceManager('geocoder', MapboxGeocoderConfigBuilder, username, orgname, GD)
geocoder = MapboxBulkGeocoder(service_manager.config.mapbox_api_key, service_manager.logger, service_manager.config.service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;

View File

@@ -0,0 +1,15 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '<%= version %>'" to load this file. \quit
-- HERE goes your code to upgrade/downgrade
DROP FUNCTION IF EXISTS cdb_dataservices_server.cdb_service_quota_info_batch(TEXT, TEXT);
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_geocode_street_point(TEXT, TEXT, jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_google_geocode_street_point(TEXT, TEXT, jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(TEXT, TEXT, jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(TEXT, TEXT, jsonb);
DROP FUNCTION IF EXISTS cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(TEXT, TEXT, jsonb);
DROP TYPE IF EXISTS cdb_dataservices_server.geocoding;
DROP TYPE IF EXISTS cdb_dataservices_server.service_quota_info_batch;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
comment = 'CartoDB dataservices server extension'
default_version = '0.31.0'
default_version = '0.32.0'
requires = 'plpythonu, plproxy, postgis, cdb_geocoder'
superuser = true
schema = cdb_dataservices_server

View File

@@ -27,6 +27,23 @@ BEGIN
END IF;
END $$;
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type inner join pg_namespace ON (pg_type.typnamespace = pg_namespace.oid)
WHERE pg_type.typname = 'service_quota_info_batch'
AND pg_namespace.nspname = 'cdb_dataservices_server') THEN
CREATE TYPE cdb_dataservices_server.service_quota_info_batch AS (
service cdb_dataservices_server.service_type,
monthly_quota NUMERIC,
used_quota NUMERIC,
soft_limit BOOLEAN,
provider TEXT,
max_batch_size NUMERIC
);
END IF;
END $$;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info(
username TEXT,
orgname TEXT)
@@ -92,6 +109,35 @@ RETURNS SETOF cdb_dataservices_server.service_quota_info AS $$
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_service_quota_info_batch(
username TEXT,
orgname TEXT)
RETURNS SETOF cdb_dataservices_server.service_quota_info_batch AS $$
from cartodb_services.bulk_geocoders import BATCH_GEOCODER_CLASS_BY_PROVIDER
from cartodb_services.tools import Logger,LoggerConfig
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
sqi = plpy.execute("SELECT * from cdb_dataservices_server.cdb_service_quota_info({0},{1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
ret = []
for info in sqi:
if info['service'] == 'hires_geocoder':
provider = info['provider']
batch_geocoder_class = BATCH_GEOCODER_CLASS_BY_PROVIDER.get(provider, None)
if batch_geocoder_class and hasattr(batch_geocoder_class, 'MAX_BATCH_SIZE'):
max_batch_size = batch_geocoder_class.MAX_BATCH_SIZE
else:
max_batch_size = 1
info['max_batch_size'] = max_batch_size
else:
info['max_batch_size'] = 1
ret += [[info['service'], info['monthly_quota'], info['used_quota'], info['soft_limit'], info['provider'], info['max_batch_size']]]
return ret
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_enough_quota(
username TEXT,
orgname TEXT,

View File

@@ -0,0 +1,97 @@
-- TODO: could cartodb_id be replaced by rowid, maybe needing extra care for offset?
CREATE TYPE cdb_dataservices_server.geocoding AS (
cartodb_id integer,
the_geom geometry(Multipolygon,4326),
metadata jsonb
);
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services.metrics import metrics
from cartodb_services.tools import Logger
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
plpy.execute("SELECT cdb_dataservices_server._get_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
user_geocoder_config = GD["user_geocoder_config_{0}".format(username)]
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
logger_config = GD["logger_config"]
logger = Logger(logger_config)
params = {'searches': searches}
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
if user_geocoder_config.google_geocoder:
provider_function = "_cdb_bulk_google_geocode_street_point";
elif user_geocoder_config.heremaps_geocoder:
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
elif user_geocoder_config.tomtom_geocoder:
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
elif user_geocoder_config.mapbox_geocoder:
provider_function = "_cdb_bulk_mapbox_geocode_street_point";
else:
raise Exception('Requested geocoder is not available')
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
return plpy.execute(plan, [username, orgname, searches])
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import LegacyServiceManager
from cartodb_services.google import GoogleMapsBulkGeocoder
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
geocoder = GoogleMapsBulkGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import LegacyServiceManager
from cartodb_services.here import HereMapsBulkGeocoder
service_manager = LegacyServiceManager('geocoder', username, orgname, GD)
geocoder = HereMapsBulkGeocoder(service_manager.config.heremaps_app_id, service_manager.config.heremaps_app_code, service_manager.logger, service_manager.config.heremaps_service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import ServiceManager
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
from cartodb_services.tomtom import TomTomBulkGeocoder
from cartodb_services.tools import Logger
import cartodb_services
cartodb_services.init(plpy, GD)
logger_config = GD["logger_config"]
logger = Logger(logger_config)
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_mapbox_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
from cartodb_services import run_street_point_geocoder
from cartodb_services.tools import ServiceManager
from cartodb_services.refactor.service.mapbox_geocoder_config import MapboxGeocoderConfigBuilder
from cartodb_services.mapbox import MapboxBulkGeocoder
from cartodb_services.tools import Logger
import cartodb_services
cartodb_services.init(plpy, GD)
logger_config = GD["logger_config"]
logger = Logger(logger_config)
service_manager = ServiceManager('geocoder', MapboxGeocoderConfigBuilder, username, orgname, GD)
geocoder = MapboxBulkGeocoder(service_manager.config.mapbox_api_key, service_manager.logger, service_manager.config.service_params)
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;

View File

@@ -1,21 +1,21 @@
-- Check that the public function is callable, even with no data
-- It should return NULL
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
------------------------------
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
------------------------------
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Valencia', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
------------------------------
@@ -35,42 +35,42 @@ INSERT INTO country_decoder (synonyms, iso2) VALUES (Array['spain', 'Spain'], 'E
INSERT INTO admin1_decoder (admin1, synonyms, iso2) VALUES ('Valencia', Array['valencia', 'Valencia'], 'ES');
-- This should return the point inserted above
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elx', 'Valencia', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340
(1 row)
SELECT cdb_dataservices_server.cdb_geocode_namedplace_point('test_user', 'test_orgname', 'Elche', 'valencia', 'Spain');
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder
WARNING: Error geocoding namedplace using geocode street point, falling back to internal geocoder. Exception: spiexceptions.ExternalRoutineException: cartodb_services.metrics.config.ConfigException: There is no user config available. Please check your configuration.'
cdb_geocode_namedplace_point
----------------------------------------------------
0101000020E6100000637FD93D7958E63F2ECA6C9049A24340

0
server/lib/__init__.py Normal file
View File

View File

View File

@@ -33,3 +33,5 @@ def _reset():
plpy = None
GD = None
from geocoder import run_street_point_geocoder, StreetPointBulkGeocoder

View File

@@ -0,0 +1,11 @@
from google import GoogleMapsBulkGeocoder
from here import HereMapsBulkGeocoder
from tomtom import TomTomBulkGeocoder
from mapbox import MapboxBulkGeocoder
BATCH_GEOCODER_CLASS_BY_PROVIDER = {
'google': GoogleMapsBulkGeocoder,
'heremaps': HereMapsBulkGeocoder,
'tomtom': TomTomBulkGeocoder,
'mapbox': MapboxBulkGeocoder
}

View File

@@ -0,0 +1,119 @@
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
from tools import QuotaExceededException, Logger
from collections import namedtuple
import json
PRECISION_PRECISE = 'precise'
PRECISION_INTERPOLATED = 'interpolated'
def geocoder_metadata(relevance, precision, match_types):
return {
'relevance': round(relevance, 2),
'precision': precision,
'match_types': match_types
}
def compose_address(street, city=None, state=None, country=None):
return ', '.join(filter(None, [street, city, state, country]))
def run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches):
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
logger_config = GD["logger_config"]
logger = Logger(logger_config)
try:
service_manager.assert_within_limits(quota=False)
geocode_results = geocoder.bulk_geocode(searches=searches)
if geocode_results:
results = []
for result in geocode_results:
if len(result) > 2:
metadata = json.dumps(result[2])
else:
logger.warning('Geocoding for {} without metadata'.format(username))
metadata = '{}'
if result[1] and len(result[1]) == 2:
plan = plpy.prepare("SELECT ST_SetSRID(ST_MakePoint($1, $2), 4326) as the_geom; ", ["double precision", "double precision"])
point = plpy.execute(plan, result[1], 1)[0]
results.append([result[0], point['the_geom'], metadata])
else:
results.append([result[0], None, metadata])
service_manager.quota_service.increment_success_service_use(len(results))
return results
else:
service_manager.quota_service.increment_empty_service_use(len(searches))
return []
except QuotaExceededException as qe:
service_manager.quota_service.increment_failed_service_use(len(searches))
return []
except BaseException as e:
import sys
service_manager.quota_service.increment_failed_service_use()
service_manager.logger.error('Error trying to bulk geocode street point', sys.exc_info(), data={"username": username, "orgname": orgname})
raise Exception('Error trying to bulk geocode street')
finally:
service_manager.quota_service.increment_total_service_use()
StreetGeocoderSearch = namedtuple('StreetGeocoderSearch', 'id address city state country')
class StreetPointBulkGeocoder:
"""
Classes extending StreetPointBulkGeocoder should implement:
* _batch_geocode(street_geocoder_searches)
* MAX_BATCH_SIZE
If they want to provide an alternative serial (for small batches):
* _should_use_batch(street_geocoder_searches)
* _serial_geocode(street_geocoder_searches)
"""
SEARCH_KEYS = ['id', 'address', 'city', 'state', 'country']
def bulk_geocode(self, searches):
"""
:param searches: array of StreetGeocoderSearch
:return: array of tuples with three elements:
* id
* latitude and longitude (array of two elements)
* empty array (future use: metadata)
"""
try:
decoded_searches = json.loads(searches)
except Exception as e:
self._logger.error('General error', exception=e)
raise e
street_geocoder_searches = []
for search in decoded_searches:
search_id, address, city, state, country = \
[search.get(k, None) for k in self.SEARCH_KEYS]
street_geocoder_searches.append(
StreetGeocoderSearch(search_id, address, city, state, country))
if len(street_geocoder_searches) > self.MAX_BATCH_SIZE:
raise Exception("Batch size can't be larger than {}".format(self.MAX_BATCH_SIZE))
if self._should_use_batch(street_geocoder_searches):
return self._batch_geocode(street_geocoder_searches)
else:
return self._serial_geocode(street_geocoder_searches)
def _batch_geocode(self, street_geocoder_searches):
raise NotImplementedError('Subclasses must implement _batch_geocode')
def _serial_geocode(self, street_geocoder_searches):
raise NotImplementedError('Subclasses must implement _serial_geocode')
def _should_use_batch(self, street_geocoder_searches):
return True

View File

@@ -1 +1,2 @@
from geocoder import GoogleMapsGeocoder
from bulk_geocoder import GoogleMapsBulkGeocoder

View File

@@ -0,0 +1,62 @@
from multiprocessing import Pool
from exceptions import MalformedResult
from cartodb_services import StreetPointBulkGeocoder
from cartodb_services.geocoder import compose_address
from cartodb_services.google import GoogleMapsGeocoder
def async_geocoder(geocoder, address, components):
return geocoder.geocode(address=address, components=components)
class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
"""A Google Maps Geocoder wrapper for python"""
MAX_BATCH_SIZE = 1000
MIN_BATCHED_SEARCH = 2 # Batched is a parallelization
PARALLEL_PROCESSES = 13
def __init__(self, client_id, client_secret, logger):
GoogleMapsGeocoder.__init__(self, client_id, client_secret, logger)
def _should_use_batch(self, searches):
return len(searches) >= self.MIN_BATCHED_SEARCH
def _serial_geocode(self, searches):
results = []
for search in searches:
(cartodb_id, street, city, state, country) = search
lng_lat, metadata = self.geocode_meta(street, city, state, country)
results.append((cartodb_id, lng_lat, metadata))
return results
def _batch_geocode(self, searches):
bulk_results = {}
pool = Pool(processes=self.PARALLEL_PROCESSES)
for search in searches:
(cartodb_id, street, city, state, country) = search
address = compose_address(street, city, state, country)
if address:
components = self._build_optional_parameters(city, state, country)
result = pool.apply_async(async_geocoder,
(self.geocoder, address, components))
bulk_results[cartodb_id] = result
pool.close()
pool.join()
try:
results = []
for cartodb_id, bulk_result in bulk_results.items():
try:
lng_lat, metadata = self._process_results(bulk_result.get())
except Exception as e:
self._logger.error('Error at Google async_geocoder', e)
lng_lat, metadata = [[], {}]
results.append((cartodb_id, lng_lat, metadata))
return results
except KeyError as e:
self._logger.error('KeyError error', exception=e)
raise MalformedResult()
except Exception as e:
self._logger.error('General error', exception=e)
raise e

View File

@@ -5,6 +5,7 @@ import googlemaps
import base64
from exceptions import InvalidGoogleCredentials
class GoogleMapsClientFactory():
clients = {}
@@ -13,11 +14,14 @@ class GoogleMapsClientFactory():
cache_key = "{}:{}:{}".format(client_id, client_secret, channel)
client = cls.clients.get(cache_key)
if not client:
cls.assert_valid_crendentials(client_secret)
client = googlemaps.Client(
client_id=client_id,
client_secret=client_secret,
channel=channel)
if client_id:
cls.assert_valid_crendentials(client_secret)
client = googlemaps.Client(
client_id=client_id,
client_secret=client_secret,
channel=channel)
else:
client = googlemaps.Client(key=client_secret)
cls.clients[cache_key] = client
return client

View File

@@ -1,16 +1,42 @@
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import googlemaps
from urlparse import parse_qs
from exceptions import MalformedResult
from cartodb_services.geocoder import compose_address, geocoder_metadata, PRECISION_PRECISE, PRECISION_INTERPOLATED
from cartodb_services.google.exceptions import InvalidGoogleCredentials
from client_factory import GoogleMapsClientFactory
EMPTY_RESPONSE = [[], {}]
PARTIAL_FACTOR = 0.8
RELEVANCE_BY_LOCATION_TYPE = {
'ROOFTOP': 1,
'GEOMETRIC_CENTER': 0.9,
'RANGE_INTERPOLATED': 0.8,
'APPROXIMATE': 0.7
}
PRECISION_BY_LOCATION_TYPE = {
'ROOFTOP': PRECISION_PRECISE,
'GEOMETRIC_CENTER': PRECISION_PRECISE,
'RANGE_INTERPOLATED': PRECISION_INTERPOLATED,
'APPROXIMATE': PRECISION_INTERPOLATED
}
MATCH_TYPE_BY_MATCH_LEVEL = {
'point_of_interest': 'point_of_interest',
'country': 'country',
'administrative_area_level_1': 'state',
'administrative_area_level_2': 'county',
'locality': 'locality',
'sublocality': 'district',
'street_address': 'street',
'intersection': 'intersection',
'street_number': 'street_number',
'postal_code': 'postal_code'
}
class GoogleMapsGeocoder:
"""A Google Maps Geocoder wrapper for python"""
class GoogleMapsGeocoder():
def __init__(self, client_id, client_secret, logger):
if client_id is None:
@@ -20,25 +46,49 @@ class GoogleMapsGeocoder:
self.geocoder = GoogleMapsClientFactory.get(self.client_id, self.client_secret, self.channel)
self._logger = logger
def geocode(self, searchtext, city=None, state=None,
country=None):
def geocode(self, searchtext, city=None, state=None, country=None):
return self.geocode_meta(searchtext, city, state, country)[0]
def geocode_meta(self, searchtext, city=None, state=None, country=None):
address = compose_address(searchtext, city, state, country)
try:
opt_params = self._build_optional_parameters(city, state, country)
results = self.geocoder.geocode(address=searchtext,
results = self.geocoder.geocode(address=address,
components=opt_params)
if results:
return self._extract_lng_lat_from_result(results[0])
else:
return []
except KeyError:
return self._process_results(results)
except KeyError as e:
self._logger.error('address: {}'.format(address), e)
raise MalformedResult()
def _process_results(self, results):
if results:
return [
self._extract_lng_lat_from_result(results[0]),
self._extract_metadata_from_result(results[0])
]
else:
return EMPTY_RESPONSE
def _extract_lng_lat_from_result(self, result):
location = result['geometry']['location']
longitude = location['lng']
latitude = location['lat']
return [longitude, latitude]
def _extract_metadata_from_result(self, result):
location_type = result['geometry']['location_type']
base_relevance = RELEVANCE_BY_LOCATION_TYPE[location_type]
partial_match = result.get('partial_match', False)
partial_factor = PARTIAL_FACTOR if partial_match else 1
match_types = [MATCH_TYPE_BY_MATCH_LEVEL.get(match_level, None)
for match_level in result['types']]
return geocoder_metadata(
base_relevance * partial_factor,
PRECISION_BY_LOCATION_TYPE[location_type],
filter(None, match_types)
)
def _build_optional_parameters(self, city=None, state=None,
country=None):
optional_params = {}

View File

@@ -1,2 +1,3 @@
from geocoder import HereMapsGeocoder
from bulk_geocoder import HereMapsBulkGeocoder
from routing import HereMapsRoutingIsoline

View File

@@ -0,0 +1,148 @@
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import requests, time, zipfile, io, csv, cStringIO
import xml.etree.ElementTree as ET
from collections import namedtuple
from requests.adapters import HTTPAdapter
from cartodb_services import StreetPointBulkGeocoder
from cartodb_services.here import HereMapsGeocoder
from cartodb_services.geocoder import geocoder_metadata
from cartodb_services.metrics import Traceable
from cartodb_services.tools.exceptions import ServiceException
HereJobStatus = namedtuple('HereJobStatus', 'total_count processed_count status')
class HereMapsBulkGeocoder(HereMapsGeocoder, StreetPointBulkGeocoder):
MAX_BATCH_SIZE = 1000000 # From the docs
MIN_BATCHED_SEARCH = 100 # Under this, serial will be used
BATCH_URL = 'https://batch.geocoder.cit.api.here.com/6.2/jobs'
# https://developer.here.com/documentation/batch-geocoder/topics/read-batch-request-output.html
META_COLS = ['relevance', 'matchType', 'matchCode', 'matchLevel', 'matchQualityStreet']
MAX_STALLED_RETRIES = 100
BATCH_RETRY_SLEEP_S = 5
JOB_FINAL_STATES = ['completed', 'cancelled', 'deleted', 'failed']
def __init__(self, app_id, app_code, logger, service_params=None, maxresults=HereMapsGeocoder.DEFAULT_MAXRESULTS):
HereMapsGeocoder.__init__(self, app_id, app_code, logger, service_params, maxresults)
self.session = requests.Session()
self.session.mount(self.BATCH_URL,
HTTPAdapter(max_retries=self.max_retries))
self.credentials_params = {
'app_id': self.app_id,
'app_code': self.app_code,
}
def _should_use_batch(self, searches):
return len(searches) >= self.MIN_BATCHED_SEARCH
def _serial_geocode(self, searches):
results = []
for search in searches:
(search_id, address, city, state, country) = search
result = self.geocode_meta(searchtext=address, city=city, state=state, country=country)
results.append((search_id, result[0], result[1]))
return results
def _batch_geocode(self, searches):
request_id = self._send_batch(self._searches_to_csv(searches))
last_processed = 0
stalled_retries = 0
# https://developer.here.com/documentation/batch-geocoder/topics/job-status.html
while True:
job_info = self._job_status(request_id)
if job_info.processed_count == last_processed:
stalled_retries += 1
if stalled_retries > self.MAX_STALLED_RETRIES:
raise Exception('Too many retries for job {}'.format(request_id))
else:
stalled_retries = 0
last_processed = job_info.processed_count
if job_info.status in self.JOB_FINAL_STATES:
break
else:
time.sleep(self.BATCH_RETRY_SLEEP_S)
results = self._download_results(request_id)
return results
def _searches_to_csv(self, searches):
queue = cStringIO.StringIO()
writer = csv.writer(queue, delimiter='|')
writer.writerow(['recId', 'searchText', 'country'])
for search in searches:
fields = [search.address, search.city, search.state]
search_text = ', '.join(filter(None, fields))
row = [s.encode("utf-8") if s else ''
for s in [str(search.id), search_text, search.country]]
writer.writerow(row)
return queue.getvalue()
def _send_batch(self, data):
cols = 'displayLatitude,displayLongitude,' + ','.join(self.META_COLS)
request_params = self.credentials_params.copy()
request_params.update({
'gen': 8,
'action': 'run',
# 'mailto': 'juanignaciosl@carto.com',
'header': 'true',
'inDelim': '|',
'outDelim': '|',
'outCols': cols,
'outputcombined': 'true'
})
response = self.session.post(self.BATCH_URL, data=data,
params=request_params,
timeout=(self.connect_timeout, self.read_timeout))
if response.status_code == 200:
root = ET.fromstring(response.text)
return root.find('./Response/MetaInfo/RequestId').text
else:
raise ServiceException("Error sending HERE batch", response)
def _job_status(self, request_id):
polling_params = self.credentials_params.copy()
polling_params.update({'action': 'status'})
polling_r = self.session.get("{}/{}".format(self.BATCH_URL, request_id),
params=polling_params,
timeout=(self.connect_timeout, self.read_timeout))
polling_root = ET.fromstring(polling_r.text)
return HereJobStatus(
total_count=int(polling_root.find('./Response/TotalCount').text),
processed_count=int(polling_root.find('./Response/ProcessedCount').text),
status=polling_root.find('./Response/Status').text)
def _download_results(self, job_id):
result_r = self.session.get("{}/{}/result".format(self.BATCH_URL, job_id),
params=self.credentials_params,
timeout=(self.connect_timeout, self.read_timeout))
root_zip = zipfile.ZipFile(io.BytesIO(result_r.content))
results = []
for name in root_zip.namelist():
if name.endswith('_out.txt'):
reader = csv.DictReader(root_zip.open(name), delimiter='|')
for row in reader:
if row['SeqNumber'] == '1': # First per requested data
precision = self.PRECISION_BY_MATCH_TYPE[
row.get('matchType', 'pointAddress')]
match_type = self.MATCH_TYPE_BY_MATCH_LEVEL.get(row['matchLevel'], None)
results.append((row['recId'],
[row['displayLongitude'], row['displayLatitude']],
geocoder_metadata(
float(row['relevance']),
precision,
[match_type] if match_type else []
)))
return results

View File

@@ -6,9 +6,9 @@ import requests
from requests.adapters import HTTPAdapter
from exceptions import *
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata
from cartodb_services.metrics import Traceable
class HereMapsGeocoder(Traceable):
'A Here Maps Geocoder wrapper for python'
@@ -52,6 +52,23 @@ class HereMapsGeocoder(Traceable):
'strictlanguagemode'
] + ADDRESS_PARAMS
PRECISION_BY_MATCH_TYPE = {
'pointAddress': PRECISION_PRECISE,
'interpolated': PRECISION_INTERPOLATED
}
MATCH_TYPE_BY_MATCH_LEVEL = {
'landmark': 'point_of_interest',
'country': 'country',
'state': 'state',
'county': 'county',
'city': 'locality',
'district': 'district',
'street': 'street',
'intersection': 'intersection',
'houseNumber': 'street_number',
'postalCode': 'postal_code'
}
def __init__(self, app_id, app_code, logger, service_params=None, maxresults=DEFAULT_MAXRESULTS):
service_params = service_params or {}
self.app_id = app_id
@@ -65,12 +82,15 @@ class HereMapsGeocoder(Traceable):
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
def geocode(self, **kwargs):
return self.geocode_meta(**kwargs)[0]
def geocode_meta(self, **kwargs):
params = {}
for key, value in kwargs.iteritems():
if value and value.strip():
params[key] = value
if not params:
return []
return [[], {}]
return self._execute_geocode(params)
def _execute_geocode(self, params):
@@ -78,11 +98,13 @@ class HereMapsGeocoder(Traceable):
raise BadGeocodingParams(params)
try:
response = self._perform_request(params)
results = response['Response']['View'][0]['Result'][0]
return self._extract_lng_lat_from_result(results)
result = response['Response']['View'][0]['Result'][0]
return [self._extract_lng_lat_from_result(result),
self._extract_metadata_from_result(result)]
except IndexError:
return []
except KeyError:
return [[], {}]
except KeyError as e:
self._logger.error('params: {}'.format(params), e)
raise MalformedResult()
def _perform_request(self, params):
@@ -118,3 +140,14 @@ class HereMapsGeocoder(Traceable):
latitude = location['DisplayPosition']['Latitude']
return [longitude, latitude]
def _extract_metadata_from_result(self, result):
# See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
precision = self.PRECISION_BY_MATCH_TYPE[
result.get('MatchType', 'pointAddress')]
match_type = self.MATCH_TYPE_BY_MATCH_LEVEL.get(result['MatchLevel'], None)
return geocoder_metadata(
result['Relevance'],
precision,
[match_type] if match_type else []
)

View File

@@ -1,4 +1,5 @@
from routing import MapboxRouting, MapboxRoutingResponse
from geocoder import MapboxGeocoder
from bulk_geocoder import MapboxBulkGeocoder
from isolines import MapboxIsolines, MapboxIsochronesResponse
from matrix_client import MapboxMatrixClient

View File

@@ -0,0 +1,67 @@
import json, requests, time
from requests.adapters import HTTPAdapter
from cartodb_services import StreetPointBulkGeocoder
from cartodb_services.mapbox import MapboxGeocoder
from cartodb_services.tools.exceptions import ServiceException
from iso3166 import countries
from cartodb_services.tools.country import country_to_iso3
class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder):
MAX_BATCH_SIZE = 50 # From the docs
MIN_BATCHED_SEARCH = 0
READ_TIMEOUT = 60
CONNECT_TIMEOUT = 10
MAX_RETRIES = 1
def __init__(self, token, logger, service_params=None):
MapboxGeocoder.__init__(self, token, logger, service_params)
self.connect_timeout = service_params.get('connect_timeout', self.CONNECT_TIMEOUT)
self.read_timeout = service_params.get('read_timeout', self.READ_TIMEOUT)
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
self.session = requests.Session()
def _should_use_batch(self, searches):
return len(searches) >= self.MIN_BATCHED_SEARCH
def _serial_geocode(self, searches):
results = []
for search in searches:
elements = self._encoded_elements(search)
result = self.geocode_meta(*elements)
results.append((search[0], result[0], result[1]))
return results
def _encoded_elements(self, search):
(search_id, address, city, state, country) = search
address = address.encode('utf-8') if address else None
city = city.encode('utf-8') if city else None
state = state.encode('utf-8') if state else None
country = self._country_code(country) if country else None
return address, city, state, country
def _batch_geocode(self, searches):
if len(searches) == 1:
return self._serial_geocode(searches)
else:
frees = []
for search in searches:
elements = self._encoded_elements(search)
free = ', '.join([elem for elem in elements if elem])
frees.append(free)
full_results = self.geocode_free_text_meta(frees)
results = []
for s, r in zip(searches, full_results):
results.append((s[0], r[0], r[1]))
return results
def _country_code(self, country):
country_iso3166 = None
country_iso3 = country_to_iso3(country)
if country_iso3:
country_iso3166 = countries.get(country_iso3).alpha2.lower()
return country_iso3166

View File

@@ -5,6 +5,7 @@ Python client for the Mapbox Geocoder service.
import json
import requests
from mapbox import Geocoder
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata
from cartodb_services.metrics import Traceable
from cartodb_services.tools.exceptions import ServiceException
from cartodb_services.tools.qps import qps_retry
@@ -22,6 +23,19 @@ ENTRY_COORDINATES = 'coordinates'
ENTRY_TYPE = 'type'
TYPE_POINT = 'Point'
EMPTY_RESPONSE = [[], {}]
MATCH_TYPE_BY_MATCH_LEVEL = {
'poi': 'point_of_interest',
'poi.landmark': 'point_of_interest',
'place': 'point_of_interest',
'country': 'country',
'region': 'state',
'locality': 'locality',
'district': 'district',
'address': 'street'
}
class MapboxGeocoder(Traceable):
'''
@@ -40,18 +54,24 @@ class MapboxGeocoder(Traceable):
def _parse_geocoder_response(self, response):
json_response = json.loads(response)
# If Mapbox returns more that one result, take the first one
if json_response:
if type(json_response) == list:
json_response = json_response[0]
if type(json_response) != list:
json_response = [json_response]
if json_response[ENTRY_FEATURES]:
feature = json_response[ENTRY_FEATURES][0]
return self._extract_lng_lat_from_feature(feature)
else:
return []
result = []
for a_json_response in json_response:
if a_json_response[ENTRY_FEATURES]:
feature = a_json_response[ENTRY_FEATURES][0]
result.append([
self._extract_lng_lat_from_feature(feature),
self._extract_metadata_from_result(feature)
]
)
else:
result.append(EMPTY_RESPONSE)
return result
else:
return []
return EMPTY_RESPONSE
def _extract_lng_lat_from_feature(self, feature):
geometry = feature[ENTRY_GEOMETRY]
@@ -64,6 +84,23 @@ class MapboxGeocoder(Traceable):
latitude = location[1]
return [longitude, latitude]
def _extract_metadata_from_result(self, result):
if result[ENTRY_GEOMETRY].get('interpolated', False):
precision = PRECISION_INTERPOLATED
else:
precision = PRECISION_PRECISE
match_types = [MATCH_TYPE_BY_MATCH_LEVEL.get(match_level, None)
for match_level in result['place_type']]
return geocoder_metadata(
self._normalize_relevance(float(result['relevance'])),
precision,
filter(None, match_types)
)
def _normalize_relevance(self, relevance):
return 1 if relevance >= 0.99 else relevance
def _validate_input(self, searchtext, city=None, state_province=None,
country=None):
if searchtext and searchtext.strip():
@@ -78,8 +115,20 @@ class MapboxGeocoder(Traceable):
@qps_retry(qps=10)
def geocode(self, searchtext, city=None, state_province=None,
country=None):
"""
:param searchtext:
:param city:
:param state_province:
:param country: Country ISO 3166 code
:return: [x, y] on success, [] on error
"""
return self.geocode_meta(searchtext, city, state_province, country)[0]
@qps_retry(qps=10)
def geocode_meta(self, searchtext, city=None, state_province=None,
country=None):
if not self._validate_input(searchtext, city, state_province, country):
return []
return EMPTY_RESPONSE
address = []
if searchtext and searchtext.strip():
@@ -89,19 +138,30 @@ class MapboxGeocoder(Traceable):
if state_province:
address.append(normalize(state_province))
free_search = ', '.join(address)
return self.geocode_free_text_meta([free_search], country)[0]
@qps_retry(qps=10)
def geocode_free_text_meta(self, free_searches, country=None):
"""
:param free_searches: Free text searches
:param country: Country ISO 3166 code
:return: list of [x, y] on success, [] on error
"""
country = [country] if country else None
try:
response = self._geocoder.forward(address=', '.join(address).decode('utf-8'),
country=country,
limit=1)
free_search = ';'.join([self._escape(fs) for fs in free_searches])
response = self._geocoder.forward(address=free_search.decode('utf-8'),
country=country)
if response.status_code == requests.codes.ok:
return self._parse_geocoder_response(response.text)
elif response.status_code == requests.codes.bad_request:
return []
return EMPTY_RESPONSE
elif response.status_code == requests.codes.unprocessable_entity:
return []
return EMPTY_RESPONSE
else:
raise ServiceException(response.status_code, response)
except requests.Timeout as te:
@@ -110,9 +170,16 @@ class MapboxGeocoder(Traceable):
self._logger.error('Timeout connecting to Mapbox geocoding server',
te)
raise ServiceException('Error geocoding {0} using Mapbox'.format(
searchtext), None)
free_search), None)
except requests.ConnectionError as ce:
# Don't raise the exception to continue with the geocoding job
self._logger.error('Error connecting to Mapbox geocoding server',
exception=ce)
return []
return EMPTY_RESPONSE
def _escape(self, free_search):
# Semicolon is used to separate batch geocoding; there's no documented
# way to pass actual semicolons, and %3B or &#59; won't work (check
# TestBulkStreetFunctions.test_semicolon and the docs,
# https://www.mapbox.com/api-documentation/#batch-requests)
return free_search.replace(';', ',')

View File

@@ -1,3 +1,4 @@
from geocoder import TomTomGeocoder
from bulk_geocoder import TomTomBulkGeocoder
from routing import TomTomRouting, TomTomRoutingResponse
from isolines import TomTomIsolines, TomTomIsochronesResponse

View File

@@ -0,0 +1,96 @@
import json, requests, time
from requests.adapters import HTTPAdapter
from cartodb_services import StreetPointBulkGeocoder
from cartodb_services.tomtom import TomTomGeocoder
from cartodb_services.tools.exceptions import ServiceException
class TomTomBulkGeocoder(TomTomGeocoder, StreetPointBulkGeocoder):
MAX_BATCH_SIZE = 1000000 # From the docs
MIN_BATCHED_SEARCH = 10 # Batch API is really fast
BASE_URL = 'https://api.tomtom.com'
BATCH_URL = BASE_URL + '/search/2/batch.json'
MAX_STALLED_RETRIES = 100
BATCH_RETRY_SLEEP_S = 5
READ_TIMEOUT = 60
CONNECT_TIMEOUT = 10
MAX_RETRIES = 1
def __init__(self, apikey, logger, service_params=None):
TomTomGeocoder.__init__(self, apikey, logger, service_params)
self.connect_timeout = service_params.get('connect_timeout', self.CONNECT_TIMEOUT)
self.read_timeout = service_params.get('read_timeout', self.READ_TIMEOUT)
self.max_retries = service_params.get('max_retries', self.MAX_RETRIES)
self.session = requests.Session()
self.session.headers.update({'Content-Type': 'application/json'})
self.session.mount(self.BATCH_URL,
HTTPAdapter(max_retries=self.max_retries))
def _should_use_batch(self, searches):
return len(searches) >= self.MIN_BATCHED_SEARCH
def _serial_geocode(self, searches):
results = []
for search in searches:
(search_id, address, city, state, country) = search
address = address.encode('utf-8') if address else None
city = city.encode('utf-8') if city else None
state = state.encode('utf-8') if state else None
country = country.encode('utf-8') if country else None
result = self.geocode_meta(searchtext=address, city=city,
state_province=state, country=country)
results.append((search_id, result[0], result[1]))
return results
def _batch_geocode(self, searches):
location = self._send_batch(searches)
full_results = self._download_results(location)
results = []
for s, r in zip(searches, full_results):
results.append((s[0], r[0], r[1]))
return results
def _send_batch(self, searches):
body = {'batchItems': [{'query': self._query(s)} for s in searches]}
request_params = {
'key': self._apikey
}
response = self.session.post(self.BATCH_URL, data=json.dumps(body),
allow_redirects=False,
params=request_params,
timeout=(self.connect_timeout, self.read_timeout))
if response.status_code == 303:
return response.headers['Location']
else:
msg = "Error sending batch: {}; Headers: {}".format(
response.text.encode('utf-8'), response.headers)
self._logger.error(msg)
raise ServiceException(msg, response)
def _download_results(self, location):
stalled_retries = 0
while True:
response = self.session.get(self.BASE_URL + location)
if response.status_code == 200:
return self._parse_results(response.json())
elif response.status_code == 202:
stalled_retries += 1
if stalled_retries > self.MAX_STALLED_RETRIES:
raise Exception('Too many retries for job {}'.format(location))
location = response.headers['Location']
time.sleep(self.BATCH_RETRY_SLEEP_S)
else:
msg = "Error downloading batch: {}; Headers: {}".format(
response.text.encode('utf-8'), response.headers)
self._logger.error(msg)
raise ServiceException(msg, response)
def _query(self, search):
(search_id, address, city, state, country) = search
searchtext = ', '.join(filter(None, [address, city, state]))
return self._request_uri(searchtext=searchtext, country=country)
def _parse_results(self, json_body):
return [self._parse_response(item['statusCode'], item['response'])
for item in json_body['batchItems']]

View File

@@ -4,20 +4,33 @@
import json
import requests
from uritemplate import URITemplate
from math import tanh
from cartodb_services.geocoder import PRECISION_PRECISE, PRECISION_INTERPOLATED, geocoder_metadata
from cartodb_services.metrics import Traceable
from cartodb_services.tools.exceptions import ServiceException
from cartodb_services.tools.qps import qps_retry
from cartodb_services.tools.normalize import normalize
BASEURI = ('https://api.tomtom.com/search/2/geocode/'
'{searchtext}.JSON'
'?key={apiKey}'
'&limit=1')
HOST = 'https://api.tomtom.com'
API_BASEURI = '/search/2'
REQUEST_BASEURI = ('/geocode/'
'{searchtext}.json'
'?limit=1')
ENTRY_RESULTS = 'results'
ENTRY_POSITION = 'position'
ENTRY_LON = 'lon'
ENTRY_LAT = 'lat'
EMPTY_RESPONSE = [[], {}]
SCORE_NORMALIZATION_FACTOR = 0.15
PRECISION_SCORE_THRESHOLD = 0.5
MATCH_TYPE_BY_MATCH_LEVEL = {
'POI': 'point_of_interest',
'Street': 'street',
'Address Range': 'street',
'Cross Street': 'intersection',
'Point Address': 'street_number'
}
class TomTomGeocoder(Traceable):
'''
@@ -29,21 +42,17 @@ class TomTomGeocoder(Traceable):
self._apikey = apikey
self._logger = logger
def _uri(self, searchtext, countries=None):
baseuri = BASEURI + '&countrySet={}'.format(countries) \
if countries else BASEURI
uri = URITemplate(baseuri).expand(apiKey=self._apikey,
searchtext=searchtext.encode('utf-8'))
return uri
def _uri(self, searchtext, country=None):
return HOST + API_BASEURI + \
self._request_uri(searchtext, country, self._apikey)
def _parse_geocoder_response(self, response):
json_response = json.loads(response)
if json_response and json_response[ENTRY_RESULTS]:
result = json_response[ENTRY_RESULTS][0]
return self._extract_lng_lat_from_feature(result)
else:
return []
def _request_uri(self, searchtext, country=None, apiKey=None):
baseuri = REQUEST_BASEURI
if country:
baseuri += '&countrySet={}'.format(country)
baseuri = baseuri + '&key={apiKey}' if apiKey else baseuri
return URITemplate(baseuri).expand(apiKey=apiKey,
searchtext=searchtext.encode('utf-8'))
def _extract_lng_lat_from_feature(self, result):
position = result[ENTRY_POSITION]
@@ -65,6 +74,11 @@ class TomTomGeocoder(Traceable):
@qps_retry(qps=5)
def geocode(self, searchtext, city=None, state_province=None,
country=None):
return self.geocode_meta(searchtext, city, state_province, country)[0]
@qps_retry(qps=5)
def geocode_meta(self, searchtext, city=None, state_province=None,
country=None):
if searchtext:
searchtext = searchtext.decode('utf-8')
if city:
@@ -75,7 +89,7 @@ class TomTomGeocoder(Traceable):
country = country.decode('utf-8')
if not self._validate_input(searchtext, city, state_province, country):
return []
return EMPTY_RESPONSE
address = []
if searchtext and searchtext.strip():
@@ -85,19 +99,11 @@ class TomTomGeocoder(Traceable):
if state_province:
address.append(normalize(state_province))
uri = self._uri(searchtext=', '.join(address), countries=country)
uri = self._uri(searchtext=', '.join(address), country=country)
try:
response = requests.get(uri)
if response.status_code == requests.codes.ok:
return self._parse_geocoder_response(response.text)
elif response.status_code == requests.codes.bad_request:
return []
elif response.status_code == requests.codes.unprocessable_entity:
return []
else:
raise ServiceException(response.status_code, response)
return self._parse_response(response.status_code, response.text)
except requests.Timeout as te:
# In case of timeout we want to stop the job because the server
# could be down
@@ -109,4 +115,44 @@ class TomTomGeocoder(Traceable):
# Don't raise the exception to continue with the geocoding job
self._logger.error('Error connecting to TomTom geocoding server',
exception=ce)
return []
return EMPTY_RESPONSE
def _parse_response(self, status_code, text):
if status_code == requests.codes.ok:
return self._parse_geocoder_response(text)
elif status_code == requests.codes.bad_request:
return EMPTY_RESPONSE
elif status_code == requests.codes.unprocessable_entity:
return EMPTY_RESPONSE
else:
msg = 'Unknown response {}: {}'.format(str(status_code), text)
raise ServiceException(msg, None)
def _parse_geocoder_response(self, response):
json_response = json.loads(response) \
if type(response) != dict else response
if json_response and json_response[ENTRY_RESULTS]:
result = json_response[ENTRY_RESULTS][0]
return [
self._extract_lng_lat_from_feature(result),
self._extract_metadata_from_result(result)
]
else:
return EMPTY_RESPONSE
def _extract_metadata_from_result(self, result):
score = self._normalize_score(result['score'])
match_type = MATCH_TYPE_BY_MATCH_LEVEL.get(result['type'], None)
return geocoder_metadata(
score,
self._precision_from_score(score),
[match_type] if match_type else []
)
def _normalize_score(self, score):
return tanh(score * SCORE_NORMALIZATION_FACTOR)
def _precision_from_score(self, score):
return PRECISION_PRECISE \
if score > PRECISION_SCORE_THRESHOLD else PRECISION_INTERPOLATED

View File

@@ -35,28 +35,28 @@ class Logger:
return
self._send_to_rollbar('debug', text, exception, data)
self._send_to_log_file('debug', text, exception, data)
self._send_to_plpy('debug', text)
self._send_to_plpy('debug', text, exception)
def info(self, text, exception=None, data={}):
if not self._check_min_level('info'):
return
self._send_to_rollbar('info', text, exception, data)
self._send_to_log_file('info', text, exception, data)
self._send_to_plpy('info', text)
self._send_to_plpy('info', text, exception)
def warning(self, text, exception=None, data={}):
if not self._check_min_level('warning'):
return
self._send_to_rollbar('warning', text, exception, data)
self._send_to_log_file('warning', text, exception, data)
self._send_to_plpy('warning', text)
self._send_to_plpy('warning', text, exception)
def error(self, text, exception=None, data={}):
if not self._check_min_level('error'):
return
self._send_to_rollbar('error', text, exception, data)
self._send_to_log_file('error', text, exception, data)
self._send_to_plpy('error', text)
self._send_to_plpy('error', text, exception)
def _check_min_level(self, level):
return True if self.LEVELS[level] >= self._min_level else False
@@ -85,18 +85,31 @@ class Logger:
elif level == 'error':
self._file_logger.error(text, extra=extra_data)
def _send_to_plpy(self, level, text):
def _send_to_plpy(self, level, text, exception=None):
# exception might also be a tuple generated by sys.exc_info
if exception:
if isinstance(exception, tuple) and len(exception) > 1:
exception = exception[1]
exception_message = '. Exception: {}'.format(exception)
else:
exception_message = ''
# Adding trace breaks tests
# trace = traceback.format_exc(15)
# message = '{}{}. Trace: {}'.format(text, exception_message, trace)
message = '{}{}'.format(text, exception_message)
if self._check_plpy():
if level == 'debug':
plpy.debug(text)
plpy.debug(message)
elif level == 'info':
plpy.info(text)
plpy.info(message)
elif level == 'warning':
plpy.warning(text)
plpy.warning(message)
elif level == 'error':
# Plpy.error and fatal raises exceptions and we only want to
# log an error, exceptions should be raise explicitly
plpy.warning(text)
plpy.warning(message)
def _parse_log_extra_data(self, exception, data):
extra_data = {}

View File

@@ -10,7 +10,7 @@ from setuptools import setup, find_packages
setup(
name='cartodb_services',
version='0.18.0',
version='0.19.0',
description='CartoDB Services API Python Library',

View File

@@ -25,7 +25,8 @@ WELL_KNOWN_SHAPE = [(40.73312, -73.98891), (40.73353, -73.98987),
(40.73186, -73.99664), (40.73147, -73.99693),
(40.73141, -73.99698), (40.73147, -73.99707),
(40.73219, -73.99856), (40.73222, -73.99861),
(40.73293, -74.00007), (40.733, -74.00001)]
(40.73225, -73.99868), (40.73293, -74.00007),
(40.733, -74.00001)]
WELL_KNOWN_LENGTH = 1317.9

View File

@@ -2,6 +2,29 @@ import os
import requests
import json
from nose.tools import assert_true
# From https://www.python.org/dev/peps/pep-0485/#proposed-implementation
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
def assert_close_enough(xy_a, xy_b, rel_tol=0.0001, abs_tol=0.0005):
"""
Asserts that the given points are "close enough", in a square.
:param xy_a: Array of 2 elements, X and Y.
:param xy_b: Array of 2 elements, X and Y.
:param rel_tol: Relative tolerance. Default: 0.001 (0.1%).
:param abs_tol: Absolute tolerance. Default: 0.0005.
"""
for i in [0, 1]:
assert_true(isclose(xy_a[i], xy_b[i], rel_tol, abs_tol),
"Coord {} error: {} and {} are not closer than {}, {}".format(
i, xy_a[i], xy_b[i], rel_tol, abs_tol
))
class IntegrationTestHelper:
@@ -22,13 +45,17 @@ class IntegrationTestHelper:
}
@classmethod
def execute_query(cls, sql_api_url, query):
def execute_query_raw(cls, sql_api_url, query):
requests.packages.urllib3.disable_warnings()
query_url = "{0}?q={1}".format(sql_api_url, query)
print "Executing query: {0}".format(query_url)
query_response = requests.get(query_url)
if query_response.status_code != 200:
raise Exception(json.loads(query_response.text)['error'])
query_response_data = json.loads(query_response.text)
return json.loads(query_response.text)
@classmethod
def execute_query(cls, sql_api_url, query):
return cls.execute_query_raw(sql_api_url, query)['rows'][0]
return query_response_data['rows'][0]

View File

@@ -1,10 +1,108 @@
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
from unittest import TestCase
from nose.tools import assert_raises
from nose.tools import assert_not_equal, assert_equal
from nose.tools import assert_not_equal, assert_equal, assert_true
from ..helpers.integration_test_helper import IntegrationTestHelper
from ..helpers.integration_test_helper import assert_close_enough, isclose
class TestStreetFunctionsSetUp(TestCase):
provider = None
fixture_points = None
class TestStreetFunctions(TestCase):
GOOGLE_POINTS = {
'Plaza Mayor, Valladolid': [-4.728252, 41.6517025],
'Paseo Zorrilla, Valladolid': [-4.7404453, 41.6314339],
'1900 amphitheatre parkway': [-122.0875324, 37.4227968],
'1901 amphitheatre parkway': [-122.0885504, 37.4238657],
'1902 amphitheatre parkway': [-122.0876674, 37.4235729],
'Valladolid': [-4.7245321, 41.652251],
'Valladolid, Spain': [-4.7245321, 41.652251],
'Valladolid, Mexico': [-88.2022488, 20.68964],
'Madrid': [-3.7037902, 40.4167754],
'Logroño, Spain': [-2.4449852, 42.4627195],
'Logroño, Argentina': [-61.6961807, -29.5031057],
'Plaza España, Barcelona': [2.1482563, 41.375485]
}
HERE_POINTS = {
'Plaza Mayor, Valladolid': [-4.72979, 41.65258],
'Paseo Zorrilla, Valladolid': [-4.73869, 41.63817],
'1900 amphitheatre parkway': [-122.0879468, 37.4234763],
'1901 amphitheatre parkway': [-122.0879253, 37.4238725],
'1902 amphitheatre parkway': [-122.0879531, 37.4234775],
'Valladolid': [-4.73214, 41.6542],
'Valladolid, Spain': [-4.73214, 41.6542],
'Valladolid, Mexico': [-88.20117, 20.69021],
'Madrid': [-3.70578, 40.42028],
'Logroño, Spain': [-2.45194, 42.46592],
'Logroño, Argentina': [-61.69604, -29.50425],
'Plaza España, Barcelona': [2.14834, 41.37494]
}
TOMTOM_POINTS = HERE_POINTS.copy()
TOMTOM_POINTS.update({
'Plaza Mayor, Valladolid': [-4.72183, 41.5826],
'Paseo Zorrilla, Valladolid': [-4.74031, 41.63181],
'Valladolid': [-4.72838, 41.6542],
'Valladolid, Spain': [-4.72838, 41.6542],
'Madrid': [-3.70035, 40.42028],
'Logroño, Spain': [-2.44998, 42.46592],
'Plaza España, Barcelona': [2.1497, 41.37516]
})
MAPBOX_POINTS = GOOGLE_POINTS.copy()
MAPBOX_POINTS.update({
'Logroño, Spain': [-2.44556, 42.47],
'Logroño, Argentina': [-70.687195, -33.470901], # TODO: huge mismatch
'Valladolid': [-4.72856, 41.652251],
'Valladolid, Spain': [-4.72856, 41.652251],
'1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch
'Madrid': [-3.69194, 40.4167754],
'Plaza España, Barcelona': [2.342231, 41.50677] # TODO: not ideal
})
FIXTURE_POINTS = {
'google': GOOGLE_POINTS,
'heremaps': HERE_POINTS,
'tomtom': TOMTOM_POINTS,
'mapbox': MAPBOX_POINTS
}
GOOGLE_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.9, 'precision': 'precise', 'match_types': ['point_of_interest']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.56, 'precision': 'interpolated', 'match_types': ['locality']}
}
HERE_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 1, 'precision': 'precise', 'match_types': ['street']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.89, 'precision': 'precise', 'match_types': ['street']} # Wrong. See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
}
TOMTOM_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.85, 'precision': 'precise', 'match_types': ['street']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.45, 'precision': 'interpolated', 'match_types': ['street']}
}
MAPBOX_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']} # TODO: wrong
}
METADATAS = {
'google': GOOGLE_METADATAS,
'heremaps': HERE_METADATAS,
'tomtom': TOMTOM_METADATAS,
'mapbox': MAPBOX_METADATAS
}
def setUp(self):
self.env_variables = IntegrationTestHelper.get_environment_variables()
@@ -15,19 +113,332 @@ class TestStreetFunctions(TestCase):
self.env_variables['api_key']
)
if not self.fixture_points:
query = "select provider from " \
"cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
response = self._run_authenticated(query)
provider = response['rows'][0]['provider']
self.fixture_points = self.FIXTURE_POINTS[provider]
self.metadata = self.METADATAS[provider]
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
def _used_quota(self):
query = "select used_quota " \
"from cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
return self._run_authenticated(query)['rows'][0]['used_quota']
class TestStreetFunctions(TestStreetFunctionsSetUp):
def test_if_select_with_street_point_is_ok(self):
query = "SELECT cdb_geocode_street_point(street) " \
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1&api_key={1}".format(
self.env_variables['table_name'],
self.env_variables['api_key'])
self.env_variables['table_name'],
self.env_variables['api_key'])
geometry = IntegrationTestHelper.execute_query(self.sql_api_url, query)
assert_not_equal(geometry['geometry'], None)
def test_if_select_with_street_without_api_key_raise_error(self):
query = "SELECT cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1".format(
self.env_variables['table_name'])
table = self.env_variables['table_name']
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1".format(table)
try:
IntegrationTestHelper.execute_query(self.sql_api_url, query)
except Exception as e:
assert_equal(e.message[0], "The api_key must be provided")
assert_equal(e.message[0],
"permission denied for relation {}".format(table))
def test_component_aggregation(self):
query = "select st_x(the_geom), st_y(the_geom) from (" \
"select cdb_dataservices_client.cdb_geocode_street_point( " \
"'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
response = self._run_authenticated(query)
row = response['rows'][0]
x_y = [row['st_x'], row['st_y']]
# Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762]
assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona'])
class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
def test_full_spec(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Plaza Mayor'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Paseo Zorrilla'' as street' " \
", 'street', 'city', 'state', 'country')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Plaza Mayor, Valladolid'],
2: self.fixture_points['Paseo Zorrilla, Valladolid']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_empty_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', '''''', '''''', '''''')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['1901 amphitheatre parkway'])
def test_null_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['1901 amphitheatre parkway'])
def test_batching(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, 2)"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['1900 amphitheatre parkway'],
2: self.fixture_points['1901 amphitheatre parkway'],
3: self.fixture_points['1902 amphitheatre parkway'],
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_batch_size_1(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, 1)"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['1900 amphitheatre parkway'],
2: self.fixture_points['1901 amphitheatre parkway'],
3: self.fixture_points['1902 amphitheatre parkway'],
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_city_column_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"city\": \"Valladolid\"}," \
"{\"cartodb_id\": 2, \"city\": \"Madrid\"}" \
"]''::jsonb) as (cartodb_id integer, city text)', " \
"'city')"
response = self._run_authenticated(query)
assert_equal(response['total_rows'], 2)
points_by_cartodb_id = {
1: self.fixture_points['Valladolid'],
2: self.fixture_points['Madrid']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_free_text_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from (" \
"select 1 as cartodb_id, ''W 26th Street'' as address, " \
"null as city , null as state , null as country" \
")_x', " \
"'''Logroño, La Rioja, Spain''')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['Logroño, Spain'])
def test_templating_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Spain''') " \
"UNION " \
"SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 2 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Argentina''')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Logroño, Spain'],
2: self.fixture_points['Logroño, Argentina']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_template_with_two_columns_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
" 'select * from (' ||" \
" ' select 1 as cartodb_id, ''Valladolid'' as city, ''Mexico'' as country ' ||" \
" ' union all ' ||" \
" ' select 2, ''Valladolid'', ''Spain''' ||" \
" ') _x'," \
"'city || '', '' || country')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Valladolid, Mexico'],
2: self.fixture_points['Valladolid, Spain']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_large_batches(self):
"""
Useful just to test a good batch size
"""
n = 110
first_cartodb_id = -1
first_street_number = 1
batch_size = 'NULL' # NULL for optimal
streets = []
for i in range(0, n):
streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \
'Toronto, Canada"}}'.format(first_cartodb_id + i,
first_street_number + i))
used_quota = self._used_quota()
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, {})".format(','.join(streets), batch_size)
response = self._run_authenticated(query)
assert_equal(n, len(response['rows']))
for row in response['rows']:
assert_not_equal(row['st_x'], None)
assert_not_equal(row['metadata'], {})
metadata = row['metadata']
assert_not_equal(metadata['relevance'], None)
assert_not_equal(metadata['precision'], None)
assert_not_equal(metadata['match_types'], None)
assert_equal(self._used_quota(), used_quota + n)
def test_missing_components_on_private_function(self):
query = "SELECT _cdb_bulk_geocode_street_point(" \
" '[{\"id\": \"1\", \"address\": \"Amphitheatre Parkway 22\"}]' " \
")"
response = self._run_authenticated(query)
assert_equal(1, len(response['rows']))
def test_semicolon(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway; mountain view; ca; us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null)"
response = self._run_authenticated(query)
x_y_by_cartodb_id = self._x_y_by_cartodb_id(response)
assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2])
def test_component_aggregation(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
"''Plaza España'' as street' " \
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['Plaza España, Barcelona'])
def _test_known_table(self):
subquery = 'select * from unknown_table where cartodb_id < 1100'
subquery_count = 'select count(1) from ({}) _x'.format(subquery)
count = self._run_authenticated(subquery_count)['rows'][0]['count']
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'{}' " \
", 'street', 'city', NULL, 'country')".format(subquery)
response = self._run_authenticated(query)
assert_equal(len(response['rows']), count)
assert_not_equal(response['rows'][0]['st_x'], None)
def test_metadata(self):
query = "select metadata " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
"''Plaza España'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Valladolid'' as city, " \
"''Santiago Rusiñol 123'' as street' " \
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
expected = [
self.metadata['Plaza España, Barcelona'],
self.metadata['Santiago Rusiñol 123, Valladolid']
]
assert_equal(len(response['rows']), len(expected))
for r, e in zip(response['rows'], expected):
self.assert_metadata(r['metadata'], e)
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
@staticmethod
def _x_y_by_cartodb_id(response):
return {r['cartodb_id']: [r['st_x'], r['st_y']]
for r in response['rows']}
@staticmethod
def assert_close_points(points_a_by_cartodb_id, points_b_by_cartodb_id):
assert_equal(len(points_a_by_cartodb_id), len(points_b_by_cartodb_id))
for cartodb_id, point in points_a_by_cartodb_id.iteritems():
assert_close_enough(point, points_b_by_cartodb_id[cartodb_id])
@staticmethod
def assert_metadata(metadata, expected):
relevance = metadata['relevance']
expected_relevance = expected['relevance']
assert_true(isclose(relevance, expected_relevance, 0.02),
'{} not close to {}'.format(relevance, expected_relevance))
assert_equal(metadata['precision'], expected['precision'])
assert_equal(metadata['match_types'], expected['match_types'])

3
test/requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
# Integration tests dependencies
requests==2.9.1
nose==1.3.7

View File

@@ -74,11 +74,11 @@ def set_environment_variables(username, api_key, table_name, host, schema):
def clean_environment_variables():
del os.environ["GEOCODER_API_TEST_USERNAME"]
del os.environ["GEOCODER_API_TEST_API_KEY"]
del os.environ["GEOCODER_API_TEST_TABLE_NAME"]
del os.environ["GEOCODER_API_TEST_HOST"]
del os.environ["GEOCODER_API_TEST_SCHEMA"]
os.environ.pop("GEOCODER_API_TEST_USERNAME", None)
os.environ.pop("GEOCODER_API_TEST_API_KEY", None)
os.environ.pop("GEOCODER_API_TEST_TABLE_NAME", None)
os.environ.pop("GEOCODER_API_TEST_HOST", None)
os.environ.pop("GEOCODER_API_TEST_SCHEMA", None)
if __name__ == "__main__":
main()