diff --git a/server/lib/python/cartodb_services/cartodb_services/google/bulk_geocoder.py b/server/lib/python/cartodb_services/cartodb_services/google/bulk_geocoder.py index e59e408..42a60d9 100644 --- a/server/lib/python/cartodb_services/cartodb_services/google/bulk_geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/google/bulk_geocoder.py @@ -6,8 +6,7 @@ from cartodb_services.google import GoogleMapsGeocoder def async_geocoder(geocoder, address, components): - results = geocoder.geocode(address=address, components=components) - return results if results else [] + return geocoder.geocode(address=address, components=components) class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder): @@ -26,12 +25,9 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder): results = [] for search in searches: (cartodb_id, street, city, state, country) = search - address = compose_address(street, city, state, country) - components = self._build_optional_parameters(city, state, country) - result = self.geocoder.geocode(address=address, components=components) - lng_lat = self._extract_lng_lat_from_result(result[0]) if result else [] - self._logger.debug('--> lng_lat: {}'.format(lng_lat)) - results.append((cartodb_id, lng_lat, [])) + lng_lat, metadata = self.geocode_meta(street, city, state, country) + self._logger.debug('--> lng_lat: {}. metadata: {}'.format(lng_lat, metadata)) + results.append((cartodb_id, lng_lat, metadata)) return results def _batch_geocode(self, searches): @@ -39,16 +35,13 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder): pool = Pool(processes=self.PARALLEL_PROCESSES) for search in searches: (cartodb_id, street, city, state, country) = search - components = self._build_optional_parameters(city, state, country) - # Geocoding works better if components are also inside the address + self._logger.debug('async geocoding --> {}'.format(search)) address = compose_address(street, city, state, country) if address: - self._logger.debug('async geocoding --> {} {}'.format(address.encode('utf-8'), components)) + components = self._build_optional_parameters(city, state, country) result = pool.apply_async(async_geocoder, (self.geocoder, address, components)) - else: - result = [] - bulk_results[cartodb_id] = result + bulk_results[cartodb_id] = result pool.close() pool.join() @@ -56,13 +49,12 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder): results = [] for cartodb_id, bulk_result in bulk_results.items(): try: - result = bulk_result.get() + lng_lat, metadata = self._process_results(bulk_result.get()) except Exception as e: self._logger.error('Error at Google async_geocoder', e) - result = [] + lng_lat, metadata = [[], {}] - lng_lat = self._extract_lng_lat_from_result(result[0]) if result else [] - results.append((cartodb_id, lng_lat, [])) + results.append((cartodb_id, lng_lat, metadata)) return results except KeyError as e: self._logger.error('KeyError error', exception=e) @@ -70,4 +62,3 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder): except Exception as e: self._logger.error('General error', exception=e) raise e - diff --git a/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py b/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py index aa8877c..b07dc6b 100644 --- a/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py @@ -8,6 +8,15 @@ from cartodb_services.geocoder import compose_address from cartodb_services.google.exceptions import InvalidGoogleCredentials from client_factory import GoogleMapsClientFactory +EMPTY_RESPONSE = [[], {}] +PARTIAL_FACTOR = 0.8 +RELEVANCE_BY_LOCATION_TYPE = { + 'ROOFTOP': 1, + 'GEOMETRIC_CENTER': 0.9, + 'RANGE_INTERPOLATED': 0.8, + 'APPROXIMATE': 0.7 +} + class GoogleMapsGeocoder(): @@ -19,26 +28,49 @@ class GoogleMapsGeocoder(): self.geocoder = GoogleMapsClientFactory.get(self.client_id, self.client_secret, self.channel) self._logger = logger - def geocode(self, searchtext, city=None, state=None, - country=None): + def geocode(self, searchtext, city=None, state=None, country=None): + return self.geocode_meta(searchtext, city, state, country)[0] + + def geocode_meta(self, searchtext, city=None, state=None, country=None): try: address = compose_address(searchtext, city, state, country) opt_params = self._build_optional_parameters(city, state, country) results = self.geocoder.geocode(address=address, components=opt_params) - if results: - return self._extract_lng_lat_from_result(results[0]) - else: - return [] - except KeyError: + return self._process_results(results) + except KeyError as e: + self._logger.error('params: {}, {}, {}, {}'.format( + searchtext.encode('utf-8'), city.encode('utf-8'), + state.encode('utf-8'), country.encode('utf-8') + ), e) raise MalformedResult() + def _process_results(self, results): + if results: + self._logger.debug('--> results: {}'.format(results[0])) + return [ + self._extract_lng_lat_from_result(results[0]), + self._extract_metadata_from_result(results[0]) + ] + else: + return EMPTY_RESPONSE + def _extract_lng_lat_from_result(self, result): location = result['geometry']['location'] longitude = location['lng'] latitude = location['lat'] return [longitude, latitude] + def _extract_metadata_from_result(self, result): + location_type = result['geometry']['location_type'] + base_relevance = RELEVANCE_BY_LOCATION_TYPE[location_type] + partial_match = result.get('partial_match', False) + partial_factor = PARTIAL_FACTOR if partial_match else 1 + return { + 'relevance': base_relevance * partial_factor + } + + def _build_optional_parameters(self, city=None, state=None, country=None): optional_params = {} diff --git a/test/integration/test_street_functions.py b/test/integration/test_street_functions.py index 79af788..cca42ca 100644 --- a/test/integration/test_street_functions.py +++ b/test/integration/test_street_functions.py @@ -83,8 +83,13 @@ class TestStreetFunctionsSetUp(TestCase): 'Plaza España, Barcelona': 0.85 }) + GOOGLE_RELEVANCES = HERE_RELEVANCES.copy() + GOOGLE_RELEVANCES.update({ + 'Plaza España, Barcelona': 0.9 + }) + RELEVANCES = { - 'google': HERE_RELEVANCES, + 'google': GOOGLE_RELEVANCES, 'here': HERE_RELEVANCES, 'tomtom': TOMTOM_RELEVANCES, 'mapbox': MAPBOX_RELEVANCES @@ -331,7 +336,7 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp): self.fixture_points['Plaza España, Barcelona']) def _test_known_table(self): - subquery = 'select * from known_table where cartodb_id < 1100' + subquery = 'select * from unknown_table where cartodb_id < 1100' subquery_count = 'select count(1) from ({}) _x'.format(subquery) count = self._run_authenticated(subquery_count)['rows'][0]['count']