From fc35911b91ff2ece8471227668b3e15d620201d9 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Wed, 20 Jan 2016 20:57:24 +0100 Subject: [PATCH] Geocoder street function with quota checking --- server/extension/.gitignore | 2 + .../extension/sql/0.1.0/15_config_helper.sql | 16 +++ .../extension/sql/0.1.0/20_geocode_street.sql | 30 ++++ .../cartodb_geocoder/config_helper.py | 41 +----- .../cartodb_geocoder/quota_service.py | 54 +++++--- .../cartodb_geocoder/user_service.py | 128 +++++++++++------- .../python/cartodb_geocoder/requirements.txt | 5 +- 7 files changed, 175 insertions(+), 101 deletions(-) create mode 100644 server/extension/sql/0.1.0/15_config_helper.sql create mode 100644 server/extension/sql/0.1.0/20_geocode_street.sql diff --git a/server/extension/.gitignore b/server/extension/.gitignore index 7b6ce39..340ecff 100644 --- a/server/extension/.gitignore +++ b/server/extension/.gitignore @@ -2,4 +2,6 @@ results/ regression.diffs regression.out cdb_geocoder_server--0.0.1.sql +cdb_geocoder_server--0.1.0.sql +cdb_geocoder_server--0.0.1--0.1.0.sql diff --git a/server/extension/sql/0.1.0/15_config_helper.sql b/server/extension/sql/0.1.0/15_config_helper.sql new file mode 100644 index 0000000..3154318 --- /dev/null +++ b/server/extension/sql/0.1.0/15_config_helper.sql @@ -0,0 +1,16 @@ +-- Get the Redis configuration from the _conf table -- +CREATE OR REPLACE FUNCTION cdb_geocoder_server._get_geocoder_config(username text, orgname text) +RETURNS boolean AS $$ + cache_key = "user_geocoder_config_{0}".format(username) + if cache_key in GD: + return False + else: + from cartodb_geocoder import config_helper + plpy.execute("SELECT cdb_geocoder_server._connect_to_redis('{0}')".format(username)) + redis_conn = GD["redis_connection_{0}".format(username)]['redis_metadata_connection'] + geocoder_config = config_helper.GeocoderConfig(redis_conn, username, orgname) + # --Think about the security concerns with this kind of global cache, it should be only available + # --for this user session but... + GD[cache_key] = geocoder_config + return True +$$ LANGUAGE plpythonu; diff --git a/server/extension/sql/0.1.0/20_geocode_street.sql b/server/extension/sql/0.1.0/20_geocode_street.sql new file mode 100644 index 0000000..32fcdbb --- /dev/null +++ b/server/extension/sql/0.1.0/20_geocode_street.sql @@ -0,0 +1,30 @@ +-- Geocodes a street address given a searchtext and a state and/or country +DROP FUNCTION IF EXISTS cdb_geocoder_server.cdb_geocode_street_point(TEXT, TEXT, TEXT, TEXT); +CREATE OR REPLACE FUNCTION cdb_geocoder_server.cdb_geocode_street_point(username TEXT, orgname TEXT, searchtext TEXT, city TEXT DEFAULT NULL, state_province TEXT DEFAULT NULL, country TEXT DEFAULT NULL) +RETURNS Geometry AS $$ + import json + from heremaps import heremapsgeocoder + from cartodb_geocoder import quota_service + + plpy.execute("SELECT cdb_geocoder_server._connect_to_redis('{0}')".format(username)) + redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection'] + plpy.execute("SELECT cdb_geocoder_server._get_geocoder_config('{0}', '{1}')".format(username, orgname)) + user_geocoder_config = GD["user_geocoder_config_{0}".format(username)] + + # -- Check the quota + quota_service = quota_service.QuotaService(user_geocoder_config, redis_conn, username, orgname) + if not quota_service.check_user_quota(): + plpy.error('You have reach limit of your quota') + + heremaps_conf = json.loads(plpy.execute("SELECT cdb_geocoder_server._get_conf('heremaps')", 1)[0]['get_conf']) + app_id = heremaps_conf['geocoder']['app_id'] + app_code = heremaps_conf['geocoder']['app_code'] + + geocoder = heremapsgeocoder.Geocoder(app_id, app_code) + results = geocoder.geocode_address(searchtext=searchtext, city=city, state=state_province, country=country) + coordinates = geocoder.extract_lng_lat_from_result(results[0]) + plan = plpy.prepare("SELECT ST_SetSRID(ST_MakePoint($1, $2), 4326); ", ["double precision", "double precision"]) + point = plpy.execute(plan, [coordinates[0], coordinates[1]], 1)[0] + + return point['st_setsrid'] +$$ LANGUAGE plpythonu; diff --git a/server/lib/python/cartodb_geocoder/cartodb_geocoder/config_helper.py b/server/lib/python/cartodb_geocoder/cartodb_geocoder/config_helper.py index 5914fe9..1eef3ba 100644 --- a/server/lib/python/cartodb_geocoder/cartodb_geocoder/config_helper.py +++ b/server/lib/python/cartodb_geocoder/cartodb_geocoder/config_helper.py @@ -6,36 +6,6 @@ class ConfigException(Exception): pass -class UserConfig: - - USER_CONFIG_KEYS = ['is_organization', 'entity_name'] - - def __init__(self, user_config_json, db_user_id=None): - config = json.loads(user_config_json) - filtered_config = {key: config[key] for key in self.USER_CONFIG_KEYS if key in config.keys()} - self.__check_config(filtered_config) - self.__parse_config(filtered_config) - - def __check_config(self, filtered_config): - if len(filtered_config.keys()) != len(self.USER_CONFIG_KEYS): - raise ConfigException( - "Passed user configuration is not correct, check it please") - - return True - - @property - def is_organization(self): - return self._is_organization - - @property - def entity_name(self): - return self._entity_name - - def __parse_config(self, filtered_config): - self._is_organization = filtered_config['is_organization'] - self._entity_name = filtered_config['entity_name'] - - class GeocoderConfig: GEOCODER_CONFIG_KEYS = ['google_maps_client_id', 'google_maps_api_key', @@ -85,14 +55,17 @@ class GeocoderConfig: self._geocoder_type = filtered_config[self.GEOCODER_TYPE].lower() self._period_end_date = date_parse(filtered_config[self.PERIOD_END_DATE]) self._google_maps_private_key = None - self._nokia_monthly_quota = 0 - self._nokia_soft_geocoder_limit = False + self._geocoding_quota = 0 + self._soft_geocoding_limit = False if self.GOOGLE_GEOCODER == self._geocoder_type: self._google_maps_private_key = filtered_config[self.GOOGLE_GEOCODER_API_KEY] self._google_maps_client_id = filtered_config[self.GOOGLE_GEOCODER_CLIENT_ID] elif self.NOKIA_GEOCODER == self._geocoder_type: - self._geocoding_quota = filtered_config[self.QUOTA_KEY] - self._soft_geocoding_limit = filtered_config[self.SOFT_LIMIT_KEY] + self._geocoding_quota = float(filtered_config[self.QUOTA_KEY]) + if filtered_config[self.SOFT_LIMIT_KEY] == 'true': + self._soft_geocoding_limit = True + else: + self._soft_geocoding_limit = False @property def service_type(self): diff --git a/server/lib/python/cartodb_geocoder/cartodb_geocoder/quota_service.py b/server/lib/python/cartodb_geocoder/cartodb_geocoder/quota_service.py index 001eb34..061ffb3 100644 --- a/server/lib/python/cartodb_geocoder/cartodb_geocoder/quota_service.py +++ b/server/lib/python/cartodb_geocoder/cartodb_geocoder/quota_service.py @@ -1,31 +1,51 @@ import user_service -import config_helper from datetime import date -class QuotaService: - """ Class to manage all the quota operation for the Geocoder SQL API Extension """ - def __init__(self, user_config, geocoder_config, redis_connection): - self._user_config = user_config - self._geocoder_config = geocoder_config - self._user_service = user_service.UserService(self._user_config, - self._geocoder_config.service_type, redis_connection) +class QuotaService: + """ Class to manage all the quota operation for + the Geocoder SQL API Extension """ + + def __init__(self, user_geocoder_config, redis_connection, username, orgname=None): + self._user_geocoder_config = user_geocoder_config + self._user_service = user_service.UserService( + self._user_geocoder_config, + redis_connection, + username, + orgname + ) def check_user_quota(self): """ Check if the current user quota surpasses the current quota """ # We don't have quota check for google geocoder - if self._geocoder_config.google_geocoder: + if self._user_geocoder_config.google_geocoder: return True - user_quota = self._geocoder_config.nokia_monthly_quota + user_quota = self._user_geocoder_config.geocoding_quota today = date.today() - service_type = self._geocoder_config.service_type - current_used = self._user_service.used_quota(service_type, today.year, today.month) - soft_geocoder_limit = self._geocoder_config.nokia_soft_limit + service_type = self._user_geocoder_config.service_type + current_used = self._user_service.used_quota(service_type, today) + soft_geocoding_limit = self._user_geocoder_config.soft_geocoding_limit - print "User quota: {0} --- current_used: {1} --- limit: {2}".format(user_quota, current_used, soft_geocoder_limit) + print "User quota: {0} --- current_used: {1} --- limit: {2}".format( + user_quota, current_used, soft_geocoding_limit) - return True if soft_geocoder_limit or current_used <= user_quota else False + if soft_geocoding_limit or current_used <= user_quota: + return True + else: + return False - def increment_geocoder_use(self, amount=1): - self._user_service.increment_service_use(self._geocoder_config.service_type) \ No newline at end of file + def increment_successful_geocoder_use(self, amount=1): + self._user_service.increment_service_use( + self._user_geocoder_config.service_type, "success_responses" + ) + + def increment_empty_geocoder_use(self, amount=1): + self._user_service.increment_service_use( + self._user_geocoder_config.service_type, "empty_responses" + ) + + def increment_failed_geocoder_use(self, amount=1): + self._user_service.increment_service_use( + self._user_geocoder_config.service_type, "failed_responses" + ) diff --git a/server/lib/python/cartodb_geocoder/cartodb_geocoder/user_service.py b/server/lib/python/cartodb_geocoder/cartodb_geocoder/user_service.py index 142056e..cdb72dd 100644 --- a/server/lib/python/cartodb_geocoder/cartodb_geocoder/user_service.py +++ b/server/lib/python/cartodb_geocoder/cartodb_geocoder/user_service.py @@ -1,64 +1,96 @@ -import redis_helper -from datetime import date +from datetime import date, timedelta +from dateutil.relativedelta import relativedelta + class UserService: - """ Class to manage all the user info """ + """ Class to manage all the user info """ - GEOCODING_QUOTA_KEY = "geocoding_quota" - GEOCODING_SOFT_LIMIT_KEY = "soft_geocoder_limit" + SERVICE_GEOCODER_NOKIA = 'geocoder_here' + SERVICE_GEOCODER_GOOGLE = 'geocoder_google' + SERVICE_GEOCODER_CACHE = 'geocoder_cache' - REDIS_CONNECTION_KEY = "redis_connection" - REDIS_CONNECTION_HOST = "redis_host" - REDIS_CONNECTION_PORT = "redis_port" - REDIS_CONNECTION_DB = "redis_db" + GEOCODING_QUOTA_KEY = "geocoding_quota" + GEOCODING_SOFT_LIMIT_KEY = "soft_geocoder_limit" - def __init__(self, user_config, service_type, redis_connection): - self.user_config = user_config - self.service_type = service_type - self._redis_connection = redis_connection + REDIS_CONNECTION_KEY = "redis_connection" + REDIS_CONNECTION_HOST = "redis_host" + REDIS_CONNECTION_PORT = "redis_port" + REDIS_CONNECTION_DB = "redis_db" - def used_quota(self, service_type, year, month, day=None): - """ Recover the used quota for the user in the current month """ - redis_key_data = self.__get_redis_key(service_type, year, month, day) - current_use = self._redis_connection.hget(redis_key_data['redis_name'], redis_key_data['redis_key']) - return int(current_use) if current_use else 0 + def __init__(self, user_geocoder_config, redis_connection, username, orgname=None): + self._user_geocoder_config = user_geocoder_config + self._redis_connection = redis_connection + self._username = username + self._orgname = orgname - def increment_service_use(self, service_type, date=date.today(), amount=1): - """ Increment the services uses in monthly and daily basis""" - self.__increment_monthly_uses(date, service_type, amount) - self.__increment_daily_uses(date, service_type, amount) + def used_quota(self, service_type, date): + """ Recover the used quota for the user in the current month """ + date_from, date_to = self.__current_billing_cycle() + current_use = 0 + success_responses = self.__get_metrics(service_type, + 'success_responses', date_from, + date_to) + empty_responses = self.__get_metrics(service_type, + 'empty_responses', date_from, + date_to) + current_use += (success_responses + empty_responses) + if service_type == self.SERVICE_GEOCODER_NOKIA: + cache_hits = self.__get_metrics(self.SERVICE_GEOCODER_CACHE, + 'success_responses', date_from, + date_to) + current_use += cache_hits - # Private functions + return current_use - def __increment_monthly_uses(self, date, service_type, amount): - redis_key_data = self.__get_redis_key(service_type, date.year, date.month) - self._redis_connection.hincrby(redis_key_data['redis_name'],redis_key_data['redis_key'],amount) + def increment_service_use(self, service_type, metric, date=date.today(), amount=1): + """ Increment the services uses in monthly and daily basis""" + self.__increment_user_uses(service_type, metric, date, amount) + if self._orgname: + self.__increment_organization_uses(service_type, metric, date, amount) - def __increment_daily_uses(self, date, service_type, amount): - redis_key_data = self.__get_redis_key(service_type, date.year, date.month, date.day) - self._redis_connection.hincrby(redis_key_data['redis_name'],redis_key_data['redis_key'],amount) + # Private functions - def __get_redis_key(self, service_type, year, month, day=None): - redis_name = self.__parse_redis_name(service_type,day) - redis_key = self.__parse_redis_key(year,month,day) + def __increment_user_uses(self, service_type, metric, date, amount): + redis_prefix = self.__parse_redis_prefix("user", self._username, + service_type, metric, date) + self._redis_connection.hincrby(redis_prefix, date.day, amount) - return {'redis_name': redis_name, 'redis_key': redis_key} + def __increment_organization_uses(self, service_type, metric, date, amount): + redis_prefix = self.__parse_redis_prefix("org", self._orgname, + service_type, metric, date) + self._redis_connection.hincrby(redis_prefix, date.day, amount) - def __parse_redis_name(self,service_type, day=None): - prefix = "org" if self.user_config.is_organization else "user" - dated_key = "used_quota_day" if day else "used_quota_month" - redis_name = "{0}:{1}:{2}:{3}".format( - prefix, self.user_config.entity_name, service_type, dated_key - ) - if self.user_config.is_organization and day: - redis_name = "{0}:{1}".format(redis_name, self.user_config.user_id) + def __parse_redis_prefix(self, prefix, entity_name, service_type, metric, date): + yearmonth_key = date.strftime('%Y%m') + redis_name = "{0}:{1}:{2}:{3}:{4}".format(prefix, entity_name, + service_type, metric, + yearmonth_key) - return redis_name + return redis_name - def __parse_redis_key(self,year,month,day=None): - if day: - redis_key = "{0}_{1}_{2}".format(year,month,day) - else: - redis_key = "{0}_{1}".format(year,month) + def __get_metrics(self, service, metric, date_from, date_to): + aggregated_metric = 0 + key_prefix = "org" if self._orgname else "user" + entity_name = self._orgname if self._orgname else self._username + for date in self.__generate_date_range(date_from, date_to): + redis_prefix = self.__parse_redis_prefix(key_prefix, entity_name, + service, metric, date) + score = self._redis_connection.zscore(redis_prefix, date.day) + aggregated_metric += score if score else 0 + return aggregated_metric - return redis_key + def __current_billing_cycle(self): + """ Return the begining and end date for the current billing cycle """ + end_period_day = self._user_geocoder_config.period_end_date.day + today = date.today() + if end_period_day > today.day: + temp_date = today + relativedelta(months=-1) + date_from = date(temp_date.year, temp_date.month, end_period_day) + else: + date_from = date(today.year, today.month, end_period_day) + + return date_from, today + + def __generate_date_range(self, date_from, date_to): + for n in range(int((date_to - date_from).days)): + yield date_from + timedelta(n) diff --git a/server/lib/python/cartodb_geocoder/requirements.txt b/server/lib/python/cartodb_geocoder/requirements.txt index 784f064..3b94e05 100644 --- a/server/lib/python/cartodb_geocoder/requirements.txt +++ b/server/lib/python/cartodb_geocoder/requirements.txt @@ -1,6 +1,7 @@ -redis-py==2.10.5 +redis==2.10.5 +python-dateutil==2.4.2 # Test mock==1.3.0 mockredispy==2.9.0.11 -nose==1.3.7 \ No newline at end of file +nose==1.3.7