Remove DO Python code and SQL tests

This commit is contained in:
Jesús Arroyo Torrens
2021-03-05 10:44:09 +01:00
parent 610398f531
commit edbefad6b0
29 changed files with 11 additions and 2252 deletions

View File

@@ -16,22 +16,11 @@ CREATE TYPE cdb_dataservices_client.simple_route AS (
duration integer
);
-- For the OBS_Meta functions
CREATE TYPE cdb_dataservices_client.obs_meta_numerator AS (numer_id text, numer_name text, numer_description text, numer_weight text, numer_license text, numer_source text, numer_type text, numer_aggregate text, numer_extra jsonb, numer_tags jsonb, valid_denom boolean, valid_geom boolean, valid_timespan boolean);
CREATE TYPE cdb_dataservices_client.obs_meta_denominator AS (denom_id text, denom_name text, denom_description text, denom_weight text, denom_license text, denom_source text, denom_type text, denom_aggregate text, denom_extra jsonb, denom_tags jsonb, valid_numer boolean, valid_geom boolean, valid_timespan boolean);
CREATE TYPE cdb_dataservices_client.obs_meta_geometry AS (geom_id text, geom_name text, geom_description text, geom_weight text, geom_aggregate text, geom_license text, geom_source text, valid_numer boolean, valid_denom boolean, valid_timespan boolean, score numeric, numtiles bigint, notnull_percent numeric, numgeoms numeric, percentfill numeric, estnumgeoms numeric, meanmediansize numeric, geom_type text, geom_extra jsonb, geom_tags jsonb);
CREATE TYPE cdb_dataservices_client.obs_meta_timespan AS (timespan_id text, timespan_name text, timespan_description text, timespan_weight text, timespan_aggregate text, timespan_license text, timespan_source text, valid_numer boolean, valid_denom boolean, valid_geom boolean, timespan_type text, timespan_extra jsonb, timespan_tags jsonb);
-- For quotas and services configuration
CREATE TYPE cdb_dataservices_client.service_type AS ENUM (
'isolines',
'hires_geocoder',
'routing',
'observatory'
'routing'
);
CREATE TYPE cdb_dataservices_client.service_quota_info AS (

View File

@@ -1,279 +0,0 @@
CREATE TYPE cdb_dataservices_client.ds_fdw_metadata as (schemaname text, tabname text, servername text);
CREATE TYPE cdb_dataservices_client.ds_return_metadata as (colnames text[], coltypes text[]);
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_PrepareTableOBS_GetMeasure(
output_table_name text,
params json
) RETURNS boolean AS $$
DECLARE
username text;
user_db_role text;
orgname text;
user_schema text;
result boolean;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT session_user INTO user_db_role;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument';
END IF;
IF orgname IS NULL OR orgname = '' OR orgname = '""' THEN
user_schema := 'public';
ELSE
user_schema := username;
END IF;
SELECT cdb_dataservices_client.__DST_PrepareTableOBS_GetMeasure(
username,
orgname,
user_db_role,
user_schema,
output_table_name,
params
) INTO result;
RETURN result;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE
SET search_path = pg_temp;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_PopulateTableOBS_GetMeasure(
table_name text,
output_table_name text,
params json
) RETURNS boolean AS $$
DECLARE
username text;
user_db_role text;
orgname text;
dbname text;
user_schema text;
result boolean;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT session_user INTO user_db_role;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument';
END IF;
IF orgname IS NULL OR orgname = '' OR orgname = '""' THEN
user_schema := 'public';
ELSE
user_schema := username;
END IF;
SELECT current_database() INTO dbname;
SELECT cdb_dataservices_client.__DST_PopulateTableOBS_GetMeasure(
username,
orgname,
user_db_role,
user_schema,
dbname,
table_name,
output_table_name,
params
) INTO result;
RETURN result;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE
SET search_path = pg_temp;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__DST_PrepareTableOBS_GetMeasure(
username text,
orgname text,
user_db_role text,
user_schema text,
output_table_name text,
params json
) RETURNS boolean AS $$
function_name = 'OBS_GetMeasure'
# Obtain return types for augmentation procedure
ds_return_metadata = plpy.execute("SELECT colnames, coltypes "
"FROM cdb_dataservices_client._DST_GetReturnMetadata({username}::text, {orgname}::text, {function_name}::text, {params}::json);"
.format(
username=plpy.quote_nullable(username),
orgname=plpy.quote_nullable(orgname),
function_name=plpy.quote_literal(function_name),
params=plpy.quote_literal(params)
)
)
if ds_return_metadata[0]["colnames"]:
colnames_arr = ds_return_metadata[0]["colnames"]
coltypes_arr = ds_return_metadata[0]["coltypes"]
else:
raise Exception('Error retrieving OBS_GetMeasure metadata')
# Prepare column and type strings required in the SQL queries
columns_with_types_arr = [colnames_arr[i] + ' ' + coltypes_arr[i] for i in range(0,len(colnames_arr))]
columns_with_types = ','.join(columns_with_types_arr)
# Create a new table with the required columns
plpy.execute('CREATE TABLE "{schema}".{table_name} ( '
'cartodb_id int, the_geom public.geometry, {columns_with_types} '
');'
.format(schema=user_schema, table_name=output_table_name, columns_with_types=columns_with_types)
)
plpy.execute('ALTER TABLE "{schema}".{table_name} OWNER TO "{user}";'
.format(schema=user_schema, table_name=output_table_name, user=user_db_role)
)
return True
$$ LANGUAGE @@plpythonu@@ VOLATILE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.__DST_PopulateTableOBS_GetMeasure(
username text,
orgname text,
user_db_role text,
user_schema text,
dbname text,
table_name text,
output_table_name text,
params json
) RETURNS boolean AS $$
function_name = 'OBS_GetMeasure'
# Obtain return types for augmentation procedure
ds_return_metadata = plpy.execute(
"SELECT colnames, coltypes "
"FROM cdb_dataservices_client._DST_GetReturnMetadata({username}::text, {orgname}::text, {function_name}::text, {params}::json);" .format(
username=plpy.quote_nullable(username),
orgname=plpy.quote_nullable(orgname),
function_name=plpy.quote_literal(function_name),
params=plpy.quote_literal(params)))
if ds_return_metadata[0]["colnames"]:
colnames_arr = ds_return_metadata[0]["colnames"]
coltypes_arr = ds_return_metadata[0]["coltypes"]
else:
raise Exception('Error retrieving OBS_GetMeasure metadata')
# Prepare column and type strings required in the SQL queries
columns_with_types_arr = [
colnames_arr[i] +
' ' +
coltypes_arr[i] for i in range(
0,
len(colnames_arr))]
columns_with_types = ','.join(columns_with_types_arr)
aliased_colname_list = ','.join(
['result.' + name for name in colnames_arr])
# Instruct the OBS server side to establish a FDW
# The metadata is obtained as well in order to:
# - (a) be able to write the query to grab the actual data to be executed in the remote server via pl/proxy,
# - (b) be able to tell OBS to free resources when done.
ds_fdw_metadata = plpy.execute(
"SELECT schemaname, tabname, servername "
"FROM cdb_dataservices_client._DST_ConnectUserTable({username}::text, {orgname}::text, {user_db_role}::text, "
"{schema}::text, {dbname}::text, {table_name}::text);" .format(
username=plpy.quote_nullable(username),
orgname=plpy.quote_nullable(orgname),
user_db_role=plpy.quote_literal(user_db_role),
schema=plpy.quote_literal(user_schema),
dbname=plpy.quote_literal(dbname),
table_name=plpy.quote_literal(table_name)))
if ds_fdw_metadata[0]["schemaname"]:
server_schema = ds_fdw_metadata[0]["schemaname"]
server_table_name = ds_fdw_metadata[0]["tabname"]
server_name = ds_fdw_metadata[0]["servername"]
else:
raise Exception('Error connecting dataset via FDW')
# Create a new table with the required columns
plpy.execute(
'INSERT INTO "{schema}".{analysis_table_name} '
'SELECT ut.cartodb_id, ut.the_geom, {colname_list} '
'FROM "{schema}".{table_name} ut '
'LEFT JOIN cdb_dataservices_client._DST_FetchJoinFdwTableData({username}::text, {orgname}::text, {server_schema}::text, {server_table_name}::text, '
'{function_name}::text, {params}::json) '
'AS result ({columns_with_types}, cartodb_id int) '
'ON result.cartodb_id = ut.cartodb_id;' .format(
schema=user_schema,
analysis_table_name=output_table_name,
colname_list=aliased_colname_list,
table_name=table_name,
username=plpy.quote_nullable(username),
orgname=plpy.quote_nullable(orgname),
server_schema=plpy.quote_literal(server_schema),
server_table_name=plpy.quote_literal(server_table_name),
function_name=plpy.quote_literal(function_name),
params=plpy.quote_literal(params),
columns_with_types=columns_with_types))
# Wipe user FDW data from the server
wiped = plpy.execute(
"SELECT cdb_dataservices_client._DST_DisconnectUserTable({username}::text, {orgname}::text, {server_schema}::text, "
"{server_table_name}::text, {fdw_server}::text)" .format(
username=plpy.quote_nullable(username),
orgname=plpy.quote_nullable(orgname),
server_schema=plpy.quote_literal(server_schema),
server_table_name=plpy.quote_literal(server_table_name),
fdw_server=plpy.quote_literal(server_name)))
return True
$$ LANGUAGE @@plpythonu@@ VOLATILE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_ConnectUserTable(
username text,
orgname text,
user_db_role text,
user_schema text,
dbname text,
table_name text
)RETURNS cdb_dataservices_client.ds_fdw_metadata AS $$
CONNECT cdb_dataservices_client._server_conn_str();
TARGET cdb_dataservices_server._DST_ConnectUserTable;
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_GetReturnMetadata(
username text,
orgname text,
function_name text,
params json
) RETURNS cdb_dataservices_client.ds_return_metadata AS $$
CONNECT cdb_dataservices_client._server_conn_str();
TARGET cdb_dataservices_server._DST_GetReturnMetadata;
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_FetchJoinFdwTableData(
username text,
orgname text,
table_schema text,
table_name text,
function_name text,
params json
) RETURNS SETOF record AS $$
CONNECT cdb_dataservices_client._server_conn_str();
TARGET cdb_dataservices_server._DST_FetchJoinFdwTableData;
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_DisconnectUserTable(
username text,
orgname text,
table_schema text,
table_name text,
server_name text
) RETURNS boolean AS $$
CONNECT cdb_dataservices_client._server_conn_str();
TARGET cdb_dataservices_server._DST_DisconnectUserTable;
$$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE;

View File

@@ -1,54 +0,0 @@
CREATE OR REPLACE FUNCTION cdb_dataservices_client._OBS_PreCheck(
source_query text,
parameters json
) RETURNS boolean AS $$
DECLARE
errors text[];
validator_errors text[];
validator_error text;
valid boolean;
geoms record;
BEGIN
errors := (ARRAY[])::TEXT[];
FOR geoms IN EXECUTE format('SELECT ST_GeometryType(the_geom) as geom_type,
bool_and(st_isvalid(the_geom)) as valid,
avg(st_npoints(the_geom)) as avg_vertex,
ST_SetSRID(ST_Extent(the_geom), 4326) as extent,
count(*)::INT as numgeoms
FROM (%s) as _source
GROUP BY ST_GeometryType(the_geom)', source_query)
LOOP
IF geoms.geom_type NOT IN ('ST_Polygon', 'ST_MultiPolygon', 'ST_Point') THEN
errors := array_append(errors, format($data$'Geometry type %s not supported'$data$, geoms.geom_type));
END IF;
IF geoms.valid IS FALSE THEN
errors := array_append(errors, 'There are invalid geometries in the input data, please try to fix them');
END IF;
-- 1000 vertex for a geometry is a limit we have in the obs_getdata function. You can check here
-- https://github.com/CartoDB/observatory-extension/blob/1.6.0/src/pg/sql/41_observatory_augmentation.sql#L813
IF geoms.avg_vertex > 1000 THEN
errors := array_append(errors, 'The average number of vertices per geometry is greater than 1000, please try to simplify them');
END IF;
-- OBS specific part
EXECUTE 'SELECT valid, errors
FROM cdb_dataservices_client.OBS_MetadataValidation($1, $2, $3, $4)'
INTO valid, validator_errors
USING geoms.extent, geoms.geom_type, parameters, geoms.numgeoms;
IF valid is FALSE THEN
FOR validator_error IN EXECUTE 'SELECT unnest($1)' USING validator_errors
LOOP
errors := array_append(errors, validator_error);
END LOOP;
END IF;
END LOOP;
IF CARDINALITY(errors) > 0 THEN
RAISE EXCEPTION '%', errors;
END IF;
RETURN TRUE;
END;
$$ LANGUAGE 'plpgsql' VOLATILE PARALLEL UNSAFE;

View File

@@ -1,6 +1,3 @@
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PrepareTableOBS_GetMeasure(output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PopulateTableOBS_GetMeasure(table_name text, output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSON) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point(query text, street_column text, city_column text, state_column text, country_column text, batch_size integer) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.cdb_count_estimate(query text) TO publicuser;