diff --git a/geocoder/namedplace/geocode_namedplace.sql b/geocoder/namedplace/geocode_namedplace.sql deleted file mode 100644 index 9dada25..0000000 --- a/geocoder/namedplace/geocode_namedplace.sql +++ /dev/null @@ -1,296 +0,0 @@ -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[]) - RETURNS SETOF geocode_namedplace_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_namedplace_v1%rowtype; - BEGIN - FOR ret IN WITH best AS (SELECT s AS q, (SELECT the_geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) as s) p), - next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) as s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) - SELECT q, geom, TRUE AS success FROM best WHERE geom IS NOT NULL - UNION ALL - SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next - LOOP - RETURN NEXT ret; - END LOOP; - RETURN; -END -$function$ - - -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text, inputcountry text) - RETURNS SETOF geocode_admin1_country_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_admin1_country_v1%rowtype; - has_country BOOLEAN; - has_admin1s BOOLEAN; - admin1s_a TEXT[]; - BEGIN - - has_country := TRUE; - has_admin1s := TRUE; - - IF inputcountry IS NULL THEN - has_country := FALSE; - ELSIF inputcountry = '' THEN - has_country := FALSE; - END IF; - - IF admin1s IS NULL THEN - has_admin1s := FALSE; - ELSIF admin1s = '' THEN - has_admin1s := FALSE; - END IF; - - -- no country value - IF has_country IS FALSE THEN - -- no country no admin1 value - IF has_admin1s IS FALSE THEN - FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(places)).*) g LOOP - RETURN NEXT ret; - END LOOP; - -- no country, has admin1 value - ELSE - FOR ret IN - SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( - SELECT ( - geocode_namedplace( - places, - (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), - NULL - ) - ).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - -- has country value - ELSE - -- has country, no admin1 value - IF has_admin1s IS FALSE THEN - FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP - RETURN NEXT ret; - END LOOP; - -- has country, has admin1 value - ELSE - FOR ret IN - SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( - SELECT ( - geocode_namedplace( - places, - (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), - inputcountry - ) - ).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - END IF; - RETURN; -END -$function$ - - -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text[], inputcountry text) - RETURNS SETOF geocode_admin1_country_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_admin1_country_v1%rowtype; - nans TEXT[]; - isoTwo TEXT := NULL; - has_country BOOLEAN; - BEGIN - has_country := TRUE; - -- find the iso2 code for the input country string, else NULL - IF inputcountry IS NULL THEN - has_country := FALSE; - ELSIF inputcountry = '' THEN - has_country := FALSE; - END IF; - IF has_country THEN - SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; - END IF; - - -- find all cases where admin1 is NULL - SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c IS NULL; - - IF 0 < array_length(nans, 1) THEN - SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c IS NOT NULL; - IF has_country THEN - -- geocode our named place without admin1 but with our iso2 - FOR ret IN SELECT g.q, null AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g LOOP - RETURN NEXT ret; - END LOOP; - ELSE - -- geocode our named place without admin1 and without iso2 - FOR ret IN SELECT g.q, NULL as a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - END IF; - - -- find all cases where admin1 is and empty string - SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c=''; - - IF 0 < array_length(nans, 1) THEN - SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c!=''; - IF has_country THEN - -- geocode our named place without admin1 but with our iso2 - FOR ret IN - SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g - LOOP - RETURN NEXT ret; - END LOOP; - ELSE - -- geocode our named place without admin1 and without iso2 - FOR ret IN - SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g - LOOP - RETURN NEXT ret; - END LOOP; - END IF; - END IF; - - -- geocode all the cases where admin1 is available - IF has_country THEN - FOR ret IN WITH - -- return c=iso2 and search without country - p AS ( - SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) AND admin1_decoder.iso2 = isoTwo LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r), - best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), - next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) - SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL - UNION ALL - SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next - LOOP - RETURN NEXT ret; - END LOOP; - ELSE - -- return c=NULL and search without country - FOR ret IN WITH - p AS ( - SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r WHERE a1 IS NOT NULL and a1 != ''), - best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), - next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) - SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL - UNION ALL - SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next - LOOP - RETURN NEXT ret; - END LOOP; - END IF; - RETURN; -END -$function$ - - -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text[], inputcountry text[]) - RETURNS SETOF geocode_admin1_country_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_admin1_country_v1%rowtype; - BEGIN - IF admin1s IS NULL THEN - FOR ret IN SELECT g.q as q, NULL as a1, g.c as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP - RETURN NEXT ret; - END LOOP; - ELSE - FOR ret IN WITH clean AS (SELECT array_agg(p) p, array_agg(a) a, c FROM (SELECT p, a, c FROM (SELECT unnest(places) p, unnest(admin1s) a, unnest(inputcountry) c) z GROUP BY p, a, c) y GROUP BY c) - SELECT (geocode_namedplace(p, a, c)).* FROM clean LOOP - RETURN NEXT ret; - END LOOP; - END IF; - RETURN; -END -$function$ - - -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], country text[]) - RETURNS SETOF geocode_namedplace_country_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_namedplace_country_v1%rowtype; - nans TEXT[]; - BEGIN - - SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c IS NULL; - - IF 0 < array_length(nans, 1) THEN - SELECT array_agg(p), array_agg(c) INTO places, country FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c IS NOT NULL; - FOR ret IN SELECT g.q, NULL as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - - SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c=''; - IF 0 < array_length(nans, 1) THEN - SELECT array_agg(p), array_agg(c) INTO places, country FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c!=''; - FOR ret IN SELECT g.q, '' as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - - FOR ret IN WITH - p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r), - best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), - next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL)) - SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL - UNION ALL - SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next - LOOP - RETURN NEXT ret; - END LOOP; - RETURN; -END -$function$ - - -CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], inputcountry text) - RETURNS SETOF geocode_admin_country_v1 - LANGUAGE plpgsql - IMMUTABLE SECURITY DEFINER -AS $function$ - DECLARE - ret geocode_admin_country_v1%rowtype; - isoTwo TEXT := NULL; - has_country BOOLEAN; - BEGIN - has_country := TRUE; - -- find the iso2 code for the input country string, else NULL - IF inputcountry IS NULL THEN - has_country := FALSE; - ELSIF inputcountry = '' THEN - has_country := FALSE; - END IF; - - IF has_country THEN - SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; - FOR ret IN WITH - best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p), - next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) - SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL - UNION ALL - SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next - LOOP - RETURN NEXT ret; - END LOOP; - -- no country included, or iso interpretation found - ELSE - FOR ret IN - SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g - LOOP - RETURN NEXT ret; - END LOOP; - END IF; - RETURN; -END -$function$ diff --git a/geocoder/namedplace/namedplace_country_guessing.sql b/geocoder/namedplace/namedplace_country_guessing.sql deleted file mode 100644 index b0e787f..0000000 --- a/geocoder/namedplace/namedplace_country_guessing.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Return a guess about the country where the places are located, if possible. --- E.g: SELECT namedplace_guess_country(Array['granada', 'jaen', 'cordoba', 'madrid', 'valladolid']); => NULL --- E.g: SELECT namedplace_guess_country(Array['granada', 'jaén', 'córdoba', 'madrid', 'valladolid', 'peligros'])); => 'ES' -CREATE OR REPLACE FUNCTION namedplace_guess_country(places text[]) -RETURNS text AS $$ -DECLARE - country_code text; - threshold CONSTANT float := 0.8; - input_length integer := array_length(places, 1); -BEGIN - BEGIN - WITH hist AS ( - SELECT count(DISTINCT(lower(p.s), gp.iso2)) AS c, iso2 - FROM global_cities_points_limited gp - inner join (SELECT unnest(places) AS s) p - ON (gp.lowername = lower(s)) - GROUP BY iso2 - ), - best_two AS ( - SELECT iso2, c - FROM hist - WHERE c > input_length * threshold - ORDER BY c DESC - LIMIT 2 - ) - SELECT iso2 INTO STRICT country_code - FROM (SELECT iso2, c, max(c) over() AS maxcount FROM best_two) bt - WHERE bt.c = bt.maxcount; - EXCEPTION - WHEN NO_DATA_FOUND OR too_many_rows THEN - RETURN NULL; - END; - RETURN country_code; -END; -$$ LANGUAGE 'plpgsql' SECURITY DEFINER IMMUTABLE;