From b7562ff6f220dd9b8e5e7ca2f6e8a35c7f8b1d83 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Mon, 30 Nov 2015 15:53:17 +0100 Subject: [PATCH] Cleans countries --- .../namedplace/sql/geocode_namedplace.sql | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/geocoder/namedplace/sql/geocode_namedplace.sql b/geocoder/namedplace/sql/geocode_namedplace.sql index 80ba76d..e2f81eb 100644 --- a/geocoder/namedplace/sql/geocode_namedplace.sql +++ b/geocoder/namedplace/sql/geocode_namedplace.sql @@ -3,14 +3,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[]) LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_namedplace_v1%rowtype; BEGIN FOR ret IN WITH best AS (SELECT s AS q, (SELECT the_geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) as s) p), next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) as s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) SELECT q, geom, TRUE AS success FROM best WHERE geom IS NOT NULL UNION ALL - SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next LOOP RETURN NEXT ret; END LOOP; @@ -24,7 +24,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_admin1_country_v1%rowtype; has_country BOOLEAN; has_admin1s BOOLEAN; @@ -54,12 +54,12 @@ AS $function$ RETURN NEXT ret; END LOOP; -- no country, has admin1 value - ELSE - FOR ret IN + ELSE + FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( SELECT ( geocode_namedplace( - places, + places, (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), NULL ) @@ -75,12 +75,12 @@ AS $function$ RETURN NEXT ret; END LOOP; -- has country, has admin1 value - ELSE - FOR ret IN + ELSE + FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( SELECT ( geocode_namedplace( - places, + places, (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), inputcountry ) @@ -99,7 +99,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_admin1_country_v1%rowtype; nans TEXT[]; isoTwo TEXT := NULL; @@ -113,7 +113,7 @@ AS $function$ has_country := FALSE; END IF; IF has_country THEN - SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; + SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1; END IF; -- find all cases where admin1 is NULL @@ -141,15 +141,15 @@ AS $function$ SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c!=''; IF has_country THEN -- geocode our named place without admin1 but with our iso2 - FOR ret IN - SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g + FOR ret IN + SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g LOOP RETURN NEXT ret; END LOOP; ELSE -- geocode our named place without admin1 and without iso2 - FOR ret IN - SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g + FOR ret IN + SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP RETURN NEXT ret; END LOOP; @@ -158,28 +158,28 @@ AS $function$ -- geocode all the cases where admin1 is available IF has_country THEN - FOR ret IN WITH - -- return c=iso2 and search without country + FOR ret IN WITH + -- return c=iso2 and search without country p AS ( SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) AND admin1_decoder.iso2 = isoTwo LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r), best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL UNION ALL - SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next LOOP RETURN NEXT ret; END LOOP; ELSE - -- return c=NULL and search without country - FOR ret IN WITH + -- return c=NULL and search without country + FOR ret IN WITH p AS ( SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r WHERE a1 IS NOT NULL and a1 != ''), best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL UNION ALL - SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next LOOP RETURN NEXT ret; END LOOP; @@ -194,14 +194,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_admin1_country_v1%rowtype; BEGIN IF admin1s IS NULL THEN FOR ret IN SELECT g.q as q, NULL as a1, g.c as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP RETURN NEXT ret; END LOOP; - ELSE + ELSE FOR ret IN WITH clean AS (SELECT array_agg(p) p, array_agg(a) a, c FROM (SELECT p, a, c FROM (SELECT unnest(places) p, unnest(admin1s) a, unnest(inputcountry) c) z GROUP BY p, a, c) y GROUP BY c) SELECT (geocode_namedplace(p, a, c)).* FROM clean LOOP RETURN NEXT ret; @@ -217,7 +217,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], country text LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_namedplace_country_v1%rowtype; nans TEXT[]; BEGIN @@ -239,13 +239,13 @@ AS $function$ END LOOP; END IF; - FOR ret IN WITH - p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r), + FOR ret IN WITH + p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r), best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL)) SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL UNION ALL - SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next LOOP RETURN NEXT ret; END LOOP; @@ -259,7 +259,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], inputcountry LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ - DECLARE + DECLARE ret geocode_admin_country_v1%rowtype; isoTwo TEXT := NULL; has_country BOOLEAN; @@ -273,22 +273,22 @@ AS $function$ END IF; IF has_country THEN - SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; - FOR ret IN WITH + SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1; + FOR ret IN WITH best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p), next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL UNION ALL - SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next LOOP RETURN NEXT ret; END LOOP; -- no country included, or iso interpretation found - ELSE - FOR ret IN - SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g + ELSE + FOR ret IN + SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g LOOP - RETURN NEXT ret; + RETURN NEXT ret; END LOOP; END IF; RETURN; @@ -301,14 +301,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace_country(places text[], coun LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER AS $function$ -DECLARE +DECLARE ret geocode_namedplace_country_v1%rowtype; iso TEXT[]; - i INT; + i INT; fails INT[]; BEGIN - SELECT array_agg((SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms))) i FROM (SELECT unnest(country)::text AS c) r INTO iso; + SELECT array_agg((SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms))) i FROM (SELECT unnest(country)::text AS c) r INTO iso; FOR i IN 1 .. array_upper(places, 1) LOOP