Cleans countries

This commit is contained in:
Carla Iriberri
2015-11-30 15:53:17 +01:00
parent 7661b28e52
commit b7562ff6f2

View File

@@ -3,14 +3,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[])
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_namedplace_v1%rowtype;
BEGIN
FOR ret IN WITH best AS (SELECT s AS q, (SELECT the_geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) as s) p),
next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) as s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
SELECT q, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
@@ -24,7 +24,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_admin1_country_v1%rowtype;
has_country BOOLEAN;
has_admin1s BOOLEAN;
@@ -54,12 +54,12 @@ AS $function$
RETURN NEXT ret;
END LOOP;
-- no country, has admin1 value
ELSE
FOR ret IN
ELSE
FOR ret IN
SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (
SELECT (
geocode_namedplace(
places,
places,
(SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r),
NULL
)
@@ -75,12 +75,12 @@ AS $function$
RETURN NEXT ret;
END LOOP;
-- has country, has admin1 value
ELSE
FOR ret IN
ELSE
FOR ret IN
SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (
SELECT (
geocode_namedplace(
places,
places,
(SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r),
inputcountry
)
@@ -99,7 +99,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_admin1_country_v1%rowtype;
nans TEXT[];
isoTwo TEXT := NULL;
@@ -113,7 +113,7 @@ AS $function$
has_country := FALSE;
END IF;
IF has_country THEN
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1;
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
END IF;
-- find all cases where admin1 is NULL
@@ -141,15 +141,15 @@ AS $function$
SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c!='';
IF has_country THEN
-- geocode our named place without admin1 but with our iso2
FOR ret IN
SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g
FOR ret IN
SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g
LOOP
RETURN NEXT ret;
END LOOP;
ELSE
-- geocode our named place without admin1 and without iso2
FOR ret IN
SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g
FOR ret IN
SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g
LOOP
RETURN NEXT ret;
END LOOP;
@@ -158,28 +158,28 @@ AS $function$
-- geocode all the cases where admin1 is available
IF has_country THEN
FOR ret IN WITH
-- return c=iso2 and search without country
FOR ret IN WITH
-- return c=iso2 and search without country
p AS (
SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) AND admin1_decoder.iso2 = isoTwo LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r),
best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p),
next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
ELSE
-- return c=NULL and search without country
FOR ret IN WITH
-- return c=NULL and search without country
FOR ret IN WITH
p AS (
SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r WHERE a1 IS NOT NULL and a1 != ''),
best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p),
next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
@@ -194,14 +194,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], admin1s text
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_admin1_country_v1%rowtype;
BEGIN
IF admin1s IS NULL THEN
FOR ret IN SELECT g.q as q, NULL as a1, g.c as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP
RETURN NEXT ret;
END LOOP;
ELSE
ELSE
FOR ret IN WITH clean AS (SELECT array_agg(p) p, array_agg(a) a, c FROM (SELECT p, a, c FROM (SELECT unnest(places) p, unnest(admin1s) a, unnest(inputcountry) c) z GROUP BY p, a, c) y GROUP BY c)
SELECT (geocode_namedplace(p, a, c)).* FROM clean LOOP
RETURN NEXT ret;
@@ -217,7 +217,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], country text
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_namedplace_country_v1%rowtype;
nans TEXT[];
BEGIN
@@ -239,13 +239,13 @@ AS $function$
END LOOP;
END IF;
FOR ret IN WITH
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
FOR ret IN WITH
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p),
next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL))
SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
@@ -259,7 +259,7 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace(places text[], inputcountry
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_admin_country_v1%rowtype;
isoTwo TEXT := NULL;
has_country BOOLEAN;
@@ -273,22 +273,22 @@ AS $function$
END IF;
IF has_country THEN
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1;
FOR ret IN WITH
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
FOR ret IN WITH
best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p),
next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
-- no country included, or iso interpretation found
ELSE
FOR ret IN
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
ELSE
FOR ret IN
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
LOOP
RETURN NEXT ret;
RETURN NEXT ret;
END LOOP;
END IF;
RETURN;
@@ -301,14 +301,14 @@ CREATE OR REPLACE FUNCTION public.geocode_namedplace_country(places text[], coun
LANGUAGE plpgsql
IMMUTABLE SECURITY DEFINER
AS $function$
DECLARE
DECLARE
ret geocode_namedplace_country_v1%rowtype;
iso TEXT[];
i INT;
i INT;
fails INT[];
BEGIN
SELECT array_agg((SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms))) i FROM (SELECT unnest(country)::text AS c) r INTO iso;
SELECT array_agg((SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms))) i FROM (SELECT unnest(country)::text AS c) r INTO iso;
FOR i IN 1 .. array_upper(places, 1) LOOP