Encapsulate cleaning function and restructure

This commit is contained in:
Carla Iriberri
2015-12-01 14:11:44 +01:00
parent 49eb8213eb
commit 3e8f635404
24 changed files with 25 additions and 768 deletions

View File

@@ -0,0 +1,8 @@
-- Cleaning function
CREATE OR REPLACE FUNCTION geocode_clean_name(name text) RETURNS text
LANGUAGE plpgsql
AS $$
BEGIN
RETURN regexp_replace(name, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g');
END
$$;

View File

@@ -9,7 +9,7 @@ CREATE OR REPLACE FUNCTION geocode_admin0_polygons(name text[])
-- FOR ret IN
RETURN QUERY
SELECT d.q, n.the_geom as geom, CASE WHEN s.adm0_a3 IS NULL then FALSE ELSE TRUE END AS success
FROM (SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x
FROM (SELECT q, lower(geocode_clean_name(q))::text x
FROM (SELECT unnest(name) q) g) d
LEFT OUTER JOIN admin0_synonyms s ON name_ = d.x
LEFT OUTER JOIN ne_admin0_v3 n ON s.adm0_a3 = n.adm0_a3 GROUP BY d.q, n.the_geom, s.adm0_a3;
@@ -24,7 +24,7 @@ CREATE OR REPLACE FUNCTION admin0_synonym_lookup(name text[])
ret synonym_lookup_v1%rowtype;
BEGIN RETURN QUERY
SELECT d.q, s.adm0_a3
FROM (SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x
FROM (SELECT q, lower(geocode_clean_name(q))::text x
FROM (SELECT unnest(name) q) g) d
LEFT OUTER JOIN admin0_synonyms s ON name_ = d.x GROUP BY d.q, s.adm0_a3;
END
@@ -74,7 +74,7 @@ CREATE INDEX idx_admin0_synonyms_rank ON admin0_synonyms USING btree (rank);
-- create trigger function. used in both admin0 and admin1 synonym tables
CREATE OR REPLACE FUNCTION alpha_numeric_identifiers() RETURNS trigger AS $alpha_numeric_identifiers$
BEGIN
NEW.name_ := lower(regexp_replace(NEW.name, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'));
NEW.name_ := lower(geocode_clean_name(NEW.name));
RETURN NEW;
END;
$alpha_numeric_identifiers$ LANGUAGE plpgsql;

View File

@@ -36,7 +36,7 @@ CREATE OR REPLACE FUNCTION geocode_admin1_polygons(name text[], inputcountry tex
BEGIN
FOR ret IN WITH
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
SELECT
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
FROM (
@@ -80,7 +80,7 @@ CREATE OR REPLACE FUNCTION geocode_admin1_polygons(names text[], country text[])
FOR ret IN WITH
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(geocode_clean_name(r.c))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
SELECT
q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
FROM (

View File

@@ -28,7 +28,7 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], country text[]) RET
END IF;
FOR ret IN WITH
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(geocode_clean_name(r.c))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p),
next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL))
SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
@@ -59,20 +59,20 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], inputcountry text)
END IF;
IF has_country THEN
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1;
FOR ret IN WITH
best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p),
next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
UNION ALL
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
LOOP
RETURN NEXT ret;
END LOOP;
-- no country included, or iso interpretation found
ELSE
FOR ret IN
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
LOOP
RETURN NEXT ret;
END LOOP;
@@ -172,7 +172,7 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], admin1s text[], inp
has_country := FALSE;
END IF;
IF has_country THEN
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1;
END IF;
-- find all cases where admin1 is NULL

View File

@@ -10,7 +10,7 @@ CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountries text[])
adm text[];
BEGIN
SELECT INTO adm array_agg((SELECT adm0_a3 FROM admin0_synonyms WHERE name_ = lower(regexp_replace(b.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text LIMIT 1)) FROM (SELECT UNNEST(inputcountries) c) b;
SELECT INTO adm array_agg((SELECT adm0_a3 FROM admin0_synonyms WHERE name_ = lower(geocode_clean_name(b.c))::text LIMIT 1)) FROM (SELECT UNNEST(inputcountries) c) b;
FOR ret IN
SELECT
@@ -52,7 +52,7 @@ CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountry text) RETU
WHERE postal_code = upper(d.q)
AND iso3 = (
SELECT iso3 FROM country_decoder WHERE
lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1
)
) geom
FROM (SELECT unnest(code) q) d
@@ -112,7 +112,7 @@ CREATE FUNCTION geocode_postalcode_points(code text[], inputcountry text) RETURN
WHERE postal_code = upper(d.q)
AND iso3 = (
SELECT iso3 FROM country_decoder WHERE
lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1
)
LIMIT 1
) geom
@@ -144,7 +144,7 @@ CREATE FUNCTION geocode_postalcode_points(code integer[], inputcountries text[])
WHERE postal_code_num = d.q
AND iso3 = (
SELECT iso3 FROM country_decoder WHERE
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1
)
LIMIT 1
) geom
@@ -201,13 +201,13 @@ CREATE FUNCTION geocode_postalcode_points(code text[], inputcountries text[]) RE
FROM (
SELECT
q, c, (SELECT iso3 FROM country_decoder WHERE
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1) iso3, (
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1) iso3, (
SELECT the_geom
FROM global_postal_code_points
WHERE postal_code = upper(d.q)
AND iso3 = (
SELECT iso3 FROM country_decoder WHERE
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1
)
LIMIT 1
) geom
@@ -257,7 +257,7 @@ CREATE FUNCTION admin0_available_services(name text[]) RETURNS SETOF available_s
BEGIN RETURN QUERY
SELECT d.q, n.adm0_a3, n.postal_code_points, n.postal_code_polygons FROM
(
SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x FROM
SELECT q, lower(geocode_clean_name(q))::text x FROM
(
SELECT unnest(name) q
)