Encapsulate cleaning function and restructure
This commit is contained in:
8
geocoder/extension/sql/0.0.1/10_aux_functions.sql
Normal file
8
geocoder/extension/sql/0.0.1/10_aux_functions.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
-- Cleaning function
|
||||
CREATE OR REPLACE FUNCTION geocode_clean_name(name text) RETURNS text
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN regexp_replace(name, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g');
|
||||
END
|
||||
$$;
|
||||
@@ -9,7 +9,7 @@ CREATE OR REPLACE FUNCTION geocode_admin0_polygons(name text[])
|
||||
-- FOR ret IN
|
||||
RETURN QUERY
|
||||
SELECT d.q, n.the_geom as geom, CASE WHEN s.adm0_a3 IS NULL then FALSE ELSE TRUE END AS success
|
||||
FROM (SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x
|
||||
FROM (SELECT q, lower(geocode_clean_name(q))::text x
|
||||
FROM (SELECT unnest(name) q) g) d
|
||||
LEFT OUTER JOIN admin0_synonyms s ON name_ = d.x
|
||||
LEFT OUTER JOIN ne_admin0_v3 n ON s.adm0_a3 = n.adm0_a3 GROUP BY d.q, n.the_geom, s.adm0_a3;
|
||||
@@ -24,7 +24,7 @@ CREATE OR REPLACE FUNCTION admin0_synonym_lookup(name text[])
|
||||
ret synonym_lookup_v1%rowtype;
|
||||
BEGIN RETURN QUERY
|
||||
SELECT d.q, s.adm0_a3
|
||||
FROM (SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x
|
||||
FROM (SELECT q, lower(geocode_clean_name(q))::text x
|
||||
FROM (SELECT unnest(name) q) g) d
|
||||
LEFT OUTER JOIN admin0_synonyms s ON name_ = d.x GROUP BY d.q, s.adm0_a3;
|
||||
END
|
||||
@@ -74,7 +74,7 @@ CREATE INDEX idx_admin0_synonyms_rank ON admin0_synonyms USING btree (rank);
|
||||
-- create trigger function. used in both admin0 and admin1 synonym tables
|
||||
CREATE OR REPLACE FUNCTION alpha_numeric_identifiers() RETURNS trigger AS $alpha_numeric_identifiers$
|
||||
BEGIN
|
||||
NEW.name_ := lower(regexp_replace(NEW.name, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'));
|
||||
NEW.name_ := lower(geocode_clean_name(NEW.name));
|
||||
RETURN NEW;
|
||||
END;
|
||||
$alpha_numeric_identifiers$ LANGUAGE plpgsql;
|
||||
@@ -36,7 +36,7 @@ CREATE OR REPLACE FUNCTION geocode_admin1_polygons(name text[], inputcountry tex
|
||||
BEGIN
|
||||
|
||||
FOR ret IN WITH
|
||||
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
|
||||
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
|
||||
SELECT
|
||||
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
@@ -80,7 +80,7 @@ CREATE OR REPLACE FUNCTION geocode_admin1_polygons(names text[], country text[])
|
||||
|
||||
|
||||
FOR ret IN WITH
|
||||
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
|
||||
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(geocode_clean_name(r.c))::text = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
|
||||
SELECT
|
||||
q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
@@ -28,7 +28,7 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], country text[]) RET
|
||||
END IF;
|
||||
|
||||
FOR ret IN WITH
|
||||
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(regexp_replace(r.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
|
||||
p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(geocode_clean_name(r.c))::text = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r),
|
||||
best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p),
|
||||
next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL))
|
||||
SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
|
||||
@@ -59,20 +59,20 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], inputcountry text)
|
||||
END IF;
|
||||
|
||||
IF has_country THEN
|
||||
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
|
||||
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1;
|
||||
FOR ret IN WITH
|
||||
best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p),
|
||||
next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL))
|
||||
SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
|
||||
SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
-- no country included, or iso interpretation found
|
||||
ELSE
|
||||
FOR ret IN
|
||||
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
|
||||
SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
@@ -172,7 +172,7 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], admin1s text[], inp
|
||||
has_country := FALSE;
|
||||
END IF;
|
||||
IF has_country THEN
|
||||
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1;
|
||||
SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1;
|
||||
END IF;
|
||||
|
||||
-- find all cases where admin1 is NULL
|
||||
@@ -10,7 +10,7 @@ CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountries text[])
|
||||
adm text[];
|
||||
BEGIN
|
||||
|
||||
SELECT INTO adm array_agg((SELECT adm0_a3 FROM admin0_synonyms WHERE name_ = lower(regexp_replace(b.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text LIMIT 1)) FROM (SELECT UNNEST(inputcountries) c) b;
|
||||
SELECT INTO adm array_agg((SELECT adm0_a3 FROM admin0_synonyms WHERE name_ = lower(geocode_clean_name(b.c))::text LIMIT 1)) FROM (SELECT UNNEST(inputcountries) c) b;
|
||||
|
||||
FOR ret IN
|
||||
SELECT
|
||||
@@ -52,7 +52,7 @@ CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountry text) RETU
|
||||
WHERE postal_code = upper(d.q)
|
||||
AND iso3 = (
|
||||
SELECT iso3 FROM country_decoder WHERE
|
||||
lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
|
||||
lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1
|
||||
)
|
||||
) geom
|
||||
FROM (SELECT unnest(code) q) d
|
||||
@@ -112,7 +112,7 @@ CREATE FUNCTION geocode_postalcode_points(code text[], inputcountry text) RETURN
|
||||
WHERE postal_code = upper(d.q)
|
||||
AND iso3 = (
|
||||
SELECT iso3 FROM country_decoder WHERE
|
||||
lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
|
||||
lower(geocode_clean_name(inputcountry))::text = ANY (synonyms) LIMIT 1
|
||||
)
|
||||
LIMIT 1
|
||||
) geom
|
||||
@@ -144,7 +144,7 @@ CREATE FUNCTION geocode_postalcode_points(code integer[], inputcountries text[])
|
||||
WHERE postal_code_num = d.q
|
||||
AND iso3 = (
|
||||
SELECT iso3 FROM country_decoder WHERE
|
||||
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
|
||||
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1
|
||||
)
|
||||
LIMIT 1
|
||||
) geom
|
||||
@@ -201,13 +201,13 @@ CREATE FUNCTION geocode_postalcode_points(code text[], inputcountries text[]) RE
|
||||
FROM (
|
||||
SELECT
|
||||
q, c, (SELECT iso3 FROM country_decoder WHERE
|
||||
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1) iso3, (
|
||||
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1) iso3, (
|
||||
SELECT the_geom
|
||||
FROM global_postal_code_points
|
||||
WHERE postal_code = upper(d.q)
|
||||
AND iso3 = (
|
||||
SELECT iso3 FROM country_decoder WHERE
|
||||
lower(regexp_replace(d.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = ANY (synonyms) LIMIT 1
|
||||
lower(geocode_clean_name(d.c))::text = ANY (synonyms) LIMIT 1
|
||||
)
|
||||
LIMIT 1
|
||||
) geom
|
||||
@@ -257,7 +257,7 @@ CREATE FUNCTION admin0_available_services(name text[]) RETURNS SETOF available_s
|
||||
BEGIN RETURN QUERY
|
||||
SELECT d.q, n.adm0_a3, n.postal_code_points, n.postal_code_polygons FROM
|
||||
(
|
||||
SELECT q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x FROM
|
||||
SELECT q, lower(geocode_clean_name(q))::text x FROM
|
||||
(
|
||||
SELECT unnest(name) q
|
||||
)
|
||||
Reference in New Issue
Block a user