From 24d53e5f6fa54d06c275cfe0943f6c0821d1cb55 Mon Sep 17 00:00:00 2001 From: andrewxhill Date: Wed, 27 Aug 2014 12:46:30 -0400 Subject: [PATCH] created test_ version of new admin1 geocoder function --- geocoder/admin1/sql/geocoder.sql | 118 +++++++++++++++++-------------- geocoder/setup/indexes.sql | 6 +- 2 files changed, 68 insertions(+), 56 deletions(-) diff --git a/geocoder/admin1/sql/geocoder.sql b/geocoder/admin1/sql/geocoder.sql index 8d0d012..cc19133 100644 --- a/geocoder/admin1/sql/geocoder.sql +++ b/geocoder/admin1/sql/geocoder.sql @@ -2,37 +2,73 @@ --- SELECT (geocode_admin1_polygons(Array['az', 'Texas'], 'Ecuador')).* + --- Function CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(name text[], inputcountry text) - RETURNS SETOF geocode_admin_v1 AS $$ + RETURNS SETOF geocode_admin_country_v1 AS $$ DECLARE - ret geocode_admin_v1%rowtype; + ret geocode_admin_country_v1%rowtype; + adm0 TEXT; + adm0_check BOOLEAN := TRUE; BEGIN - FOR ret IN WITH - p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r) - SELECT - q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success - FROM ( - SELECT - q, ( - SELECT the_geom - FROM global_province_polygons - WHERE p.c = ANY (synonyms) - AND iso3 = p.i - -- To calculate frequency, I simply counted the number of users - -- we had signed up in each country. Countries with more users, - -- we favor higher in the geocoder :) - ORDER BY frequency DESC LIMIT 1 - ) geom - FROM p) n - LOOP - RETURN NEXT ret; - END LOOP; + IF inputcountry IS NULL THEN + adm0_check = FALSE; + END IF; + IF trim(inputcountry)='' THEN + adm0_check = FALSE; + END IF; + + IF adm0_check IS TRUE THEN + SELECT INTO adm0 adm0_a3 FROM admin0_synonyms WHERE name_ = lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text LIMIT 1; + + FOR ret IN + SELECT + q, inputcountry, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom FROM qs_adm1 WHERE global_id = ( + SELECT global_id + FROM admin1_synonyms + WHERE name_ = lower(regexp_replace(d.q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text + AND adm0_a3 = adm0 + LIMIT 1 + ) + ) geom + FROM (SELECT unnest(name) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + + --Handle cases where country couldn't be found + ELSE + FOR ret IN + SELECT + q, inputcountry, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom FROM qs_adm1 WHERE global_id = ( + SELECT global_id + FROM admin1_synonyms + WHERE name_ = lower(regexp_replace(d.q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text + LIMIT 1 + ) + ) geom + FROM (SELECT unnest(name) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + END IF; RETURN; END -$$ LANGUAGE 'plpgsql' SECURITY DEFINER; -Text array, country array +$$ LANGUAGE 'plpgsql'; + + +--Text array, country array --- Usage @@ -44,40 +80,12 @@ CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(names text[], country te RETURNS SETOF geocode_admin_country_v1 AS $$ DECLARE ret geocode_admin_country_v1%rowtype; - nans TEXT[]; BEGIN - - SELECT array_agg(p) INTO nans FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NULL; - - IF 0 < array_length(nans, 1) THEN - SELECT array_agg(p), array_agg(c) INTO names, country FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NOT NULL; - FOR ret IN SELECT g.q, NULL as c, g.geom, g.success FROM (SELECT (geocode_admin1_polygons(nans)).*) g LOOP - RETURN NEXT ret; - END LOOP; - END IF; - - - FOR ret IN WITH - p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r) - SELECT - q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success - FROM ( - SELECT - q, c, ( - SELECT the_geom - FROM global_province_polygons - WHERE p.p = ANY (synonyms) - AND iso3 = p.i - -- To calculate frequency, I simply counted the number of users - -- we had signed up in each country. Countries with more users, - -- we favor higher in the geocoder :) - ORDER BY frequency DESC LIMIT 1 - ) geom - FROM p) n - LOOP + FOR ret IN SELECT (test_geocode_admin1_polygons(array_agg(n), c)).* FROM (SELECT unnest(names) n, unnest(country) c) a GROUP BY c LOOP RETURN NEXT ret; END LOOP; RETURN; END -$$ LANGUAGE 'plpgsql' SECURITY DEFINER; \ No newline at end of file +$$ LANGUAGE 'plpgsql'; + diff --git a/geocoder/setup/indexes.sql b/geocoder/setup/indexes.sql index 4def421..9b2bbb0 100644 --- a/geocoder/setup/indexes.sql +++ b/geocoder/setup/indexes.sql @@ -4,8 +4,12 @@ CREATE INDEX idx_admin0_synonyms_name_ ON admin0_synonyms (name_); CREATE INDEX idx_admin0_synonyms_rank ON admin0_synonyms (rank); -- CREATE INDEX idx_admin0_synonyms_name_rank ON admin0_synonyms (name_, rank); +-- Index on admin1 id +CREATE UNIQUE INDEX idx_qs_adm1_global_id ON qs_adm1 (global_id) +CREATE INDEX idx_admin1_synonyms_name_adm0 ON admin1_synonyms (name_, adm0_a3) + -- create indexes on polygon table CREATE UNIQUE INDEX idx_ne_admin0_v3_adm0_a3 ON ne_admin0_v3 (adm0_a3); -- create indexes on postal code polygon table -CREATE UNIQUE INDEX idx_postal_code_polygons_a3_code ON postal_code_polygons (adm0_a3, postal_code) \ No newline at end of file +CREATE UNIQUE INDEX idx_postal_code_polygons_a3_code ON postal_code_polygons (adm0_a3, postal_code)