Adds admin1 fils

This commit is contained in:
Carla Iriberri
2015-10-26 18:38:06 +01:00
parent 7eb57495ec
commit bfd2ed3f49
7 changed files with 392 additions and 0 deletions

3
geocoder/admin1/extension/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
results/
regression.diffs
regression.out

View File

@@ -0,0 +1,8 @@
EXTENSION = cdb_geocoder_admin1
DATA = cdb_geocoder_admin1--0.0.1.sql
REGRESS = cdb_geocoder_admin1_test
# postgres build stuff
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

View File

@@ -0,0 +1,36 @@
# CartoDB admin1 geocoder extension
Postgres extension for the CartoDB admin1 geocoder. It is meant to contain the functions and related objects needed to geocode by admin1 regions. It is not meant to contain the actual data used to geocode them.
## Dependencies
This extension is thought to be used on top of CartoDB platform. Therefore a cartodb user is required to install the extension onto it.
The following is a non-comprehensive list of dependencies:
- Postgres 9.3+
- Postgis extension
- Schema triggers extension
- CartoDB extension
## Installation into the db cluster
This requires root privileges
```
sudo make all install
```
## Execute tests
```
PGUSER=postgres make installcheck
```
## Install onto a user's database
```
psql -U development_cartodb_user_fe3b850a-01c0-48f9-8a26-a82f09e9b53f cartodb_dev_user_fe3b850a-01c0-48f9-8a26-a82f09e9b53f_db
```
and then:
```sql
CREATE EXTENSION cdb_geocoder_admin1;
```
The extension creation in the user's db does not require special privileges. It can be even created from the sql api.

View File

@@ -0,0 +1,265 @@
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION cdb_geocoder_admin1" to load this file. \quit
-- Response types for admin1 geocoder
-- TODO: check if the types exist already in the db
CREATE TYPE geocode_admin_v1 AS (q TEXT, geom GEOMETRY, success BOOLEAN);
CREATE TYPE geocode_admin_country_v1 AS (q TEXT, c TEXT, geom GEOMETRY, success BOOLEAN);
-- Public API functions --
--- Geocoding function ---
-- TODO: deal with permissions
CREATE OR REPLACE FUNCTION geocode_admin1_polygons(name text[]) RETURNS SETOF geocode_admin_v1
LANGUAGE plpgsql SECURITY DEFINER
AS $$
DECLARE
ret geocode_admin_v1%rowtype;
BEGIN
FOR ret IN
SELECT
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
FROM (
SELECT
q, (
SELECT the_geom
FROM global_province_polygons
WHERE d.c = ANY (synonyms)
ORDER BY frequency DESC LIMIT 1
) geom
FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) d
) v
LOOP
RETURN NEXT ret;
END LOOP;
RETURN;
END
$$;
CREATE OR REPLACE FUNCTION geocode_admin1_polygons(name text[], inputcountry text) RETURNS SETOF geocode_admin_v1
LANGUAGE plpgsql SECURITY DEFINER
AS $$
DECLARE
ret geocode_admin_v1%rowtype;
BEGIN
FOR ret IN WITH
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
SELECT
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
FROM (
SELECT
q, (
SELECT the_geom
FROM global_province_polygons
WHERE p.c = ANY (synonyms)
AND iso3 = p.i
-- To calculate frequency, I simply counted the number of users
-- we had signed up in each country. Countries with more users,
-- we favor higher in the geocoder :)
ORDER BY frequency DESC LIMIT 1
) geom
FROM p) n
LOOP
RETURN NEXT ret;
END LOOP;
RETURN;
END
$$;
CREATE OR REPLACE FUNCTION geocode_admin1_polygons(names text[], country text[]) RETURNS SETOF geocode_admin_country_v1
LANGUAGE plpgsql SECURITY DEFINER
AS $$
DECLARE
ret geocode_admin_country_v1%rowtype;
nans TEXT[];
BEGIN
SELECT array_agg(p) INTO nans FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NULL;
IF 0 < array_length(nans, 1) THEN
SELECT array_agg(p), array_agg(c) INTO names, country FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NOT NULL;
FOR ret IN SELECT g.q, NULL as c, g.geom, g.success FROM (SELECT (geocode_admin1_polygons(nans)).*) g LOOP
RETURN NEXT ret;
END LOOP;
END IF;
FOR ret IN WITH
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
SELECT
q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
FROM (
SELECT
q, c, (
SELECT the_geom
FROM global_province_polygons
WHERE p.p = ANY (synonyms)
AND iso3 = p.i
-- To calculate frequency, I simply counted the number of users
-- we had signed up in each country. Countries with more users,
-- we favor higher in the geocoder :)
ORDER BY frequency DESC LIMIT 1
) geom
FROM p) n
LOOP
RETURN NEXT ret;
END LOOP;
RETURN;
END
$$;
--------------------------------------------------------------------------------
-- Support tables
CREATE TABLE country_decoder (
name text,
nativename text,
tld text,
iso2 text,
ccn3 text,
iso3 text,
currency text,
callingcode text,
capital text,
altspellings text,
relevance text,
region text,
subregion text,
language text,
languagescodes text,
translations text,
population text,
latlng text,
demonym text,
borders text,
the_geom geometry(Geometry,4326),
cartodb_id integer NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
updated_at timestamp with time zone DEFAULT now() NOT NULL,
the_geom_webmercator geometry(Geometry,3857),
synbu text[],
synonyms text[],
users double precision
);
CREATE SEQUENCE country_decoder_cartodb_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE country_decoder_cartodb_id_seq OWNED BY country_decoder.cartodb_id;
ALTER TABLE ONLY country_decoder ALTER COLUMN cartodb_id SET DEFAULT nextval('country_decoder_cartodb_id_seq'::regclass);
ALTER TABLE ONLY country_decoder
ADD CONSTRAINT country_decoder_cartodb_id_key UNIQUE (cartodb_id);
ALTER TABLE ONLY country_decoder
ADD CONSTRAINT country_decoder_pkey PRIMARY KEY (cartodb_id);
ALTER TABLE country_decoder CLUSTER ON country_decoder_pkey;
CREATE INDEX country_decoder_the_geom_idx ON country_decoder USING gist (the_geom);
CREATE INDEX country_decoder_the_geom_webmercator_idx ON country_decoder USING gist (the_geom_webmercator);
CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON country_decoder FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger();
CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON country_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator();
CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON country_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at();
CREATE TABLE global_province_polygons (
the_geom geometry(Geometry,4326),
adm1_code text,
objectid_1 integer,
diss_me integer,
adm1_cod_1 text,
iso_3166_2 text,
wikipedia text,
iso_a2 text,
adm0_sr integer,
name text,
name_alt text,
name_local text,
type text,
type_en text,
code_local text,
code_hasc text,
note text,
hasc_maybe text,
region text,
region_cod text,
provnum_ne integer,
gadm_level integer,
check_me integer,
scalerank integer,
datarank integer,
abbrev text,
postal text,
area_sqkm double precision,
sameascity integer,
labelrank integer,
featurecla text,
name_len integer,
mapcolor9 integer,
mapcolor13 integer,
fips text,
fips_alt text,
woe_id integer,
woe_label text,
woe_name text,
latitude double precision,
longitude double precision,
sov_a3 text,
iso3 text,
adm0_label integer,
admin text,
geonunit text,
gu_a3 text,
gn_id integer,
gn_name text,
gns_id integer,
gns_name text,
gn_level integer,
gn_region text,
gn_a1_code text,
region_sub text,
sub_code text,
gns_level integer,
gns_lang text,
gns_adm1 text,
gns_region text,
cartodb_id integer NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
updated_at timestamp with time zone DEFAULT now() NOT NULL,
the_geom_webmercator geometry(Geometry,3857),
synonyms text[],
frequency double precision
);
CREATE SEQUENCE global_province_polygons_cartodb_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE global_province_polygons_cartodb_id_seq OWNED BY global_province_polygons.cartodb_id;
ALTER TABLE ONLY global_province_polygons ALTER COLUMN cartodb_id SET DEFAULT nextval('global_province_polygons_cartodb_id_seq'::regclass);
ALTER TABLE ONLY global_province_polygons
ADD CONSTRAINT global_province_polygons_cartodb_id_key UNIQUE (cartodb_id);
ALTER TABLE ONLY global_province_polygons
ADD CONSTRAINT global_province_polygons_pkey PRIMARY KEY (cartodb_id);
CREATE INDEX global_province_polygons_the_geom_idx ON global_province_polygons USING gist (the_geom);
CREATE INDEX global_province_polygons_the_geom_webmercator_idx ON global_province_polygons USING gist (the_geom_webmercator);
CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON global_province_polygons FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger();
CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON global_province_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator();
CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON global_province_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at();

View File

@@ -0,0 +1,6 @@
# cdb geocoder admin1 extension
comment = 'CartoDB admin1 internal geocoder'
default_version = '0.0.1'
relocatable = true
requires = cartodb
superuser = false

View File

@@ -0,0 +1,45 @@
CREATE EXTENSION postgis;
CREATE EXTENSION schema_triggers;
CREATE EXTENSION plpythonu;
CREATE EXTENSION cartodb;
CREATE EXTENSION cdb_geocoder_admin1;
-- Check that the geocoding functions are callable, should return NULL
SELECT (geocode_admin1_polygons(Array['TX','Cuidad Real', 'sevilla'])).*;
q | geom | success
-------------+------+---------
TX | | f
Cuidad Real | | f
sevilla | | f
(3 rows)
SELECT (geocode_admin1_polygons(Array['NH', 'Vermont'], 'United States')).*;
q | geom | success
---------+------+---------
NH | | f
Vermont | | f
(2 rows)
SELECT (geocode_admin1_polygons(Array['az', 'az'], Array['Ecuador', 'USA'])).*;
q | c | geom | success
----+---------+------+---------
az | Ecuador | | f
az | USA | | f
(2 rows)
-- Mock the varnish invalidation function
CREATE OR REPLACE FUNCTION public.cdb_invalidate_varnish(table_name text) RETURNS void AS $$
BEGIN
RETURN;
END
$$
LANGUAGE plpgsql;
-- Add a few data to the sources
COPY global_province_polygons (the_geom,adm1_code, objectid_1, diss_me, adm1_cod_1, iso_3166_2, wikipedia, iso_a2, adm0_sr, name, name_alt, name_local, type, type_en, code_local, code_hasc, note, hasc_maybe, region, region_cod, provnum_ne, gadm_level, check_me, scalerank, datarank, abbrev, postal, area_sqkm, sameascity, labelrank, featurecla, name_len, mapcolor9, mapcolor13, fips, fips_alt, woe_id, woe_label, woe_name, latitude, longitude, sov_a3, iso3, adm0_label, admin, geonunit, gu_a3, gn_id, gn_name, gns_id, gns_name, gn_level, gn_region, gn_a1_code, region_sub, sub_code, gns_level, gns_lang, gns_adm1, gns_region, cartodb_id, created_at, updated_at, synonyms, frequency) FROM stdin;
0106000020E6100000010000000103000000010000000400000000000000005009C07FB86AC523AF4340FFFFFFFFFFEF0EC0C3CF8DC4FADB42400000000000E0F0BFC3CF8DC4FADB424000000000005009C07FB86AC523AF4340 SVN-1035 1473 1035 SVN-1035 SI- \N SI 1 Vipava \N \N Opcine Commune|Municipality \N SI.SP.VI \N \N Goriška \N 162 2 0 10 8 \N VI 0 -99 10 Admin-1 scale rank 6 2 12 SIE1 \N -55848385 \N \N 45.8271000000000015 13.9723000000000006 SVN SVN 2 Slovenia Slovenia SVN 3239075 Obcina Vipava 243467 Vipava, Obcina 1 \N SI.E1 \N \N 1 div SI05 \N 3700 2014-02-18 19:53:50.080158+00 2014-04-01 15:18:54.094644+00 {vipava,"obcina vipava","vipava,obcina",si.e1,si.sp.vi,svn-1035,svn-1035} 5
\.
-- Check that the synonym function is callable, should return true
SELECT (geocode_admin1_polygons(Array['obcina vipava'])).success;
success
---------
t
(1 row)

View File

@@ -0,0 +1,29 @@
CREATE EXTENSION postgis;
CREATE EXTENSION schema_triggers;
CREATE EXTENSION plpythonu;
CREATE EXTENSION cartodb;
CREATE EXTENSION cdb_geocoder_admin1;
-- Check that the geocoding functions are callable, should return NULL
SELECT (geocode_admin1_polygons(Array['TX','Cuidad Real', 'sevilla'])).*;
SELECT (geocode_admin1_polygons(Array['NH', 'Vermont'], 'United States')).*;
SELECT (geocode_admin1_polygons(Array['az', 'az'], Array['Ecuador', 'USA'])).*;
-- Mock the varnish invalidation function
CREATE OR REPLACE FUNCTION public.cdb_invalidate_varnish(table_name text) RETURNS void AS $$
BEGIN
RETURN;
END
$$
LANGUAGE plpgsql;
-- Add a few data to the sources
COPY global_province_polygons (the_geom, adm1_code, objectid_1, diss_me, adm1_cod_1, iso_3166_2, wikipedia, iso_a2, adm0_sr, name, name_alt, name_local, type, type_en, code_local, code_hasc, note, hasc_maybe, region, region_cod, provnum_ne, gadm_level, check_me, scalerank, datarank, abbrev, postal, area_sqkm, sameascity, labelrank, featurecla, name_len, mapcolor9, mapcolor13, fips, fips_alt, woe_id, woe_label, woe_name, latitude, longitude, sov_a3, iso3, adm0_label, admin, geonunit, gu_a3, gn_id, gn_name, gns_id, gns_name, gn_level, gn_region, gn_a1_code, region_sub, sub_code, gns_level, gns_lang, gns_adm1, gns_region, cartodb_id, created_at, updated_at, the_geom_webmercator, synonyms, frequency) FROM stdin;
0106000020E6100000010000000103000000010000001C000000701C468693172C405BCFC9FB20EB464010E0C037081E2C40F7FF710981E74640606B2301AC1A2C40B407F55702E64640200BD34801192C40A44EE24698E34640503B8E674E162C4084868DF1EEE246400025D4480D132C40B43156A168E14640701EE41F19082C400CFADB0CA1E24640E0F6B6C345062C401815CAFBB1E24640A00B4B5DB3022C40EF46C124BCE2464010E49A3E6E012C40CB84EFBA8CE24640105943E9D0FA2B40ECD210DDEBE14640008981CA8FF72B40071D4DCAEFE24640B05E6F7FDDDC2B40F3246E6C88E5464060FB4FFA19CE2B40A46CBD87B5E746404058ECF694C52B408CF11C3411EA4640FFF7D7E59FC82B40BCC34EE79AEB46400077C737A4D02B403C3FEF3AB8EC4640E04B53FA05D82B4090EF60F885ED464090EBDCBC6BDD2B40E048DFA985EE464000A5771CC0E92B40BC5B8EF1D4EF4640005BBBFD66F22B40208A181748F04640805CE1823AF92B402CA5060659F0464040FC6A45A0FE2B404B4FC4C11DF046408061640B9F022C4060B922EEBEEF4640FFD76EE28A072C405421278BBDEE4640E09BADEC49092C40FB7EEC1D7AED4640AF09D3481D0B2C40D86DDB0C8DEC4640701C468693172C405BCFC9FB20EB4640 SVN-1035 1473 1035 SVN-1035 SI- \N SI 1 Vipava \N \N Opcine Commune|Municipality \N SI.SP.VI \N
\N Goriška \N 162 2 0 10 8 \N VI 0 -99 10 Admin-1 scale rank 6 2 12 SIE1 \N -55848385
\N \N 45.8271000000000015 13.9723000000000006 SVN SVN 2 Slovenia Slovenia SVN 3239075 Obcina Vipava 243467 Vipava, Obcina 1
\N SI.E1 \N \N 1 div SI05 \N 3700 2014-02-18 19:53:50.080158+00 2014-04-01 15:18:54.094644+00 0106000020110F0000010000000103000000010000001C00000023340ED8CEDB3741C0404B7264F35541C85B157A4AE13741B323798AF9EE5541CEDCBDE56FDE37414004235027ED5541489EE87B05DD3741D970658A36EA55418D272EB8BADA374116C6965868E95541A8917D26F7D73741F9BBEB238DE75541A6C99293A9CE37417F31C28009E95541DA9DBCA51CCD3741A5FAE31E1EE9554121BD100C14CA37414E69CD7D2AE95541124037ECFFC8374162390EC3F0E85541332B2FC661C337417AB6C1E72CE855418A0578349EC037417308826069E95541BCFC69D6F1A93741B086FDCC92EC55419F7FD8E3679D3741462E0A8039EF554160E7068E2B963741A150E52919F25541861B631AC198374148B84A13F9F3554123289F23909F3741CCF276F254F55541F49EE41CD5A53741895CFAD14FF655415E8C59A46AAA3741EE95F4A487F755414B69D859E3B43741E59BB99720F95541A6ECFD723CBC3741762F910EADF9554144635A9E08C23741DD3BD2B6C1F95541C650CF259EC637415377246A79F9554105D616CA02CA374149377DBD05F9554170AA9CC530CE3741A52C14CCCBF755419A5F7271ACCF37414FD3A85D41F65541A2FB415F39D13741C55D614C20F5554123340ED8CEDB3741C0404B7264F35541 {vipava,"obcina vipava","vipava, obcina",si.e1,si.sp.vi,svn-1035,svn-1035} 5
\.
-- Check that the synonym function is callable, should return true
SELECT (geocode_admin1_polygons(Array['obcina vipava'])).success;