diff --git a/geocoder/postal-codes/extension/.gitignore b/geocoder/postal-codes/extension/.gitignore new file mode 100644 index 0000000..e710f0e --- /dev/null +++ b/geocoder/postal-codes/extension/.gitignore @@ -0,0 +1,3 @@ +results/ +regression.diffs +regression.out diff --git a/geocoder/postal-codes/extension/Makefile b/geocoder/postal-codes/extension/Makefile new file mode 100644 index 0000000..dcff338 --- /dev/null +++ b/geocoder/postal-codes/extension/Makefile @@ -0,0 +1,8 @@ +EXTENSION = cdb_geocoder_postalcode +DATA = cdb_geocoder_postalcode--0.0.1.sql +REGRESS = cdb_geocoder_postalcode_test + +# postgres build stuff +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/geocoder/postal-codes/extension/README.md b/geocoder/postal-codes/extension/README.md new file mode 100644 index 0000000..b2afd35 --- /dev/null +++ b/geocoder/postal-codes/extension/README.md @@ -0,0 +1,36 @@ +# CartoDB postal code geocoder extension +Postgres extension for the CartoDB postal code geocoder. It is meant to contain the functions and related objects needed to geocode by postal codes. It is not meant to contain the actual data used to geocode them. + +## Dependencies +This extension is thought to be used on top of CartoDB platform. Therefore a cartodb user is required to install the extension onto it. + +The following is a non-comprehensive list of dependencies: + +- Postgres 9.3+ +- Postgis extension +- Schema triggers extension +- CartoDB extension + +## Installation into the db cluster +This requires root privileges +``` +sudo make all install +``` + +## Execute tests +``` +PGUSER=postgres make installcheck +``` + +## Install onto a user's database +``` +psql -U development_cartodb_user_fe3b850a-01c0-48f9-8a26-a82f09e9b53f cartodb_dev_user_fe3b850a-01c0-48f9-8a26-a82f09e9b53f_db +``` + +and then: + +```sql +CREATE EXTENSION cdb_geocoder_postalcode; +``` + +The extension creation in the user's db does not require special privileges. It can be even created from the sql api. diff --git a/geocoder/postal-codes/extension/cdb_geocoder_postalcode--0.0.1.sql b/geocoder/postal-codes/extension/cdb_geocoder_postalcode--0.0.1.sql new file mode 100644 index 0000000..b409817 --- /dev/null +++ b/geocoder/postal-codes/extension/cdb_geocoder_postalcode--0.0.1.sql @@ -0,0 +1,437 @@ +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cdb_geocoder_postalcode" to load this file. \quit + +-- Response types for admin0 geocoder +CREATE TYPE geocode_namedplace_v1 AS (q TEXT, geom GEOMETRY, success BOOLEAN); +CREATE TYPE geocode_postalint_country_v1 AS (q TEXT, c TEXT, geom GEOMETRY, success BOOLEAN); +CREATE TYPE geocode_namedplace_country_v1 AS (q TEXT, c TEXT, geom GEOMETRY, success BOOLEAN); + + +-- Public API functions -- +--- Geocoding function --- +-- TODO: deal with permissions +-- TODO: check functions + + +CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountries text[]) RETURNS SETOF geocode_namedplace_country_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_country_v1%rowtype; + adm text[]; + BEGIN + + SELECT INTO adm array_agg((SELECT adm0_a3 FROM admin0_synonyms WHERE name_ = lower(regexp_replace(b.c, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text LIMIT 1)) FROM (SELECT UNNEST(inputcountries) c) b; + + FOR ret IN + SELECT + q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, c, ( + SELECT the_geom + FROM postal_code_polygons + WHERE postal_code = CASE WHEN a = 'CAN' THEN substring(upper(d.q) from 1 for 3) ELSE upper(d.q) END + AND adm0_a3 = a + ) geom + FROM (SELECT unnest(code) q, unnest(inputcountries) c, unnest(adm) a) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + +CREATE FUNCTION geocode_postalcode_polygons(code text[], inputcountry text) RETURNS SETOF geocode_namedplace_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_v1%rowtype; + BEGIN + FOR ret IN + SELECT + q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom + FROM global_postal_code_polygons + WHERE postal_code = upper(d.q) + AND iso3 = ( + SELECT iso3 FROM country_decoder WHERE + lower(inputcountry) = ANY (synonyms) LIMIT 1 + ) + ) geom + FROM (SELECT unnest(code) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + +CREATE FUNCTION geocode_postalcode_polygons(code integer[], inputcountries text[]) RETURNS SETOF geocode_postalint_country_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ +DECLARE + ret geocode_postalint_country_v1%rowtype; +BEGIN + FOR ret IN + SELECT + q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, c, ( + SELECT the_geom + FROM global_postal_code_polygons + WHERE postal_code_num = d.q + AND iso3 = ( + SELECT iso3 FROM country_decoder WHERE + lower(d.c) = ANY (synonyms) LIMIT 1 + ) + ) geom + FROM (SELECT unnest(code) q, unnest(inputcountries) c) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + +CREATE FUNCTION geocode_postalcode_polygons(code text[]) RETURNS SETOF geocode_namedplace_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_v1%rowtype; + BEGIN + FOR ret IN + SELECT + q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom + FROM global_postal_code_polygons + WHERE postal_code = upper(d.q) + LIMIT 1 + ) geom + FROM (SELECT unnest(code) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + +CREATE FUNCTION geocode_postalcode_points(code text[], inputcountry text) RETURNS SETOF geocode_namedplace_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_v1%rowtype; + BEGIN + FOR ret IN + SELECT + q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom + FROM global_postal_code_points + WHERE postal_code = upper(d.q) + AND iso3 = ( + SELECT iso3 FROM country_decoder WHERE + lower(inputcountry) = ANY (synonyms) LIMIT 1 + ) + LIMIT 1 + ) geom + FROM (SELECT unnest(code) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + + +CREATE FUNCTION geocode_postalcode_points(code text[], inputcountries text[]) RETURNS SETOF geocode_place_country_iso_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_place_country_iso_v1%rowtype; + geo GEOMETRY; + BEGIN + + FOR ret IN + SELECT + q, c, iso3, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, c, (SELECT iso3 FROM country_decoder WHERE + lower(d.c) = ANY (synonyms) LIMIT 1) iso3, ( + SELECT the_geom + FROM global_postal_code_points + WHERE postal_code = upper(d.q) + AND iso3 = ( + SELECT iso3 FROM country_decoder WHERE + lower(d.c) = ANY (synonyms) LIMIT 1 + ) + LIMIT 1 + ) geom + FROM (SELECT unnest(code) q, unnest(inputcountries) c) d + ) v + LOOP + IF ret.geom IS NULL AND ret.iso3 = 'GBR' THEN + geo := geocode_greatbritain_outward(ret.q); + IF geo IS NOT NULL THEN + ret.geom := geo; + ret.success := TRUE; + END IF; + END IF; + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + + +CREATE FUNCTION geocode_postalcode_points(code integer[], inputcountries text[]) RETURNS SETOF geocode_postalint_country_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_postalint_country_v1%rowtype; + BEGIN + FOR ret IN + SELECT + q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, c, ( + SELECT the_geom + FROM global_postal_code_points + WHERE postal_code_num = d.q + AND iso3 = ( + SELECT iso3 FROM country_decoder WHERE + lower(d.c) = ANY (synonyms) LIMIT 1 + ) + LIMIT 1 + ) geom + FROM (SELECT unnest(code) q, unnest(inputcountries) c) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + + +CREATE FUNCTION geocode_postalcode_points(code text[]) RETURNS SETOF geocode_namedplace_v1 + LANGUAGE plpgsql SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_v1%rowtype; + BEGIN + FOR ret IN + SELECT + q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success + FROM ( + SELECT + q, ( + SELECT the_geom + FROM global_postal_code_points + WHERE postal_code = upper(d.q) + LIMIT 1 + ) geom + FROM (SELECT unnest(code) q) d + ) v + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + +-------------------------------------------------------------------------------- + +-- Support tables + + +CREATE TABLE postal_code_points ( + cartodb_id integer NOT NULL, + adm0_a3 text, + postal_code text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom geometry(Geometry,4326), + the_geom_webmercator geometry(Geometry,3857) +); + + +CREATE SEQUENCE postal_code_points_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE postal_code_points_cartodb_id_seq OWNED BY postal_code_points.cartodb_id; +ALTER TABLE ONLY postal_code_points ALTER COLUMN cartodb_id SET DEFAULT nextval('postal_code_points_cartodb_id_seq'::regclass); + + +ALTER TABLE ONLY postal_code_points + ADD CONSTRAINT postal_code_points_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY postal_code_points + ADD CONSTRAINT postal_code_points_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX postal_code_points_the_geom_idx ON postal_code_points USING gist (the_geom); +CREATE INDEX postal_code_points_the_geom_webmercator_idx ON postal_code_points USING gist (the_geom_webmercator); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON postal_code_points FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON postal_code_points FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON postal_code_points FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + + +CREATE TABLE postal_code_polygons ( + cartodb_id integer NOT NULL, + postal_code text, + adm0_a3 text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom geometry(Geometry,4326), + the_geom_webmercator geometry(Geometry,3857) +); + + +CREATE SEQUENCE postal_code_polygons_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE postal_code_polygons_cartodb_id_seq OWNED BY postal_code_polygons.cartodb_id; +ALTER TABLE ONLY postal_code_polygons ALTER COLUMN cartodb_id SET DEFAULT nextval('postal_code_polygons_cartodb_id_seq'::regclass); + + +ALTER TABLE ONLY postal_code_polygons + ADD CONSTRAINT postal_code_polygons_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY postal_code_polygons + ADD CONSTRAINT postal_code_polygons_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX postal_code_polygons_the_geom_idx ON postal_code_polygons USING gist (the_geom); +CREATE INDEX postal_code_polygons_the_geom_webmercator_idx ON postal_code_polygons USING gist (the_geom_webmercator); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON postal_code_polygons FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON postal_code_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON postal_code_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + + +CREATE TABLE global_postal_code_polygons ( + the_geom geometry(Geometry,4326), + zcta5ce10 text, + geoid10 text, + mtfcc10 text, + cartodb_id integer NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom_webmercator geometry(Geometry,3857), + iso3 text, + postal_code text, + postal_code_num integer +); + + +CREATE SEQUENCE global_postal_code_polygons_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE global_postal_code_polygons_cartodb_id_seq OWNED BY global_postal_code_polygons.cartodb_id; +ALTER TABLE ONLY global_postal_code_polygons ALTER COLUMN cartodb_id SET DEFAULT nextval('global_postal_code_polygons_cartodb_id_seq'::regclass); + + +ALTER TABLE ONLY global_postal_code_polygons + ADD CONSTRAINT global_postal_code_polygons_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY global_postal_code_polygons + ADD CONSTRAINT global_postal_code_polygons_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX global_postal_code_polygons_the_geom_idx ON global_postal_code_polygons USING gist (the_geom); +CREATE INDEX global_postal_code_polygons_the_geom_webmercator_idx ON global_postal_code_polygons USING gist (the_geom_webmercator); +CREATE INDEX global_postal_code_polygons_postal_code_idx ON global_postal_code_polygons USING btree (postal_code); +CREATE INDEX global_postal_code_polygons_iso3_idx ON global_postal_code_polygons USING btree (iso3); +CREATE INDEX global_global_postal_code_polygons_postal_code_num_idx ON global_postal_code_polygons USING btree (postal_code_num); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON global_postal_code_polygons FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON global_postal_code_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON global_postal_code_polygons FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + +CREATE TABLE global_postal_code_points ( + iso2 text, + postal_code text, + place_name text, + admin_name1 text, + admin_code1 text, + admin_name2 text, + admin_code2 text, + admin_name3 text, + admin_code3 text, + accuracy text, + the_geom geometry(Geometry,4326), + cartodb_id integer NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom_webmercator geometry(Geometry,3857), + iso3 text, + frompoly boolean, + postal_code_num integer, + datasource text +); + + +CREATE SEQUENCE global_postal_code_points_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE global_postal_code_points_cartodb_id_seq OWNED BY global_postal_code_points.cartodb_id; +ALTER TABLE ONLY global_postal_code_points ALTER COLUMN cartodb_id SET DEFAULT nextval('global_postal_code_points_cartodb_id_seq'::regclass); + + +ALTER TABLE ONLY global_postal_code_points + ADD CONSTRAINT global_postal_code_points_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY global_postal_code_points + ADD CONSTRAINT global_postal_code_points_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX global_postal_code_points_the_geom_idx ON global_postal_code_points USING gist (the_geom); +CREATE INDEX global_postal_code_points_the_geom_webmercator_idx ON global_postal_code_points USING gist (the_geom_webmercator); +CREATE INDEX global_postal_code_points_postal_code_idx ON global_postal_code_points USING btree (postal_code); +CREATE INDEX global_postal_code_points_iso3_idx ON global_postal_code_points USING btree (iso3); +CREATE INDEX global_postal_code_points_postal_code_num_idx ON global_postal_code_points USING btree (postal_code_num); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON global_postal_code_points FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON global_postal_code_points FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON global_postal_code_points FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + diff --git a/geocoder/postal-codes/extension/cdb_geocoder_postalcode.control b/geocoder/postal-codes/extension/cdb_geocoder_postalcode.control new file mode 100644 index 0000000..70f249c --- /dev/null +++ b/geocoder/postal-codes/extension/cdb_geocoder_postalcode.control @@ -0,0 +1,6 @@ +# cdb geocoder postalcode extension +comment = 'CartoDB postalcode internal geocoder' +default_version = '0.0.1' +relocatable = true +requires = cartodb +superuser = false diff --git a/geocoder/postal-codes/extension/expected/cdb_geocoder_postalcode_test.out b/geocoder/postal-codes/extension/expected/cdb_geocoder_postalcode_test.out new file mode 100644 index 0000000..6824cae --- /dev/null +++ b/geocoder/postal-codes/extension/expected/cdb_geocoder_postalcode_test.out @@ -0,0 +1,6 @@ +CREATE EXTENSION postgis; +CREATE EXTENSION schema_triggers; +CREATE EXTENSION plpythonu; +CREATE EXTENSION cartodb; +CREATE EXTENSION cdb_geocoder_postalcode; +-- PENDING diff --git a/geocoder/postal-codes/extension/sql/cdb_geocoder_postalcode_test.sql b/geocoder/postal-codes/extension/sql/cdb_geocoder_postalcode_test.sql new file mode 100644 index 0000000..314eb72 --- /dev/null +++ b/geocoder/postal-codes/extension/sql/cdb_geocoder_postalcode_test.sql @@ -0,0 +1,7 @@ +CREATE EXTENSION postgis; +CREATE EXTENSION schema_triggers; +CREATE EXTENSION plpythonu; +CREATE EXTENSION cartodb; +CREATE EXTENSION cdb_geocoder_postalcode; + +-- PENDING