From 585cee81d2d7ed1fa67052a19e40124d7949baa0 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Mon, 26 Oct 2015 16:01:49 +0100 Subject: [PATCH] Adding file for namedplaces extension --- geocoder/namedplace/extension/.gitignore | 3 + geocoder/namedplace/extension/Makefile | 8 + geocoder/namedplace/extension/README.md | 36 ++ .../cdb_geocoder_namedplaces--0.0.1.sql | 497 ++++++++++++++++++ .../cdb_geocoder_namedplaces.control | 6 + .../cdb_geocoder_namedplaces_test.out | 64 +++ .../sql/cdb_geocoder_namedplaces_test.sql | 35 ++ 7 files changed, 649 insertions(+) create mode 100644 geocoder/namedplace/extension/.gitignore create mode 100644 geocoder/namedplace/extension/Makefile create mode 100644 geocoder/namedplace/extension/README.md create mode 100644 geocoder/namedplace/extension/cdb_geocoder_namedplaces--0.0.1.sql create mode 100644 geocoder/namedplace/extension/cdb_geocoder_namedplaces.control create mode 100644 geocoder/namedplace/extension/expected/cdb_geocoder_namedplaces_test.out create mode 100644 geocoder/namedplace/extension/sql/cdb_geocoder_namedplaces_test.sql diff --git a/geocoder/namedplace/extension/.gitignore b/geocoder/namedplace/extension/.gitignore new file mode 100644 index 0000000..e710f0e --- /dev/null +++ b/geocoder/namedplace/extension/.gitignore @@ -0,0 +1,3 @@ +results/ +regression.diffs +regression.out diff --git a/geocoder/namedplace/extension/Makefile b/geocoder/namedplace/extension/Makefile new file mode 100644 index 0000000..ab1f0dc --- /dev/null +++ b/geocoder/namedplace/extension/Makefile @@ -0,0 +1,8 @@ +EXTENSION = cdb_geocoder_namedplaces +DATA = cdb_geocoder_namedplaces--0.0.1.sql +REGRESS = cdb_geocoder_namedplaces_test + +# postgres build stuff +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/geocoder/namedplace/extension/README.md b/geocoder/namedplace/extension/README.md new file mode 100644 index 0000000..478c892 --- /dev/null +++ b/geocoder/namedplace/extension/README.md @@ -0,0 +1,36 @@ +# CartoDB named places geocoder extension +Postgres extension for the CartoDB named places geocoder. It is meant to contain the functions and related objects needed to geocode by city names. It is not meant to contain the actual data used to geocode them. + +## Dependencies +This extension is thought to be used on top of CartoDB platform. Therefore a cartodb user is required to install the extension onto it. + +The following is a non-comprehensive list of dependencies: + +- Postgres 9.3+ +- Postgis extension +- Schema triggers extension +- CartoDB extension + +## Installation into the db cluster +This requires root privileges +``` +sudo make all install +``` + +## Execute tests +``` +PGUSER=postgres make installcheck +``` + +## Install onto a user's database +``` +psql -U cartodb_dev_user_367c0edc-b2ad-4bab-ad43-3d58a6179a93_db cartodb_dev_user_367c0edc-b2ad-4bab-ad43-3d58a6179a93_db +``` + +and then: + +```sql +CREATE EXTENSION cdb_geocoder_namedplaces; +``` + +The extension creation in the user's db does not require special privileges. It can be even created from the sql api. diff --git a/geocoder/namedplace/extension/cdb_geocoder_namedplaces--0.0.1.sql b/geocoder/namedplace/extension/cdb_geocoder_namedplaces--0.0.1.sql new file mode 100644 index 0000000..6dc537e --- /dev/null +++ b/geocoder/namedplace/extension/cdb_geocoder_namedplaces--0.0.1.sql @@ -0,0 +1,497 @@ +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cdb_geocoder_namedplaces" to load this file. \quit + +-- Response types for namedplaces geocoder +CREATE TYPE geocode_namedplace_country_v1 AS (q text, c text, geom geometry, success boolean); +CREATE TYPE geocode_namedplace_v1 AS (q text, geom geometry, success boolean); +CREATE TYPE geocode_admin1_country_v1 AS (q text, a1 text, c text, geom geometry, success boolean); +CREATE TYPE geocode_admin_country_v1 AS (q text, c text, geom geometry, success boolean); + +-- Public API functions -- +--- Geocoding function --- +-- TODO: deal with permissions + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], country text[]) RETURNS SETOF geocode_namedplace_country_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_country_v1%rowtype; + nans TEXT[]; + BEGIN + + SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c IS NULL; + + IF 0 < array_length(nans, 1) THEN + SELECT array_agg(p), array_agg(c) INTO places, country FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c IS NOT NULL; + FOR ret IN SELECT g.q, NULL as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP + RETURN NEXT ret; + END LOOP; + END IF; + + SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c=''; + IF 0 < array_length(nans, 1) THEN + SELECT array_agg(p), array_agg(c) INTO places, country FROM (SELECT unnest(places) p, unnest(country) c) g WHERE c!=''; + FOR ret IN SELECT g.q, '' as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP + RETURN NEXT ret; + END LOOP; + END IF; + + FOR ret IN WITH + p AS (SELECT r.s, r.c, (SELECT iso2 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT unnest(places) AS s, unnest(country)::text AS c) r), + best AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), + next AS (SELECT p.s AS q, p.c AS c, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE c = p.c AND geom IS NOT NULL)) + SELECT q, c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL + UNION ALL + SELECT q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], inputcountry text) RETURNS SETOF geocode_admin_country_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_admin_country_v1%rowtype; + isoTwo TEXT := NULL; + has_country BOOLEAN; + BEGIN + has_country := TRUE; + -- find the iso2 code for the input country string, else NULL + IF inputcountry IS NULL THEN + has_country := FALSE; + ELSIF inputcountry = '' THEN + has_country := FALSE; + END IF; + + IF has_country THEN + SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; + FOR ret IN WITH + best AS (SELECT p.s AS q, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.iso2 = isoTwo ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) AS s) p), + next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND gp.iso2 = isoTwo AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) AS s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) + SELECT q, inputcountry c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL + UNION ALL + SELECT q, inputcountry c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + LOOP + RETURN NEXT ret; + END LOOP; + -- no country included, or iso interpretation found + ELSE + FOR ret IN + SELECT g.q as q, inputcountry as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places)).*) g + LOOP + RETURN NEXT ret; + END LOOP; + END IF; + RETURN; +END +$$; + + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], admin1s text, inputcountry text) RETURNS SETOF geocode_admin1_country_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_admin1_country_v1%rowtype; + has_country BOOLEAN; + has_admin1s BOOLEAN; + admin1s_a TEXT[]; + BEGIN + + has_country := TRUE; + has_admin1s := TRUE; + + IF inputcountry IS NULL THEN + has_country := FALSE; + ELSIF inputcountry = '' THEN + has_country := FALSE; + END IF; + + IF admin1s IS NULL THEN + has_admin1s := FALSE; + ELSIF admin1s = '' THEN + has_admin1s := FALSE; + END IF; + + -- no country value + IF has_country IS FALSE THEN + -- no country no admin1 value + IF has_admin1s IS FALSE THEN + FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(places)).*) g LOOP + RETURN NEXT ret; + END LOOP; + -- no country, has admin1 value + ELSE + FOR ret IN + SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( + SELECT ( + geocode_namedplace( + places, + (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), + NULL + ) + ).*) g LOOP + RETURN NEXT ret; + END LOOP; + END IF; + -- has country value + ELSE + -- has country, no admin1 value + IF has_admin1s IS FALSE THEN + FOR ret IN SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP + RETURN NEXT ret; + END LOOP; + -- has country, has admin1 value + ELSE + FOR ret IN + SELECT g.q, admin1s AS a1, inputcountry as c, g.geom, g.success FROM ( + SELECT ( + geocode_namedplace( + places, + (SELECT array_agg(a) FROM (SELECT admin1s a FROM GENERATE_SERIES(1, Array_Length(places, 1)) s) r), + inputcountry + ) + ).*) g LOOP + RETURN NEXT ret; + END LOOP; + END IF; + END IF; + RETURN; +END +$$; + + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], admin1s text[], inputcountry text) RETURNS SETOF geocode_admin1_country_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_admin1_country_v1%rowtype; + nans TEXT[]; + isoTwo TEXT := NULL; + has_country BOOLEAN; + BEGIN + has_country := TRUE; + -- find the iso2 code for the input country string, else NULL + IF inputcountry IS NULL THEN + has_country := FALSE; + ELSIF inputcountry = '' THEN + has_country := FALSE; + END IF; + IF has_country THEN + SELECT iso2 INTO isoTwo FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms) LIMIT 1; + END IF; + + -- find all cases where admin1 is NULL + SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c IS NULL; + + IF 0 < array_length(nans, 1) THEN + SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c IS NOT NULL; + IF has_country THEN + -- geocode our named place without admin1 but with our iso2 + FOR ret IN SELECT g.q, null AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g LOOP + RETURN NEXT ret; + END LOOP; + ELSE + -- geocode our named place without admin1 and without iso2 + FOR ret IN SELECT g.q, NULL as a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g LOOP + RETURN NEXT ret; + END LOOP; + END IF; + END IF; + + -- find all cases where admin1 is and empty string + SELECT array_agg(p) INTO nans FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c=''; + + IF 0 < array_length(nans, 1) THEN + SELECT array_agg(p), array_agg(c) INTO places, admin1s FROM (SELECT unnest(places) p, unnest(admin1s) c) g WHERE c!=''; + IF has_country THEN + -- geocode our named place without admin1 but with our iso2 + FOR ret IN + SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans, inputcountry)).*) g + LOOP + RETURN NEXT ret; + END LOOP; + ELSE + -- geocode our named place without admin1 and without iso2 + FOR ret IN + SELECT g.q, '' AS a1, inputcountry as c, g.geom, g.success FROM (SELECT (geocode_namedplace(nans)).*) g + LOOP + RETURN NEXT ret; + END LOOP; + END IF; + END IF; + + -- geocode all the cases where admin1 is available + IF has_country THEN + FOR ret IN WITH + -- return c=iso2 and search without country + p AS ( + SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) AND admin1_decoder.iso2 = isoTwo LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r), + best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), + next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) + SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL + UNION ALL + SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + LOOP + RETURN NEXT ret; + END LOOP; + ELSE + -- return c=NULL and search without country + FOR ret IN WITH + p AS ( + SELECT r.s, r.a1, (SELECT admin1 FROM admin1_decoder WHERE lower(r.a1) = ANY (synonyms) LIMIT 1) i FROM (SELECT unnest(places) AS s, unnest(admin1s)::text AS a1) r WHERE a1 IS NOT NULL and a1 != ''), + best AS (SELECT p.s AS q, p.a1 as a1, (SELECT gp.the_geom AS geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) AND gp.admin1 = p.i ORDER BY population DESC LIMIT 1) AS geom FROM p), + next AS (SELECT p.s AS q, p.a1 AS a1, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.admin1 = p.i AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) + SELECT q, a1, inputcountry as c, geom, TRUE AS success FROM best WHERE geom IS NOT NULL + UNION ALL + SELECT q, a1, inputcountry as c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + LOOP + RETURN NEXT ret; + END LOOP; + END IF; + RETURN; +END +$$; + + + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[], admin1s text[], inputcountry text[]) RETURNS SETOF geocode_admin1_country_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_admin1_country_v1%rowtype; + BEGIN + IF admin1s IS NULL THEN + FOR ret IN SELECT g.q as q, NULL as a1, g.c as c, g.geom as geom, g.success as success FROM (SELECT (geocode_namedplace(places, inputcountry)).*) g LOOP + RETURN NEXT ret; + END LOOP; + ELSE + FOR ret IN WITH clean AS (SELECT array_agg(p) p, array_agg(a) a, c FROM (SELECT p, a, c FROM (SELECT unnest(places) p, unnest(admin1s) a, unnest(inputcountry) c) z GROUP BY p, a, c) y GROUP BY c) + SELECT (geocode_namedplace(p, a, c)).* FROM clean LOOP + RETURN NEXT ret; + END LOOP; + END IF; + RETURN; +END +$$; + + +CREATE OR REPLACE FUNCTION geocode_namedplace(places text[]) RETURNS SETOF geocode_namedplace_v1 + LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER + AS $$ + DECLARE + ret geocode_namedplace_v1%rowtype; + BEGIN + FOR ret IN WITH best AS (SELECT s AS q, (SELECT the_geom FROM global_cities_points_limited gp WHERE gp.lowername = lower(p.s) ORDER BY population DESC LIMIT 1) AS geom FROM (SELECT unnest(places) as s) p), + next AS (SELECT p.s AS q, (SELECT gp.the_geom FROM global_cities_points_limited gp, global_cities_alternates_limited ga WHERE lower(p.s) = ga.lowername AND ga.geoname_id = gp.geoname_id ORDER BY preferred DESC LIMIT 1) geom FROM (SELECT unnest(places) as s) p WHERE p.s NOT IN (SELECT q FROM best WHERE geom IS NOT NULL)) + SELECT q, geom, TRUE AS success FROM best WHERE geom IS NOT NULL + UNION ALL + SELECT q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success FROM next + LOOP + RETURN NEXT ret; + END LOOP; + RETURN; +END +$$; + +-------------------------------------------------------------------------------- + +-- Support tables + +CREATE TABLE admin1_decoder ( + name text, + admin1 text, + iso2 text, + geoname_id integer, + cartodb_id integer NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom geometry(Geometry,4326), + the_geom_webmercator geometry(Geometry,3857), + synonyms text[], + iso3 text, + users double precision +); + + +CREATE SEQUENCE admin1_decoder_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE admin1_decoder_cartodb_id_seq OWNED BY admin1_decoder.cartodb_id; +ALTER TABLE ONLY admin1_decoder ALTER COLUMN cartodb_id SET DEFAULT nextval('admin1_decoder_cartodb_id_seq'::regclass); + + +ALTER TABLE ONLY admin1_decoder + ADD CONSTRAINT admin1_decoder_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY admin1_decoder + ADD CONSTRAINT admin1_decoder_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX admin1_decoder_the_geom_idx ON admin1_decoder USING gist (the_geom); +CREATE INDEX admin1_decoder_admin1_idx ON admin1_decoder USING btree (admin1); +CREATE INDEX admin1_decoder_geoname_id_idx ON admin1_decoder USING btree (geoname_id); +CREATE INDEX admin1_decoder_iso2_idx ON admin1_decoder USING btree (iso2); +CREATE INDEX admin1_decoder_iso3_idx ON admin1_decoder USING btree (iso3); +CREATE INDEX admin1_decoder_name_idx ON admin1_decoder USING btree (name); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON admin1_decoder FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON admin1_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON admin1_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + +CREATE TABLE country_decoder ( + name text, + nativename text, + tld text, + iso2 text, + ccn3 text, + iso3 text, + currency text, + callingcode text, + capital text, + altspellings text, + relevance text, + region text, + subregion text, + language text, + languagescodes text, + translations text, + population text, + latlng text, + demonym text, + borders text, + the_geom geometry(Geometry,4326), + cartodb_id integer NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom_webmercator geometry(Geometry,3857), + synbu text[], + synonyms text[], + users double precision +); + + +CREATE SEQUENCE country_decoder_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE country_decoder_cartodb_id_seq OWNED BY country_decoder.cartodb_id; +ALTER TABLE ONLY country_decoder ALTER COLUMN cartodb_id SET DEFAULT nextval('country_decoder_cartodb_id_seq'::regclass); + +ALTER TABLE ONLY country_decoder + ADD CONSTRAINT country_decoder_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY country_decoder + ADD CONSTRAINT country_decoder_pkey PRIMARY KEY (cartodb_id); +ALTER TABLE country_decoder CLUSTER ON country_decoder_pkey; + + +CREATE INDEX country_decoder_the_geom_idx ON country_decoder USING gist (the_geom); +CREATE INDEX country_decoder_the_geom_webmercator_idx ON country_decoder USING gist (the_geom_webmercator); + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON country_decoder FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON country_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON country_decoder FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + +CREATE TABLE global_cities_points_limited ( + geoname_id integer, + name text, + asciiname text, + altnames text, + featclass text, + featcode text, + iso2 text, + cc2 text, + admin1 text, + admin2 text, + admin3 text, + admin4 text, + population double precision, + gtopo30 integer, + the_geom geometry(Point,4326), + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom_webmercator geometry(Geometry,3857), + cartodb_id integer NOT NULL, + lowername text +); + + +CREATE SEQUENCE global_cities_points_limited_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE global_cities_points_limited_cartodb_id_seq OWNED BY global_cities_points_limited.cartodb_id; +ALTER TABLE ONLY global_cities_points_limited ALTER COLUMN cartodb_id SET DEFAULT nextval('global_cities_points_limited_cartodb_id_seq'::regclass); + +ALTER TABLE ONLY global_cities_points_limited + ADD CONSTRAINT global_cities_points_limited_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY global_cities_points_limited + ADD CONSTRAINT global_cities_points_limited_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX global_cities_points_limited_the_geom_idx ON global_cities_points_limited USING gist (the_geom); +CREATE INDEX global_cities_points_limited_the_geom_webmercator_idx ON global_cities_points_limited USING gist (the_geom_webmercator); +CREATE INDEX global_cities_points_limited_lower_iso2_idx ON global_cities_points_limited USING btree (lowername, iso2); +CREATE INDEX global_cities_points_limited_admin1_idx ON global_cities_points_limited USING btree (admin1); +CREATE INDEX global_cities_points_limited_geoname_id_idx ON global_cities_points_limited USING btree (geoname_id); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON global_cities_points_limited FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON global_cities_points_limited FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON global_cities_points_limited FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + + +CREATE TABLE global_cities_alternates_limited ( + geoname_id integer, + name text, + the_geom geometry(Geometry,4326), + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + the_geom_webmercator geometry(Geometry,3857), + preferred boolean, + lowername text, + cartodb_id integer NOT NULL, + admin1_geonameid integer, + iso2 text, + admin1 text +); + + +CREATE SEQUENCE global_cities_alternates_limited_cartodb_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE global_cities_alternates_limited_cartodb_id_seq OWNED BY global_cities_alternates_limited.cartodb_id; +ALTER TABLE ONLY global_cities_alternates_limited ALTER COLUMN cartodb_id SET DEFAULT nextval('global_cities_alternates_limited_cartodb_id_seq'::regclass); + +ALTER TABLE ONLY global_cities_alternates_limited + ADD CONSTRAINT points_cities_alternates_limited_cartodb_id_key UNIQUE (cartodb_id); +ALTER TABLE ONLY global_cities_alternates_limited + ADD CONSTRAINT global_cities_alternates_limited_pkey PRIMARY KEY (cartodb_id); + + +CREATE INDEX global_cities_alternates_limited_the_geom_idx ON global_cities_alternates_limited USING gist (the_geom); +CREATE INDEX global_cities_alternates_limited_the_geom_webmercator_idx ON global_cities_alternates_limited USING gist (the_geom_webmercator); +CREATE INDEX global_cities_alternates_limited_admin1_idx ON global_cities_alternates_limited USING btree (admin1); +CREATE INDEX global_cities_alternates_limited_admin1_geonameid_idx ON global_cities_alternates_limited USING btree (admin1_geonameid); +CREATE INDEX global_cities_alternates_limited_lowername_idx ON global_cities_alternates_limited USING btree (lowername); + + +CREATE TRIGGER track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON global_cities_alternates_limited FOR EACH STATEMENT EXECUTE PROCEDURE cartodb.cdb_tablemetadata_trigger(); +CREATE TRIGGER update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON global_cities_alternates_limited FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_the_geom_webmercator(); +CREATE TRIGGER update_updated_at_trigger BEFORE UPDATE ON global_cities_alternates_limited FOR EACH ROW EXECUTE PROCEDURE cartodb._cdb_update_updated_at(); + diff --git a/geocoder/namedplace/extension/cdb_geocoder_namedplaces.control b/geocoder/namedplace/extension/cdb_geocoder_namedplaces.control new file mode 100644 index 0000000..7da3164 --- /dev/null +++ b/geocoder/namedplace/extension/cdb_geocoder_namedplaces.control @@ -0,0 +1,6 @@ +# cdb geocoder namedplaces extension +comment = 'CartoDB named places internal geocoder' +default_version = '0.0.1' +relocatable = true +requires = cartodb +superuser = false diff --git a/geocoder/namedplace/extension/expected/cdb_geocoder_namedplaces_test.out b/geocoder/namedplace/extension/expected/cdb_geocoder_namedplaces_test.out new file mode 100644 index 0000000..d09e4ce --- /dev/null +++ b/geocoder/namedplace/extension/expected/cdb_geocoder_namedplaces_test.out @@ -0,0 +1,64 @@ +CREATE EXTENSION postgis; +CREATE EXTENSION schema_triggers; +CREATE EXTENSION plpythonu; +CREATE EXTENSION cartodb; +CREATE EXTENSION cdb_geocoder_namedplaces; +-- Check that the different geocoding functions are callable, should return success = false +SELECT (geocode_namedplace(Array['Madrid', 'New York City', 'sunapee'])).*; + q | geom | success +---------------+------+--------- + Madrid | | f + New York City | | f + sunapee | | f +(3 rows) +SELECT (geocode_namedplace(Array['Elche', 'Granada', 'Madrid'], 'Spain')).*; + q | c | geom | success +---------+-------+------+--------- + Elche | Spain | | f + Granada | Spain | | f + Madrid | Spain | | f + (3 rows) +SELECT (geocode_namedplace(Array['sunapee', 'sunapeeee', 'New York City', 'Madrid'], Array['', 'US', 'United States', NULL])).*; + q | c | geom | success +---------------+---------------+------+--------- + Madrid | | | f + sunapee | | | f + sunapeeee | US | | f + New York City | United States | | f +(4 rows) +SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], 'USA')).*; + q | a1 | c | geom | success +---------------+--------+-----+------+--------- + New York City | | USA | | f + Portland | Maine | USA | | f + Portland | Oregon | USA | | f +(3 rows) +SELECT (geocode_namedplace(Array['Portland'], 'Oregon', 'USA')).*; + q | a1 | c | geom | success +----------+--------+-----+------+--------- + Portland | Oregon | USA | | f +(1 row) +SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], Array['USA'])).*; + q | a1 | c | geom | success +---------------+--------+-----+------+--------- + New York City | | USA | | f + Portland | Oregon | USA | | f + Portland | Maine | USA | | f +(3 rows) + +-- Mock the varnish invalidation function +CREATE OR REPLACE FUNCTION public.cdb_invalidate_varnish(table_name text) RETURNS void AS $$ +BEGIN + RETURN; +END +$$ +LANGUAGE plpgsql; +-- Add a named place source +COPY global_cities_alternates_limited (geoname_id, name, the_geom, created_at, updated_at, the_geom_webmercator, preferred, lowername, cartodb_id, admin1_geonameid, iso2, admin1) FROM stdin; +COPY global_cities_points_limited (geoname_id, name, asciiname, altnames, featclass, featcode, iso2, admin1, admin2, population, the_geom, created_at, updated_at, the_geom_webmercator, cartodb_id, lowername) FROM stdin; +-- Check that the geocoding function is callable, should return success = true +SELECT (geocode_namedplace(Array['Barcelona'])).* + q | geom | success +-----------+----------------------------------------------------+--------- + Barcelona | 0101000020E6100000CA15DEE522E653C0A4C2D842902B4540 | t +(1 row) \ No newline at end of file diff --git a/geocoder/namedplace/extension/sql/cdb_geocoder_namedplaces_test.sql b/geocoder/namedplace/extension/sql/cdb_geocoder_namedplaces_test.sql new file mode 100644 index 0000000..6f5c422 --- /dev/null +++ b/geocoder/namedplace/extension/sql/cdb_geocoder_namedplaces_test.sql @@ -0,0 +1,35 @@ +CREATE EXTENSION postgis; +CREATE EXTENSION schema_triggers; +CREATE EXTENSION plpythonu; +CREATE EXTENSION cartodb; +CREATE EXTENSION cdb_geocoder_namedplaces; + +-- Check that the different geocoding functions are callable, should return success = false +SELECT (geocode_namedplace(Array['Madrid', 'New York City', 'sunapee'])).*; +SELECT (geocode_namedplace(Array['Elche', 'Granada', 'Madrid'], 'Spain')).*; +SELECT (geocode_namedplace(Array['sunapee', 'sunapeeee', 'New York City', 'Madrid'], Array['', 'US', 'United States', NULL])).*; +SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], 'USA')).*; +SELECT (geocode_namedplace(Array['Portland'], 'Oregon', 'USA')).*; +SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], Array['USA'])).* + + +-- Mock the varnish invalidation function +CREATE OR REPLACE FUNCTION public.cdb_invalidate_varnish(table_name text) RETURNS void AS $$ +BEGIN + RETURN; +END +$$ +LANGUAGE plpgsql; + +-- Add a named place source +COPY global_cities_alternates_limited (geoname_id, name, the_geom, created_at, updated_at, the_geom_webmercator, preferred, lowername, cartodb_id, admin1_geonameid, iso2, admin1) FROM stdin; +3128760 barcelona \N 2014-02-11 18:23:18.115612+00 2014-02-25 16:41:15.278786+00 \N t barcelona 7530944 409419 \N 56 +\. + +COPY global_cities_points_limited (geoname_id, name, asciiname, altnames, featclass, featcode, iso2, admin1, admin2, population, the_geom, created_at, updated_at, the_geom_webmercator, cartodb_id, lowername) FROM stdin; +2421056 Barcelona Barcelona P PPLA ES B 185 0101000020E6100000CA15DEE522E653C0A4C2D842902B4540 2015-06-13 14:48:34.341372+00 2015-06-15 16:53:41.067784+00 0101000020110F00000643969A73E660C10FF27276F0E15341 8653176 barcelona +\. + +-- Check that the geocoding function is callable, should return success = true +SELECT (geocode_namedplace(Array['Barcelona'])).* +