Merge branch 'master' of https://github.com/CartoDB/data-services
merge
This commit is contained in:
@@ -1,97 +1,74 @@
|
||||
--- Usage
|
||||
|
||||
--SELECT (geocode_admin1_polygons(Array['TX','Cuidad Real', 'sevilla'])).*
|
||||
|
||||
--- Function
|
||||
|
||||
CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(name text[])
|
||||
RETURNS SETOF geocode_admin_v1 AS $$
|
||||
DECLARE
|
||||
ret geocode_admin_v1%rowtype;
|
||||
BEGIN
|
||||
FOR ret IN
|
||||
SELECT
|
||||
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
SELECT
|
||||
q, (
|
||||
SELECT the_geom
|
||||
FROM global_province_polygons
|
||||
WHERE d.c = ANY (synonyms)
|
||||
-- To calculate frequency, I simply counted the number of users
|
||||
-- we had signed up in each country. Countries with more users,
|
||||
-- we favor higher in the geocoder :)
|
||||
ORDER BY frequency DESC LIMIT 1
|
||||
) geom
|
||||
FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) d
|
||||
) v
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
END
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER;
|
||||
Text array, country name
|
||||
|
||||
|
||||
-- CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(name text[])
|
||||
-- RETURNS SETOF geocode_admin_v1 AS $$
|
||||
-- DECLARE
|
||||
-- ret geocode_admin_v1%rowtype;
|
||||
-- BEGIN
|
||||
-- -- FOR ret IN
|
||||
-- RETURN QUERY
|
||||
-- SELECT
|
||||
-- d.q, n.the_geom as geom,
|
||||
-- CASE WHEN s.adm1_code IS NULL then FALSE ELSE TRUE END AS success
|
||||
-- FROM (
|
||||
-- SELECT
|
||||
-- q, lower(regexp_replace(q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text x
|
||||
-- FROM (SELECT unnest(name) q) g
|
||||
-- ) d
|
||||
-- LEFT OUTER JOIN
|
||||
-- admin1_synonyms s ON name_ = d.x
|
||||
-- LEFT OUTER JOIN
|
||||
-- ne_admin1_v3 n ON s.adm1_code = n.adm1_code;
|
||||
-- END
|
||||
-- $$ LANGUAGE 'plpgsql' SECURITY DEFINER;
|
||||
|
||||
|
||||
--- Usage
|
||||
|
||||
--- SELECT (geocode_admin1_polygons(Array['az', 'Texas'], 'Ecuador')).*
|
||||
|
||||
|
||||
--- Function
|
||||
CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(name text[], inputcountry text)
|
||||
RETURNS SETOF geocode_admin_v1 AS $$
|
||||
RETURNS SETOF geocode_admin_country_v1 AS $$
|
||||
DECLARE
|
||||
ret geocode_admin_v1%rowtype;
|
||||
ret geocode_admin_country_v1%rowtype;
|
||||
adm0 TEXT;
|
||||
adm0_check BOOLEAN := TRUE;
|
||||
BEGIN
|
||||
|
||||
FOR ret IN WITH
|
||||
p AS (SELECT r.c, r.q, (SELECT iso3 FROM country_decoder WHERE lower(inputcountry) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(name)),'.',' ')) c, unnest(name) q) r)
|
||||
SELECT
|
||||
q, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
SELECT
|
||||
q, (
|
||||
SELECT the_geom
|
||||
FROM global_province_polygons
|
||||
WHERE p.c = ANY (synonyms)
|
||||
AND iso3 = p.i
|
||||
-- To calculate frequency, I simply counted the number of users
|
||||
-- we had signed up in each country. Countries with more users,
|
||||
-- we favor higher in the geocoder :)
|
||||
ORDER BY frequency DESC LIMIT 1
|
||||
) geom
|
||||
FROM p) n
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
IF inputcountry IS NULL THEN
|
||||
adm0_check = FALSE;
|
||||
END IF;
|
||||
IF trim(inputcountry)='' THEN
|
||||
adm0_check = FALSE;
|
||||
END IF;
|
||||
|
||||
IF adm0_check IS TRUE THEN
|
||||
SELECT INTO adm0 adm0_a3 FROM admin0_synonyms WHERE name_ = lower(regexp_replace(inputcountry, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text LIMIT 1;
|
||||
|
||||
FOR ret IN
|
||||
SELECT
|
||||
q, inputcountry, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
SELECT
|
||||
q, (
|
||||
SELECT the_geom FROM qs_adm1 WHERE global_id = (
|
||||
SELECT global_id
|
||||
FROM admin1_synonyms
|
||||
WHERE name_ = lower(regexp_replace(d.q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text
|
||||
AND adm0_a3 = adm0
|
||||
LIMIT 1
|
||||
)
|
||||
) geom
|
||||
FROM (SELECT unnest(name) q) d
|
||||
) v
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
|
||||
--Handle cases where country couldn't be found
|
||||
ELSE
|
||||
FOR ret IN
|
||||
SELECT
|
||||
q, inputcountry, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
SELECT
|
||||
q, (
|
||||
SELECT the_geom FROM qs_adm1 WHERE global_id = (
|
||||
SELECT global_id
|
||||
FROM admin1_synonyms
|
||||
WHERE name_ = lower(regexp_replace(d.q, '[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text
|
||||
LIMIT 1
|
||||
)
|
||||
) geom
|
||||
FROM (SELECT unnest(name) q) d
|
||||
) v
|
||||
LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
END IF;
|
||||
RETURN;
|
||||
END
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER;
|
||||
Text array, country array
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
|
||||
|
||||
--Text array, country array
|
||||
|
||||
--- Usage
|
||||
|
||||
@@ -103,40 +80,12 @@ CREATE OR REPLACE FUNCTION test_geocode_admin1_polygons(names text[], country te
|
||||
RETURNS SETOF geocode_admin_country_v1 AS $$
|
||||
DECLARE
|
||||
ret geocode_admin_country_v1%rowtype;
|
||||
nans TEXT[];
|
||||
BEGIN
|
||||
|
||||
|
||||
SELECT array_agg(p) INTO nans FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NULL;
|
||||
|
||||
IF 0 < array_length(nans, 1) THEN
|
||||
SELECT array_agg(p), array_agg(c) INTO names, country FROM (SELECT unnest(names) p, unnest(country) c) g WHERE c IS NOT NULL;
|
||||
FOR ret IN SELECT g.q, NULL as c, g.geom, g.success FROM (SELECT (geocode_admin1_polygons(nans)).*) g LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
|
||||
FOR ret IN WITH
|
||||
p AS (SELECT r.p, r.q, c, (SELECT iso3 FROM country_decoder WHERE lower(r.c) = ANY (synonyms)) i FROM (SELECT trim(replace(lower(unnest(names)),'.',' ')) p, unnest(names) q, unnest(country) c) r)
|
||||
SELECT
|
||||
q, c, geom, CASE WHEN geom IS NULL THEN FALSE ELSE TRUE END AS success
|
||||
FROM (
|
||||
SELECT
|
||||
q, c, (
|
||||
SELECT the_geom
|
||||
FROM global_province_polygons
|
||||
WHERE p.p = ANY (synonyms)
|
||||
AND iso3 = p.i
|
||||
-- To calculate frequency, I simply counted the number of users
|
||||
-- we had signed up in each country. Countries with more users,
|
||||
-- we favor higher in the geocoder :)
|
||||
ORDER BY frequency DESC LIMIT 1
|
||||
) geom
|
||||
FROM p) n
|
||||
LOOP
|
||||
FOR ret IN SELECT (test_geocode_admin1_polygons(array_agg(n), c)).* FROM (SELECT unnest(names) n, unnest(country) c) a GROUP BY c LOOP
|
||||
RETURN NEXT ret;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
END
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER;
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
|
||||
|
||||
@@ -5,11 +5,14 @@ IP address geocoder
|
||||
|
||||
### Creation steps
|
||||
|
||||
1. upload a new dataset to the geocoder table, call it latest_ip_address_locations
|
||||
1. Upload a new dataset to the geocoder table, call it latest_ip_address_locations
|
||||
2. Run the sql/build_data_table script to update the table
|
||||
|
||||
### Data Sources
|
||||
|
||||
GeoLite2 open source database [Created by MaxMind](http://www.maxmind.com) -
|
||||
http://dev.maxmind.com/geoip/geoip2/geolite2/ Download the CSV [Geolite2 City](http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip)
|
||||
|
||||
### Preparation details
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
---- Postal Code Polygon table ---
|
||||
---- IP addresses table ---
|
||||
--- ---
|
||||
|
||||
-- Clear table
|
||||
|
||||
@@ -3,8 +3,14 @@ Postal code geocoder (polygons)
|
||||
|
||||
### Function
|
||||
|
||||
By following the next steps a table is populated with zipcodes from Australia, Canada, USA and France (identified by iso3) related with their spatial location in terms of polygons.
|
||||
|
||||
### Creation steps
|
||||
|
||||
1. Import the four files attached in the section "Datasources".
|
||||
|
||||
2. Run sql/build_data_table.sql. Notice that table "postal_code_polygons" should exist in advance with columns: _the_geom_, _adm0_a3_ and _postal_code_.
|
||||
|
||||
### Data Sources
|
||||
|
||||
Australian polygons - http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2033.0.55.0012011?OpenDocument
|
||||
@@ -20,6 +26,58 @@ French polygons - http://www.data.gouv.fr/dataset/fond-de-carte-des-codes-postau
|
||||
|
||||
### Preparation details
|
||||
|
||||
The names of the imported files are:
|
||||
|
||||
- doc for Australia table
|
||||
- gfsa000a11a_e for Canada table
|
||||
- tl_2013_us_zcta510 for USA table
|
||||
- codes_postaux for France table
|
||||
|
||||
# Postal code geocoder (points)
|
||||
|
||||
todo
|
||||
### Function
|
||||
|
||||
By following the next steps a table is populated with zipcodes of different countries (identified by iso3) related with their spatial location in terms of points.
|
||||
|
||||
This dataset includes data for the following countries:
|
||||
|
||||
````
|
||||
CH, ES, GU, ZA, MX, SJ, NL, RU, AX, TH, AR, MY, RE, LK, GB, IS, GL, JE, DK, IN,
|
||||
SI, GP, MQ, BR, SM, BG, NZ, MP, CZ, DO, MD, PK, TR, VI, BD, GG, LT, PM, MC, US,
|
||||
IT, LU, SK, LI, PR, IM, NO, PT, PL, FI, JP, CA, DE, HU, PH, SE, VA, YT, MK, FR,
|
||||
MH, RO, FO, GF, AD, HR, DZ, GT, AU, AS, BE, AT
|
||||
````
|
||||
|
||||
### Creation steps
|
||||
|
||||
1. Download the allCountries.zip file from [GeoNames](www.geonames.org). Import and rename the table as tmp_zipcode_points. You can follow the manual process explained below instead.
|
||||
|
||||
|
||||
The columns that are loaded are the following ones:
|
||||
field_1: corresponding to ISO2
|
||||
field_10: corresponds to latitude
|
||||
field_11: corresponds to longitude
|
||||
field_2: corresponds to ZIP code
|
||||
|
||||
2. Georeference the table using field11 as longitude and field10 as latitude in order to construct the_geom.
|
||||
|
||||
3. Add column iso3 (text) and run sql/build_zipcode_points_table.sql.
|
||||
|
||||
|
||||
**Alternative manual process**
|
||||
|
||||
Open the allCountries.txt file with Excel an add a new row on top. Delete columns C-I and L.
|
||||
|
||||
In the first row, add the following columns: iso2, zipcode, lat, long.
|
||||
|
||||
Import the file ignoring step 2.
|
||||
|
||||
### Data Sources
|
||||
|
||||
All countries points [GeoNames](www.geonames.org) - http://download.geonames.org/export/zip/allCountries.zip
|
||||
|
||||
### Preparation details
|
||||
|
||||
_The big size of the dataset may cause interruptions in the processing of the coordinates after uploading the file, manipulating the file before importing is a faster workaround._
|
||||
|
||||
|
||||
|
||||
26
geocoder/postal-codes/sql/build_zipcode_points_table.sql
Normal file
26
geocoder/postal-codes/sql/build_zipcode_points_table.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
---- Postal Code Points table ---
|
||||
|
||||
-- Clear table
|
||||
|
||||
DELETE FROM zipcode_points;
|
||||
|
||||
-- Insert points
|
||||
|
||||
DELETE FROM zipcode_points;
|
||||
|
||||
INSERT INTO zip_code_points (the_geom, zipcode, iso3)
|
||||
SELECT the_geom, zipcode,
|
||||
(
|
||||
SELECT country_decoder.iso3 FROM country_decoder
|
||||
WHERE tmp_zipcode_points.iso2 = country_decoder.iso2
|
||||
)
|
||||
FROM tmp_zipcode_points
|
||||
);
|
||||
|
||||
|
||||
-- Drops temporary table
|
||||
|
||||
DROP TABLE tmp_zipcode_points;
|
||||
|
||||
|
||||
@@ -4,6 +4,12 @@ CREATE INDEX idx_admin0_synonyms_name_ ON admin0_synonyms (name_);
|
||||
CREATE INDEX idx_admin0_synonyms_rank ON admin0_synonyms (rank);
|
||||
-- CREATE INDEX idx_admin0_synonyms_name_rank ON admin0_synonyms (name_, rank);
|
||||
|
||||
-- Index on admin1 id
|
||||
CREATE UNIQUE INDEX idx_qs_adm1_global_id ON qs_adm1 (global_id)
|
||||
CREATE INDEX idx_admin1_synonyms_name_adm0 ON admin1_synonyms (name_, adm0_a3)
|
||||
|
||||
-- create indexes on polygon table
|
||||
CREATE UNIQUE INDEX idx_ne_admin0_v3_adm0_a3 ON ne_admin0_v3 (adm0_a3);
|
||||
|
||||
-- create indexes on postal code polygon table
|
||||
CREATE UNIQUE INDEX idx_postal_code_polygons_a3_code ON postal_code_polygons (adm0_a3, postal_code)
|
||||
|
||||
Reference in New Issue
Block a user