Add guessing function and tests to extension

This commit is contained in:
Carla Iriberri
2015-11-20 15:50:46 +01:00
parent 905d1c9348
commit df1d6c0d8d
3 changed files with 50 additions and 2 deletions

View File

@@ -46,13 +46,25 @@ SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array
Portland | Maine | USA | | f
(3 rows)
SELECT namedplace_guess_country(Array['granada', 'jaen', 'cordoba', 'madrid', 'valladolid']);
namedplace_guess_country
--------------------------
(1 row)
-- Add a named place source
COPY global_cities_alternates_limited (geoname_id, name, the_geom, created_at, updated_at, the_geom_webmercator, preferred, lowername, cartodb_id, admin1_geonameid, iso2, admin1) FROM stdin;
COPY global_cities_points_limited (geoname_id, name, asciiname, altnames, featclass, featcode, iso2, admin1, admin2, population, the_geom, created_at, updated_at, the_geom_webmercator, cartodb_id, lowername) FROM stdin;
-- Check that the geocoding function is callable, should return success = true
SELECT (geocode_namedplace(Array['Barcelona'])).*
SELECT (geocode_namedplace(Array['Barcelona'])).*;
q | geom | success
-----------+----------------------------------------------------+---------
Barcelona | 0101000020E6100000CA15DEE522E653C0A4C2D842902B4540 | t
(1 row)
SELECT namedplace_guess_country(Array['Barcelona']);
namedplace_guess_country
--------------------------
ES
(1 row)

View File

@@ -288,6 +288,40 @@ CREATE OR REPLACE FUNCTION geocode_namedplace(places text[]) RETURNS SETOF geoco
END
$$;
CREATE OR REPLACE FUNCTION namedplace_guess_country(places text[])
RETURNS text AS $$
DECLARE
country_code text;
threshold CONSTANT float := 0.8;
input_length integer := array_length(places, 1);
BEGIN
BEGIN
WITH hist AS (
SELECT count(DISTINCT(lower(p.s), gp.iso2)) AS c, iso2
FROM global_cities_points_limited gp
inner join (SELECT unnest(places) AS s) p
ON (gp.lowername = lower(s))
GROUP BY iso2
),
best_two AS (
SELECT iso2, c
FROM hist
WHERE c > input_length * threshold
ORDER BY c DESC
LIMIT 2
)
SELECT iso2 INTO STRICT country_code
FROM (SELECT iso2, c, max(c) over() AS maxcount FROM best_two) bt
WHERE bt.c = bt.maxcount;
EXCEPTION
WHEN NO_DATA_FOUND OR too_many_rows THEN
RETURN NULL;
END;
RETURN country_code;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER IMMUTABLE;
--------------------------------------------------------------------------------
-- Support tables

View File

@@ -5,6 +5,7 @@ SELECT (geocode_namedplace(Array['sunapee', 'sunapeeee', 'New York City', 'Madri
SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], 'USA')).*;
SELECT (geocode_namedplace(Array['Portland'], 'Oregon', 'USA')).*;
SELECT (geocode_namedplace(Array['Portland', 'Portland', 'New York City'], Array['Maine', 'Oregon', NULL], Array['USA'])).*;
SELECT namedplace_guess_country(Array['granada', 'jaen', 'cordoba', 'madrid', 'valladolid']);
-- Add a named place source
COPY global_cities_alternates_limited (geoname_id, name, the_geom, created_at, updated_at, the_geom_webmercator, preferred, lowername, cartodb_id, admin1_geonameid, iso2, admin1) FROM stdin;
@@ -16,5 +17,6 @@ COPY global_cities_points_limited (geoname_id, name, asciiname, altnames, featcl
\.
-- Check that the geocoding function is callable, should return success = true
SELECT (geocode_namedplace(Array['Barcelona'])).*
SELECT (geocode_namedplace(Array['Barcelona'])).*;
SELECT namedplace_guess_country(Array['Barcelona']);