-- TODO: implement search for timespan CREATE OR REPLACE FUNCTION cdb_observatory._OBS_SearchTables( search_term text, time_span text DEFAULT NULL ) RETURNS table(tablename text, timespan text) As $$ DECLARE out_var text[]; BEGIN IF time_span IS NULL THEN RETURN QUERY EXECUTE 'SELECT tablename::text, timespan::text FROM observatory.obs_table t JOIN observatory.obs_column_table ct ON ct.table_id = t.id JOIN observatory.obs_column c ON ct.column_id = c.id WHERE c.type ILIKE ''geometry'' AND c.id = $1' USING search_term; RETURN; ELSE RETURN QUERY EXECUTE 'SELECT tablename::text, timespan::text FROM observatory.obs_table t JOIN observatory.obs_column_table ct ON ct.table_id = t.id JOIN observatory.obs_column c ON ct.column_id = c.id WHERE c.type ILIKE ''geometry'' AND c.id = $1 AND t.timespan = $2' USING search_term, time_span; RETURN; END IF; END; $$ LANGUAGE plpgsql IMMUTABLE; -- Functions used to search the observatory for measures -------------------------------------------------------------------------------- -- TODO allow the user to specify the boundary to search for measures -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_Search( search_term text, relevant_boundary text DEFAULT null ) RETURNS TABLE(id text, description text, name text, aggregate text, source text) as $$ DECLARE boundary_term text; BEGIN IF relevant_boundary then boundary_term = ''; else boundary_term = ''; END IF; RETURN QUERY EXECUTE format($string$ SELECT id::text, description::text, name::text, aggregate::text, NULL::TEXT source -- TODO use tags FROM observatory.OBS_column where name ilike '%%' || %L || '%%' or description ilike '%%' || %L || '%%' %s $string$, search_term, search_term,boundary_term); RETURN; END $$ LANGUAGE plpgsql; -- Functions to return the geometry levels that a point is part of -------------------------------------------------------------------------------- -- TODO add test response CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableBoundaries( geom geometry(Geometry, 4326), timespan text DEFAULT null) RETURNS TABLE(boundary_id text, description text, time_span text, tablename text) as $$ DECLARE timespan_query TEXT DEFAULT ''; BEGIN IF timespan != NULL THEN timespan_query = format('AND timespan = %L', timespan); END IF; RETURN QUERY EXECUTE $string$ SELECT column_id::text As column_id, obs_column.description::text As description, timespan::text As timespan, tablename::text As tablename FROM observatory.OBS_table, observatory.OBS_column_table, observatory.OBS_column WHERE observatory.OBS_column_table.column_id = observatory.obs_column.id AND observatory.OBS_column_table.table_id = observatory.obs_table.id AND observatory.OBS_column.type = 'Geometry' AND ST_Intersects($1, st_setsrid(observatory.obs_table.the_geom, 4326)) $string$ || timespan_query USING geom; RETURN; END $$ LANGUAGE plpgsql; -- Functions the interface works from to identify available numerators, -- denominators, geometries, and timespans CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableNumerators( bounds GEOMETRY DEFAULT NULL, filter_tags TEXT[] DEFAULT NULL, denom_id TEXT DEFAULT NULL, geom_id TEXT DEFAULT NULL, timespan TEXT DEFAULT NULL ) RETURNS TABLE ( numer_id TEXT, numer_name TEXT, numer_description TEXT, numer_weight NUMERIC, numer_license TEXT, numer_source TEXT, numer_type TEXT, numer_aggregate TEXT, numer_extra JSONB, numer_tags JSONB, valid_denom BOOLEAN, valid_geom BOOLEAN, valid_timespan BOOLEAN ) AS $$ DECLARE geom_clause TEXT; BEGIN filter_tags := COALESCE(filter_tags, (ARRAY[])::TEXT[]); denom_id := COALESCE(denom_id, ''); geom_id := COALESCE(geom_id, ''); timespan := COALESCE(timespan, ''); IF bounds IS NULL THEN geom_clause := ''; ELSE geom_clause := 'ST_Intersects(the_geom, $5) AND'; END IF; RETURN QUERY EXECUTE format($string$ SELECT numer_id::TEXT, numer_name::TEXT, numer_description::TEXT, numer_weight::NUMERIC, NULL::TEXT license, NULL::TEXT source, numer_type numer_type, numer_aggregate numer_aggregate, numer_extra::JSONB numer_extra, numer_tags numer_tags, $1 = ANY(denoms) valid_denom, $2 = ANY(geoms) valid_geom, $3 = ANY(timespans) valid_timespan FROM observatory.obs_meta_numer WHERE %s (numer_tags ?& $4 OR CARDINALITY($4) = 0) $string$, geom_clause) USING denom_id, geom_id, timespan, filter_tags, bounds; RETURN; END $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableDenominators( bounds GEOMETRY DEFAULT NULL, filter_tags TEXT[] DEFAULT NULL, numer_id TEXT DEFAULT NULL, geom_id TEXT DEFAULT NULL, timespan TEXT DEFAULT NULL ) RETURNS TABLE ( denom_id TEXT, denom_name TEXT, denom_description TEXT, denom_weight NUMERIC, denom_license TEXT, denom_source TEXT, denom_type TEXT, denom_aggregate TEXT, denom_extra JSONB, denom_tags JSONB, valid_numer BOOLEAN, valid_geom BOOLEAN, valid_timespan BOOLEAN ) AS $$ DECLARE geom_clause TEXT; BEGIN filter_tags := COALESCE(filter_tags, (ARRAY[])::TEXT[]); numer_id := COALESCE(numer_id, ''); geom_id := COALESCE(geom_id, ''); timespan := COALESCE(timespan, ''); IF bounds IS NULL THEN geom_clause := ''; ELSE geom_clause := 'ST_Intersects(the_geom, $5) AND'; END IF; RETURN QUERY EXECUTE format($string$ SELECT denom_id::TEXT, denom_name::TEXT, denom_description::TEXT, denom_weight::NUMERIC, NULL::TEXT license, NULL::TEXT source, denom_type::TEXT, denom_aggregate::TEXT, denom_extra::JSONB, denom_tags::JSONB, $1 = ANY(numers) valid_numer, $2 = ANY(geoms) valid_geom, $3 = ANY(timespans) valid_timespan FROM observatory.obs_meta_denom WHERE %s (denom_tags ?& $4 OR CARDINALITY($4) = 0) $string$, geom_clause) USING numer_id, geom_id, timespan, filter_tags, bounds; RETURN; END $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableGeometries( bounds GEOMETRY DEFAULT NULL, filter_tags TEXT[] DEFAULT NULL, numer_id TEXT DEFAULT NULL, denom_id TEXT DEFAULT NULL, timespan TEXT DEFAULT NULL ) RETURNS TABLE ( geom_id TEXT, geom_name TEXT, geom_description TEXT, geom_weight NUMERIC, geom_aggregate TEXT, geom_license TEXT, geom_source TEXT, valid_numer BOOLEAN, valid_denom BOOLEAN, valid_timespan BOOLEAN, score NUMERIC, numtiles BIGINT, notnull_percent NUMERIC, numgeoms NUMERIC, percentfill NUMERIC, estnumgeoms NUMERIC, meanmediansize NUMERIC ) AS $$ DECLARE geom_clause TEXT; BEGIN filter_tags := COALESCE(filter_tags, (ARRAY[])::TEXT[]); numer_id := COALESCE(numer_id, ''); denom_id := COALESCE(denom_id, ''); timespan := COALESCE(timespan, ''); IF bounds IS NULL THEN geom_clause := ''; ELSE geom_clause := 'ST_Intersects(the_geom, $5) AND'; END IF; RETURN QUERY EXECUTE format($string$ WITH available_geoms AS ( SELECT geom_id::TEXT, geom_name::TEXT, geom_description::TEXT, geom_weight::NUMERIC, NULL::TEXT geom_aggregate, NULL::TEXT license, NULL::TEXT source, $1 = ANY(numers) valid_numer, $2 = ANY(denoms) valid_denom, $3 = ANY(timespans) valid_timespan FROM observatory.obs_meta_geom WHERE %s (geom_tags ?& $4 OR CARDINALITY($4) = 0) ), scores AS ( SELECT * FROM cdb_observatory._OBS_GetGeometryScores($5, (SELECT ARRAY_AGG(geom_id) FROM available_geoms) ) ) SELECT available_geoms.*, score, numtiles, notnull_percent, numgeoms, percentfill, estnumgeoms, meanmediansize FROM available_geoms, scores WHERE available_geoms.geom_id = scores.geom_id $string$, geom_clause) USING numer_id, denom_id, timespan, filter_tags, bounds; RETURN; END $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableTimespans( bounds GEOMETRY DEFAULT NULL, filter_tags TEXT[] DEFAULT NULL, numer_id TEXT DEFAULT NULL, denom_id TEXT DEFAULT NULL, geom_id TEXT DEFAULT NULL ) RETURNS TABLE ( timespan_id TEXT, timespan_name TEXT, timespan_description TEXT, timespan_weight NUMERIC, timespan_aggregate TEXT, timespan_license TEXT, timespan_source TEXT, valid_numer BOOLEAN, valid_denom BOOLEAN, valid_geom BOOLEAN ) AS $$ DECLARE geom_clause TEXT; BEGIN filter_tags := COALESCE(filter_tags, (ARRAY[])::TEXT[]); numer_id := COALESCE(numer_id, ''); denom_id := COALESCE(denom_id, ''); geom_id := COALESCE(geom_id, ''); IF bounds IS NULL THEN geom_clause := ''; ELSE geom_clause := 'ST_Intersects(the_geom, $5) AND'; END IF; RETURN QUERY EXECUTE format($string$ SELECT timespan_id::TEXT, timespan_name::TEXT, timespan_description::TEXT, timespan_weight::NUMERIC, NULL::TEXT timespan_aggregate, NULL::TEXT license, NULL::TEXT source, $1 = ANY(numers) valid_numer, $2 = ANY(denoms) valid_denom, $3 = ANY(geoms) valid_geom_id FROM observatory.obs_meta_timespan WHERE %s (timespan_tags ?& $4 OR CARDINALITY($4) = 0) $string$, geom_clause) USING numer_id, denom_id, geom_id, filter_tags, bounds; RETURN; END $$ LANGUAGE plpgsql; -- Function below should replace SQL in -- https://github.com/CartoDB/cartodb/blob/ab465cb2918c917940e955963b0cd8a050c06600/lib/assets/javascripts/cartodb3/editor/layers/layer-content-views/analyses/data-observatory-metadata.js CREATE OR REPLACE FUNCTION cdb_observatory.OBS_LegacyBuilderMetadata( aggregate_type TEXT DEFAULT NULL ) RETURNS TABLE ( name TEXT, subsection JSONB ) AS $$ DECLARE aggregate_condition TEXT DEFAULT ''; BEGIN IF aggregate_type IS NOT NULL THEN aggregate_condition := format(' AND numer_aggregate = %L ', aggregate_type); END IF; RETURN QUERY EXECUTE format($string$ WITH expanded AS ( SELECT JSONB_Build_Object('id', numer_id, 'name', numer_name) "column", SUBSTR((sections).key, 9) section_id, (sections).value section_name, SUBSTR((subsections).key, 12) subsection_id, (subsections).value subsection_name FROM ( SELECT numer_id, numer_name, jsonb_each_text(numer_tags) as sections, jsonb_each_text as subsections FROM (SELECT numer_id, numer_name, numer_tags, jsonb_each_text(numer_tags) FROM cdb_observatory.obs_getavailablenumerators() WHERE numer_weight > 0 %s ) foo ) bar WHERE (sections).key LIKE 'section/%%' AND (subsections).key LIKE 'subsection/%%' ), grouped_by_subsections AS ( SELECT JSONB_Agg(JSONB_Build_Object('f1', "column")) AS columns, section_id, section_name, subsection_id, subsection_name FROM expanded GROUP BY section_id, section_name, subsection_id, subsection_name ) SELECT section_name as name, JSONB_Agg( JSONB_Build_Object( 'f1', JSONB_Build_Object( 'name', subsection_name, 'id', subsection_id, 'columns', columns ) ) ) as subsection FROM grouped_by_subsections GROUP BY section_name $string$, aggregate_condition); RETURN; END $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryScores( bounds Geometry(Geometry, 4326) DEFAULT NULL, filter_geom_ids TEXT[] DEFAULT NULL, desired_num_geoms INTEGER DEFAULT 3000 ) RETURNS TABLE ( score NUMERIC, numtiles BIGINT, geom_id TEXT, notnull_percent NUMERIC, numgeoms NUMERIC, percentfill NUMERIC, estnumgeoms NUMERIC, meanmediansize NUMERIC ) AS $$ BEGIN filter_geom_ids := COALESCE(filter_geom_ids, (ARRAY[])::TEXT[]); RETURN QUERY EXECUTE format($string$ SELECT ((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric AS score, * FROM ( WITH clipped_geom AS ( SELECT column_id, table_id , CASE WHEN $1 IS NOT NULL THEN ST_Clip(tile, $1, True) ELSE tile END clipped_tile , tile FROM observatory.obs_column_table_tile WHERE ($1 IS NULL OR ST_Intersects($1, tile)) AND (column_id = ANY($2) OR cardinality($2) = 0) ), clipped_geom_countagg AS ( SELECT column_id, table_id , ST_CountAgg(clipped_tile, 2, True)::Numeric notnull_pixels , ST_CountAgg(clipped_tile, 2, False)::Numeric pixels FROM clipped_geom GROUP BY column_id, table_id ) SELECT count(*)::BIGINT, a.column_id , (CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0 THEN cdb_observatory.FIRST(notnull_pixels) / cdb_observatory.FIRST(pixels) ELSE 1 END)::Numeric AS notnull_percent , (CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0 THEN (ST_SummaryStatsAgg(clipped_tile, 2, True)).sum ELSE COALESCE(ST_Value(cdb_observatory.FIRST(tile), 2, ST_PointOnSurface($1)), 0) * (ST_Area($1) / ST_Area(ST_PixelAsPolygon(cdb_observatory.FIRST(tile), 0, 0)) * cdb_observatory.FIRST(pixels)) END)::Numeric AS numgeoms , (CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0 THEN (ST_SummaryStatsAgg(clipped_tile, 3, True)).mean ELSE COALESCE(ST_Value(cdb_observatory.FIRST(tile), 3, ST_PointOnSurface($1)), 0) END)::Numeric AS percentfill , ((ST_Area(ST_Transform($1, 3857)) / 1000000) / NullIf( CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0 THEN (ST_SummaryStatsAgg(clipped_tile, 1, True)).mean ELSE Coalesce(ST_Value(cdb_observatory.FIRST(tile), 1, ST_PointOnSurface($1)), 0) END, 0))::Numeric AS estnumgeoms , (CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0 THEN (ST_SummaryStatsAgg(clipped_tile, 1, True)).mean ELSE COALESCE(ST_Value(cdb_observatory.FIRST(tile), 1, ST_PointOnSurface($1)), 0) END)::Numeric AS meanmediansize FROM clipped_geom_countagg a, clipped_geom b WHERE a.table_id = b.table_id AND a.column_id = b.column_id GROUP BY a.column_id, a.table_id ORDER BY a.column_id, a.table_id ) foo $string$) USING bounds, filter_geom_ids, desired_num_geoms; RETURN; END $$ LANGUAGE plpgsql;