Compare commits

..

86 Commits

Author SHA1 Message Date
Javier Goizueta
8f1435c049 Release 0.16.2 2016-04-27 18:30:26 +02:00
Javier Goizueta
8302f89413 Merge pull request #246 from CartoDB/245-categories-mode
Use the mode to aggregate category columns in overviews
2016-04-27 18:16:05 +02:00
Javier Goizueta
e9050178a8 Merge branch 'master' of github.com:CartoDB/cartodb-postgresql 2016-04-27 16:23:46 +02:00
Javier Goizueta
3e34ca4654 Overviews documentation fixes 2016-04-27 16:23:25 +02:00
Javier Goizueta
a067cc7da1 Generate stats used to identify category columns in overviews if needed
This only generates the stats if no stats are available for a table.
This doesn't warrant that the stats are up to date or accurate.
2016-04-27 15:06:09 +02:00
Javier Goizueta
2c43943df6 Fix syntax 2016-04-26 18:27:52 +02:00
Javier Goizueta
417cbe7902 Fix category columns aggregation in overviews
Overviews are created in cascade, each one from the inmediate
lower level, but the stats to decide if a column is a category
should be taken always from the base table.
2016-04-26 18:02:25 +02:00
Javier Goizueta
9a73703954 Use mode to aggregate categorical columns in overviews
Fixes #245
2016-04-26 15:15:24 +02:00
Rafa de la Torre
36ac831bd1 Update cartodbfy-requirements.rst
Fix broken link to doc
2016-04-26 14:43:24 +02:00
Javier Goizueta
1358964628 Release 0.16.1 2016-04-25 18:47:42 +02:00
Javier Goizueta
efe381ad94 Merge pull request #243 from CartoDB/241-webmercator
Compute webmercator resolution with full accuracy
2016-04-25 17:30:40 +02:00
Javier Goizueta
f7cce21eb7 Merge pull request #242 from CartoDB/240-overviews-pixels
Adjust overview points to pixel centers
2016-04-25 17:30:25 +02:00
Javier Goizueta
18267477da Merge pull request #238 from CartoDB/235-column-names
Optimize column information functions
2016-04-25 17:30:07 +02:00
Javier Goizueta
11ad45306f Remove unneeded pg_catalog schema name 2016-04-25 16:30:58 +02:00
Javier Goizueta
75c7ae98e4 Compute webmercator resolution with full accuracy
Fixes #241
2016-04-25 14:02:26 +02:00
Javier Goizueta
3c12cf629f Optimize overview pixel adjustment for integer-pixel cells 2016-04-25 13:53:59 +02:00
Javier Goizueta
7b2100b51e Adjust overview coordinates to pixel centers
This makes the adjustment for all grid sizes, not only
for integral number of pixels.
2016-04-25 13:33:43 +02:00
Javier Goizueta
580ec38ab8 Adjust overview clustered point to pixel centers
Fixes #240
2016-04-23 15:07:06 +02:00
Raul Ochoa
897689dd43 Release 0.16.0 2016-04-19 15:44:37 +02:00
Raul Ochoa
808fc9fc25 Merge pull request #237 from CartoDB/analysis-catalog
Adds table for storing camshaft analysis nodes
2016-04-19 15:32:46 +02:00
Javier Goizueta
65415bb335 Optimize funcion CDB_COlumnType 2016-04-18 19:07:33 +02:00
Javier Goizueta
06ebb27160 Optimize internal funcion _cdb_unlimited_text_column 2016-04-18 18:50:37 +02:00
Javier Goizueta
bd5ae84e90 Optimize CDB_ColumnNames
This implementation is about 1000 times faster
2016-04-18 18:49:58 +02:00
Raul Ochoa
de5a702510 Adds table for storing camshaft analysis nodes 2016-04-18 17:41:39 +02:00
Javier Goizueta
6908fb4672 Release 0.15.1
Overviews bugfixes & enhancements
2016-04-15 18:15:35 +02:00
Javier Goizueta
a528a250d4 Merge pull request #234 from CartoDB/231-overviews-text-aggr
Aggregate small number of text items in overviews
2016-04-15 18:04:07 +02:00
Javier Goizueta
ef43623f77 Remove unneeded variable 2016-04-15 17:58:03 +02:00
Javier Goizueta
09ad550de3 Fix tests 2016-04-15 17:50:47 +02:00
Javier Goizueta
1b0f77aa96 Always retain single-valued aggregated texts
This makes columns which have the same value in a group to be aggregated
maintain that value (rather than replacing it by the multiple-value
indicator *) whatever the group value is. (Previously this happend
only for small groups)
2016-04-15 17:49:00 +02:00
Javier Goizueta
45f063d469 Aggregate small number of text items in overviews
Instead of nulling text fields for non-singleton aggregated records
concatenate distinct text values when they're few (5 or less).
Fixes #231
2016-04-15 12:37:16 +02:00
Carla
20989e2f28 Merge pull request #233 from CartoDB/232-overviews-avg
Fix AVG computation in overview tables
2016-04-15 11:10:26 +02:00
Javier Goizueta
176d69d09e Fix AVG computation in overview tables
Fixes #232
Averages of averages are not equal to overall averages.
2016-04-15 10:48:08 +02:00
Javier Goizueta
9fdbfda60a Merge pull request #228 from CartoDB/225-no-centroid-master
Use cell centers, not cluster centroids when grouping points
2016-04-15 10:06:44 +02:00
Javier Goizueta
9a3d93976c Merge pull request #227 from CartoDB/226-add_count_aggregated_features
Include and aggregate _vovw_count column to count aggregated features
2016-04-15 10:06:05 +02:00
Javier Goizueta
46b45f6dd4 Merge pull request #224 from CartoDB/223-fix-dropoverviews
Fix CDB_DropOverviews and CDB_Overviews problems
2016-04-15 10:05:28 +02:00
Carla Iriberri
fd14750ce5 Rename _vovw_count to _feature_count 2016-04-14 18:23:09 +02:00
Javier Goizueta
c595e45c11 Add _vovw_count columnt to tables for which overviews are created
Initially we planned to add this column to the queries sent to the
tiler only, but that makes the column hard to access from the editor.
2016-04-14 17:32:18 +02:00
Carla
1cf7074fb1 Merge pull request #230 from CartoDB/229-set_tolerance_px_to_1_overviews
Set tolerance to 1 pixel in overviews by default
2016-04-14 17:26:37 +02:00
Javier Goizueta
f785e71d3b Fix: numeric is a valid numeric column type
Actually this is the type of aggregated _vovw_count columns
2016-04-14 15:46:03 +02:00
Carla
14b8cd7d99 Set default value to 1 and remove typo line 2016-04-14 12:12:17 +02:00
Carla
213adcca16 Fixes tests for tolerance_px = 1.0, with no zoom 3 2016-04-14 11:24:10 +02:00
Carla
1a571c8a9c Set tolerance to 1 pixel 2016-04-14 11:13:59 +02:00
Carla Iriberri
8f44f5347a Fix indent for code clarity 2016-04-13 17:51:30 +02:00
Carla Iriberri
f96163265b Fix bug for tables without geom or with no potential overviews
If the table doesn't have geometries but the createoverviews function is
called, the current geometry type checks won't work because "null" will
not give a boolean value in the type comparisons.

Also, if the createoverviews function is called over a simple table with
would not require overviews according to the strategies it is handled
correctly.
2016-04-13 17:49:38 +02:00
Javier Goizueta
1c67214b09 Use cell centers, not cluster centroids when grouping points
Fixes #225
2016-04-13 11:14:09 +02:00
Carla Iriberri
16d08ef52b Include and aggregate _vovw_count column to count aggregated features 2016-04-12 11:10:55 +02:00
Javier Goizueta
15ac9a2cd9 Remove unneeded code 2016-04-07 10:30:10 +02:00
Javier Goizueta
ee61d46100 💄 rename variable for clarity 2016-04-07 10:24:02 +02:00
Javier Goizueta
49e7094c8a Make CDB_CreateOverview usable by superuser
CDB_CreateOverview had to be executed with the user role
corresponding to the owner of the table; now it can be executed
by the postgres user.
2016-04-07 07:52:58 +02:00
Javier Goizueta
fb910be12f Fix conversion of regclass to indentifier 2016-04-07 07:07:20 +02:00
Javier Goizueta
34c39662ec Replace use of CDB_UserTables in CDB_Overviews
Use a function that returns reclasses and schema names properly instead.
2016-04-07 00:07:45 +02:00
Javier Goizueta
84cac16d1c Temporary fix 2016-04-06 22:05:00 +02:00
Javier Goizueta
c1fc07d2ac Fix typo
This function isn't beint actively used; should consider removing it
or testing it properly
2016-04-06 18:58:37 +02:00
Javier Goizueta
5c3c0f5fc9 Fix bug in CDB_DropOverviews
Fixes #223
2016-04-06 18:57:52 +02:00
Javier Goizueta
e68d5eca45 Release 0.15.0
This release includes some overviews enhancements
2016-04-05 14:22:46 +02:00
Javier Goizueta
16a58c479d Merge pull request #221 from CartoDB/219-overviews-tolerance
New function CDB_CreateOverviewsWithToleranceInPixels
2016-04-05 14:01:25 +02:00
Javier Goizueta
06bb669d4c Add comment
Clarify the reason why some functions are dropped at the
end of the file and not where their new definitions are.
2016-04-05 13:22:07 +02:00
Javier Goizueta
00a3d6e650 Fix upgrades from prior versions 2016-04-05 13:10:00 +02:00
Javier Goizueta
f0ff197c56 Merge branch 'master' into 219-overviews-tolerance
# Conflicts:
#	scripts-available/CDB_Overviews.sql
#	test/CDB_OverviewsTest_expect
2016-04-05 13:08:22 +02:00
Javier Goizueta
c6885c2972 Merge pull request #220 from CartoDB/218-org-users-overviews
Add support for explicit schema names in overview functions
2016-04-05 13:01:16 +02:00
Javier Goizueta
57c32332e2 New function CDB_CreateOverviewsWithToleranceInPixels
This function allows defining the previously fixed parameter grid_px.
The lim parameter used to define the reference Z level is also
correlated to this value.

Fixes #219
2016-04-04 19:21:10 +02:00
Javier Goizueta
3c71eecbae Fix cases that require explicit schema name
This allows using overview functions in situations where
the base tables require being qualified with the schema name.
2016-04-04 17:45:20 +02:00
Javier Goizueta
6d9424746c Fix expected tests output 2016-04-04 17:42:10 +02:00
Javier Goizueta
c0262a05eb Fix CDB_Overviews for cases that require explicit schema name
Fixes #218
2016-04-04 17:02:59 +02:00
Javier Goizueta
aff7ae3e2e Release 0.14.4 2016-03-29 13:09:09 +02:00
Carla
01757a1b6d Merge pull request #207 from CartoDB/update_docs_user_table
Update CartoDB User table docs and minor edits on cartodbfy reqs
2016-03-29 12:58:20 +02:00
Carla
f946bfe4fe remove bigint mention 2016-03-29 12:57:58 +02:00
Javier Goizueta
a098713bfa Merge pull request #215 from CartoDB/213-makefile-echo
Fix backslashes in generated files
2016-03-29 12:51:02 +02:00
Javier Goizueta
217ff4ffb9 Merge pull request #214 from CartoDB/211-boolean-col-overviews
Support boolean columns when creating overviews
2016-03-29 12:50:57 +02:00
Javier Goizueta
a0bb7b1b03 Fix backslashes in generated files
Fixes #213
Some systems treat escaping of text passed to the echo command
from a Makefile differently.
This seems to work for our needs.
2016-03-21 14:59:05 +01:00
Javier Goizueta
4074173c05 Add support for boolean columns in overview creation 2016-03-21 12:58:29 +01:00
Javier Goizueta
603b1ceed8 Adjust Overviews test expectations 2016-03-21 12:58:00 +01:00
Javier Goizueta
0caf6c50dd Fix the tests for boolean columns and overview creation 2016-03-21 12:27:00 +01:00
Carla
224b2ce395 Update travis button to show only master status 2016-03-19 12:33:17 +01:00
Javier Goizueta
70eeab5748 Tests for revealing issue #211
Test creating overviews for tables with boolean columns
2016-03-17 19:27:51 +01:00
Carla
06c05a1d67 Merge pull request #212 from CartoDB/release_0_14_3
Release 0.14.3
2016-03-17 10:53:59 +01:00
Carla
6567d5441b Release 0.14.3 2016-03-17 10:45:54 +01:00
Carla
e10d7c3a27 Update NEWS.md 2016-03-17 10:44:12 +01:00
Carla
b89a752548 Merge pull request #210 from CartoDB/209-remove_bigint_casting_cartodb_id
Remove casting to bigint in cartodb_id column
2016-03-17 10:31:06 +01:00
Carla
90fa45b59d Remove casting to bigint 2016-03-16 11:03:46 +01:00
Carla
eb48e26eec Update CartoDB-user-table.rst 2016-03-15 16:39:56 +01:00
Carla
85e1b92199 Add non-zero requirement 2016-03-15 16:39:39 +01:00
Carla
6ea0013343 Update cartodbfy-requirements.rst 2016-03-09 15:49:18 +01:00
Carla
80bd7f8f10 Update cartodbfy-requirements.rst 2016-03-09 15:48:33 +01:00
Carla
bb061981ad style edits 2016-03-09 15:47:26 +01:00
Carla
e289b4725f Update information and convert to .rst 2016-03-09 15:26:45 +01:00
19 changed files with 607 additions and 185 deletions

View File

@@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.14.2
EXTVERSION = 0.16.2
SED = sed
@@ -62,6 +62,13 @@ UPGRADABLE = \
0.14.0 \
0.14.1 \
0.14.2 \
0.14.3 \
0.14.4 \
0.15.0 \
0.15.1 \
0.16.0 \
0.16.1 \
0.16.2 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)
@@ -115,6 +122,9 @@ $(EXTENSION).control: $(EXTENSION).control.in Makefile
cartodb_version.sql: cartodb_version.sql.in Makefile $(GITDIR)/index
$(SED) -e 's/@@VERSION@@/$(EXTVERSION)/' $< > $@
# Needed for consistent `echo` results with backslashes
SHELL = bash
legacy_regress: $(REGRESS_OLD) Makefile
mkdir -p sql/test/
mkdir -p expected/test/
@@ -122,14 +132,14 @@ legacy_regress: $(REGRESS_OLD) Makefile
for f in $(REGRESS_OLD); do \
tn=`basename $${f} .sql`; \
of=sql/test/$${tn}.sql; \
echo '\\set ECHO none' > $${of}; \
echo '\\a' >> $${of}; \
echo '\\t' >> $${of}; \
echo '\\set QUIET off' >> $${of}; \
echo '\set ECHO none' > $${of}; \
echo '\a' >> $${of}; \
echo '\t' >> $${of}; \
echo '\set QUIET off' >> $${of}; \
cat $${f} | \
$(SED) -e 's/public\./cartodb./g' >> $${of}; \
exp=expected/test/$${tn}.out; \
echo '\\set ECHO none' > $${exp}; \
echo '\set ECHO none' > $${exp}; \
cat test/$${tn}_expect >> $${exp}; \
done

58
NEWS.md
View File

@@ -1,3 +1,61 @@
0.16.2 (2016-04-27)
-------------------
* Use the mode to aggregate category columns in overviews
[#246](https://github.com/CartoDB/cartodb-postgresql/pull/246)
0.16.1 (2016-04-25)
-------------------
* Optimize column information functions performance
[#238](https://github.com/CartoDB/cartodb-postgresql/pull/238)
* Adjust overview points to pixel CDB_EqualIntervalBins
[#242](https://github.com/CartoDB/cartodb-postgresql/pull/242)
* Compute webmercator resolution using full numeric precision
[#243](https://github.com/CartoDB/cartodb-postgresql/pull/243)
0.16.0 (2016-04-15)
-------------------
* Adds table for storing camshaft analysis nodes
[#237](https://github.com/CartoDB/cartodb-postgresql/pull/237)
0.15.1 (2016-04-15)
-------------------
* Fix problems with org users in overviews functions
[#224](https://github.com/CartoDB/cartodb-postgresql/pull/224)
* Add `_feature_count` to overviews
[#227](https://github.com/CartoDB/cartodb-postgresql/pull/227)
* Change point clustering behaviour of overviews
[#228](https://github.com/CartoDB/cartodb-postgresql/pull/228)
* Change default tolerance of overviews
[#230](https://github.com/CartoDB/cartodb-postgresql/pull/230)
* Fix problem with aggregated numerical fields in overviews
[#233](https://github.com/CartoDB/cartodb-postgresql/pull/233)
* Enhance aggregation of text fields in overviews
[#234]https://github.com/CartoDB/cartodb-postgresql/pull/234
0.15.0 (2016-04-05)
-------------------
* New function CDB_CreateOverviewsWithToleranceInPixels that adds tolerance parameter for overview creation
[#221](https://github.com/CartoDB/cartodb-postgresql/pull/221)
* New default value for the overviews tolerance in pixels is 2 (used to be 7.5) (also in #221)
* The feature density limit used to choose the reference Z level now depends on the tolerance in pixels (also in #221)
* Tables that require an explicit schema can now be passed to overview functions
[#220](https://github.com/CartoDB/cartodb-postgresql/pull/220)
0.14.4 (2016-03-29)
-------------------
* Fix creating overviews for tables with boolean columns
[#214](https://github.com/CartoDB/cartodb-postgresql/pull/214)
* Fix tests for some systems [#215](https://github.com/CartoDB/cartodb-postgresql/pull/215)
0.14.3 (2016-03-17)
-------------------
* Fix for `cartodb_id` bigint casting hardcoded in 0.14.2 to support `cartodb_id` text columns [#210](https://github.com/CartoDB/cartodb-postgresql/pull/210)
0.14.2 (2016-03-15)
-------------------
* Support text `cartodb_id` columns in `_CDB_Has_Usable_Primary_ID` [#202](https://github.com/CartoDB/cartodb-postgresql/pull/202)

View File

@@ -1,7 +1,7 @@
cartodb-postgresql
==================
[![Build Status](http://travis-ci.org/CartoDB/cartodb-postgresql.png)]
[![Build Status](http://api.travis-ci.org/CartoDB/cartodb-postgresql.svg?branch=master)]
(http://travis-ci.org/CartoDB/cartodb-postgresql)
PostgreSQL extension for CartoDB

View File

@@ -2,18 +2,25 @@ Overviews are tables that represent a *reduced* version of a dataset intended
for efficient rendering at certain zoom levels while preserving the
general visual appearance of the complete dataset.
The *reduction* consists in a fewer number of records
The *reduction* consists in havig a fewer number of records
(while each overview record may represent an aggregation of multiple records)
and/or simplified record geometries.
Overviews are created through the `CDB_CreateOverviews`.
Overviews are created through the `CDB_CreateOverviews` function.
The statement timeout may need to be adjusted before using this function,
as overview creation for large tables is a time-consuming operation.
The `CDB_Overviews` function can be used determine what overview tables
exist for a given dataset table and which zoom levels correspond to it.
The `CDB_DropOverviews` remove a dataset's existing overviews.
The `CDB_DropOverviews` function removes a dataset's existing overviews.
To know if overview tables exist for some base table, and to obtain
a list of which overview tables are approrpiate for which zoom levels,
the `CDB_Overviews` functions can be used.
The zoom level we're referring here to are those used
by the tiler: http://wiki.openstreetmap.org/wiki/Zoom_levels
### CDB_CreateOverviews
@@ -48,6 +55,17 @@ CDB_CreateOverviews(table_name, ref_z_strategy, reduction_strategy)
- **base_z** integer, base Z level assigned to the base table.
- **overview_z** integer, Z level for which to generate the overview.
#### Tolerance / level of detail
The level of detail to be representable by each overview layer can
be specified as a tolerance in pixels (if different from the default of 1 pixel)
with the function `CDB_CreateOverviewsWithToleranceInPixels`
which has as a second additional argument the desired tolerance.
This tolerance defines the maximum deviation in pixels of the overviews
geometries with respect to the original geometries when overview tables
are used for their intendend zoom level.
### CDB_Overviews
Obtain overview metadata for a given table (existing overviews).
@@ -72,7 +90,7 @@ SELECT CDB_Overviews(CDB_QueryTablesText('SELECT * FROM table1, table2'));
The result of `CDB_Overviews` has three columns:
| base_table | z | overview_table |
|------------+---+----------------|
| ---------- | - | -------------- |
| table1 | 1 | table1_ov1 |
| table1 | 2 | table1_ov2 |
| table1 | 4 | table1_ov4 |

View File

@@ -1,29 +0,0 @@
A "cartodb" user table is a table with a well-known set of fields and a well-known set of triggers attached on.
The fields are:
- `cartodb_id`, a numerical primary key of serial type
- `created_at`, timestamp with timezone not null default now()
- `updated_at`, timestamp with timezone not null default now()
- `the_geom`, geometry, GiST indexed, constrained (see below)
- `the_geom_webmercator`, geometry, GiST indexed, constrained (see below)
The values of "the_geom" and "the_geom_webmercator" must match these constraints:
- Only POINT, MULTILINE, MULTIPOLYGON types ? Maybe UNCONSTRAINED
- Only 2 dimensions ? Maybe UNCONSTRAINED
- SRID=4326 for the_geom and SRID=3857 for the_geom_webmercator
The triggers are:
- `track_updates` after modifying statement updates cdb_tablemetadata
- `test_quota` before changing statement to forbid if overquota
- `test_quota_per_row` before changing row to forbod if overquota (checked on a probabilistic basis)
- `update_the_geom_webmercator` before insert or update row to maintain the_geom_webmercator
- `update_updated_at_trigger` before update row to maintain updated_at
Some conversions will be attempted to perform upon cartodbfication when certain fields appear:
- `cartodb_id`: If found type TEXT will be attempted to cast
- `created_at`: If found type TEXT will be attempted to cast
- `updated_at`: If found type TEXT will be attempted to cast

View File

@@ -0,0 +1,68 @@
CartoDB User Table
==================
Introduction
----------
A CartoDB user table is a table with a well-known set of columns and a well-known set of triggers attached on.
Columns
----------
The required columns of a CartoDB table are:
- ``cartodb_id``
- This column will be used as the primary key of the table and it has a sequence as default value
- Its values must be integer, non-zero, non-null and unique
- B-Tree indexed
- ``the_geom``
- This column stores the main geometric features of a table
- The type of the column in the Postgres database is ``geometry(Geometry,4326)```
- GiST indexed
- geometry, GiST indexed, constrained (see below)
- ``the_geom_webmercator``
- This column stores the geometries used for rendering purposes
- The type of the column in the Postgres database is ``geometry(Geometry,3857)``
- GiST indexed
- This column is automatically updated by the system when the ``the_geom`` column is updated or when there is an insertion of a new row into the table (See triggers below)
The values of ``the_geom`` and ``the_geom_webmercator`` must be two-dimensional Points, MultiLineStrings or MultiPolygons. Different geometric types in a CartoDB table are not supported.
Described table example
^^^^^^^^^^
::
Column | Type | Modifiers
----------------------+-------------------------+--------------------------------------------------------
cartodb_id | bigint | not null default nextval('t_cartodb_id_seq'::regclass)
the_geom | geometry(Geometry,4326) |
the_geom_webmercator | geometry(Geometry,3857) |
Indexes:
"table_name_pkey" PRIMARY KEY, btree (cartodb_id)
"table_name_the_geom_idx" gist (the_geom)
"table_name_the_geom_webmercator_idx" gist (the_geom_webmercator)
Triggers
----------
The triggers generated in each CartoDB table are:
- ``track_updates`` after modifying statement updates ``cdb_tablemetadata``
- ``test_quota`` before changing statement to forbid if overquota
- ``test_quota_per_row`` before insert ot update row to forbid if overquota (checked on a probabilistic basis)
- ``update_the_geom_webmercator`` before insert or update row to maintain the ``the_geom_webmercator`` updated with the contents in ``the_geom``
Described triggers example
^^^^^^^^^^
::
test_quota BEFORE INSERT OR UPDATE ON t FOR EACH STATEMENT EXECUTE PROCEDURE cdb_checkquota('0.1', '-1', 'public')
test_quota_per_row BEFORE INSERT OR UPDATE ON t FOR EACH ROW EXECUTE PROCEDURE cdb_checkquota('0.001', '-1', 'public')
track_updates AFTER INSERT OR DELETE OR UPDATE OR TRUNCATE ON t FOR EACH STATEMENT EXECUTE PROCEDURE cdb_tablemetadata_trigger()
update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON t FOR EACH ROW EXECUTE PROCEDURE _cdb_update_the_geom_webmercator()
Further details
----------
Some conversions will be attempted to perform upon cartodbfication when certain fields appear:
- ``cartodb_id``: If found type TEXT will be attempted to cast to integer. If not casteable, an eror will be raised.
- ``the_geom``: If found type TEXT will be attempted to cast to geometry(Geometry,4326).

View File

@@ -1,59 +1,63 @@
CartoDBfy Requirements
======================
Introduction
============
------------
This document aims at describing what cartodbfy is and what its formal requirements are, with the following goals in mind:
This document aims at describing what the CartoDBfication is and what its formal requirements are, with the following goals in mind:
- clarify what are the expectations of the "cartodbfycation process".
- define an important part of what should be a stable, public API
- allow for better testing, which should in turn...
- Clarify what are the expectations of the "cartodbfycation process".
- Define an important part of what should be a stable, public API
- Allow for better testing, which should in turn...
- ...ease modifications and increase quality of the code
What is the CartoDBfycation
---------------------------
What is the cartodbfycation
===========================
The cartodbfycation is the process of converting an arbitrary postgres table into a valid CartoDB table, and register it in the system so that it can be used in the CartoDB editor and platform to generate maps and analysis.
The CartoDBfycation is the process of converting an arbitrary postgres table into a valid CartoDB table, and register it in the system so that it can be used in the CartoDB editor and platform to generate maps and analysis.
It is performed by running the function ``CDB_CartodbfyTable(reloid REGCLASS)`` over a target table.
Valid CartoDB tables
====================
--------------------
A valid CartoDB table shall meet the following conditions:
- Have a ``cartodb_id`` integer column as primary key with a sequence as default value
- Have a ``cartodb_id`` column with integer, unique, non-zero and non-null values as primary key with a sequence as default value
- Have a ``the_geom`` column of type ``Geometry`` with SRID 4326
- Have a ``the_geom_webmercator`` column of type ``Geometry`` with SRID 3857
- The columns ``the_geom`` and ``the_geom_webmercator`` shall be in sync
- The columns ``the_geom`` and ``the_geom_webmercator`` shall be in sync (task of the ``update_the_geom_webmercator`` trigger)
Additionally, a CartoDB table can contain other columns.
See the `CartoDB User Table documentation`_
.. _CartoDB User Table documentation: https://github.com/CartoDB/cartodb-postgresql/blob/master/doc/CartoDB-user-table.rst
for further information.
High level requirements
=======================
-----------------------
Here is a list of high level requirments for the public function ``CDB_CartodbfyTable()``:
- A call to ``CDB_CartodbfyTable()`` shall modify/rewrite the table and produce a valid CartoDB table with the same name.
- A call to ``CDB_CartodbfyTable()`` shall cause the registration of the table into the platform
- It shall be idempotent, meaning that successive calls to ``CDB_CartodbfyTable()`` shall not produce any visible effect in the system.
- A call to the function shall modify/rewrite the table and produce a valid CartoDB table with the same name.
- A call to the function shall cause the registration of the table into the platform.
- It shall be idempotent, meaning that successive calls to the function shall not produce any visible effect in the system.
- If there's a column containing a geometry, it shall be used to generate ``the_geom`` and the ``the_geom_webmercator`` columns.
- Exporting and re-importing the same table in CartoDB shall produce equivalent tables, with the same features associated to the same ``cartodb_id``'s.
Note that there should be only one feature per row in the source table. If there's more than one, then which one is used for ``the_geom`` and ``the_geom_webmercator`` fields is not determined.
Note that there should be only one geometry per row in the source table. If there's more than one, then which one is used for ``the_geom`` and ``the_geom_webmercator`` fields is not determined.
Low-level requirements
======================
----------------------
- If the original table contains a valid (unique and not null) ``cartodb_id`` column, it shall be used
- If the original table contains a ``the_geom`` column or a ``the_geom_webmercator`` column in the expected projection (EPSG 4326 and EPSG 3857, respectively) they shall be used.
- If the original table contains a valid (integer, unique, non-zero and not null) ``cartodb_id`` column, it shall be used
- If the original table contains a ``the_geom`` column or a ``the_geom_webmercator`` geometric column in the expected projection (EPSG 4326 and EPSG 3857, respectively) they shall be used.
- A modification of a cartodbfy'ed table shall insert or update a row in ``CDB_TableMetadata``
- A cartodbfy'ed table shall have a ``btree`` index on ``cartodb_id``
- A cartodbfy'ed table shall have ``gist`` indices on ``the_geom`` and ``the_geom_webmercator``
- Cartodbfy shall deal with text columns for imports, regarding CartoDB columns
- Cartodbfy shall deal with text columns for imports, regarding CartoDB columns (``cartodb_id``, ``the_geom``, ``the_geom_webmercator``)

View File

@@ -0,0 +1,24 @@
-- Table to register analysis nodes from https://github.com/cartodb/camshaft
CREATE TABLE IF NOT EXISTS
cartodb.cdb_analysis_catalog (
-- md5 hex hash
node_id char(40) CONSTRAINT cdb_analysis_catalog_pkey PRIMARY KEY,
-- being json allows to do queries like analysis_def->>'type' = 'buffer'
analysis_def json NOT NULL,
-- can reference other nodes in this very same table, allowing recursive queries
input_nodes char(40) ARRAY NOT NULL DEFAULT '{}',
status TEXT NOT NULL DEFAULT 'pending',
CONSTRAINT valid_status CHECK (
status IN ( 'pending', 'waiting', 'running', 'canceled', 'failed', 'ready' )
),
created_at timestamp with time zone NOT NULL DEFAULT now(),
-- should be updated when some operation was performed in the node
-- and anything associated to it might have changed
updated_at timestamp with time zone DEFAULT NULL,
-- should register last time the node was used
used_at timestamp with time zone NOT NULL DEFAULT now(),
-- should register the number of times the node was used
hits NUMERIC DEFAULT 0,
-- should register what was the last node using current node
last_used_from char(40)
);

View File

@@ -919,7 +919,7 @@ BEGIN
-- Add cartodb ID!
IF has_usable_primary_key THEN
sql := sql || const.pkey || '::bigint ';
sql := sql || const.pkey || '::integer ';
ELSE
sql := sql || 'nextval(''' || destseq || ''') AS ' || const.pkey;
END IF;

View File

@@ -2,15 +2,13 @@
CREATE OR REPLACE FUNCTION CDB_ColumnNames(REGCLASS)
RETURNS SETOF information_schema.sql_identifier
AS $$
SELECT c.column_name
FROM information_schema.columns c, pg_class _tn, pg_namespace _sn
WHERE table_name = _tn.relname
AND table_schema = _sn.nspname
AND _tn.oid = $1::oid
AND _sn.oid = _tn.relnamespace
ORDER BY ordinal_position;
SELECT
a.attname::information_schema.sql_identifier column_name
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.oid = $1::oid
AND a.attstattarget < 0 -- exclude system columns
ORDER BY a.attnum;
$$ LANGUAGE SQL;
-- This is to migrate from pre-0.2.0 version

View File

@@ -2,15 +2,13 @@
CREATE OR REPLACE FUNCTION CDB_ColumnType(REGCLASS, TEXT)
RETURNS information_schema.character_data
AS $$
SELECT c.data_type
FROM information_schema.columns c, pg_class _tn, pg_namespace _sn
WHERE table_name = _tn.relname
AND table_schema = _sn.nspname
AND column_name = $2
AND _tn.oid = $1::oid
AND _sn.oid = _tn.relnamespace;
SELECT
format_type(a.atttypid, NULL)::information_schema.character_data data_type
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.oid = $1::oid
AND a.attname = $2
AND a.attstattarget < 0; -- exclude system columns
$$ LANGUAGE SQL;
-- This is to migrate from pre-0.2.0 version

View File

@@ -1,4 +1,24 @@
-- security definer
-- Information about tables in a schema.
-- If the schema name parameter is NULL, then tables from all schemas
-- that may contain user tables are returned.
-- For each table, the regclass, schema name and table name are returned.
-- Scope: private.
CREATE OR REPLACE FUNCTION _CDB_UserTablesInSchema(schema_name text DEFAULT NULL)
RETURNS TABLE(table_regclass REGCLASS, schema_name TEXT, table_name TEXT)
AS $$
SELECT
c.oid::regclass AS table_regclass,
n.nspname::text AS schema_name,
c.relname::text AS table_relname
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND c.relname NOT IN ('cdb_tablemetadata', 'spatial_ref_sys')
AND CASE WHEN schema_name IS NULL
THEN n.nspname NOT IN ('pg_catalog', 'information_schema', 'topology', 'cartodb')
ELSE n.nspname = schema_name
END;
$$ LANGUAGE 'sql';
-- Pattern that can be used to detect overview tables and Extract
-- the intended zoom level from the table name.
@@ -68,6 +88,61 @@ AS $$
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
CREATE OR REPLACE FUNCTION _CDB_OverviewBaseTable(overview_table REGCLASS)
RETURNS REGCLASS
AS $$
DECLARE
table_name TEXT;
schema_name TEXT;
base_name TEXT;
base_table REGCLASS;
BEGIN
SELECT * FROM _cdb_split_table_name(overview_table) INTO schema_name, table_name;
base_name := _CDB_OverviewBaseTableName(table_name);
IF base_name != table_name THEN
base_table := Format('%I.%I', schema_name, base_name)::regclass;
ELSE
base_table := overview_table;
END IF;
RETURN base_table;
END;
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Schema and relation names of a table given its reloid
-- Scope: private.
-- Parameters
-- reloid: oid of the table.
-- Return (schema_name, table_name)
-- note that returned names will be quoted if necessary
CREATE OR REPLACE FUNCTION _cdb_split_table_name(reloid REGCLASS, OUT schema_name TEXT, OUT table_name TEXT)
AS $$
BEGIN
SELECT n.nspname, c.relname
INTO STRICT schema_name, table_name
FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.oid = reloid;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Schema and relation names of a table given its reloid
-- Scope: private.
-- Parameters
-- reloid: oid of the table.
-- Return (schema_name, table_name)
-- note that returned names will be quoted if necessary
CREATE OR REPLACE FUNCTION _cdb_schema_name(reloid REGCLASS)
RETURNS TEXT
AS $$
DECLARE
schema_name TEXT;
BEGIN
SELECT n.nspname
INTO STRICT schema_name
FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.oid = reloid;
RETURN schema_name;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Remove a dataset's existing overview tables.
-- Scope: public
@@ -77,8 +152,11 @@ CREATE OR REPLACE FUNCTION CDB_DropOverviews(reloid REGCLASS)
RETURNS void
AS $$
DECLARE
row record;
row record;
schema_name TEXT;
table_name TEXT;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
FOR row IN
SELECT * FROM CDB_Overviews(reloid)
LOOP
@@ -100,18 +178,20 @@ $$ LANGUAGE PLPGSQL VOLATILE;
CREATE OR REPLACE FUNCTION CDB_Overviews(reloid REGCLASS)
RETURNS TABLE(base_table REGCLASS, z integer, overview_table REGCLASS)
AS $$
-- FIXME: this will fail if the overview tables
-- require a explicit schema name
-- possible solutions: return table names as text instead of regclass
-- or add schema of reloid before casting to regclass
SELECT
reloid AS base_table,
_CDB_OverviewTableZ(cdb_usertables) AS z,
cdb_usertables::regclass AS overview_table
FROM CDB_UserTables()
WHERE _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=reloid), cdb_usertables)
ORDER BY z;
$$ LANGUAGE SQL;
DECLARE
schema_name TEXT;
base_table_name TEXT;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, base_table_name;
RETURN QUERY SELECT
reloid AS base_table,
_CDB_OverviewTableZ(table_name) AS z,
table_regclass AS overview_table
FROM _CDB_UserTablesInSchema(schema_name)
WHERE _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=reloid), table_name)
ORDER BY z;
END
$$ LANGUAGE PLPGSQL;
-- Return existing overviews (if any) for multiple dataset tables.
-- Scope: public
@@ -127,30 +207,16 @@ RETURNS TABLE(base_table REGCLASS, z integer, overview_table REGCLASS)
AS $$
SELECT
base_table::regclass AS base_table,
_CDB_OverviewTableZ(cdb_usertables) AS z,
cdb_usertables::regclass AS overview_table
_CDB_OverviewTableZ(table_name) AS z,
table_regclass AS overview_table
FROM
CDB_UserTables(), unnest(tables) base_table
WHERE _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=base_table), cdb_usertables)
_CDB_UserTablesInSchema(), unnest(tables) base_table
WHERE
schema_name = _cdb_schema_name(base_table)
AND _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=base_table), table_name)
ORDER BY base_table, z;
$$ LANGUAGE SQL;
-- Schema and relation names of a table given its reloid
-- Scope: private.
-- Parameters
-- reloid: oid of the table.
-- Return (schema_name, table_name)
-- note that returned names will be quoted if necessary
CREATE OR REPLACE FUNCTION _cdb_split_table_name(reloid REGCLASS, OUT schema_name TEXT, OUT table_name TEXT)
AS $$
BEGIN
SELECT n.nspname, c.relname
INTO STRICT schema_name, table_name
FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.oid = reloid;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Calculate the estimated extent of a cartodbfy'ed table.
-- Scope: private.
-- Parameters
@@ -175,11 +241,17 @@ AS $$
BEGIN
EXECUTE ext_query INTO ext;
EXCEPTION
EXCEPTION
-- This is the typical ERROR: stats for "mytable" do not exist
WHEN internal_error THEN
-- Get stats and execute again
EXECUTE format('ANALYZE %1$I', reloid);
EXECUTE format('ANALYZE %1$s', reloid);
-- We check the geometry type in case the error is due to empty geometries
IF _CDB_GeometryTypes(reloid) IS NULL THEN
RETURN NULL;
END IF;
EXECUTE ext_query INTO ext;
END;
@@ -266,22 +338,28 @@ $$ LANGUAGE PLPGSQL STABLE;
-- Parameters:
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: Z level as an integer
CREATE OR REPLACE FUNCTION _CDB_Feature_Density_Ref_Z_Strategy(reloid REGCLASS)
CREATE OR REPLACE FUNCTION _CDB_Feature_Density_Ref_Z_Strategy(reloid REGCLASS, tolerance_px FLOAT8 DEFAULT NULL)
RETURNS INTEGER
AS $$
DECLARE
lim FLOAT8 := 500; -- TODO: determine/parameterize this
lim FLOAT8;
nz integer := 4;
fd FLOAT8;
c FLOAT8;
BEGIN
IF (tolerance_px IS NULL) OR tolerance_px = 0 THEN
lim := 500;
ELSE
lim := floor(power(256/tolerance_px, 2))/2;
END IF;
-- Compute fd as an estimation of the (maximum) number
-- of features per unit of tile area (in webmercator squared meters)
SELECT _CDB_Feature_Density(reloid, nz) INTO fd;
-- lim maximum number of (desiderable) features per tile
-- we have c = 2*Pi*R = CDB_XYZ_Resolution(-8) (earth circumference)
-- ta(z): tile area = power(c*power(2,z), 2) = c*c*power(2,2*z)
-- => fd*ta(z) if the average number of features per tile at level z
-- ta(z): tile area = power(c*power(2,-z), 2) = c*c*power(2,-2*z)
-- => fd*ta(z) is the average number of features per tile at level z
-- find minimum z so that fd*ta(z) <= lim
-- compute a rough 'feature density' value
SELECT CDB_XYZ_Resolution(-8) INTO c;
@@ -321,7 +399,7 @@ $$ LANGUAGE PLPGSQL IMMUTABLE;
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER)
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, tolerance_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
@@ -330,12 +408,16 @@ AS $$
base_name TEXT;
class_info RECORD;
num_samples INTEGER;
schema_name TEXT;
table_name TEXT;
BEGIN
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- TODO: compute fraction from tolerance_px if not NULL
fraction := power(2, 2*(overview_z - ref_z));
-- FIXME: handle schema name for overview_rel if reloid requires it
EXECUTE Format('DROP TABLE IF EXISTS %I CASCADE;', overview_rel);
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Estimate number of rows
SELECT reltuples, relpages FROM pg_class INTO STRICT class_info
@@ -349,16 +431,16 @@ AS $$
ELSE
num_samples := ceil(class_info.reltuples*fraction);
EXECUTE Format('
CREATE TABLE %1$I AS SELECT * FROM %2$s
CREATE TABLE %4$I.%1$I AS SELECT * FROM %2$s
WHERE ctid = ANY (
ARRAY[
(SELECT CDB_RandomTids(''%2$s'', %3$s))
]
);
', overview_rel, reloid, num_samples);
', overview_rel, reloid, num_samples, schema_name);
END IF;
RETURN overview_rel;
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
@@ -394,9 +476,12 @@ AS $$
-- preserve the owner of the base table
SELECT u.usename
FROM pg_catalog.pg_class c JOIN pg_catalog.pg_user u ON (c.relowner=u.usesysid)
WHERE c.relname = dataset::text
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_user u ON (c.relowner=u.usesysid)
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = dataset_name::text AND n.nspname = dataset_scheme
INTO table_owner;
EXECUTE Format('ALTER TABLE IF EXISTS %s OWNER TO %I;', overview_table::text, table_owner);
-- preserve the table privileges
@@ -450,6 +535,70 @@ BEGIN
END
$$ LANGUAGE PLPGSQL STABLE;
-- Check if a column of a table is of an unlimited-length text type
CREATE OR REPLACE FUNCTION _cdb_unlimited_text_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
SELECT EXISTS (
SELECT a.attname
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
LEFT JOIN pg_type t ON t.oid = a.atttypid
WHERE c.oid = reloid
AND a.attname = col_name
AND format_type(a.atttypid, NULL) IN ('text', 'character varying', 'character')
AND format_type(a.atttypid, NULL) = format_type(a.atttypid, a.atttypmod)
);
$$ LANGUAGE SQL STABLE;
CREATE OR REPLACE FUNCTION _cdb_categorical_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
schema_name TEXT;
table_name TEXT;
available BOOLEAN;
categorical BOOLEAN;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
SELECT n_distinct IS NOT NULL
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO available;
IF available IS NULL OR NOT available THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
SELECT n_distinct > 0 AND n_distinct <= 20
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO categorical;
RETURN categorical;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
CREATE OR REPLACE FUNCTION _cdb_mode_of_array(anyarray)
RETURNS anyelement AS
$$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$$
LANGUAGE SQL IMMUTABLE;
-- Tell Postgres how to use our aggregate
CREATE AGGREGATE _cdb_mode(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_cdb_mode_of_array,
INITCOND='{}'
);
-- SQL Aggregation expression for a datase attribute
-- Scope: private.
-- Parameters
@@ -464,6 +613,10 @@ AS $$
DECLARE
column_type TEXT;
qualified_column TEXT;
has_counter_column BOOLEAN;
feature_count TEXT;
total_feature_count TEXT;
base_table REGCLASS;
BEGIN
IF table_alias <> '' THEN
qualified_column := Format('%I.%I', table_alias, column_name);
@@ -473,19 +626,44 @@ BEGIN
column_type := CDB_ColumnType(reloid, column_name);
SELECT EXISTS (
SELECT * FROM CDB_ColumnNames(reloid) as colname WHERE colname = '_feature_count'
) INTO has_counter_column;
IF has_counter_column THEN
feature_count := '_feature_count';
total_feature_count := 'SUM(_feature_count)';
ELSE
feature_count := '1';
total_feature_count := 'count(*)';
END IF;
base_table := _CDB_OverviewBaseTable(reloid);
CASE column_type
WHEN 'double precision', 'real', 'integer', 'bigint' THEN
RETURN Format('AVG(%s)::' || column_type, qualified_column);
WHEN 'text' THEN
-- TODO: we could define a new aggregate function that returns distinct
-- separated values with a limit, adding ellipsis if more values existed
-- e.g. with '/' as separator and a limit of three:
-- 'A', 'B', 'A', 'C', 'D' => 'A/B/C/...'
-- Other ideas: if value is unique then use it, otherwise use something
-- like '*' or '(varies)' or '(multiple values)', or NULL
-- Using 'string_agg(' || qualified_column || ',''/'')'
-- here causes
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
WHEN 'double precision', 'real', 'integer', 'bigint', 'numeric' THEN
IF column_name = '_feature_count' THEN
RETURN 'SUM(_feature_count)';
ELSE
IF column_type = 'integer' AND _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('CDB_Math_Mode(%s)::', qualified_column) || column_type;
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
END IF;
END IF;
WHEN 'text', 'character varying', 'character' THEN
IF _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('_cdb_mode(%s)::', qualified_column) || column_type;
ELSE
IF _cdb_unlimited_text_column(base_table, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END IF;
END IF;
WHEN 'boolean' THEN
RETURN 'CASE count(*) WHEN 1 THEN BOOL_AND(' || qualified_column || ') ELSE NULL END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END CASE;
@@ -545,33 +723,49 @@ $$ LANGUAGE PLPGSQL STABLE;
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER)
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
grid_px FLOAT8 = 7.5; -- Grid size in pixels at Z level overview_z
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
RETURN 'x';
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- compute grid cell size using the overview_z dimension...
SELECT CDB_XYZ_Resolution(overview_z)*grid_px INTO grid_m;
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
@@ -582,19 +776,31 @@ AS $$
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
-- cartodb_id,
-- ST_Transform(ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857), 4326) AS the_geom,
-- ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857) AS the_geom_webmercator
-- %4$s
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857), 4326) AS the_geom'
Format('ST_Transform(%s, 4326) AS the_geom', point_geom)
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(sx/n, sy/n), 3857) AS the_geom_webmercator'
Format('%s AS the_geom_webmercator', point_geom)
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
@@ -603,21 +809,22 @@ AS $$
SELECT * FROM cols
) AS s INTO columns;
-- FIXME: handle schema name for overview_rel if reloid requires it
EXECUTE Format('DROP TABLE IF EXISTS %I CASCADE;', overview_rel);
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
EXECUTE Format('DROP TABLE IF EXISTS %I.%I CASCADE;', schema_name, overview_rel);
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
CREATE TABLE %3$I AS
CREATE TABLE %7$I.%3$I AS
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS sx,
SUM(ST_Y(f.the_geom_webmercator)) AS sy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
@@ -625,9 +832,9 @@ AS $$
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns);
', reloid::text, grid_m, overview_rel, attributes, aggr_attributes, columns, schema_name);
RETURN overview_rel;
RETURN Format('%I.%I', schema_name, overview_rel)::regclass;
END;
$$ LANGUAGE PLPGSQL;
@@ -642,7 +849,20 @@ $$ LANGUAGE PLPGSQL;
-- created by the strategy must have the same columns
-- as the base table and in the same order.
-- Return value: Array with the names of the generated overview tables
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(reloid REGCLASS, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy'::regproc, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy'::regproc)
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(reloid REGCLASS, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
tolerance_px FLOAT8;
BEGIN
-- Use the default tolerance
tolerance_px := 1.0;
RETURN CDB_CreateOverviewsWithToleranceInPixels(reloid, tolerance_px, refscale_strategy, reduce_strategy);
END;
$$ LANGUAGE PLPGSQL;
-- Create overviews with additional parameter to define the desired detail/tolerance in pixels
CREATE OR REPLACE FUNCTION CDB_CreateOverviewsWithToleranceInPixels(reloid REGCLASS, tolerance_px FLOAT8, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
@@ -653,9 +873,14 @@ DECLARE
overview_z integer;
overview_tables REGCLASS[];
overviews_step integer := 1;
has_counter_column boolean;
BEGIN
-- Determine the referece zoom level
EXECUTE 'SELECT ' || quote_ident(refscale_strategy::text) || Format('(''%s'');', reloid) INTO ref_z;
EXECUTE 'SELECT ' || quote_ident(refscale_strategy::text) || Format('(''%s'', %s);', reloid, tolerance_px) INTO ref_z;
IF ref_z < 0 OR ref_z IS NULL THEN
RETURN NULL;
END IF;
-- Determine overlay zoom levels
-- TODO: should be handled by the refscale_strategy?
@@ -669,7 +894,7 @@ BEGIN
base_z := ref_z;
base_rel := reloid;
FOREACH overview_z IN ARRAY overviews_z LOOP
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s);', base_rel, base_z, overview_z) INTO base_rel;
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s, %s);', base_rel, base_z, overview_z, tolerance_px) INTO base_rel;
IF base_rel IS NULL THEN
EXIT;
END IF;
@@ -678,6 +903,24 @@ BEGIN
SELECT array_append(overview_tables, base_rel) INTO overview_tables;
END LOOP;
IF overview_tables IS NOT NULL AND array_length(overview_tables, 1) > 0 THEN
SELECT EXISTS (
SELECT * FROM CDB_ColumnNames(reloid) as colname WHERE colname = '_feature_count'
) INTO has_counter_column;
IF NOT has_counter_column THEN
EXECUTE Format('
ALTER TABLE %s ADD COLUMN _feature_count integer DEFAULT 1;
', reloid);
END IF;
END IF;
RETURN overview_tables;
END;
$$ LANGUAGE PLPGSQL;
-- Here are some older signatures of these functions, no longar in use.
-- They must be droped here, after the (new) definition of the function `CDB_CreateOverviews`
-- because that function used to contain references to them in the default argument values.
DROP FUNCTION IF EXISTS _CDB_Feature_Density_Ref_Z_Strategy(REGCLASS);
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);

View File

@@ -6,7 +6,7 @@ CREATE OR REPLACE FUNCTION CDB_XYZ_Resolution(z INTEGER)
RETURNS FLOAT8
AS $$
-- circumference divided by 256 is z0 resolution, then divide by 2^z
SELECT 40075017.0 / 256 / power(2, z);
SELECT 6378137.0*2.0*pi() / 256.0 / power(2.0, z);
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- }

View File

@@ -0,0 +1 @@
../scripts-available/CDB_AnalysisCatalog.sql

View File

@@ -8,7 +8,7 @@ SELECT _CDB_Aggregated_Attributes_Expression('base_bare_t'::regclass);
SELECT _CDB_Aggregated_Attributes_Expression('base_bare_t'::regclass, 'tab');
SELECT CDB_CreateOverviews('base_bare_t'::regclass);
SELECT count(*) FROM _vovw_5_base_bare_t;
SELECT count(*) FROM _vovw_2_base_bare_t;
SELECT _CDB_Aggregable_Attributes_Expression('base_t'::regclass);
@@ -16,18 +16,28 @@ SELECT _CDB_Aggregated_Attributes_Expression('base_t'::regclass);
SELECT _CDB_Aggregated_Attributes_Expression('base_t'::regclass, 'tab');
SELECT CDB_CreateOverviews('base_t'::regclass);
SELECT count(*) FROM _vovw_5_base_t;
SELECT count(*) FROM _vovw_2_base_t;
SELECT CDB_CreateOverviews('polyg_t'::regclass);
SELECT CDB_CreateOverviews('column_types_t'::regclass);
SELECT CDB_Overviews('base_t'::regclass);
SELECT CDB_Overviews('"public"."base_t"'::regclass);
SELECT CDB_Overviews(ARRAY['base_t'::regclass, 'base_bare_t'::regclass]);
SELECT CDB_Overviews('polyg_t'::regclass);
SELECT CDB_Overviews('column_types_t'::regclass);
SELECT CDB_DropOverviews('column_types_t'::regclass);
SELECT CDB_DropOverviews('base_bare_t'::regclass);
SELECT CDB_DropOverviews('base_t'::regclass);
SELECT count(*) FROM _vovw_5_base_t;
SELECT count(*) FROM _vovw_2_base_t;
SELECT CDB_CreateOverviewsWithToleranceInPixels('base_t'::regclass, 7.5);
SELECT count(*) FROM _vovw_2_base_t;
SELECT CDB_DropOverviews('base_t'::regclass);
DROP TABLE column_types_t;
DROP TABLE base_bare_t;
DROP TABLE base_t;
DROP TABLE polyg_t;

View File

@@ -5,40 +5,44 @@ CREATE TABLE
INSERT 0 1114
CREATE TABLE
INSERT 0 5
SELECT 1114
{_vovw_5_base_bare_t,_vovw_4_base_bare_t,_vovw_3_base_bare_t,_vovw_2_base_bare_t,_vovw_1_base_bare_t,_vovw_0_base_bare_t}
125
{_vovw_2_base_bare_t,_vovw_1_base_bare_t,_vovw_0_base_bare_t}
126
number,int_number,name,start
AVG(number)::double precision AS number,AVG(int_number)::integer AS int_number,CASE count(*) WHEN 1 THEN MIN(name) ELSE NULL END::text AS name,CASE count(*) WHEN 1 THEN MIN(start) ELSE NULL END::date AS start
AVG(tab.number)::double precision AS number,AVG(tab.int_number)::integer AS int_number,CASE count(*) WHEN 1 THEN MIN(tab.name) ELSE NULL END::text AS name,CASE count(*) WHEN 1 THEN MIN(tab.start) ELSE NULL END::date AS start
{_vovw_5_base_t,_vovw_4_base_t,_vovw_3_base_t,_vovw_2_base_t,_vovw_1_base_t,_vovw_0_base_t}
125
SUM(number*1)/count(*)::double precision AS number,SUM(int_number*1)/count(*)::integer AS int_number,CASE WHEN count(distinct name) = 1 THEN MIN(name) WHEN count(*) < 5 THEN string_agg(distinct name,' / ') ELSE '*' END::text AS name,CASE count(*) WHEN 1 THEN MIN(start) ELSE NULL END::date AS start
SUM(tab.number*1)/count(*)::double precision AS number,SUM(tab.int_number*1)/count(*)::integer AS int_number,CASE WHEN count(distinct tab.name) = 1 THEN MIN(tab.name) WHEN count(*) < 5 THEN string_agg(distinct tab.name,' / ') ELSE '*' END::text AS name,CASE count(*) WHEN 1 THEN MIN(tab.start) ELSE NULL END::date AS start
{_vovw_2_base_t,_vovw_1_base_t,_vovw_0_base_t}
126
{_vovw_2_column_types_t,_vovw_1_column_types_t,_vovw_0_column_types_t}
(base_t,0,_vovw_0_base_t)
(base_t,1,_vovw_1_base_t)
(base_t,2,_vovw_2_base_t)
(base_t,0,_vovw_0_base_t)
(base_t,1,_vovw_1_base_t)
(base_t,2,_vovw_2_base_t)
(base_t,3,_vovw_3_base_t)
(base_t,4,_vovw_4_base_t)
(base_t,5,_vovw_5_base_t)
(base_bare_t,0,_vovw_0_base_bare_t)
(base_bare_t,1,_vovw_1_base_bare_t)
(base_bare_t,2,_vovw_2_base_bare_t)
(base_bare_t,3,_vovw_3_base_bare_t)
(base_bare_t,4,_vovw_4_base_bare_t)
(base_bare_t,5,_vovw_5_base_bare_t)
(base_t,0,_vovw_0_base_t)
(base_t,1,_vovw_1_base_t)
(base_t,2,_vovw_2_base_t)
(base_t,3,_vovw_3_base_t)
(base_t,4,_vovw_4_base_t)
(base_t,5,_vovw_5_base_t)
(column_types_t,0,_vovw_0_column_types_t)
(column_types_t,1,_vovw_1_column_types_t)
(column_types_t,2,_vovw_2_column_types_t)
ERROR: relation "_vovw_5_base_t" does not exist
LINE 1: SELECT count(*) FROM _vovw_5_base_t;
ERROR: relation "_vovw_2_base_t" does not exist
LINE 1: SELECT count(*) FROM _vovw_2_base_t;
^
{_vovw_5_base_t,_vovw_4_base_t,_vovw_3_base_t,_vovw_2_base_t,_vovw_1_base_t,_vovw_0_base_t}
38
DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE

View File

@@ -3,4 +3,5 @@ SET SCHEMA 'cartodb';
\i scripts-available/CDB_TableMetadata.sql
\i scripts-available/CDB_ColumnNames.sql
\i scripts-available/CDB_ColumnType.sql
\i scripts-available/CDB_AnalysisCatalog.sql
SET SCHEMA 'public';

View File

@@ -563,6 +563,13 @@ test_extension|public|"local-table-with-dashes"'
DATABASE=fdw_target tear_down_database
}
function test_cdb_catalog_basic_node() {
DEF="'{\"type\":\"buffer\",\"source\":\"b2db66bc7ac02e135fd20bbfef0fdd81b2d15fad\",\"radio\":10000}'"
sql postgres "INSERT INTO cartodb.cdb_analysis_catalog (node_id, analysis_def) VALUES ('1bbc4c41ea7c9d3a7dc1509727f698b7', ${DEF}::json)"
sql postgres "SELECT status from cartodb.cdb_analysis_catalog where node_id = '1bbc4c41ea7c9d3a7dc1509727f698b7'" should 'pending'
sql postgres "DELETE FROM cartodb.cdb_analysis_catalog"
}
#################################################### TESTS END HERE ####################################################
run_tests $@

View File

@@ -2240,3 +2240,10 @@ INSERT INTO polyg_t VALUES
(3, 'C', 'SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, ST_Transform('SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, 3857)),
(4, 'D', 'SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, ST_Transform('SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, 3857)),
(5, 'E', 'SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, ST_Transform('SRID=4326;POLYGON((9 40,8 39,8.5 40,9 41,9 40))'::geometry, 3857));
CREATE TABLE column_types_t
AS SELECT cartodb_id,
the_geom,
the_geom_webmercator,
(CASE cartodb_id % 2 WHEN 0 THEN 'f' ELSE 't' END)::boolean AS is_odd
FROM base_bare_t;