Fix typo

Add info about python dependencies
Constraint version numbers of reqs a little
2016-03-10 10:11:11 +01:00 · 2016-03-09 18:51:04 +01:00 · 2016-03-09 17:45:50 +01:00 · 2016-03-09 14:40:02 +01:00 · 2016-03-08 19:35:02 +01:00
45 changed files with 246 additions and 218 deletions
--- a/4
+++ b/4
@@ -1,5 +1,5 @@
-EXT_DIR = pg
-PYP_DIR = python
+EXT_DIR = src/pg
+PYP_DIR = src/py

 .PHONY: install
 .PHONY: run_tests
--- a/README.md
+++ b/README.md
@@ -4,9 +4,87 @@ CartoDB Spatial Analysis extension for PostgreSQL.

 ## Code organization

-* *pg* contains the PostgreSQL extension source code
-* *python* Python module
+* *doc* documentation
+* *src* source code
+* - *src/pg* contains the PostgreSQL extension source code
+* - *src/py* Python module source code
+* *release* reselesed versions

 ## Requirements

-* pip
+* pip, virtualenv, PostgreSQL
+
+# Working Process
+
+## Development
+
+Work in `src/pg/sql`, `src/py/crankshaft`;
+use topic branch.
+
+Update local installation with `sudo make install`
+(this will update the 'dev' version of the extension in 'src/pg/')
+
+Run the tests with `PGUSER=postgres make test`
+
+Update extension in working database with
+
+* `ALTER EXTENSION crankshaft VERSION TO 'current';`
+  `ALTER EXTENSION crankshaft VERSION TO 'dev';`
+
+Note: we keep the current development version install as 'dev' always;
+we update through the 'current' alias to allow changing the extension
+contents but not the version identifier. This will fail if the
+changes involve incompatible function changes such as a different
+return type; in that case the offending function (or the whole extension)
+should be dropped manually before the update.
+
+If the extension has not previously been installed in a database
+we can:
+
+Add tests...
+
+* `CREATE EXTENSION crankshaft WITH VERSION 'dev';`
+
+Test
+
+Commit, push, create PR, wait for CI tests, CR, ...
+
+## Release
+
+To release current development version
+(working directory should be clean in dev branch)
+
+(process to be gradually automated)
+
+For backwards compatible changes (no return value, num of arguments, etc. changes...)
+new version number increasing either patch level (no new functionality)
+or minor level (new functionality) => 'X.Y.Z'.
+Update version in src/pg/crankshaft.control
+Copy release/crankshaft--current.sql to release/crankshaft--X.Y.Z.sql
+Prepare incremental downgrade, upgrade scripts....
+
+Python: ...
+
+Install the new release
+
+`make install-release`
+
+Test the new release
+
+`make test-release`
+
+Push the release
+
+Wait for CI tests
+
+Merge into master
+
+Deploy: install extension and python to production hosts,
+update extension in databases (limited to team users, data observatory, ...)
+
+Release manager role: ...
+
+.sql release scripts
+commit
+tests: staging....
+merge, tag, deploy...
--- a/pg/doc/02_moran.md
+++ b/pg/doc/02_moran.md
--- a/pg/doc/03_overlap_sum.md
+++ b/pg/doc/03_overlap_sum.md
--- a/pg/.gitignore
+++ b/pg/.gitignore
@@ -1,3 +0,0 @@
-regression.diffs
-regression.out
-results/
--- a/pg/Makefile
+++ b/pg/Makefile
@@ -1,33 +0,0 @@
-# Makefile to generate the extension out of separate sql source files.
-# Once a version is released, it is not meant to be changed. E.g: once version 0.0.1 is out, it SHALL NOT be changed.
-
-EXTENSION    = crankshaft
-EXTVERSION   = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
-
-# The new version to be generated from templates
-NEW_EXTENSION_ARTIFACT = $(EXTENSION)--$(EXTVERSION).sql
-
-# DATA is a special variable used by postgres build infrastructure
-# These are the files to be installed in the server shared dir,
-# for installation from scratch, upgrades and downgrades.
-# @see http://www.postgresql.org/docs/current/static/extend-pgxs.html
-DATA =  $(NEW_EXTENSION_ARTIFACT)
-
-SOURCES_DATA_DIR = sql/$(EXTVERSION)
-SOURCES_DATA = $(wildcard sql/$(EXTVERSION)/*.sql)
-
-# The extension installation artifacts are stored in the base subdirectory
-$(NEW_EXTENSION_ARTIFACT): $(SOURCES_DATA)
-	rm -f $@
-	cat $(SOURCES_DATA_DIR)/*.sql >> $@
-
-REGRESS = $(notdir $(basename $(wildcard test/$(EXTVERSION)/sql/*test.sql)))
-TEST_DIR = test/$(EXTVERSION)
-REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
-
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-
-# This seems to be needed at least for PG 9.3.11
-all: $(DATA)
--- a/pg/crankshaft--0.0.1.sql
+++ b/pg/crankshaft--0.0.1.sql
@@ -1,148 +0,0 @@
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
-\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
-- Internal function.
-- Set the seeds of the RNGs (Random Number Generators)
-- used internally.
-CREATE OR REPLACE FUNCTION
-_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
-AS $$
-  from crankshaft import random_seeds
-  random_seeds.set_random_seeds(seed_value)
-$$ LANGUAGE plpythonu;
-- Moran's I
-CREATE OR REPLACE FUNCTION
-  cdb_moran_local (
-      t TEXT,
-  	  attr TEXT,
-  	  significance float DEFAULT 0.05,
-  	  num_ngbrs INT DEFAULT 5,
-  	  permutations INT DEFAULT 99,
-  	  geom_column TEXT DEFAULT 'the_geom',
-  	  id_col TEXT DEFAULT 'cartodb_id',
-      w_type TEXT DEFAULT 'knn')
-RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
-AS $$
-  from crankshaft.clustering import moran_local
-  # TODO: use named parameters or a dictionary
-  return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
-$$ LANGUAGE plpythonu;
-
-- Moran's I Local Rate
-CREATE OR REPLACE FUNCTION
-  cdb_moran_local_rate(t TEXT,
-		 numerator TEXT,
-		 denominator TEXT,
-		 significance FLOAT DEFAULT 0.05,
-		 num_ngbrs INT DEFAULT 5,
-		 permutations INT DEFAULT 99,
-		 geom_column TEXT DEFAULT 'the_geom',
-		 id_col TEXT DEFAULT 'cartodb_id',
-		 w_type TEXT DEFAULT 'knn')
-RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
-AS $$
-  from crankshaft.clustering import moran_local_rate
-  # TODO: use named parameters or a dictionary
-  return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
-$$ LANGUAGE plpythonu;
-- Function by Stuart Lynn for a simple interpolation of a value
-- from a polygon table over an arbitrary polygon
-- (weighted by the area proportion overlapped)
-- Aereal weighting is a very simple form of aereal interpolation.
--
-- Parameters:
--   * geom a Polygon geometry which defines the area where a value will be
--     estimated as the area-weighted sum of a given table/column
--   * target_table_name table name of the table that provides the values
--   * target_column column name of the column that provides the values
--   * schema_name optional parameter to defina the schema the target table
--     belongs to, which is necessary if its not in the search_path.
--     Note that target_table_name should never include the schema in it.
-- Return value:
--   Aereal-weighted interpolation of the column values over the geometry
-CREATE OR REPLACE
-FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
-  RETURNS numeric AS
-$$
-DECLARE
-	result numeric;
-  qualified_name text;
-BEGIN
-  IF schema_name IS NULL THEN
-    qualified_name := Format('%I', target_table_name);
-  ELSE
-    qualified_name := Format('%I.%s', schema_name, target_table_name);
-  END IF;
-  EXECUTE Format('
-    SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
-    FROM %s AS a
-    WHERE $1 && a.the_geom
-  ', target_column, qualified_name)
-  USING geom
-  INTO result;
-  RETURN result;
-END;
-$$ LANGUAGE plpgsql;
--
-- Creates N points randomly distributed arround the polygon
--
-- @param g - the geometry to be turned in to points
--
-- @param no_points - the number of points to generate
--
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
-- misses per point the funciton accepts before giving up.
--
-- Returns: Multipoint with the requested points
-CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
-RETURNS GEOMETRY AS $$
-DECLARE
-  extent GEOMETRY;
-  test_point Geometry;
-  width                NUMERIC;
-  height               NUMERIC;
-  x0                   NUMERIC;
-  y0                   NUMERIC;
-  xp                   NUMERIC;
-  yp                   NUMERIC;
-  no_left              INTEGER;
-  remaining_iterations INTEGER;
-  points               GEOMETRY[];
-  bbox_line            GEOMETRY;
-  intersection_line    GEOMETRY;
-BEGIN
-  extent  := ST_Envelope(geom);
-  width   := ST_XMax(extent) - ST_XMIN(extent);
-  height  := ST_YMax(extent) - ST_YMIN(extent);
-  x0 	  := ST_XMin(extent);
-  y0 	  := ST_YMin(extent);
-  no_left := no_points;
-
-  LOOP
-    if(no_left=0) THEN
-      EXIT;
-    END IF;
-    yp = y0 + height*random();
-    bbox_line  = ST_MakeLine(
-      ST_SetSRID(ST_MakePoint(yp, x0),4326),
-      ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
-    );
-    intersection_line = ST_Intersection(bbox_line,geom);
-  	test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
-	  points := points || test_point;
-	  no_left = no_left - 1 ;
-  END LOOP;
-  RETURN ST_Collect(points);
-END;
-$$
-LANGUAGE plpgsql VOLATILE;
-- Make sure by default there are no permissions for publicuser
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
-
-- Grant permissions on the schema to publicuser (but just the schema)
-GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
-
-- Revoke execute permissions on all functions in the schema by default
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
--- a/pg/test/0.0.1/expected/01_install_test.out
+++ b/pg/test/0.0.1/expected/01_install_test.out
@@ -1,6 +0,0 @@
-- Install dependencies
-CREATE EXTENSION plpythonu;
-CREATE EXTENSION postgis;
-CREATE EXTENSION cartodb;
-- Install the extension
-CREATE EXTENSION crankshaft;
--- a/python/Makefile
+++ b/python/Makefile
@@ -1,11 +0,0 @@
-# Install the package (needs root privileges)
-install:
-	pip install ./crankshaft --upgrade
-
-# Test from source code
-test:
-	(cd crankshaft && nosetests test/)
-
-# Test currently installed package
-testinstalled:
-	nosetests crankshaft/test/
--- a/python/README.md
+++ b/python/README.md
@@ -1,9 +0,0 @@
-# Crankshaft Python Package
-
-...
-### Run the tests
-
-```bash
-cd crankshaft
-nosetests test/
-```
--- a/src/pg/.gitignore
+++ b/src/pg/.gitignore
@@ -0,0 +1,6 @@
+regression.diffs
+regression.out
+results/
+crankshaft--dev.sql
+crankshaft--dev--current.sql
+crankshaft--current--dev.sql
--- a/src/pg/Makefile
+++ b/src/pg/Makefile
@@ -0,0 +1,41 @@
+# Generation of a new development version 'dev' (with an alias 'current' for
+# updating easily by upgrading to 'current', then 'dev')
+
+# sudo make install -- generate the 'dev' version from current source
+#                      and make it available to PostgreSQL
+# PGUSER=postgres make installcheck -- test the 'dev' extension
+
+EXTENSION    = crankshaft
+
+DATA         = $(EXTENSION)--dev.sql \
+	             $(EXTENSION)--current--dev.sql \
+	             $(EXTENSION)--dev--current.sql
+
+SOURCES_DATA_DIR = sql
+SOURCES_DATA = $(wildcard $(SOURCES_DATA_DIR)/*.sql)
+
+$(DATA): $(SOURCES_DATA)
+	cat $(SOURCES_DATA_DIR)/*.sql > $@
+
+TEST_DIR = test
+REGRESS = $(notdir $(basename $(wildcard $(TEST_DIR)/sql/*test.sql)))
+REGRESS_OPTS = --inputdir='$(TEST_DIR)' --outputdir='$(TEST_DIR)'
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+# This seems to be needed at least for PG 9.3.11
+all: $(DATA)
+
+# WIP: goals for releasing the extension...
+
+EXTVERSION   = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
+
+../release/$(EXTENSION).control: $(EXTENSION).control
+	cp $< $@
+
+release: ../release/$(EXTENSION).control
+	cp $(EXTENSION)--dev.sql $(EXTENSION)--$(EXTVERSION).sql
+	# pending: create upgrade/downgrade scripts,
+	#          commit, push, tag....
--- a/src/pg/README.md
+++ b/src/pg/README.md
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
--- a/pg/sql/0.0.1/00_header.sql
+++ b/pg/sql/0.0.1/00_header.sql
--- a/pg/sql/0.0.1/01_random_seeds.sql
+++ b/pg/sql/0.0.1/01_random_seeds.sql
--- a/pg/sql/0.0.1/02_moran.sql
+++ b/pg/sql/0.0.1/02_moran.sql
--- a/pg/sql/0.0.1/03_overlap_sum.sql
+++ b/pg/sql/0.0.1/03_overlap_sum.sql
--- a/pg/sql/0.0.1/04_dot_density.sql
+++ b/pg/sql/0.0.1/04_dot_density.sql
--- a/pg/sql/0.0.1/90_permissions.sql
+++ b/pg/sql/0.0.1/90_permissions.sql
--- a/pg/test/0.0.1/results/01_install_test.out
+++ b/pg/test/0.0.1/results/01_install_test.out
@@ -3,4 +3,4 @@ CREATE EXTENSION plpythonu;
 CREATE EXTENSION postgis;
 CREATE EXTENSION cartodb;
 -- Install the extension
-CREATE EXTENSION crankshaft;
+CREATE EXTENSION crankshaft VERSION 'dev';
--- a/pg/test/0.0.1/expected/02_moran_test.out
+++ b/pg/test/0.0.1/expected/02_moran_test.out
--- a/pg/test/0.0.1/expected/03_overlap_sum_test.out
+++ b/pg/test/0.0.1/expected/03_overlap_sum_test.out
--- a/pg/test/0.0.1/expected/04_dot_density_test.out
+++ b/pg/test/0.0.1/expected/04_dot_density_test.out
--- a/src/pg/test/fixtures/polyg_values.sql
+++ b/src/pg/test/fixtures/polyg_values.sql
--- a/src/pg/test/fixtures/ppoints.sql
+++ b/src/pg/test/fixtures/ppoints.sql
--- a/src/pg/test/fixtures/ppoints2.sql
+++ b/src/pg/test/fixtures/ppoints2.sql
--- a/pg/test/0.0.1/sql/01_install_test.sql
+++ b/pg/test/0.0.1/sql/01_install_test.sql
@@ -4,4 +4,4 @@ CREATE EXTENSION postgis;
 CREATE EXTENSION cartodb;

 -- Install the extension
-CREATE EXTENSION crankshaft;
+CREATE EXTENSION crankshaft VERSION 'dev';
--- a/pg/test/0.0.1/sql/02_moran_test.sql
+++ b/pg/test/0.0.1/sql/02_moran_test.sql
--- a/pg/test/0.0.1/sql/03_overlap_sum_test.sql
+++ b/pg/test/0.0.1/sql/03_overlap_sum_test.sql
--- a/pg/test/0.0.1/sql/04_dot_density_test.sql
+++ b/pg/test/0.0.1/sql/04_dot_density_test.sql
--- a/pg/test/0.0.1/sql/90_permissions.sql
+++ b/pg/test/0.0.1/sql/90_permissions.sql
--- a/src/py/.gitignore
+++ b/src/py/.gitignore
@@ -1 +1,2 @@
 *.pyc
+dev/
--- a/src/py/Makefile
+++ b/src/py/Makefile
@@ -0,0 +1,9 @@
+# Install the package locally for development
+install:
+	virtualenv dev
+	./dev/bin/pip install ./crankshaft --upgrade
+	./dev/bin/pip install nose
+
+# Test develpment install
+testinstalled:
+	./dev/bin/nosetests crankshaft/test/
--- a/src/py/README.md
+++ b/src/py/README.md
@@ -0,0 +1,99 @@
+# Crankshaft Python Package
+
+...
+### Run the tests
+
+```bash
+cd crankshaft
+nosetests test/
+```
+
+## Notes about python dependencies
+* This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments.
+* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has  all the dependencies
+* Every dependency should be:
+  - Added to the `setup.py` file
+  - Installed through it
+  - Tested, when they have a test suite.
+  - Fixed in the `requirements.txt`
+* At present we use Python version 2.7.3
+
+---
+
+### Sample session with virtualenv
+#### Create and use a virtual env
+
+    # Create the virtual environment for python
+    $ virtualenv myenv
+
+    # Activate the virtualenv
+    $ source myenv/bin/activate
+
+    # Install all the requirements
+    # expect this to take a while, as it will trigger a few compilations
+    (myenv) $ pip install -r requirements.txt
+
+    # Add a new pip to the party
+    (myenv) $ pip install pandas
+
+#### Test the libraries with that virtual env
+##### Test numpy library dependency:
+
+    import numpy
+    numpy.test('full')
+
+output:
+```
+======================================================================
+ERROR: test_multiarray.TestNewBufferProtocol.test_relaxed_strides
+----------------------------------------------------------------------
+Traceback (most recent call last):
+  File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
+    self.test(*self.arg)
+  File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/numpy/core/tests/test_multiarray.py", line 5366, in test_relaxed_strides
+    fd.write(c.data)
+TypeError: 'buffer' does not have the buffer interface
+
+----------------------------------------------------------------------
+Ran 6153 tests in 84.561s
+
+FAILED (KNOWNFAIL=3, SKIP=5, errors=1)
+Out[2]: <nose.result.TextTestResult run=6153 errors=1 failures=0>
+```
+
+NOTE: this is expected to fail with Python 2.7.3, which is the version embedded in our postgresql installation
+
+
+##### Run scipy tests
+
+    import scipy
+    scipy.test('full')
+
+Output:
+```
+Ran 21562 tests in 321.610s
+
+OK (KNOWNFAIL=130, SKIP=1840)
+Out[2]: <nose.result.TextTestResult run=21562 errors=0 failures=0>
+```
+Ok, this looks good...
+
+##### Testing pysal
+See [http://pysal.readthedocs.org/en/latest/developers/testing.html]
+
+    import pysal
+    import nose
+    nose.runmodule('pysal')
+
+```
+Ran 537 tests in 42.182s
+
+FAILED (errors=48, failures=17)
+An exception has occurred, use %tb to see the full traceback.
+```
+
+This doesn't look good... Taking a deeper look at the failures, many have the `IOError: [Errno 2] No such file or directory: 'streets.shp'`
+
+In the source code, there's the following [config](https://github.com/pysal/pysal/blob/master/setup.cfg) that seems to be missing in the pip package. By copying it to `lib/python2.7/site-packages` within the environment, it goes down to 17 failures.
+
+The remaining failures don't look good. I see two types: precision calculation errors and arrays/matrices missing 1 element when comparing... TODO: FIX this
--- a/src/py/crankshaft/crankshaft/init.py
+++ b/src/py/crankshaft/crankshaft/init.py
--- a/src/py/crankshaft/crankshaft/clustering/init.py
+++ b/src/py/crankshaft/crankshaft/clustering/init.py
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
--- a/src/py/crankshaft/crankshaft/random_seeds.py
+++ b/src/py/crankshaft/crankshaft/random_seeds.py
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,11 @@ setup(

    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
-    install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
+    install_requires=[
+        'numpy>=1.10.4,<2',
+        'scipy>=0.11,<1', # see https://github.com/pysal/pysal/blob/master/requirements.txt
+        'pysal>=1.11.0,<2',
+    ],

    requires=['pysal', 'numpy'],

--- a/src/py/crankshaft/test/fixtures/moran.json
+++ b/src/py/crankshaft/test/fixtures/moran.json
--- a/src/py/crankshaft/test/fixtures/neighbors.json
+++ b/src/py/crankshaft/test/fixtures/neighbors.json
--- a/src/py/crankshaft/test/helper.py
+++ b/src/py/crankshaft/test/helper.py
--- a/src/py/crankshaft/test/mock_plpy.py
+++ b/src/py/crankshaft/test/mock_plpy.py
--- a/src/py/crankshaft/test/test_clustering_moran.py
+++ b/src/py/crankshaft/test/test_clustering_moran.py
Author	SHA1	Message	Date
Javier Goizueta	8dcf420437	Fix typo	2016-03-10 10:11:11 +01:00
Rafa de la Torre	7cd15885dc	Add info about python dependencies	2016-03-09 18:51:04 +01:00
Rafa de la Torre	d766001bf4	Constraint version numbers of reqs a little	2016-03-09 17:45:50 +01:00
Rafa de la Torre	e76eb0c56f	Define install dependencies in order I don't know if that actually affects the result, but just in case.	2016-03-09 14:40:02 +01:00
Javier Goizueta	cdd2d9e722	Directory reorganization and sketch of new versioning procedure	2016-03-08 19:35:02 +01:00