Modify sql code to user the python virtualenv

2016-03-09 15:00:50 +01:00
7 changed files with 22 additions and 95 deletions
--- a/src/pg/sql/01_py.sql
+++ b/src/pg/sql/01_py.sql
@@ -0,0 +1,18 @@
+-- Use the crankshaft python module
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
+RETURNS VOID
+AS $$
+    # activate virtualenv
+    # TODO: parameterize with environment variables or something
+    venv_path = '/home/ubuntu/crankshaft/src/py/dev'
+    activate_path = venv_path + '/bin/activate_this.py'
+    exec(open(activate_path).read(),
+         dict(__file__=activate_path))
+
+    # import something from virtualenv
+    # from crankshaft import random_seeds
+
+    # do some stuff
+    # random_seeds.set_random_seeds(123)
+    # plpy.notice('here we are')
+$$ LANGUAGE plpythonu;
--- a/src/pg/sql/02_random_seeds.sql
+++ b/src/pg/sql/02_random_seeds.sql
@@ -4,6 +4,7 @@
 CREATE OR REPLACE FUNCTION
 _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
 AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
  from crankshaft import random_seeds
  random_seeds.set_random_seeds(seed_value)
 $$ LANGUAGE plpythonu;
--- a/src/pg/sql/10_moran.sql
+++ b/src/pg/sql/10_moran.sql
@@ -11,6 +11,7 @@ CREATE OR REPLACE FUNCTION
      w_type TEXT DEFAULT 'knn')
 RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
 AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
  from crankshaft.clustering import moran_local
  # TODO: use named parameters or a dictionary
  return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
@@ -29,6 +30,7 @@ CREATE OR REPLACE FUNCTION
 		 w_type TEXT DEFAULT 'knn')
 RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
 AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
  from crankshaft.clustering import moran_local_rate
  # TODO: use named parameters or a dictionary
  return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
--- a/src/pg/sql/20_overlap_sum.sql
+++ b/src/pg/sql/20_overlap_sum.sql
--- a/src/pg/sql/30_dot_density.sql
+++ b/src/pg/sql/30_dot_density.sql
--- a/src/py/README.md
+++ b/src/py/README.md
@@ -7,93 +7,3 @@
 cd crankshaft
 nosetests test/
 ```
-
-## Notes about python dependencies
-* This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments.
-* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has  all the dependencies
-* Every dependency should be:
-  - Added to the `setup.py` file
-  - Installed through it
-  - Tested, when they have a test suite.
-  - Fixed in the `requirements.txt`
-* At present we use Python version 2.7.3
-
---
-
-### Sample session with virtualenv
-#### Create and use a virtual env
-
-    # Create the virtual environment for python
-    $ virtualenv myenv
-
-    # Activate the virtualenv
-    $ source myenv/bin/activate
-
-    # Install all the requirements
-    # expect this to take a while, as it will trigger a few compilations
-    (myenv) $ pip install -r requirements.txt
-
-    # Add a new pip to the party
-    (myenv) $ pip install pandas
-
-#### Test the libraries with that virtual env
-##### Test numpy library dependency:
-
-    import numpy
-    numpy.test('full')
-
-output:
-```
-======================================================================
-ERROR: test_multiarray.TestNewBufferProtocol.test_relaxed_strides
----------------------------------------------------------------------
-Traceback (most recent call last):
-  File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
-    self.test(*self.arg)
-  File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/numpy/core/tests/test_multiarray.py", line 5366, in test_relaxed_strides
-    fd.write(c.data)
-TypeError: 'buffer' does not have the buffer interface
-
----------------------------------------------------------------------
-Ran 6153 tests in 84.561s
-
-FAILED (KNOWNFAIL=3, SKIP=5, errors=1)
-Out[2]: <nose.result.TextTestResult run=6153 errors=1 failures=0>
-```
-
-NOTE: this is expected to fail with Python 2.7.3, which is the version embedded in our postgresql installation
-
-
-##### Run scipy tests
-
-    import scipy
-    scipy.test('full')
-
-Output:
-```
-Ran 21562 tests in 321.610s
-
-OK (KNOWNFAIL=130, SKIP=1840)
-Out[2]: <nose.result.TextTestResult run=21562 errors=0 failures=0>
-```
-Ok, this looks good...
-
-##### Testing pysal
-See [http://pysal.readthedocs.org/en/latest/developers/testing.html]
-
-    import pysal
-    import nose
-    nose.runmodule('pysal')
-
-```
-Ran 537 tests in 42.182s
-
-FAILED (errors=48, failures=17)
-An exception has occurred, use %tb to see the full traceback.
-```
-
-This doesn't look good... Taking a deeper look at the failures, many have the `IOError: [Errno 2] No such file or directory: 'streets.shp'`
-
-In the source code, there's the following [config](https://github.com/pysal/pysal/blob/master/setup.cfg) that seems to be missing in the pip package. By copying it to `lib/python2.7/site-packages` within the environment, it goes down to 17 failures.
-
-The remaining failures don't look good. I see two types: precision calculation errors and arrays/matrices missing 1 element when comparing... TODO: FIX this
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,11 +40,7 @@ setup(

    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
-    install_requires=[
-        'numpy>=1.10.4,<2',
-        'scipy>=0.11,<1', # see https://github.com/pysal/pysal/blob/master/requirements.txt
-        'pysal>=1.11.0,<2',
-    ],
+    install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],

    requires=['pysal', 'numpy'],