Compare commits

..

1 Commits

Author SHA1 Message Date
Javier Goizueta
8e972128eb Modify sql code to user the python virtualenv 2016-03-09 15:00:50 +01:00
7 changed files with 22 additions and 95 deletions

18
src/pg/sql/01_py.sql Normal file
View File

@@ -0,0 +1,18 @@
-- Use the crankshaft python module
CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
RETURNS VOID
AS $$
# activate virtualenv
# TODO: parameterize with environment variables or something
venv_path = '/home/ubuntu/crankshaft/src/py/dev'
activate_path = venv_path + '/bin/activate_this.py'
exec(open(activate_path).read(),
dict(__file__=activate_path))
# import something from virtualenv
# from crankshaft import random_seeds
# do some stuff
# random_seeds.set_random_seeds(123)
# plpy.notice('here we are')
$$ LANGUAGE plpythonu;

View File

@@ -4,6 +4,7 @@
CREATE OR REPLACE FUNCTION
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft import random_seeds
random_seeds.set_random_seeds(seed_value)
$$ LANGUAGE plpythonu;

View File

@@ -11,6 +11,7 @@ CREATE OR REPLACE FUNCTION
w_type TEXT DEFAULT 'knn')
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft.clustering import moran_local
# TODO: use named parameters or a dictionary
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
@@ -29,6 +30,7 @@ CREATE OR REPLACE FUNCTION
w_type TEXT DEFAULT 'knn')
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
AS $$
plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
from crankshaft.clustering import moran_local_rate
# TODO: use named parameters or a dictionary
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)

View File

@@ -7,93 +7,3 @@
cd crankshaft
nosetests test/
```
## Notes about python dependencies
* This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments.
* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has all the dependencies
* Every dependency should be:
- Added to the `setup.py` file
- Installed through it
- Tested, when they have a test suite.
- Fixed in the `requirements.txt`
* At present we use Python version 2.7.3
---
### Sample session with virtualenv
#### Create and use a virtual env
# Create the virtual environment for python
$ virtualenv myenv
# Activate the virtualenv
$ source myenv/bin/activate
# Install all the requirements
# expect this to take a while, as it will trigger a few compilations
(myenv) $ pip install -r requirements.txt
# Add a new pip to the party
(myenv) $ pip install pandas
#### Test the libraries with that virtual env
##### Test numpy library dependency:
import numpy
numpy.test('full')
output:
```
======================================================================
ERROR: test_multiarray.TestNewBufferProtocol.test_relaxed_strides
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/home/ubuntu/www/crankshaft/src/py/dev2/lib/python2.7/site-packages/numpy/core/tests/test_multiarray.py", line 5366, in test_relaxed_strides
fd.write(c.data)
TypeError: 'buffer' does not have the buffer interface
----------------------------------------------------------------------
Ran 6153 tests in 84.561s
FAILED (KNOWNFAIL=3, SKIP=5, errors=1)
Out[2]: <nose.result.TextTestResult run=6153 errors=1 failures=0>
```
NOTE: this is expected to fail with Python 2.7.3, which is the version embedded in our postgresql installation
##### Run scipy tests
import scipy
scipy.test('full')
Output:
```
Ran 21562 tests in 321.610s
OK (KNOWNFAIL=130, SKIP=1840)
Out[2]: <nose.result.TextTestResult run=21562 errors=0 failures=0>
```
Ok, this looks good...
##### Testing pysal
See [http://pysal.readthedocs.org/en/latest/developers/testing.html]
import pysal
import nose
nose.runmodule('pysal')
```
Ran 537 tests in 42.182s
FAILED (errors=48, failures=17)
An exception has occurred, use %tb to see the full traceback.
```
This doesn't look good... Taking a deeper look at the failures, many have the `IOError: [Errno 2] No such file or directory: 'streets.shp'`
In the source code, there's the following [config](https://github.com/pysal/pysal/blob/master/setup.cfg) that seems to be missing in the pip package. By copying it to `lib/python2.7/site-packages` within the environment, it goes down to 17 failures.
The remaining failures don't look good. I see two types: precision calculation errors and arrays/matrices missing 1 element when comparing... TODO: FIX this

View File

@@ -40,11 +40,7 @@ setup(
# The choice of component versions is dictated by what's
# provisioned in the production servers.
install_requires=[
'numpy>=1.10.4,<2',
'scipy>=0.11,<1', # see https://github.com/pysal/pysal/blob/master/requirements.txt
'pysal>=1.11.0,<2',
],
install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
requires=['pysal', 'numpy'],