Compare commits
12 Commits
optimizati
...
better_dot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e479f84cf9 | ||
|
|
f9c1b57dfd | ||
|
|
4c175c9565 | ||
|
|
de6f228e72 | ||
|
|
87f9b2c787 | ||
|
|
fda5d93cf4 | ||
|
|
0748212610 | ||
|
|
27b18f5e1c | ||
|
|
3649f958c8 | ||
|
|
faaf5e419a | ||
|
|
1f3b74e54f | ||
|
|
b8fe05b388 |
8
NEWS.md
8
NEWS.md
@@ -1,11 +1,3 @@
|
||||
0.5.0 (2016-12-15)
|
||||
------------------
|
||||
* Updated PULL_REQUEST_TEMPLATE
|
||||
* Fixed a bug that flips the order of the numerator in denominator for calculating using Moran Local Rate because previously the code sorted the keys alphabetically.
|
||||
* Add new CDB_GetisOrdsG functions. Getis-Ord's G\* is a geo-statistical measurement of the intensity of clustering of high or low values
|
||||
* Add new outlier detection functions: CDB_StaticOutlier, CDB_PercentOutlier and CDB_StdDevOutlier
|
||||
* Updates in the framework for accessing the Python functions.
|
||||
|
||||
0.4.2 (2016-09-22)
|
||||
------------------
|
||||
* Bugfix for cdb_areasofinterestglobal: import correct modules
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.5.1'
|
||||
default_version = '0.4.2'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
"""Import all modules"""
|
||||
import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import analysis_data_provider
|
||||
@@ -1,67 +0,0 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
import pysal_utils as pu
|
||||
|
||||
|
||||
class AnalysisDataProvider:
|
||||
def get_getis(self, w_type, params):
|
||||
"""fetch data for getis ord's g"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
else:
|
||||
return result
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
"""fetch data for spatial markov"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
"""fetch data for moran's i analyses"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
# if there are no neighbors, exit
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
def get_nonspatial_kmeans(self, query):
|
||||
"""fetch data for non-spatial kmeans"""
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_spatial_kmeans(self, params):
|
||||
"""fetch data for spatial kmeans"""
|
||||
query = ("SELECT "
|
||||
"array_agg({id_col} ORDER BY {id_col}) as ids,"
|
||||
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
|
||||
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
|
||||
"FROM ({subquery}) As a "
|
||||
"WHERE {geom_col} IS NOT NULL").format(**params)
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
@@ -1,4 +0,0 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
from getis import *
|
||||
@@ -1,50 +0,0 @@
|
||||
"""
|
||||
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
|
||||
"""
|
||||
|
||||
import pysal as ps
|
||||
from collections import OrderedDict
|
||||
|
||||
# crankshaft modules
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Getis:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def getis_ord(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Getis-Ord's G*
|
||||
Implementation building neighbors with a PostGIS database and PySAL's
|
||||
Getis-Ord's G* hotspot/coldspot module.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors if kNN is chosen
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_getis(w_type, qvals)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# build PySAL weight object
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate Getis-Ord's G* z- and p-values
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
@@ -1,32 +0,0 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import numpy as np
|
||||
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Kmeans:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial(self, query, no_clusters, no_init=20):
|
||||
"""
|
||||
find centers based on clusters of latitude/longitude pairs
|
||||
query: SQL query that has a WGS84 geometry (the_geom)
|
||||
"""
|
||||
params = {"subquery": query,
|
||||
"geom_col": "the_geom",
|
||||
"id_col": "cartodb_id"}
|
||||
|
||||
data = self.data_provider.get_spatial_kmeans(params)
|
||||
|
||||
# Unpack query response
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
@@ -1,208 +0,0 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
from collections import OrderedDict
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Moran:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def global_stat(self, subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator)
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Import all functions for pysal_utils"""
|
||||
from crankshaft.pysal_utils.pysal_utils import *
|
||||
@@ -1,211 +0,0 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on
|
||||
translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
|
||||
# Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res dict-like: query results with attributes and neighbors
|
||||
"""
|
||||
# if w_type.lower() == 'knn':
|
||||
# row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
# weights = {x['id']: row_normed_weights for x in query_res}
|
||||
# else:
|
||||
# weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
# if len(x['neighbors']) > 0
|
||||
# else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
return built_weight
|
||||
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
Defaults to order in the params
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
Example:
|
||||
OrderedDict([('numerator', 'price'),
|
||||
('denominator', 'sq_meters'),
|
||||
('subquery', 'SELECT * FROM interesting_data')])
|
||||
Output:
|
||||
"i.\"price\"::numeric As attr1, " \
|
||||
"i.\"sq_meters\"::numeric As attr2, "
|
||||
"""
|
||||
|
||||
attr_string = ""
|
||||
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
if 'time_cols' in params:
|
||||
# if markov analysis
|
||||
attrs = params['time_cols']
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
else:
|
||||
# if moran's analysis
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": params[val],
|
||||
"alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Construct where conditions when building neighbors query
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
Input: dict of params:
|
||||
{'subquery': ...,
|
||||
'numerator': 'data1',
|
||||
'denominator': 'data2',
|
||||
'': ...}
|
||||
Output:
|
||||
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
|
||||
Input:
|
||||
{'subquery': ...,
|
||||
'time_cols': ['time1', 'time2', 'time3'],
|
||||
'etc': ...}
|
||||
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
|
||||
NULL AND idx_replace."time3" IS NOT NULL'
|
||||
"""
|
||||
attr_string = []
|
||||
template = "idx_replace.\"%s\" IS NOT NULL"
|
||||
|
||||
if 'time_cols' in params:
|
||||
# markov where clauses
|
||||
attrs = params['time_cols']
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % attr)
|
||||
else:
|
||||
# moran where clauses
|
||||
|
||||
# get keys
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % params[attr])
|
||||
|
||||
if 'denominator' in attrs:
|
||||
attr_string.append(
|
||||
"idx_replace.\"%s\" <> 0" % params['denominator'])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
|
||||
# SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
# to add more weight methods open a ticket or pull request
|
||||
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res],
|
||||
dtype=np.float)
|
||||
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
@@ -1,11 +0,0 @@
|
||||
"""Random seed generator used for non-deterministic functions in crankshaft"""
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
@@ -1 +0,0 @@
|
||||
from segmentation import *
|
||||
@@ -1,176 +0,0 @@
|
||||
"""
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import sklearn
|
||||
import numpy as np
|
||||
import plpy
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
# Lower level functions
|
||||
#----------------------
|
||||
|
||||
def replace_nan_with_mean(array):
|
||||
"""
|
||||
Input:
|
||||
@param array: an array of floats which may have null-valued entries
|
||||
Output:
|
||||
array with nans filled in with the mean of the dataset
|
||||
"""
|
||||
# returns an array of rows and column indices
|
||||
indices = np.where(np.isnan(array))
|
||||
|
||||
# iterate through entries which have nan values
|
||||
for row, col in zip(*indices):
|
||||
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
|
||||
|
||||
return array
|
||||
|
||||
def get_data(variable, feature_columns, query):
|
||||
"""
|
||||
Fetch data from the database, clean, and package into
|
||||
numpy arrays
|
||||
Input:
|
||||
@param variable: name of the target variable
|
||||
@param feature_columns: list of column names
|
||||
@param query: subquery that data is pulled from for the packaging
|
||||
Output:
|
||||
prepared data, packaged into NumPy arrays
|
||||
"""
|
||||
|
||||
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
|
||||
|
||||
try:
|
||||
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
|
||||
variable=variable,
|
||||
columns=columns,
|
||||
query=query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to access data to build segmentation model: %s' % e)
|
||||
|
||||
# extract target data from plpy object
|
||||
target = np.array(data[0]['target'])
|
||||
|
||||
# put n feature data arrays into an n x m array of arrays
|
||||
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
|
||||
|
||||
return replace_nan_with_mean(target), replace_nan_with_mean(features)
|
||||
|
||||
# High level interface
|
||||
# --------------------
|
||||
|
||||
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
|
||||
"""
|
||||
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
|
||||
the function.
|
||||
|
||||
Input:
|
||||
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
|
||||
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
|
||||
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
|
||||
@param model_parameters: A dictionary containing parameters for the model.
|
||||
"""
|
||||
|
||||
clean_target = replace_nan_with_mean(target)
|
||||
clean_features = replace_nan_with_mean(features)
|
||||
target_features = replace_nan_with_mean(target_features)
|
||||
|
||||
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
|
||||
prediction = model.predict(target_features)
|
||||
accuracy_array = [accuracy]*prediction.shape[0]
|
||||
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
|
||||
|
||||
|
||||
|
||||
def create_and_predict_segment(query, variable, target_query, model_params):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
Stuart Lynn
|
||||
"""
|
||||
|
||||
## fetch column names
|
||||
try:
|
||||
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
## extract column names to be used in building the segmentation model
|
||||
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
|
||||
## get data from database
|
||||
target, features = get_data(variable, feature_columns, query)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
|
||||
accuracy_array = [accuracy]*result.shape[0]
|
||||
return zip(cartodb_ids, result, accuracy_array)
|
||||
|
||||
|
||||
def train_model(target, features, model_params, test_split):
|
||||
"""
|
||||
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
|
||||
Input:
|
||||
@param target: 1D Array of the variable that the model is to be trianed to predict
|
||||
@param features: 2D Array NSamples * NFeatures to use in trining the model
|
||||
@param model_params: A dictionary of model parameters, the full specification can be found on the
|
||||
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
|
||||
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
|
||||
"""
|
||||
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
|
||||
model = GradientBoostingRegressor(**model_params)
|
||||
model.fit(features_train, target_train)
|
||||
accuracy = calculate_model_accuracy(model, features, target)
|
||||
return model, accuracy
|
||||
|
||||
def calculate_model_accuracy(model, features, target):
|
||||
"""
|
||||
Calculate the mean squared error of the model prediction
|
||||
Input:
|
||||
@param model: model trained from input features
|
||||
@param features: features to make a prediction from
|
||||
@param target: target to compare prediction to
|
||||
Output:
|
||||
mean squared error of the model prection compared to the target
|
||||
"""
|
||||
prediction = model.predict(features)
|
||||
return metrics.mean_squared_error(prediction, target)
|
||||
|
||||
def predict_segment(model, features, target_query):
|
||||
"""
|
||||
Use the provided model to predict the values for the new feature set
|
||||
Input:
|
||||
@param model: The pretrained model
|
||||
@features: A list of features to use in the model prediction (list of column names)
|
||||
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
|
||||
"""
|
||||
|
||||
batch_size = 1000
|
||||
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
|
||||
|
||||
try:
|
||||
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
|
||||
joined_features=joined_features,
|
||||
target_query=target_query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
rows = cursor.fetch(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
|
||||
|
||||
#Need to fix this. Should be global mean. This will cause weird effects
|
||||
batch = replace_nan_with_mean(batch)
|
||||
prediction = model.predict(batch)
|
||||
results.append(prediction)
|
||||
|
||||
try:
|
||||
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
return cartodb_ids, np.concatenate(results)
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Import all functions from clustering libraries."""
|
||||
from markov import *
|
||||
@@ -1,194 +0,0 @@
|
||||
"""
|
||||
Spatial dynamics measurements using Spatial Markov
|
||||
"""
|
||||
|
||||
# TODO: remove all plpy dependencies
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Markov:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial_trend(self, subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st
|
||||
quantile -> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break
|
||||
distribution of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains
|
||||
the geometries
|
||||
@param id_col string (optional): name of column which has the ids
|
||||
of the table
|
||||
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a
|
||||
lower class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
|
||||
params = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query_result = self.data_provider.get_markov(w_type, params)
|
||||
|
||||
# build weight
|
||||
weights = pu.get_weight(query_result, w_type)
|
||||
weights.transform = 'r'
|
||||
|
||||
# prep time data
|
||||
t_data = get_time_data(query_result, time_cols)
|
||||
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
|
||||
# get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
|
||||
# look up probablity distribution for each unit according to class and
|
||||
# lag class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
# find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
|
||||
|
||||
def get_time_data(markov_data, time_cols):
|
||||
"""
|
||||
Extract the time columns and bin appropriately
|
||||
"""
|
||||
num_attrs = len(time_cols)
|
||||
return np.array([[x['attr' + str(i)] for x in markov_data]
|
||||
for i in range(1, num_attrs+1)], dtype=float).transpose()
|
||||
|
||||
|
||||
# not currently used
|
||||
def rebin_data(time_data, num_time_per_bin):
|
||||
"""
|
||||
Convert an n x l matrix into an (n/m) x l matrix where the values are
|
||||
reduced (averaged) for the intervening states:
|
||||
1 2 3 4 1.5 3.5
|
||||
5 6 7 8 -> 5.5 7.5
|
||||
9 8 7 6 8.5 6.5
|
||||
5 4 3 2 4.5 2.5
|
||||
|
||||
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
|
||||
|
||||
This process effectively resamples the data at a longer time span n
|
||||
units longer than the input data.
|
||||
For cases when there is a remainder (remainder(5/3) = 2), the remaining
|
||||
two columns are binned together as the last time period, while the
|
||||
first three are binned together for the first period.
|
||||
|
||||
Input:
|
||||
@param time_data n x l ndarray: measurements of an attribute at
|
||||
different time intervals
|
||||
@param num_time_per_bin int: number of columns to average into a new
|
||||
column
|
||||
Output:
|
||||
ceil(n / m) x l ndarray of resampled time series
|
||||
"""
|
||||
|
||||
if time_data.shape[1] % num_time_per_bin == 0:
|
||||
# if fit is perfect, then use it
|
||||
n_max = time_data.shape[1] / num_time_per_bin
|
||||
else:
|
||||
# fit remainders into an additional column
|
||||
n_max = time_data.shape[1] / num_time_per_bin + 1
|
||||
|
||||
return np.array(
|
||||
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||
for i in range(n_max)]).T
|
||||
|
||||
|
||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||
"""
|
||||
Given an array of transition matrices, look up the probability
|
||||
associated with the arrangements passed
|
||||
|
||||
Input:
|
||||
@param transition_matrix ndarray[k,k,k]:
|
||||
@param lag_indices ndarray:
|
||||
@param unit_indices ndarray:
|
||||
|
||||
Output:
|
||||
Array of probability distributions
|
||||
"""
|
||||
|
||||
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
|
||||
for i in range(len(lag_indices))])
|
||||
|
||||
|
||||
def get_prob_stats(prob_dist, unit_indices):
|
||||
"""
|
||||
get the statistics of the probability distributions
|
||||
|
||||
Outputs:
|
||||
@param trend_up ndarray(float): sum of probabilities for upward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend_down ndarray(float): sum of probabilities for downward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend ndarray(float): difference of upward and downward
|
||||
movements
|
||||
"""
|
||||
|
||||
num_elements = len(unit_indices)
|
||||
trend_up = np.empty(num_elements, dtype=float)
|
||||
trend_down = np.empty(num_elements, dtype=float)
|
||||
trend = np.empty(num_elements, dtype=float)
|
||||
|
||||
for i in range(num_elements):
|
||||
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
|
||||
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
|
||||
if prob_dist[i, unit_indices[i]] > 0.0:
|
||||
trend[i] = (trend_up[i] - trend_down[i]) / (
|
||||
prob_dist[i, unit_indices[i]])
|
||||
else:
|
||||
trend[i] = None
|
||||
|
||||
# calculate volatility of distribution
|
||||
volatility = prob_dist.std(axis=1)
|
||||
|
||||
return trend_up, trend_down, trend, volatility
|
||||
@@ -1,5 +0,0 @@
|
||||
joblib==0.8.3
|
||||
numpy==1.6.1
|
||||
scipy==0.14.0
|
||||
pysal==1.11.2
|
||||
scikit-learn==0.14.1
|
||||
@@ -1,49 +0,0 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.5.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]
|
||||
@@ -1 +0,0 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
@@ -1 +0,0 @@
|
||||
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
|
||||
@@ -1,52 +0,0 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
@@ -1,54 +0,0 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,13 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
@@ -1,54 +0,0 @@
|
||||
import re
|
||||
|
||||
|
||||
class MockCursor:
|
||||
def __init__(self, data):
|
||||
self.cursor_pos = 0
|
||||
self.data = data
|
||||
|
||||
def fetch(self, batch_size):
|
||||
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
|
||||
self.cursor_pos += batch_size
|
||||
return batch
|
||||
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def cursor(self, query):
|
||||
data = self.execute(query)
|
||||
return MockCursor(data)
|
||||
|
||||
# TODO: additional arguments
|
||||
def execute(self, query):
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
@@ -1,78 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.clustering import Getis
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# Fixture files produced as follows
|
||||
#
|
||||
# import pysal as ps
|
||||
# import numpy as np
|
||||
# import random
|
||||
#
|
||||
# # setup variables
|
||||
# f = ps.open(ps.examples.get_path("stl_hom.dbf"))
|
||||
# y = np.array(f.by_col['HR8893'])
|
||||
# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
|
||||
#
|
||||
# out_queen = [{"id": index + 1,
|
||||
# "neighbors": [x+1 for x in w_queen.neighbors[index]],
|
||||
# "value": val} for index, val in enumerate(y)]
|
||||
#
|
||||
# with open('neighbors_queen_getis.json', 'w') as f:
|
||||
# f.write(str(out_queen))
|
||||
#
|
||||
# random.seed(1234)
|
||||
# np.random.seed(1234)
|
||||
# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
|
||||
# permutations=999)
|
||||
#
|
||||
# with open('getis_queen.json', 'w') as f:
|
||||
# f.write(str(zip(lgstar_queen.z_sim,
|
||||
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_getis(self, w_type, param):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class GetisTest(unittest.TestCase):
|
||||
"""Testing class for Getis-Ord's G* funtion
|
||||
This test replicates the work done in PySAL documentation:
|
||||
https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
# load raw data for analysis
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_getis.json')).read())
|
||||
|
||||
# load pre-computed/known values
|
||||
self.getis_data = json.loads(
|
||||
open(fixture_file('getis.json')).read())
|
||||
|
||||
def test_getis_ord(self):
|
||||
"""Test Getis-Ord's G*"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
getis = Getis(FakeDataProvider(data))
|
||||
|
||||
result = getis.getis_ord('subquery', 'value',
|
||||
'queen', None, 999, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = np.array(self.getis_data)[:, 0:2]
|
||||
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_z, exp_z, delta=1e-2)
|
||||
@@ -1,56 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Kmeans
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.clustering as cc
|
||||
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mocked_result):
|
||||
self.mocked_result = mocked_result
|
||||
|
||||
def get_spatial_kmeans(self, query):
|
||||
return self.mocked_result
|
||||
|
||||
def get_nonspatial_kmeans(self, query, standarize):
|
||||
return self.mocked_result
|
||||
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for k-means spatial"""
|
||||
|
||||
def setUp(self):
|
||||
self.cluster_data = json.loads(
|
||||
open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"}
|
||||
|
||||
def test_kmeans(self):
|
||||
"""
|
||||
"""
|
||||
data = [{'xs': d['xs'],
|
||||
'ys': d['ys'],
|
||||
'ids': d['ids']} for d in self.cluster_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
kmeans = Kmeans(FakeDataProvider(data))
|
||||
clusters = kmeans.spatial('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1] == 0]
|
||||
c2 = [a for a in clusters if a[1] == 1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)), 2)
|
||||
self.assertEqual(len(c1), 20)
|
||||
self.assertEqual(len(c2), 20)
|
||||
@@ -1,112 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Moran
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.params_markov = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan",
|
||||
"_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(
|
||||
open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
from crankshaft.clustering import map_quads
|
||||
self.assertEqual(map_quads(1), 'HH')
|
||||
self.assertEqual(map_quads(2), 'LH')
|
||||
self.assertEqual(map_quads(3), 'LL')
|
||||
self.assertEqual(map_quads(4), 'HL')
|
||||
self.assertEqual(map_quads(33), None)
|
||||
self.assertEqual(map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
from crankshaft.clustering import quad_position
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_local_stat(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [OrderedDict([('id', d['id']),
|
||||
('attr1', d['value']),
|
||||
('neighbors', d['neighbors'])])
|
||||
for d in self.neighbors_data]
|
||||
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = moran.local_stat('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'attr2': 1,
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
random_seeds.set_random_seeds(1235)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.global_stat('table', 'value',
|
||||
'knn', 5, 99, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
@@ -1,160 +0,0 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params1 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("attr1", "andy"),
|
||||
("attr2", "jay_z"),
|
||||
("subquery", "SELECT * FROM a_list"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params2 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "price"),
|
||||
("denominator", "sq_meters"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params3 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "sq_meters"),
|
||||
("denominator", "price"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params_array = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans1 = ("i.\"andy\"::numeric As attr1, "
|
||||
"i.\"jay_z\"::numeric As attr2, ")
|
||||
|
||||
ans2 = ("i.\"price\"::numeric As attr1, "
|
||||
"i.\"sq_meters\"::numeric As attr2, ")
|
||||
|
||||
ans3 = ("i.\"sq_meters\"::numeric As attr1, "
|
||||
"i.\"price\"::numeric As attr2, ")
|
||||
|
||||
ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
|
||||
"i.\"_2014_jan\"::numeric As attr2, "
|
||||
"i.\"_2014_feb\"::numeric As attr3, ")
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_select(self.params2), ans2)
|
||||
self.assertEqual(pu.query_attr_select(self.params3), ans3)
|
||||
self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
|
||||
"idx_replace.\"jay_z\" IS NOT NULL")
|
||||
|
||||
ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_jan\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_feb\" IS NOT NULL")
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans1 = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
ans_array = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"_2013_dec\"::numeric As attr1, " \
|
||||
"i.\"_2014_jan\"::numeric As attr2, " \
|
||||
"i.\"_2014_feb\"::numeric As attr3, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"_2013_dec\" IS NOT NULL AND " \
|
||||
"j.\"_2014_jan\" IS NOT NULL AND " \
|
||||
"j.\"_2014_feb\" IS NOT NULL " \
|
||||
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"_2013_dec\" IS NOT NULL AND " \
|
||||
"i.\"_2014_jan\" IS NOT NULL AND " \
|
||||
"i.\"_2014_feb\" IS NOT NULL "\
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params1), ans1)
|
||||
self.assertEqual(pu.knn(self.params_array), ans_array)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans1 = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params1), ans1)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params1),
|
||||
pu.knn(self.params1))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
@@ -1,64 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from helper import plpy, fixture_file
|
||||
import crankshaft.segmentation as segmentation
|
||||
import json
|
||||
|
||||
class SegmentationTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
|
||||
def generate_random_data(self,n_samples,random_state, row_type=False):
|
||||
x1 = random_state.uniform(size=n_samples)
|
||||
x2 = random_state.uniform(size=n_samples)
|
||||
x3 = random_state.randint(0, 4, size=n_samples)
|
||||
|
||||
y = x1+x2*x2+x3
|
||||
cartodb_id = range(len(x1))
|
||||
|
||||
if row_type:
|
||||
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
|
||||
else:
|
||||
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
|
||||
|
||||
def test_replace_nan_with_mean(self):
|
||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||
|
||||
def test_create_and_predict_segment(self):
|
||||
n_samples = 1000
|
||||
|
||||
random_state_train = np.random.RandomState(13)
|
||||
random_state_test = np.random.RandomState(134)
|
||||
training_data = self.generate_random_data(n_samples, random_state_train)
|
||||
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
||||
|
||||
|
||||
ids = [{'cartodb_ids': range(len(test_data))}]
|
||||
rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
|
||||
|
||||
plpy._define_result('select \* from \(select \* from training\) a limit 1',rows)
|
||||
plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
|
||||
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
|
||||
plpy._define_result('.*select \* from test.*' ,test_data)
|
||||
|
||||
model_parameters = {'n_estimators': 1200,
|
||||
'max_depth': 3,
|
||||
'subsample' : 0.5,
|
||||
'learning_rate': 0.01,
|
||||
'min_samples_leaf': 1}
|
||||
|
||||
result = segmentation.create_and_predict_segment(
|
||||
'select * from training',
|
||||
'target',
|
||||
'select * from test',
|
||||
model_parameters)
|
||||
|
||||
prediction = [r[1] for r in result]
|
||||
|
||||
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
|
||||
|
||||
self.assertEqual(len(result),len(test_data))
|
||||
self.assertTrue( result[0][2] < 0.01)
|
||||
self.assertTrue( accuracy < 0.5*np.mean(test_y) )
|
||||
@@ -1,349 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.space_time_dynamics import Markov
|
||||
import crankshaft.space_time_dynamics as std
|
||||
from crankshaft import random_seeds
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import json
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, data):
|
||||
self.mock_result = data
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class SpaceTimeTests(unittest.TestCase):
|
||||
"""Testing class for Markov Functions."""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_markov.json')).read())
|
||||
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
|
||||
|
||||
self.time_data = np.array([i * np.ones(10, dtype=float)
|
||||
for i in range(10)]).T
|
||||
|
||||
self.transition_matrix = np.array([
|
||||
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
|
||||
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
|
||||
[0., 0.14, 0.74, 0.12, 0.],
|
||||
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
|
||||
[0., 0., 0., 0.16666667, 0.83333333]],
|
||||
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
|
||||
[0., 0., 0.06372549, 0.90196078, 0.03431373],
|
||||
[0., 0., 0., 0.19444444, 0.80555556]],
|
||||
[[0.84693878, 0.15306122, 0., 0., 0.],
|
||||
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
|
||||
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0., 0., 0., 0.10204082, 0.89795918]],
|
||||
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
|
||||
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
|
||||
[0., 0., 0., 0.09661836, 0.90338164]],
|
||||
[[0.33333333, 0.66666667, 0., 0., 0.],
|
||||
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
|
||||
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
|
||||
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]]]
|
||||
)
|
||||
|
||||
def test_spatial_markov(self):
|
||||
"""Test Spatial Markov."""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
# print(str(data[0]))
|
||||
markov = Markov(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
|
||||
result = markov.spatial_trend('subquery',
|
||||
['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'],
|
||||
5, 'knn', 5, 0, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
self.assertTrue(result is not None)
|
||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||
print result[0]
|
||||
expected = self.markov_data
|
||||
for ([res_trend, res_up, res_down, res_vol, res_id],
|
||||
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
||||
) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_trend, exp_trend)
|
||||
|
||||
def test_get_time_data(self):
|
||||
"""Test get_time_data"""
|
||||
data = [{'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009']} for d in self.neighbors_data]
|
||||
|
||||
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'])
|
||||
|
||||
# expected was prepared from PySAL example:
|
||||
# f = ps.open(ps.examples.get_path("usjoin.csv"))
|
||||
# pci = np.array([f.by_col[str(y)]
|
||||
# for y in range(1995, 2010)]).transpose()
|
||||
# rpci = pci / (pci.mean(axis = 0))
|
||||
|
||||
expected = np.array(
|
||||
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
|
||||
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
|
||||
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
|
||||
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
|
||||
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
|
||||
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
|
||||
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
|
||||
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
|
||||
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
|
||||
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
|
||||
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
|
||||
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
|
||||
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
|
||||
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
|
||||
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
|
||||
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
|
||||
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
|
||||
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
|
||||
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
|
||||
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
|
||||
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
|
||||
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
|
||||
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
|
||||
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
|
||||
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
|
||||
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
|
||||
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
|
||||
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
|
||||
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
|
||||
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
|
||||
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
|
||||
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
|
||||
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
|
||||
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
|
||||
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
|
||||
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
|
||||
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
|
||||
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
|
||||
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
|
||||
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
|
||||
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
|
||||
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
|
||||
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
|
||||
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
|
||||
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
|
||||
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
|
||||
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
|
||||
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
|
||||
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
|
||||
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
|
||||
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
|
||||
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
|
||||
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
|
||||
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
|
||||
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
|
||||
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
|
||||
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
|
||||
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
|
||||
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
|
||||
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
|
||||
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
|
||||
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
|
||||
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
|
||||
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
|
||||
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
|
||||
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
|
||||
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
|
||||
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
|
||||
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
|
||||
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
|
||||
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
|
||||
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
|
||||
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
|
||||
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
|
||||
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
|
||||
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
|
||||
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
|
||||
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
|
||||
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
|
||||
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
|
||||
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
|
||||
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
|
||||
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
|
||||
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
|
||||
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
|
||||
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
|
||||
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
|
||||
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
|
||||
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
|
||||
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
|
||||
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
|
||||
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
|
||||
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
|
||||
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
|
||||
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
|
||||
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
|
||||
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
|
||||
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
|
||||
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
|
||||
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
|
||||
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
|
||||
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
|
||||
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
|
||||
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
|
||||
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
|
||||
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
|
||||
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
|
||||
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
|
||||
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
|
||||
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
|
||||
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
|
||||
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
|
||||
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
|
||||
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
|
||||
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
|
||||
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
|
||||
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
|
||||
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
|
||||
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
|
||||
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
|
||||
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
|
||||
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
|
||||
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
|
||||
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
|
||||
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
|
||||
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
|
||||
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
|
||||
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
|
||||
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
|
||||
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
|
||||
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
|
||||
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
|
||||
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
|
||||
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
|
||||
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
|
||||
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
|
||||
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
|
||||
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
|
||||
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
|
||||
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
|
||||
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
|
||||
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
|
||||
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
|
||||
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
|
||||
|
||||
self.assertTrue(np.allclose(result, expected))
|
||||
self.assertTrue(type(result) == type(expected))
|
||||
self.assertTrue(result.shape == expected.shape)
|
||||
|
||||
def test_rebin_data(self):
|
||||
"""Test rebin_data"""
|
||||
# sample in double the time (even case since 10 % 2 = 0):
|
||||
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
|
||||
# = 0.5, 2.5, 4.5, 6.5, 8.5
|
||||
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
|
||||
for i in range(0, 10, 2)]).T
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
|
||||
|
||||
# sample in triple the time (uneven since 10 % 3 = 1):
|
||||
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
|
||||
# = 1, 4, 7, 9
|
||||
ans_odd = np.array([i * np.ones(10, dtype=float)
|
||||
for i in (1, 4, 7, 9)]).T
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
|
||||
|
||||
def test_get_prob_dist(self):
|
||||
"""Test get_prob_dist"""
|
||||
lag_indices = np.array([1, 2, 3, 4])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
result = std.get_prob_dist(self.transition_matrix,
|
||||
lag_indices, unit_indices)
|
||||
|
||||
self.assertTrue(np.array_equal(result, answer))
|
||||
|
||||
def test_get_prob_stats(self):
|
||||
"""Test get_prob_stats"""
|
||||
|
||||
probs = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
|
||||
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
|
||||
answer_trend = np.array([-0.03301887 / 0.88207547,
|
||||
-0.05882353 / 0.87058824,
|
||||
0.02475248 / 0.77722772,
|
||||
-0.02352941 / 0.97647059])
|
||||
answer_volatility = np.array([0.34221495, 0.33705421,
|
||||
0.29226542, 0.38834223])
|
||||
|
||||
result = std.get_prob_stats(probs, unit_indices)
|
||||
result_up = result[0]
|
||||
result_down = result[1]
|
||||
result_trend = result[2]
|
||||
result_volatility = result[3]
|
||||
|
||||
self.assertTrue(np.allclose(result_up, answer_up))
|
||||
self.assertTrue(np.allclose(result_down, answer_down))
|
||||
self.assertTrue(np.allclose(result_trend, answer_trend))
|
||||
self.assertTrue(np.allclose(result_volatility, answer_volatility))
|
||||
@@ -1,6 +0,0 @@
|
||||
"""Import all modules"""
|
||||
import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import analysis_data_provider
|
||||
@@ -1,67 +0,0 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
import pysal_utils as pu
|
||||
|
||||
|
||||
class AnalysisDataProvider:
|
||||
def get_getis(self, w_type, params):
|
||||
"""fetch data for getis ord's g"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
else:
|
||||
return result
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
"""fetch data for spatial markov"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
"""fetch data for moran's i analyses"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
# if there are no neighbors, exit
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
def get_nonspatial_kmeans(self, query):
|
||||
"""fetch data for non-spatial kmeans"""
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_spatial_kmeans(self, params):
|
||||
"""fetch data for spatial kmeans"""
|
||||
query = ("SELECT "
|
||||
"array_agg({id_col} ORDER BY {id_col}) as ids,"
|
||||
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
|
||||
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
|
||||
"FROM ({subquery}) As a "
|
||||
"WHERE {geom_col} IS NOT NULL").format(**params)
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
@@ -1,4 +0,0 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
from getis import *
|
||||
@@ -1,50 +0,0 @@
|
||||
"""
|
||||
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
|
||||
"""
|
||||
|
||||
import pysal as ps
|
||||
from collections import OrderedDict
|
||||
|
||||
# crankshaft modules
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Getis:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def getis_ord(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Getis-Ord's G*
|
||||
Implementation building neighbors with a PostGIS database and PySAL's
|
||||
Getis-Ord's G* hotspot/coldspot module.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors if kNN is chosen
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_getis(w_type, qvals)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# build PySAL weight object
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate Getis-Ord's G* z- and p-values
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
@@ -1,32 +0,0 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import numpy as np
|
||||
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Kmeans:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial(self, query, no_clusters, no_init=20):
|
||||
"""
|
||||
find centers based on clusters of latitude/longitude pairs
|
||||
query: SQL query that has a WGS84 geometry (the_geom)
|
||||
"""
|
||||
params = {"subquery": query,
|
||||
"geom_col": "the_geom",
|
||||
"id_col": "cartodb_id"}
|
||||
|
||||
data = self.data_provider.get_spatial_kmeans(params)
|
||||
|
||||
# Unpack query response
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
@@ -1,208 +0,0 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
"""
|
||||
|
||||
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
from collections import OrderedDict
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Moran:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def global_stat(self, subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator)
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Input:
|
||||
@param coord (int): quadrant of a specific measurement
|
||||
Output:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Produce Moran's I classification based of n
|
||||
Input:
|
||||
@param quads ndarray: an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Output:
|
||||
@param list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Import all functions for pysal_utils"""
|
||||
from crankshaft.pysal_utils.pysal_utils import *
|
||||
@@ -1,211 +0,0 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on
|
||||
translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
|
||||
# Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res dict-like: query results with attributes and neighbors
|
||||
"""
|
||||
# if w_type.lower() == 'knn':
|
||||
# row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||
# weights = {x['id']: row_normed_weights for x in query_res}
|
||||
# else:
|
||||
# weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||
# if len(x['neighbors']) > 0
|
||||
# else [] for x in query_res}
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
return built_weight
|
||||
|
||||
|
||||
def query_attr_select(params):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
Defaults to order in the params
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
Example:
|
||||
OrderedDict([('numerator', 'price'),
|
||||
('denominator', 'sq_meters'),
|
||||
('subquery', 'SELECT * FROM interesting_data')])
|
||||
Output:
|
||||
"i.\"price\"::numeric As attr1, " \
|
||||
"i.\"sq_meters\"::numeric As attr2, "
|
||||
"""
|
||||
|
||||
attr_string = ""
|
||||
template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
if 'time_cols' in params:
|
||||
# if markov analysis
|
||||
attrs = params['time_cols']
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
else:
|
||||
# if moran's analysis
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": params[val],
|
||||
"alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
|
||||
def query_attr_where(params):
|
||||
"""
|
||||
Construct where conditions when building neighbors query
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
Input: dict of params:
|
||||
{'subquery': ...,
|
||||
'numerator': 'data1',
|
||||
'denominator': 'data2',
|
||||
'': ...}
|
||||
Output:
|
||||
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
|
||||
Input:
|
||||
{'subquery': ...,
|
||||
'time_cols': ['time1', 'time2', 'time3'],
|
||||
'etc': ...}
|
||||
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
|
||||
NULL AND idx_replace."time3" IS NOT NULL'
|
||||
"""
|
||||
attr_string = []
|
||||
template = "idx_replace.\"%s\" IS NOT NULL"
|
||||
|
||||
if 'time_cols' in params:
|
||||
# markov where clauses
|
||||
attrs = params['time_cols']
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % attr)
|
||||
else:
|
||||
# moran where clauses
|
||||
|
||||
# get keys
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % params[attr])
|
||||
|
||||
if 'denominator' in attrs:
|
||||
attr_string.append(
|
||||
"idx_replace.\"%s\" <> 0" % params['denominator'])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE " \
|
||||
"i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"%(attr_where_j)s " \
|
||||
"ORDER BY " \
|
||||
"j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||
"LIMIT {num_ngbrs})" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
|
||||
# SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = "SELECT " \
|
||||
"i.\"{id_col}\" As id, " \
|
||||
"%(attr_select)s" \
|
||||
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||
"FROM ({subquery}) As j " \
|
||||
"WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
|
||||
"ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||
"%(attr_where_j)s)" \
|
||||
") As neighbors " \
|
||||
"FROM ({subquery}) As i " \
|
||||
"WHERE " \
|
||||
"%(attr_where_i)s " \
|
||||
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
# to add more weight methods open a ticket or pull request
|
||||
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res],
|
||||
dtype=np.float)
|
||||
|
||||
|
||||
def empty_zipped_array(num_nones):
|
||||
"""
|
||||
prepare return values for cases of empty weights objects (no neighbors)
|
||||
Input:
|
||||
@param num_nones int: number of columns (e.g., 4)
|
||||
Output:
|
||||
[(None, None, None, None)]
|
||||
"""
|
||||
|
||||
return [tuple([None] * num_nones)]
|
||||
@@ -1,11 +0,0 @@
|
||||
"""Random seed generator used for non-deterministic functions in crankshaft"""
|
||||
import random
|
||||
import numpy
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
||||
@@ -1 +0,0 @@
|
||||
from segmentation import *
|
||||
@@ -1,176 +0,0 @@
|
||||
"""
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import sklearn
|
||||
import numpy as np
|
||||
import plpy
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
# Lower level functions
|
||||
#----------------------
|
||||
|
||||
def replace_nan_with_mean(array):
|
||||
"""
|
||||
Input:
|
||||
@param array: an array of floats which may have null-valued entries
|
||||
Output:
|
||||
array with nans filled in with the mean of the dataset
|
||||
"""
|
||||
# returns an array of rows and column indices
|
||||
indices = np.where(np.isnan(array))
|
||||
|
||||
# iterate through entries which have nan values
|
||||
for row, col in zip(*indices):
|
||||
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
|
||||
|
||||
return array
|
||||
|
||||
def get_data(variable, feature_columns, query):
|
||||
"""
|
||||
Fetch data from the database, clean, and package into
|
||||
numpy arrays
|
||||
Input:
|
||||
@param variable: name of the target variable
|
||||
@param feature_columns: list of column names
|
||||
@param query: subquery that data is pulled from for the packaging
|
||||
Output:
|
||||
prepared data, packaged into NumPy arrays
|
||||
"""
|
||||
|
||||
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
|
||||
|
||||
try:
|
||||
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
|
||||
variable=variable,
|
||||
columns=columns,
|
||||
query=query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to access data to build segmentation model: %s' % e)
|
||||
|
||||
# extract target data from plpy object
|
||||
target = np.array(data[0]['target'])
|
||||
|
||||
# put n feature data arrays into an n x m array of arrays
|
||||
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
|
||||
|
||||
return replace_nan_with_mean(target), replace_nan_with_mean(features)
|
||||
|
||||
# High level interface
|
||||
# --------------------
|
||||
|
||||
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
|
||||
"""
|
||||
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
|
||||
the function.
|
||||
|
||||
Input:
|
||||
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
|
||||
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
|
||||
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
|
||||
@param model_parameters: A dictionary containing parameters for the model.
|
||||
"""
|
||||
|
||||
clean_target = replace_nan_with_mean(target)
|
||||
clean_features = replace_nan_with_mean(features)
|
||||
target_features = replace_nan_with_mean(target_features)
|
||||
|
||||
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
|
||||
prediction = model.predict(target_features)
|
||||
accuracy_array = [accuracy]*prediction.shape[0]
|
||||
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
|
||||
|
||||
|
||||
|
||||
def create_and_predict_segment(query, variable, target_query, model_params):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
Stuart Lynn
|
||||
"""
|
||||
|
||||
## fetch column names
|
||||
try:
|
||||
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
## extract column names to be used in building the segmentation model
|
||||
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
|
||||
## get data from database
|
||||
target, features = get_data(variable, feature_columns, query)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
|
||||
accuracy_array = [accuracy]*result.shape[0]
|
||||
return zip(cartodb_ids, result, accuracy_array)
|
||||
|
||||
|
||||
def train_model(target, features, model_params, test_split):
|
||||
"""
|
||||
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
|
||||
Input:
|
||||
@param target: 1D Array of the variable that the model is to be trianed to predict
|
||||
@param features: 2D Array NSamples * NFeatures to use in trining the model
|
||||
@param model_params: A dictionary of model parameters, the full specification can be found on the
|
||||
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
|
||||
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
|
||||
"""
|
||||
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
|
||||
model = GradientBoostingRegressor(**model_params)
|
||||
model.fit(features_train, target_train)
|
||||
accuracy = calculate_model_accuracy(model, features, target)
|
||||
return model, accuracy
|
||||
|
||||
def calculate_model_accuracy(model, features, target):
|
||||
"""
|
||||
Calculate the mean squared error of the model prediction
|
||||
Input:
|
||||
@param model: model trained from input features
|
||||
@param features: features to make a prediction from
|
||||
@param target: target to compare prediction to
|
||||
Output:
|
||||
mean squared error of the model prection compared to the target
|
||||
"""
|
||||
prediction = model.predict(features)
|
||||
return metrics.mean_squared_error(prediction, target)
|
||||
|
||||
def predict_segment(model, features, target_query):
|
||||
"""
|
||||
Use the provided model to predict the values for the new feature set
|
||||
Input:
|
||||
@param model: The pretrained model
|
||||
@features: A list of features to use in the model prediction (list of column names)
|
||||
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
|
||||
"""
|
||||
|
||||
batch_size = 1000
|
||||
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
|
||||
|
||||
try:
|
||||
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
|
||||
joined_features=joined_features,
|
||||
target_query=target_query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
rows = cursor.fetch(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
|
||||
|
||||
#Need to fix this. Should be global mean. This will cause weird effects
|
||||
batch = replace_nan_with_mean(batch)
|
||||
prediction = model.predict(batch)
|
||||
results.append(prediction)
|
||||
|
||||
try:
|
||||
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
return cartodb_ids, np.concatenate(results)
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Import all functions from clustering libraries."""
|
||||
from markov import *
|
||||
@@ -1,194 +0,0 @@
|
||||
"""
|
||||
Spatial dynamics measurements using Spatial Markov
|
||||
"""
|
||||
|
||||
# TODO: remove all plpy dependencies
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Markov:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial_trend(self, subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st
|
||||
quantile -> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break
|
||||
distribution of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains
|
||||
the geometries
|
||||
@param id_col string (optional): name of column which has the ids
|
||||
of the table
|
||||
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a
|
||||
lower class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
|
||||
params = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
query_result = self.data_provider.get_markov(w_type, params)
|
||||
|
||||
# build weight
|
||||
weights = pu.get_weight(query_result, w_type)
|
||||
weights.transform = 'r'
|
||||
|
||||
# prep time data
|
||||
t_data = get_time_data(query_result, time_cols)
|
||||
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
|
||||
# get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
|
||||
# look up probablity distribution for each unit according to class and
|
||||
# lag class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
# find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
|
||||
|
||||
def get_time_data(markov_data, time_cols):
|
||||
"""
|
||||
Extract the time columns and bin appropriately
|
||||
"""
|
||||
num_attrs = len(time_cols)
|
||||
return np.array([[x['attr' + str(i)] for x in markov_data]
|
||||
for i in range(1, num_attrs+1)], dtype=float).transpose()
|
||||
|
||||
|
||||
# not currently used
|
||||
def rebin_data(time_data, num_time_per_bin):
|
||||
"""
|
||||
Convert an n x l matrix into an (n/m) x l matrix where the values are
|
||||
reduced (averaged) for the intervening states:
|
||||
1 2 3 4 1.5 3.5
|
||||
5 6 7 8 -> 5.5 7.5
|
||||
9 8 7 6 8.5 6.5
|
||||
5 4 3 2 4.5 2.5
|
||||
|
||||
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
|
||||
|
||||
This process effectively resamples the data at a longer time span n
|
||||
units longer than the input data.
|
||||
For cases when there is a remainder (remainder(5/3) = 2), the remaining
|
||||
two columns are binned together as the last time period, while the
|
||||
first three are binned together for the first period.
|
||||
|
||||
Input:
|
||||
@param time_data n x l ndarray: measurements of an attribute at
|
||||
different time intervals
|
||||
@param num_time_per_bin int: number of columns to average into a new
|
||||
column
|
||||
Output:
|
||||
ceil(n / m) x l ndarray of resampled time series
|
||||
"""
|
||||
|
||||
if time_data.shape[1] % num_time_per_bin == 0:
|
||||
# if fit is perfect, then use it
|
||||
n_max = time_data.shape[1] / num_time_per_bin
|
||||
else:
|
||||
# fit remainders into an additional column
|
||||
n_max = time_data.shape[1] / num_time_per_bin + 1
|
||||
|
||||
return np.array(
|
||||
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||
for i in range(n_max)]).T
|
||||
|
||||
|
||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||
"""
|
||||
Given an array of transition matrices, look up the probability
|
||||
associated with the arrangements passed
|
||||
|
||||
Input:
|
||||
@param transition_matrix ndarray[k,k,k]:
|
||||
@param lag_indices ndarray:
|
||||
@param unit_indices ndarray:
|
||||
|
||||
Output:
|
||||
Array of probability distributions
|
||||
"""
|
||||
|
||||
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
|
||||
for i in range(len(lag_indices))])
|
||||
|
||||
|
||||
def get_prob_stats(prob_dist, unit_indices):
|
||||
"""
|
||||
get the statistics of the probability distributions
|
||||
|
||||
Outputs:
|
||||
@param trend_up ndarray(float): sum of probabilities for upward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend_down ndarray(float): sum of probabilities for downward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend ndarray(float): difference of upward and downward
|
||||
movements
|
||||
"""
|
||||
|
||||
num_elements = len(unit_indices)
|
||||
trend_up = np.empty(num_elements, dtype=float)
|
||||
trend_down = np.empty(num_elements, dtype=float)
|
||||
trend = np.empty(num_elements, dtype=float)
|
||||
|
||||
for i in range(num_elements):
|
||||
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
|
||||
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
|
||||
if prob_dist[i, unit_indices[i]] > 0.0:
|
||||
trend[i] = (trend_up[i] - trend_down[i]) / (
|
||||
prob_dist[i, unit_indices[i]])
|
||||
else:
|
||||
trend[i] = None
|
||||
|
||||
# calculate volatility of distribution
|
||||
volatility = prob_dist.std(axis=1)
|
||||
|
||||
return trend_up, trend_down, trend, volatility
|
||||
@@ -1,5 +0,0 @@
|
||||
joblib==0.8.3
|
||||
numpy==1.6.1
|
||||
scipy==0.14.0
|
||||
pysal==1.11.2
|
||||
scikit-learn==0.14.1
|
||||
@@ -1,49 +0,0 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.5.1',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]
|
||||
@@ -1 +0,0 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
||||
@@ -1 +0,0 @@
|
||||
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
|
||||
@@ -1,52 +0,0 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
||||
@@ -1,54 +0,0 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,13 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
||||
@@ -1,54 +0,0 @@
|
||||
import re
|
||||
|
||||
|
||||
class MockCursor:
|
||||
def __init__(self, data):
|
||||
self.cursor_pos = 0
|
||||
self.data = data
|
||||
|
||||
def fetch(self, batch_size):
|
||||
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
|
||||
self.cursor_pos += batch_size
|
||||
return batch
|
||||
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def cursor(self, query):
|
||||
data = self.execute(query)
|
||||
return MockCursor(data)
|
||||
|
||||
# TODO: additional arguments
|
||||
def execute(self, query):
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
@@ -1,78 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.clustering import Getis
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# Fixture files produced as follows
|
||||
#
|
||||
# import pysal as ps
|
||||
# import numpy as np
|
||||
# import random
|
||||
#
|
||||
# # setup variables
|
||||
# f = ps.open(ps.examples.get_path("stl_hom.dbf"))
|
||||
# y = np.array(f.by_col['HR8893'])
|
||||
# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
|
||||
#
|
||||
# out_queen = [{"id": index + 1,
|
||||
# "neighbors": [x+1 for x in w_queen.neighbors[index]],
|
||||
# "value": val} for index, val in enumerate(y)]
|
||||
#
|
||||
# with open('neighbors_queen_getis.json', 'w') as f:
|
||||
# f.write(str(out_queen))
|
||||
#
|
||||
# random.seed(1234)
|
||||
# np.random.seed(1234)
|
||||
# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
|
||||
# permutations=999)
|
||||
#
|
||||
# with open('getis_queen.json', 'w') as f:
|
||||
# f.write(str(zip(lgstar_queen.z_sim,
|
||||
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_getis(self, w_type, param):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class GetisTest(unittest.TestCase):
|
||||
"""Testing class for Getis-Ord's G* funtion
|
||||
This test replicates the work done in PySAL documentation:
|
||||
https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
# load raw data for analysis
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_getis.json')).read())
|
||||
|
||||
# load pre-computed/known values
|
||||
self.getis_data = json.loads(
|
||||
open(fixture_file('getis.json')).read())
|
||||
|
||||
def test_getis_ord(self):
|
||||
"""Test Getis-Ord's G*"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
getis = Getis(FakeDataProvider(data))
|
||||
|
||||
result = getis.getis_ord('subquery', 'value',
|
||||
'queen', None, 999, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = np.array(self.getis_data)[:, 0:2]
|
||||
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_z, exp_z, delta=1e-2)
|
||||
@@ -1,56 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Kmeans
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.clustering as cc
|
||||
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mocked_result):
|
||||
self.mocked_result = mocked_result
|
||||
|
||||
def get_spatial_kmeans(self, query):
|
||||
return self.mocked_result
|
||||
|
||||
def get_nonspatial_kmeans(self, query, standarize):
|
||||
return self.mocked_result
|
||||
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for k-means spatial"""
|
||||
|
||||
def setUp(self):
|
||||
self.cluster_data = json.loads(
|
||||
open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"}
|
||||
|
||||
def test_kmeans(self):
|
||||
"""
|
||||
"""
|
||||
data = [{'xs': d['xs'],
|
||||
'ys': d['ys'],
|
||||
'ids': d['ids']} for d in self.cluster_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
kmeans = Kmeans(FakeDataProvider(data))
|
||||
clusters = kmeans.spatial('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1] == 0]
|
||||
c2 = [a for a in clusters if a[1] == 1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)), 2)
|
||||
self.assertEqual(len(c1), 20)
|
||||
self.assertEqual(len(c2), 20)
|
||||
@@ -1,112 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Moran
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.params_markov = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan",
|
||||
"_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(
|
||||
open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
from crankshaft.clustering import map_quads
|
||||
self.assertEqual(map_quads(1), 'HH')
|
||||
self.assertEqual(map_quads(2), 'LH')
|
||||
self.assertEqual(map_quads(3), 'LL')
|
||||
self.assertEqual(map_quads(4), 'HL')
|
||||
self.assertEqual(map_quads(33), None)
|
||||
self.assertEqual(map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
from crankshaft.clustering import quad_position
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_local_stat(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [OrderedDict([('id', d['id']),
|
||||
('attr1', d['value']),
|
||||
('neighbors', d['neighbors'])])
|
||||
for d in self.neighbors_data]
|
||||
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = moran.local_stat('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'attr2': 1,
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
random_seeds.set_random_seeds(1235)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.global_stat('table', 'value',
|
||||
'knn', 5, 99, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
||||
@@ -1,160 +0,0 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params1 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("attr1", "andy"),
|
||||
("attr2", "jay_z"),
|
||||
("subquery", "SELECT * FROM a_list"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params2 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "price"),
|
||||
("denominator", "sq_meters"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params3 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "sq_meters"),
|
||||
("denominator", "price"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params_array = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans1 = ("i.\"andy\"::numeric As attr1, "
|
||||
"i.\"jay_z\"::numeric As attr2, ")
|
||||
|
||||
ans2 = ("i.\"price\"::numeric As attr1, "
|
||||
"i.\"sq_meters\"::numeric As attr2, ")
|
||||
|
||||
ans3 = ("i.\"sq_meters\"::numeric As attr1, "
|
||||
"i.\"price\"::numeric As attr2, ")
|
||||
|
||||
ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
|
||||
"i.\"_2014_jan\"::numeric As attr2, "
|
||||
"i.\"_2014_feb\"::numeric As attr3, ")
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_select(self.params2), ans2)
|
||||
self.assertEqual(pu.query_attr_select(self.params3), ans3)
|
||||
self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
|
||||
"idx_replace.\"jay_z\" IS NOT NULL")
|
||||
|
||||
ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_jan\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_feb\" IS NOT NULL")
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
|
||||
|
||||
def test_knn(self):
|
||||
"""Test knn neighbors constructor"""
|
||||
|
||||
ans1 = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY " \
|
||||
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
ans_array = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"_2013_dec\"::numeric As attr1, " \
|
||||
"i.\"_2014_jan\"::numeric As attr2, " \
|
||||
"i.\"_2014_feb\"::numeric As attr3, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"j.\"_2013_dec\" IS NOT NULL AND " \
|
||||
"j.\"_2014_jan\" IS NOT NULL AND " \
|
||||
"j.\"_2014_feb\" IS NOT NULL " \
|
||||
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \
|
||||
"LIMIT 321)) As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"_2013_dec\" IS NOT NULL AND " \
|
||||
"i.\"_2014_jan\" IS NOT NULL AND " \
|
||||
"i.\"_2014_feb\" IS NOT NULL "\
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.knn(self.params1), ans1)
|
||||
self.assertEqual(pu.knn(self.params_array), ans_array)
|
||||
|
||||
def test_queen(self):
|
||||
"""Test queen neighbors constructor"""
|
||||
|
||||
ans1 = "SELECT i.\"cartodb_id\" As id, " \
|
||||
"i.\"andy\"::numeric As attr1, " \
|
||||
"i.\"jay_z\"::numeric As attr2, " \
|
||||
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||
"FROM (SELECT * FROM a_list) As j " \
|
||||
"WHERE " \
|
||||
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
|
||||
"ST_Touches(i.\"the_geom\", " \
|
||||
"j.\"the_geom\") AND " \
|
||||
"j.\"andy\" IS NOT NULL AND " \
|
||||
"j.\"jay_z\" IS NOT NULL)" \
|
||||
") As neighbors " \
|
||||
"FROM (SELECT * FROM a_list) As i " \
|
||||
"WHERE i.\"andy\" IS NOT NULL AND " \
|
||||
"i.\"jay_z\" IS NOT NULL " \
|
||||
"ORDER BY i.\"cartodb_id\" ASC;"
|
||||
|
||||
self.assertEqual(pu.queen(self.params1), ans1)
|
||||
|
||||
def test_construct_neighbor_query(self):
|
||||
"""Test construct_neighbor_query"""
|
||||
|
||||
# Compare to raw knn query
|
||||
self.assertEqual(pu.construct_neighbor_query('knn', self.params1),
|
||||
pu.knn(self.params1))
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
## need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_empty_zipped_array(self):
|
||||
"""Test empty_zipped_array"""
|
||||
ans2 = [(None, None)]
|
||||
ans4 = [(None, None, None, None)]
|
||||
self.assertEqual(pu.empty_zipped_array(2), ans2)
|
||||
self.assertEqual(pu.empty_zipped_array(4), ans4)
|
||||
@@ -1,64 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from helper import plpy, fixture_file
|
||||
import crankshaft.segmentation as segmentation
|
||||
import json
|
||||
|
||||
class SegmentationTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
|
||||
def generate_random_data(self,n_samples,random_state, row_type=False):
|
||||
x1 = random_state.uniform(size=n_samples)
|
||||
x2 = random_state.uniform(size=n_samples)
|
||||
x3 = random_state.randint(0, 4, size=n_samples)
|
||||
|
||||
y = x1+x2*x2+x3
|
||||
cartodb_id = range(len(x1))
|
||||
|
||||
if row_type:
|
||||
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
|
||||
else:
|
||||
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
|
||||
|
||||
def test_replace_nan_with_mean(self):
|
||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||
|
||||
def test_create_and_predict_segment(self):
|
||||
n_samples = 1000
|
||||
|
||||
random_state_train = np.random.RandomState(13)
|
||||
random_state_test = np.random.RandomState(134)
|
||||
training_data = self.generate_random_data(n_samples, random_state_train)
|
||||
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
||||
|
||||
|
||||
ids = [{'cartodb_ids': range(len(test_data))}]
|
||||
rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
|
||||
|
||||
plpy._define_result('select \* from \(select \* from training\) a limit 1',rows)
|
||||
plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
|
||||
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
|
||||
plpy._define_result('.*select \* from test.*' ,test_data)
|
||||
|
||||
model_parameters = {'n_estimators': 1200,
|
||||
'max_depth': 3,
|
||||
'subsample' : 0.5,
|
||||
'learning_rate': 0.01,
|
||||
'min_samples_leaf': 1}
|
||||
|
||||
result = segmentation.create_and_predict_segment(
|
||||
'select * from training',
|
||||
'target',
|
||||
'select * from test',
|
||||
model_parameters)
|
||||
|
||||
prediction = [r[1] for r in result]
|
||||
|
||||
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
|
||||
|
||||
self.assertEqual(len(result),len(test_data))
|
||||
self.assertTrue( result[0][2] < 0.01)
|
||||
self.assertTrue( accuracy < 0.5*np.mean(test_y) )
|
||||
@@ -1,349 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.space_time_dynamics import Markov
|
||||
import crankshaft.space_time_dynamics as std
|
||||
from crankshaft import random_seeds
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import json
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, data):
|
||||
self.mock_result = data
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class SpaceTimeTests(unittest.TestCase):
|
||||
"""Testing class for Markov Functions."""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_markov.json')).read())
|
||||
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
|
||||
|
||||
self.time_data = np.array([i * np.ones(10, dtype=float)
|
||||
for i in range(10)]).T
|
||||
|
||||
self.transition_matrix = np.array([
|
||||
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
|
||||
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
|
||||
[0., 0.14, 0.74, 0.12, 0.],
|
||||
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
|
||||
[0., 0., 0., 0.16666667, 0.83333333]],
|
||||
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
|
||||
[0., 0., 0.06372549, 0.90196078, 0.03431373],
|
||||
[0., 0., 0., 0.19444444, 0.80555556]],
|
||||
[[0.84693878, 0.15306122, 0., 0., 0.],
|
||||
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
|
||||
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0., 0., 0., 0.10204082, 0.89795918]],
|
||||
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
|
||||
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
|
||||
[0., 0., 0., 0.09661836, 0.90338164]],
|
||||
[[0.33333333, 0.66666667, 0., 0., 0.],
|
||||
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
|
||||
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
|
||||
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]]]
|
||||
)
|
||||
|
||||
def test_spatial_markov(self):
|
||||
"""Test Spatial Markov."""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
# print(str(data[0]))
|
||||
markov = Markov(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
|
||||
result = markov.spatial_trend('subquery',
|
||||
['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'],
|
||||
5, 'knn', 5, 0, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
self.assertTrue(result is not None)
|
||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||
print result[0]
|
||||
expected = self.markov_data
|
||||
for ([res_trend, res_up, res_down, res_vol, res_id],
|
||||
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
||||
) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_trend, exp_trend)
|
||||
|
||||
def test_get_time_data(self):
|
||||
"""Test get_time_data"""
|
||||
data = [{'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009']} for d in self.neighbors_data]
|
||||
|
||||
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'])
|
||||
|
||||
# expected was prepared from PySAL example:
|
||||
# f = ps.open(ps.examples.get_path("usjoin.csv"))
|
||||
# pci = np.array([f.by_col[str(y)]
|
||||
# for y in range(1995, 2010)]).transpose()
|
||||
# rpci = pci / (pci.mean(axis = 0))
|
||||
|
||||
expected = np.array(
|
||||
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
|
||||
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
|
||||
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
|
||||
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
|
||||
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
|
||||
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
|
||||
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
|
||||
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
|
||||
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
|
||||
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
|
||||
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
|
||||
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
|
||||
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
|
||||
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
|
||||
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
|
||||
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
|
||||
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
|
||||
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
|
||||
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
|
||||
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
|
||||
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
|
||||
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
|
||||
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
|
||||
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
|
||||
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
|
||||
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
|
||||
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
|
||||
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
|
||||
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
|
||||
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
|
||||
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
|
||||
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
|
||||
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
|
||||
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
|
||||
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
|
||||
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
|
||||
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
|
||||
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
|
||||
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
|
||||
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
|
||||
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
|
||||
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
|
||||
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
|
||||
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
|
||||
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
|
||||
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
|
||||
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
|
||||
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
|
||||
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
|
||||
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
|
||||
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
|
||||
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
|
||||
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
|
||||
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
|
||||
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
|
||||
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
|
||||
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
|
||||
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
|
||||
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
|
||||
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
|
||||
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
|
||||
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
|
||||
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
|
||||
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
|
||||
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
|
||||
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
|
||||
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
|
||||
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
|
||||
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
|
||||
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
|
||||
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
|
||||
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
|
||||
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
|
||||
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
|
||||
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
|
||||
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
|
||||
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
|
||||
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
|
||||
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
|
||||
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
|
||||
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
|
||||
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
|
||||
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
|
||||
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
|
||||
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
|
||||
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
|
||||
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
|
||||
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
|
||||
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
|
||||
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
|
||||
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
|
||||
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
|
||||
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
|
||||
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
|
||||
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
|
||||
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
|
||||
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
|
||||
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
|
||||
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
|
||||
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
|
||||
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
|
||||
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
|
||||
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
|
||||
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
|
||||
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
|
||||
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
|
||||
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
|
||||
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
|
||||
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
|
||||
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
|
||||
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
|
||||
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
|
||||
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
|
||||
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
|
||||
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
|
||||
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
|
||||
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
|
||||
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
|
||||
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
|
||||
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
|
||||
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
|
||||
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
|
||||
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
|
||||
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
|
||||
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
|
||||
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
|
||||
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
|
||||
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
|
||||
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
|
||||
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
|
||||
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
|
||||
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
|
||||
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
|
||||
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
|
||||
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
|
||||
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
|
||||
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
|
||||
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
|
||||
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
|
||||
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
|
||||
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
|
||||
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
|
||||
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
|
||||
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
|
||||
|
||||
self.assertTrue(np.allclose(result, expected))
|
||||
self.assertTrue(type(result) == type(expected))
|
||||
self.assertTrue(result.shape == expected.shape)
|
||||
|
||||
def test_rebin_data(self):
|
||||
"""Test rebin_data"""
|
||||
# sample in double the time (even case since 10 % 2 = 0):
|
||||
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
|
||||
# = 0.5, 2.5, 4.5, 6.5, 8.5
|
||||
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
|
||||
for i in range(0, 10, 2)]).T
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
|
||||
|
||||
# sample in triple the time (uneven since 10 % 3 = 1):
|
||||
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
|
||||
# = 1, 4, 7, 9
|
||||
ans_odd = np.array([i * np.ones(10, dtype=float)
|
||||
for i in (1, 4, 7, 9)]).T
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
|
||||
|
||||
def test_get_prob_dist(self):
|
||||
"""Test get_prob_dist"""
|
||||
lag_indices = np.array([1, 2, 3, 4])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
result = std.get_prob_dist(self.transition_matrix,
|
||||
lag_indices, unit_indices)
|
||||
|
||||
self.assertTrue(np.array_equal(result, answer))
|
||||
|
||||
def test_get_prob_stats(self):
|
||||
"""Test get_prob_stats"""
|
||||
|
||||
probs = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
|
||||
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
|
||||
answer_trend = np.array([-0.03301887 / 0.88207547,
|
||||
-0.05882353 / 0.87058824,
|
||||
0.02475248 / 0.77722772,
|
||||
-0.02352941 / 0.97647059])
|
||||
answer_volatility = np.array([0.34221495, 0.33705421,
|
||||
0.29226542, 0.38834223])
|
||||
|
||||
result = std.get_prob_stats(probs, unit_indices)
|
||||
result_up = result[0]
|
||||
result_down = result[1]
|
||||
result_trend = result[2]
|
||||
result_volatility = result[3]
|
||||
|
||||
self.assertTrue(np.allclose(result_up, answer_up))
|
||||
self.assertTrue(np.allclose(result_down, answer_down))
|
||||
self.assertTrue(np.allclose(result_trend, answer_trend))
|
||||
self.assertTrue(np.allclose(result_volatility, answer_volatility))
|
||||
@@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.5.1'
|
||||
default_version = '0.4.2'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
||||
@@ -10,11 +10,9 @@ CREATE OR REPLACE FUNCTION
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran NUMERIC, significance NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import Moran
|
||||
from crankshaft.clustering import moran
|
||||
# TODO: use named parameters or a dictionary
|
||||
moran = Moran()
|
||||
return moran.global_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (internal function)
|
||||
@@ -29,11 +27,9 @@ CREATE OR REPLACE FUNCTION
|
||||
id_col TEXT)
|
||||
RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import Moran
|
||||
moran = Moran()
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.local_stat(subquery, column_name, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local (public-facing function)
|
||||
@@ -124,11 +120,9 @@ CREATE OR REPLACE FUNCTION
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (moran FLOAT, significance FLOAT)
|
||||
AS $$
|
||||
from crankshaft.clustering import Moran
|
||||
moran = Moran()
|
||||
from crankshaft.clustering import moran_local
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.global_rate_stat(subquery, numerator, denominator, w_type,
|
||||
num_ngbrs, permutations, geom_col, id_col)
|
||||
return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
|
||||
@@ -146,10 +140,9 @@ CREATE OR REPLACE FUNCTION
|
||||
RETURNS
|
||||
TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.clustering import Moran
|
||||
moran = Moran()
|
||||
from crankshaft.clustering import moran_local_rate
|
||||
# TODO: use named parameters or a dictionary
|
||||
return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Moran's I Local Rate (public-facing function)
|
||||
|
||||
@@ -1,24 +1,21 @@
|
||||
-- Spatial k-means clustering
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer, no_init integer default 20)
|
||||
CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
|
||||
RETURNS table (cartodb_id integer, cluster_no integer) as $$
|
||||
|
||||
from crankshaft.clustering import kmeans
|
||||
return kmeans(query,no_clusters,no_init)
|
||||
|
||||
from crankshaft.clustering import Kmeans
|
||||
kmeans = Kmeans()
|
||||
return kmeans.spatial(query, no_clusters, no_init)
|
||||
|
||||
$$ LANGUAGE plpythonu;
|
||||
$$ language plpythonu;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
|
||||
RETURNS Numeric[] AS
|
||||
RETURNS Numeric[] AS
|
||||
$$
|
||||
DECLARE
|
||||
DECLARE
|
||||
newX NUMERIC;
|
||||
newY NUMERIC;
|
||||
newW NUMERIC;
|
||||
BEGIN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
IF weight IS NULL OR the_geom IS NULL THEN
|
||||
newX = state[1];
|
||||
newY = state[2];
|
||||
newW = state[3];
|
||||
@@ -33,12 +30,12 @@ END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
|
||||
RETURNS GEOMETRY AS
|
||||
RETURNS GEOMETRY AS
|
||||
$$
|
||||
BEGIN
|
||||
IF state[3] = 0 THEN
|
||||
IF state[3] = 0 THEN
|
||||
RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
|
||||
ELSE
|
||||
ELSE
|
||||
RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
|
||||
END IF;
|
||||
END
|
||||
@@ -59,7 +56,7 @@ BEGIN
|
||||
SFUNC = CDB_WeightedMeanS,
|
||||
FINALFUNC = CDB_WeightedMeanF,
|
||||
STYPE = Numeric[],
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
INITCOND = "{0.0,0.0,0.0}"
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
|
||||
@@ -22,11 +22,10 @@ CREATE OR REPLACE FUNCTION
|
||||
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
||||
AS $$
|
||||
|
||||
from crankshaft.space_time_dynamics import Markov
|
||||
markov = Markov()
|
||||
from crankshaft.space_time_dynamics import spatial_markov_trend
|
||||
|
||||
## TODO: use named parameters or a dictionary
|
||||
return markov.spatial_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- input table format: identical to above but in a predictable format
|
||||
|
||||
@@ -11,9 +11,8 @@ CREATE OR REPLACE FUNCTION
|
||||
id_col TEXT DEFAULT 'cartodb_id')
|
||||
RETURNS TABLE (z_score NUMERIC, p_value NUMERIC, p_z_sim NUMERIC, rowid BIGINT)
|
||||
AS $$
|
||||
from crankshaft.clustering import Getis
|
||||
getis = Getis()
|
||||
return getis.getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
from crankshaft.clustering import getis_ord
|
||||
return getis_ord(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- TODO: make a version that accepts the values as arrays
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_OptimAssignments(source text,
|
||||
drain text,
|
||||
drain_capacity text,
|
||||
source_production text,
|
||||
marginal_cost text,
|
||||
dist_matrix_query text,
|
||||
dist_rate numeric DEFAULT 0.15,
|
||||
dist_threshold numeric DEFAULT null)
|
||||
RETURNS table(drain_id bigint, source_id int, cost numeric, amount numeric) AS $$
|
||||
|
||||
from crankshaft.optimization import Optim
|
||||
|
||||
def cast_val(val):
|
||||
return float(val) if val is not None else None
|
||||
|
||||
params = {'dist_rate': cast_val(dist_rate),
|
||||
'dist_threshold': cast_val(dist_threshold)}
|
||||
|
||||
|
||||
optim = Optim(source, drain, dist_matrix_query, drain_capacity,
|
||||
source_production, marginal_cost, **params)
|
||||
x = optim.output()
|
||||
|
||||
return x
|
||||
$$ LANGUAGE plpythonu;
|
||||
@@ -1,44 +0,0 @@
|
||||
-- Calculate the distance matrix using underlying road network
|
||||
-- Sample usage:
|
||||
-- select * from cdb_distancematrix('drain_table'::regclass,
|
||||
-- 'source_table'::regclass)
|
||||
CREATE OR REPLACE FUNCTION CDB_DistanceMatrix(
|
||||
origin_table regclass,
|
||||
destination_table regclass,
|
||||
transit_mode text DEFAULT 'car'
|
||||
)
|
||||
RETURNS TABLE(origin_id bigint, destination_id bigint,
|
||||
the_geom geometry(geometry, 4326),
|
||||
length_km numeric, duration_sec numeric)
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
EXECUTE format('
|
||||
WITH pairs AS (
|
||||
SELECT
|
||||
o."cartodb_id" AS origin_id,
|
||||
d."cartodb_id" AS destination_id,
|
||||
o."the_geom" AS origin_point,
|
||||
d."the_geom" AS destination_point
|
||||
FROM
|
||||
(SELECT * FROM %I) AS o,
|
||||
(SELECT * FROM %I) AS d),
|
||||
results AS (
|
||||
SELECT
|
||||
origin_id,
|
||||
destination_id,
|
||||
(cdb_route_point_to_point(origin_point,
|
||||
destination_point,
|
||||
$1)).*
|
||||
FROM pairs)
|
||||
SELECT
|
||||
origin_id::bigint AS origin_id,
|
||||
destination_id::bigint AS destination_id,
|
||||
shape AS the_geom,
|
||||
length::numeric AS length_km,
|
||||
duration::numeric AS duration_sec
|
||||
FROM results;', origin_table, destination_table)
|
||||
USING transit_mode;
|
||||
RETURN;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
@@ -10,45 +10,192 @@
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
test_point Geometry;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
xp NUMERIC;
|
||||
yp NUMERIC;
|
||||
no_left INTEGER;
|
||||
remaining_iterations INTEGER;
|
||||
points GEOMETRY[];
|
||||
bbox_line GEOMETRY;
|
||||
intersection_line GEOMETRY;
|
||||
BEGIN
|
||||
extent := ST_Envelope(geom);
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
if(no_left=0) THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
yp = y0 + height*random();
|
||||
bbox_line = ST_MakeLine(
|
||||
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||
);
|
||||
intersection_line = ST_Intersection(bbox_line,geom);
|
||||
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||
points := points || test_point;
|
||||
no_left = no_left - 1 ;
|
||||
END LOOP;
|
||||
RETURN ST_Collect(points);
|
||||
CREATE OR REPLACE FUNCTION CDB_DotDensity(g geometry(Polygon, 4326), no_points integer, max_iter integer DEFAULT 1000)
|
||||
RETURNS SETOF geometry(Point, 4326)
|
||||
AS $$
|
||||
DECLARE
|
||||
extent GEOMETRY;
|
||||
eq_area_geom GEOMETRY;
|
||||
test_point Geometry;
|
||||
iter NUMERIC;
|
||||
width NUMERIC;
|
||||
height NUMERIC;
|
||||
x0 NUMERIC;
|
||||
y0 NUMERIC;
|
||||
no_left INTEGER;
|
||||
sample_points GEOMETRY[];
|
||||
points GEOMETRY[];
|
||||
BEGIN
|
||||
eq_area_geom := ST_TRANSFORM(g, 2163);
|
||||
extent := ST_Envelope(eq_area_geom);
|
||||
iter := 0;
|
||||
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||
x0 := ST_XMin(extent);
|
||||
y0 := ST_YMin(extent);
|
||||
no_left := no_points;
|
||||
|
||||
LOOP
|
||||
IF(no_left <= 0 or iter >= max_iter) THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
|
||||
with random_points as(
|
||||
SELECT ST_SetSRID(ST_MAKEPOINT( x0 + width*random(), y0 + height*random()), 2163) as p
|
||||
FROM generate_series(1,no_left)
|
||||
)
|
||||
SELECT array_agg(p) from random_points
|
||||
WHERE ST_WITHIN(p, eq_area_geom)
|
||||
into sample_points;
|
||||
|
||||
RETURN QUERY select ST_TRANSFORM(a, 4326) from unnest(sample_points) as a;
|
||||
|
||||
IF sample_points IS NOT null THEN
|
||||
no_left := no_left - array_length(sample_points, 1);
|
||||
END IF;
|
||||
iter = iter + 1;
|
||||
END LOOP;
|
||||
|
||||
RETURN;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- DEPRECATED
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry, no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||
RETURNS GEOMETRY
|
||||
AS $$
|
||||
DECLARE
|
||||
final_points GEOMETRY;
|
||||
|
||||
BEGIN
|
||||
|
||||
with new_points as(
|
||||
SELECT * FROM CDB_DotDensity(geom, no_points, max_iter_per_point) as a
|
||||
)
|
||||
SELECT ST_Collect(a) FROM new_points
|
||||
into final_points;
|
||||
RETURN final_points;
|
||||
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
--
|
||||
-- Creates N points randomly distributed in the specified secondary polygons
|
||||
--
|
||||
-- @param g - array of the geometries to be turned in to points
|
||||
--
|
||||
-- @param no_points - the number of points to generate
|
||||
--
|
||||
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||
-- misses per point the funciton accepts before giving up.
|
||||
--
|
||||
-- Returns: Multipoint with the requested points
|
||||
|
||||
|
||||
|
||||
--
|
||||
-- Generate a random response based on the weights given
|
||||
--
|
||||
-- @param array_ids an array of ids representing the category to return
|
||||
--
|
||||
-- @param weights an array of weights for each category
|
||||
--
|
||||
-- Returns : The randomly selected ID.
|
||||
|
||||
CREATE OR REPLACE function _cdb_SelectRandomWeights(array_ids numeric[], weights numeric[]) returns NUMERIC
|
||||
as $$
|
||||
DECLARE
|
||||
result NUMERIC;
|
||||
BEGIN
|
||||
|
||||
WITH idw as (
|
||||
select unnest(array_ids) as id, unnest(weights) as percent
|
||||
),
|
||||
CTE AS (
|
||||
SELECT random() * (SELECT SUM(percent) FROM idw) R
|
||||
)
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT id, SUM(percent) OVER (ORDER BY id) S, R
|
||||
FROM idw as percent CROSS JOIN CTE
|
||||
) Q
|
||||
WHERE S >= R
|
||||
ORDER BY id
|
||||
LIMIT 1
|
||||
into result;
|
||||
return result;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
--
|
||||
-- Weighted Dot Density
|
||||
--
|
||||
-- @param no_points the number of points to generate
|
||||
--
|
||||
-- @param geoms the target geometries to place the points in
|
||||
--
|
||||
-- @param weights the weight for each of the target polygons
|
||||
--
|
||||
-- RETURNS set of points
|
||||
|
||||
CREATE OR REPLACE FUNCTION _cdb_WeightedDD(no_points numeric, geoms geometry[], weights numeric[])
|
||||
RETURNS SETOF geometry
|
||||
AS $$
|
||||
DECLARE
|
||||
i NUMERIC;
|
||||
ids NUMERIC[];
|
||||
perGeom NUMERIC[];
|
||||
selected_poly NUMERIC;
|
||||
BEGIN
|
||||
with idseries as (
|
||||
select generate_series(1,array_upper(geoms,1)) as id
|
||||
)
|
||||
select array_agg(id) from idseries into ids;
|
||||
|
||||
FOR i in 1..no_points
|
||||
LOOP
|
||||
select cdb_crankshaft._cdb_SelectRandomWeights(ids, weights) INTO selected_poly;
|
||||
perGeom[selected_poly] = coalesce(perGeom[selected_poly] + 1, 0 );
|
||||
END LOOP;
|
||||
|
||||
raise notice 'pergeom %', perGeom;
|
||||
|
||||
FOR i in 1..array_length(ids,1)
|
||||
LOOP
|
||||
return QUERY
|
||||
select cdb_crankshaft.CDB_DotDensity(geoms[i], coalesce(perGeom[i],0)::INTEGER);
|
||||
END LOOP;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql VOLATILE;
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
--
|
||||
-- Daysymetric Dot Density
|
||||
--
|
||||
-- @param geom: the geometry that has the
|
||||
--
|
||||
-- @param no_points: the total number of points to create
|
||||
--
|
||||
-- @param targetGeoms: the geometry that has the
|
||||
--
|
||||
-- @param weights: targetGeom weights
|
||||
--
|
||||
-- RETURNS setof points
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_DasymetricDotDensity(geom GEOMETRY, no_points NUMERIC, targetGeoms GEOMETRY[], weights numeric [])
|
||||
RETURNS setof GEOMETRY
|
||||
AS $$
|
||||
BEGIN
|
||||
RAISE NOTICE 'running Dasymetric';
|
||||
RETURN QUERY
|
||||
SELECT cdb_crankshaft._CDB_WeightedDD(no_points, array_agg( ST_INTERSECTION(geom,g)), array_agg(ST_AREA(ST_INTERSECTION(geom,g))*w)::NUMERIC[])
|
||||
FROM unnest(targetGeoms) as g , unnest(weights) as w
|
||||
WHERE geom && g;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
WITH g AS (
|
||||
SELECT ST_Buffer(ST_SetSRID(ST_MakePoint(0,0),4326)::geometry, 1000)::geometry AS g
|
||||
),
|
||||
points AS(
|
||||
SELECT (
|
||||
ST_Dump(
|
||||
cdb_crankshaft.cdb_dot_density(g.g, 100)
|
||||
)
|
||||
).geom AS p FROM g
|
||||
)
|
||||
SELECT count(*), sum(CASE WHEN ST_Contains(g,p) THEN 1 ELSE 0 END) FROM points, g
|
||||
count | sum
|
||||
-------+-----
|
||||
100 | 100
|
||||
(1 row)
|
||||
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
WITH g AS (
|
||||
SELECT ST_Buffer(ST_SetSRID(ST_MakePoint(0,0),4326)::geometry, 1000)::geometry AS g
|
||||
),
|
||||
points AS(
|
||||
SELECT (
|
||||
ST_Dump(
|
||||
cdb_crankshaft.cdb_dot_density(g.g, 100)
|
||||
)
|
||||
).geom AS p FROM g
|
||||
)
|
||||
|
||||
SELECT count(*), sum(CASE WHEN ST_Contains(g,p) THEN 1 ELSE 0 END) FROM points, g
|
||||
|
||||
@@ -3,5 +3,3 @@ import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import analysis_data_provider
|
||||
import crankshaft.optimization
|
||||
|
||||
@@ -1,218 +0,0 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
import pysal_utils as pu
|
||||
import numpy as np
|
||||
|
||||
class AnalysisDataProvider(object):
|
||||
"""Analysis providers for crankshaft functions. These rely on database
|
||||
access through `plpy`"""
|
||||
def get_getis(self, w_type, params):
|
||||
"""fetch data for getis ord's g"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
else:
|
||||
return result
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
"""fetch data for spatial markov"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
return data
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
"""fetch data for moran's i analyses"""
|
||||
try:
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
data = plpy.execute(query)
|
||||
|
||||
# if there are no neighbors, exit
|
||||
if len(data) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
return data
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
def get_nonspatial_kmeans(self, query):
|
||||
"""fetch data for non-spatial kmeans"""
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_spatial_kmeans(self, params):
|
||||
"""fetch data for spatial kmeans"""
|
||||
query = ("SELECT "
|
||||
"array_agg({id_col} ORDER BY {id_col}) as ids,"
|
||||
"array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs,"
|
||||
"array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys "
|
||||
"FROM ({subquery}) As a "
|
||||
"WHERE {geom_col} IS NOT NULL").format(**params)
|
||||
try:
|
||||
data = plpy.execute(query)
|
||||
return data
|
||||
except plpy.SPIError as err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
|
||||
def get_column(self, subquery, column, dtype=float, id_col='cartodb_id',
|
||||
condition=None):
|
||||
"""
|
||||
Retrieve the column from the specified table from a connected
|
||||
PostgreSQL database.
|
||||
|
||||
Args:
|
||||
subquery (str): subquery to retrieve column from
|
||||
column (str): column to retrieve
|
||||
dtype (type): data type in column (e.g, float, int, str)
|
||||
id_col (str, optional): Column name for index. Defaults to
|
||||
`cartodb_id`.
|
||||
|
||||
Returns:
|
||||
numpy.array: column from table as a NumPy array
|
||||
"""
|
||||
query = '''
|
||||
SELECT array_agg("{column}" ORDER BY "{id_col}" ASC) as col
|
||||
FROM ({subquery}) As _wrap {filter}
|
||||
'''.format(subquery=subquery,
|
||||
column=column,
|
||||
id_col=id_col,
|
||||
filter='WHERE {}'.format(condition) if condition else '')
|
||||
|
||||
resp = plpy.execute(query)
|
||||
return np.array(resp[0]['col'], dtype=dtype)
|
||||
|
||||
def get_reduced_column(self, drain_query, capacity,
|
||||
source_query, amount,
|
||||
dtype=float, id_col='cartodb_id'):
|
||||
"""
|
||||
Retrieve the column from the specified table from a connected
|
||||
PostgreSQL database.
|
||||
|
||||
Args:
|
||||
source_query (str): source_query to retrieve column from
|
||||
column (str): column to retrieve
|
||||
dtype (type): data type in column (e.g, float, int, str)
|
||||
id_col (str, optional): Column name for index. Defaults to
|
||||
`cartodb_id`.
|
||||
|
||||
Returns:
|
||||
numpy.array: column from table as a NumPy array
|
||||
|
||||
"""
|
||||
query = '''
|
||||
WITH cte AS (
|
||||
SELECT
|
||||
d."{capacity}" - coalesce(s."source_claimed", 0) As
|
||||
reduced_capacity,
|
||||
d."{id_col}"
|
||||
FROM
|
||||
({drain_query}) As d
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
"drain_id",
|
||||
sum("{amount}") As source_claimed
|
||||
FROM ({source_query}) As _wrap
|
||||
GROUP BY "drain_id") As s
|
||||
ON
|
||||
d."{id_col}" = s."drain_id"
|
||||
)
|
||||
SELECT
|
||||
array_agg("reduced_capacity"
|
||||
ORDER BY "{id_col}" ASC) As col
|
||||
FROM cte
|
||||
'''.format(capacity=capacity,
|
||||
id_col=id_col,
|
||||
drain_query=drain_query,
|
||||
amount=amount,
|
||||
source_query=source_query)
|
||||
|
||||
resp = plpy.execute(query)
|
||||
return np.array(resp[0]['col'], dtype=dtype)
|
||||
|
||||
def get_distance_matrix(self, table, origin_ids, destination_ids):
|
||||
"""Transforms a SQL table origin-destination table into a distance
|
||||
matrix.
|
||||
|
||||
:param query: Table that has the data needed for building the
|
||||
distance matrix. Query should have the following columns:
|
||||
- origin_id (int)
|
||||
- destination_id (int)
|
||||
- length_km (numeric)
|
||||
:type query: str
|
||||
:param origin_ids: List of origin IDs
|
||||
:type origin_ids: list of ints
|
||||
:param destination_ids: List of origin IDs
|
||||
:type destination_ids: list of ints
|
||||
:returns: 2D array of distances from all origins to all destinations
|
||||
:rtype: numpy.array
|
||||
"""
|
||||
try:
|
||||
resp = plpy.execute('''
|
||||
SELECT "origin_id", "destination_id", "length_km"
|
||||
FROM (SELECT * FROM "{table}") as _wrap
|
||||
'''.format(table=table))
|
||||
except plpy.SPIError as err:
|
||||
plpy.error("Failed to build distance matrix: {}".format(err))
|
||||
|
||||
pairs = {(row['origin_id'], row['destination_id']): row['length_km']
|
||||
for row in resp}
|
||||
distance_matrix = np.array([
|
||||
pairs[(origin, destination)]
|
||||
for destination in destination_ids
|
||||
for origin in origin_ids
|
||||
])
|
||||
|
||||
return np.array(distance_matrix,
|
||||
dtype=float).reshape((len(destination_ids),
|
||||
len(origin_ids)))
|
||||
|
||||
|
||||
def get_pairwise_distances(self, drain_query, source_query,
|
||||
id_col='cartodb_id'):
|
||||
"""Retuns the pairwise distances between row i and j for all i in
|
||||
drain_query and j in source_query
|
||||
|
||||
Args:
|
||||
drain_query (str): Query that exposes the `the_geom` and
|
||||
`cartodb_id` (or what is specified in `id_col`) of the dataset
|
||||
for 'drain' locations
|
||||
source_query (str): Query that exposes the `the_geom` and
|
||||
`cartodb_id` (or what is specified in `id_col`) of the dataset
|
||||
for 'source' locations
|
||||
id_col (str, optional): Column name for table index. Defaults to
|
||||
`cartodb_id`.
|
||||
|
||||
Returns:
|
||||
numpy.array: A len(s) by len(d) array of distances from source i to
|
||||
drain j
|
||||
"""
|
||||
query = '''
|
||||
SELECT array_agg(ST_Distance(d."the_geom"::geography,
|
||||
s."the_geom"::geography) / 1000.0
|
||||
ORDER BY d."{id_col}" ASC) as dist
|
||||
FROM ({drain_query}) AS d, ({source_query}) AS s
|
||||
GROUP BY s."{id_col}"
|
||||
ORDER BY s."{id_col}" ASC
|
||||
'''.format(drain_query=drain_query,
|
||||
source_query=source_query,
|
||||
id_col=id_col)
|
||||
|
||||
resp = plpy.execute(query)
|
||||
|
||||
# len(s) x len(d) matrix
|
||||
return np.array([np.array(row['dist'], dtype=float)
|
||||
for row in resp], dtype=float)
|
||||
@@ -3,48 +3,50 @@ Getis-Ord's G geostatistics (hotspot/coldspot analysis)
|
||||
"""
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
from collections import OrderedDict
|
||||
|
||||
# crankshaft modules
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Getis:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
def getis_ord(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Getis-Ord's G*
|
||||
Implementation building neighbors with a PostGIS database and PySAL's
|
||||
Getis-Ord's G* hotspot/coldspot module.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
def getis_ord(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Getis-Ord's G*
|
||||
Implementation building neighbors with a PostGIS database and PySAL's
|
||||
Getis-Ord's G* hotspot/coldspot module.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors if kNN is chosen
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors if kNN is chosen
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
result = self.data_provider.get_getis(w_type, qvals)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Query failed: %s' % err)
|
||||
|
||||
# build PySAL weight object
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# calculate Getis-Ord's G* z- and p-values
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
# build PySAL weight object
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
# calculate Getis-Ord's G* z- and p-values
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
||||
|
||||
@@ -1,32 +1,18 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import numpy as np
|
||||
import plpy
|
||||
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
def kmeans(query, no_clusters, no_init=20):
|
||||
data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
|
||||
array_agg(ST_X(the_geom) order by cartodb_id) xs,
|
||||
array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
|
||||
where the_geom is not null
|
||||
'''.format(query=query))
|
||||
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
class Kmeans:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
km = KMeans(n_clusters= no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs,ys))
|
||||
return zip(ids,labels)
|
||||
|
||||
def spatial(self, query, no_clusters, no_init=20):
|
||||
"""
|
||||
find centers based on clusters of latitude/longitude pairs
|
||||
query: SQL query that has a WGS84 geometry (the_geom)
|
||||
"""
|
||||
params = {"subquery": query,
|
||||
"geom_col": "the_geom",
|
||||
"id_col": "cartodb_id"}
|
||||
|
||||
data = self.data_provider.get_spatial_kmeans(params)
|
||||
|
||||
# Unpack query response
|
||||
xs = data[0]['xs']
|
||||
ys = data[0]['ys']
|
||||
ids = data[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
|
||||
@@ -6,8 +6,8 @@ Moran's I geostatistics (global clustering & outliers presence)
|
||||
# average of the their neighborhood
|
||||
|
||||
import pysal as ps
|
||||
import plpy
|
||||
from collections import OrderedDict
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
@@ -15,162 +15,204 @@ import crankshaft.pysal_utils as pu
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Moran:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
def moran(subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
def global_stat(self, subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
except plpy.SPIError, e:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
# collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
# collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
# calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
# calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
def moran_local(subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I implementation for PL/Python
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError, e:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
|
||||
def moran_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
Andy Eschbacher
|
||||
"""
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator)
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(2)
|
||||
except plpy.SPIError, e:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(2)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
def moran_local_rate(subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator)
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(5)
|
||||
except plpy.SPIError, e:
|
||||
plpy.error('Analysis failed: %s' % e)
|
||||
return pu.empty_zipped_array(5)
|
||||
|
||||
# calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
Andy Eschbacher
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
def moran_local_bv(subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col, w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
qvals = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
query = pu.construct_neighbor_query(w_type, qvals)
|
||||
|
||||
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
|
||||
try:
|
||||
result = plpy.execute(query)
|
||||
# if there are no neighbors, exit
|
||||
if len(result) == 0:
|
||||
return pu.empty_zipped_array(4)
|
||||
except plpy.SPIError:
|
||||
plpy.error("Error: areas of interest query failed, "
|
||||
"check input parameters")
|
||||
return pu.empty_zipped_array(4)
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
# collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
from optim import Optim
|
||||
@@ -1,301 +0,0 @@
|
||||
"""optimization"""
|
||||
import plpy
|
||||
import numpy as np
|
||||
import cvxopt
|
||||
from cvxopt import solvers
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
class Optim(object):
|
||||
"""Linear optimization class for logistics cost minimization
|
||||
Optimization for logistics
|
||||
based on models:
|
||||
- source_amount * (marginal_cost + transport_cost * distance)
|
||||
"""
|
||||
|
||||
def __init__(self, source_query, drain_query, dist_matrix_table,
|
||||
capacity_column, production_column, marginal_column,
|
||||
**kwargs):
|
||||
|
||||
# set data provider - defaults to SQL database access
|
||||
self.data_provider = kwargs.get('data_provider',
|
||||
AnalysisDataProvider())
|
||||
# model parameters
|
||||
self.model_params = {
|
||||
'dist_cost': kwargs.get('dist_cost', 0.15),
|
||||
'dist_threshold': kwargs.get('dist_threshold', None),
|
||||
'solver': kwargs.get('solver', 'glpk')}
|
||||
self._check_model_params()
|
||||
|
||||
# database ids
|
||||
self.ids = {
|
||||
'drain_free': self.data_provider.get_column(
|
||||
drain_query,
|
||||
'cartodb_id',
|
||||
id_col='cartodb_id',
|
||||
dtype=int),
|
||||
'source_free': self.data_provider.get_column(
|
||||
source_query,
|
||||
'cartodb_id',
|
||||
dtype=int,
|
||||
condition='drain_id is null'),
|
||||
'source_fixed': self.data_provider.get_column(
|
||||
source_query,
|
||||
'cartodb_id',
|
||||
dtype=int,
|
||||
condition='drain_id is not null'),
|
||||
'drain_fixed': self.data_provider.get_column(
|
||||
source_query,
|
||||
'drain_id',
|
||||
dtype=int,
|
||||
condition='drain_id is not null'
|
||||
)}
|
||||
|
||||
# model data
|
||||
self.model_data = {
|
||||
'drain_capacity': self.data_provider.get_reduced_column(
|
||||
drain_query,
|
||||
capacity_column,
|
||||
source_query,
|
||||
production_column,
|
||||
id_col='cartodb_id',
|
||||
dtype=int),
|
||||
'source_amount': self.data_provider.get_column(
|
||||
source_query,
|
||||
production_column,
|
||||
condition='drain_id is null'),
|
||||
'source_amount_fixed': self.data_provider.get_column(
|
||||
source_query,
|
||||
production_column,
|
||||
condition='drain_id is not null'),
|
||||
'marginal_cost': self.data_provider.get_column(
|
||||
drain_query,
|
||||
marginal_column),
|
||||
'distance': self.data_provider.get_distance_matrix(
|
||||
dist_matrix_table,
|
||||
self.ids['source_free'],
|
||||
self.ids['drain_free']),
|
||||
'distance_fixed': self.data_provider.get_distance_matrix(
|
||||
dist_matrix_table,
|
||||
self.ids['source_fixed'],
|
||||
self.ids['drain_fixed']
|
||||
)}
|
||||
self.model_data['cost'] = self.calc_cost()
|
||||
self.n_sources = len(self.ids['source_free'])
|
||||
self.n_drains = len(self.ids['drain_free'])
|
||||
|
||||
def _check_constraints(self):
|
||||
"""Check if inputs are within constraints"""
|
||||
total_capacity = self.model_data['drain_capacity'].sum()
|
||||
total_amount = self.model_data['source_amount'].sum()
|
||||
if total_amount > total_capacity:
|
||||
raise ValueError("Solution not possible. Drain capacity is "
|
||||
"smaller than total source production.")
|
||||
elif total_capacity <= 0:
|
||||
raise ValueError("Capacity must be greater than zero")
|
||||
|
||||
plpy.notice('Capacity: {total_capacity}, '
|
||||
'Amount: {total_amount} '
|
||||
'({perc}%)'.format(total_capacity=total_capacity,
|
||||
total_amount=total_amount,
|
||||
perc=100.0 * total_amount / total_capacity))
|
||||
return None
|
||||
|
||||
def _check_model_params(self):
|
||||
"""Ensure model parameters are well formed"""
|
||||
|
||||
if (self.model_params['dist_threshold'] <= 0 and
|
||||
self.model_params['dist_threshold'] is not None):
|
||||
raise ValueError("`dist_threshold` must be greater than zero")
|
||||
|
||||
if (self.model_params['dist_cost'] is None or
|
||||
self.model_params['dist_cost'] < 0):
|
||||
raise ValueError("`dist_cost` must be greater than zero")
|
||||
|
||||
if self.model_params['solver'] not in (None, 'glpk'):
|
||||
raise ValueError("`solver` must be one of 'glpk' (default) "
|
||||
"or None.")
|
||||
|
||||
return None
|
||||
|
||||
def output(self):
|
||||
"""Output the calculated 'optimal' assignments if solution is not infeasible.
|
||||
|
||||
:returns: List of source id/drain id pairs and the associated cost of
|
||||
transport from source to drain
|
||||
:rtype: List of tuples
|
||||
"""
|
||||
# retrieve fractional assignments
|
||||
assignments = self.optim()
|
||||
|
||||
# crosswalks for matrix index -> cartodb_id
|
||||
drain_id_crosswalk = {}
|
||||
for idx, cid in enumerate(self.ids['drain_free']):
|
||||
# matrix index -> cartodb_id
|
||||
drain_id_crosswalk[idx] = cid
|
||||
|
||||
source_id_crosswalk = {}
|
||||
for idx, cid in enumerate(self.ids['source_free']):
|
||||
# matrix index -> cartodb_id
|
||||
source_id_crosswalk[idx] = cid
|
||||
|
||||
# find non-zero entries
|
||||
source_index, drain_index = np.nonzero(assignments)
|
||||
# returns:
|
||||
# - drain_id
|
||||
# - source_id
|
||||
# - cost of that pairing
|
||||
# - amount sent via that pairing
|
||||
assigned_costs = [(
|
||||
drain_id_crosswalk[drain_index[idx]],
|
||||
source_id_crosswalk[source_val],
|
||||
self.model_data['cost'][drain_index[idx], source_val],
|
||||
round(self.model_data['source_amount'][source_val] *
|
||||
assignments[source_val, drain_index[idx]], 6),
|
||||
False
|
||||
)
|
||||
for idx, source_val in enumerate(source_index)]
|
||||
# Fixed vals:
|
||||
# - self.ids['source_fixed']
|
||||
# - self.ids['drain_fixed']
|
||||
# -
|
||||
fixed_costs = self.fixed_values()
|
||||
# plpy.notice("FIXED COSTS: {}".format(fixed_costs))
|
||||
return assigned_costs + fixed_costs
|
||||
|
||||
def fixed_values(self):
|
||||
"""Return the fixed source IDs, drain IDs, costs for transport, and the
|
||||
amount that is transported.
|
||||
|
||||
"""
|
||||
margins = {k: val for k, val in zip(self.ids['drain_free'],
|
||||
self.model_data['marginal_cost'])}
|
||||
self.model_data['marginal_cost_fixed'] = [margins[d]
|
||||
for d in self.ids['drain_fixed']]
|
||||
fixed_costs = self.calc_cost(source='source_amount_fixed',
|
||||
distance='distance_fixed',
|
||||
margin='marginal_cost_fixed')
|
||||
# cost = [fixed_costs[self.ids['drain_fixed'][idx], source_val]
|
||||
# for idx, source_val in enumerate(self.ids['source_fixed'])]
|
||||
|
||||
return zip(self.ids['drain_fixed'],
|
||||
self.ids['source_fixed'],
|
||||
[1.] * len(self.ids['source_fixed']),
|
||||
self.model_data['source_amount_fixed'],
|
||||
[True] * len(self.ids['drain_fixed']))
|
||||
|
||||
def cost_func(self, distance, waste, marginal):
|
||||
"""
|
||||
cost equation
|
||||
|
||||
:param distance: distance (in km)
|
||||
:type distance: float
|
||||
:param waste: number of tons of waste. This was previously calculated
|
||||
as self.model_params['amount_per_unit'] * number of people minus the recycle_rate
|
||||
:type waste: numeric
|
||||
:param marginal: intrinsic cost per ton of a plant
|
||||
:type marginal: numeric
|
||||
:returns: cost
|
||||
:rtype: numeric
|
||||
|
||||
Note: dist_cost is the cost per ton (e.g., 0.15 GBP/ton)
|
||||
"""
|
||||
return waste * (marginal + self.model_params['dist_cost'] * distance)
|
||||
|
||||
def calc_cost(self, source='source_amount', distance='distance',
|
||||
margin='marginal_cost'):
|
||||
"""
|
||||
Populate an d x s matrix according to the cost equation
|
||||
|
||||
:returns: d x s matrix of costs from area i to plant j
|
||||
:rtype: numpy.array
|
||||
"""
|
||||
costs = np.array(
|
||||
[self.cost_func(dist,
|
||||
self.model_data[source][pair[1]],
|
||||
self.model_data[margin][pair[0]])
|
||||
for pair, dist in np.ndenumerate(self.model_data[distance])])
|
||||
return costs.reshape(self.model_data[distance].shape)
|
||||
|
||||
def optim(self):
|
||||
"""solve linear optimization problem
|
||||
Equations of the form:
|
||||
|
||||
minimize c'*x by assigning x values
|
||||
subject to G*x <= h
|
||||
A*x = b
|
||||
0 <= x[k] <= 1
|
||||
:returns: Fractional assignments array (of 1s and 0s) of shape c.T.
|
||||
Value at position (i, j) corresponds to the fraction of source
|
||||
`i`'s supply to drain `j`.
|
||||
|
||||
:rtype: numpy.array
|
||||
"""
|
||||
n_pairings = self.n_sources * self.n_drains
|
||||
|
||||
# ---
|
||||
# costs
|
||||
# elements chosen to minimize sum
|
||||
cost = np.nan_to_num(self.model_data['cost'])
|
||||
cost = cvxopt.matrix(cost.ravel('F'))
|
||||
|
||||
# ---
|
||||
# equality constraint variables
|
||||
# each area is serviced once
|
||||
A = cvxopt.spmatrix(1.,
|
||||
[i // self.n_drains
|
||||
for i in range(n_pairings)],
|
||||
range(n_pairings), tc='d')
|
||||
b = cvxopt.matrix([1.] * self.n_sources, tc='d')
|
||||
|
||||
# make nan's in cost impossible
|
||||
if np.isnan(self.model_data['distance']).any():
|
||||
i_vals, j_vals = np.where(np.isnan(self.model_data['distance']))
|
||||
for idx, i_val in enumerate(i_vals):
|
||||
i = int(i_val)
|
||||
j = int(i_val * self.n_drains + j_vals[idx])
|
||||
A[i, j] = 0
|
||||
|
||||
# knock out values above distance threshold
|
||||
if self.model_params['dist_threshold']:
|
||||
j_vals, i_vals = np.where(self.model_data['distance'] >
|
||||
self.model_params['dist_threshold'])
|
||||
for idx, ival in enumerate(i_vals):
|
||||
A[int(ival), int(ival * self.n_drains + j_vals[idx])] = 0
|
||||
|
||||
# ---
|
||||
# inequality constraint variables
|
||||
# each plant never goes over capacity
|
||||
drain_capacity = cvxopt.matrix([
|
||||
cvxopt.matrix(self.model_data['drain_capacity'], tc='d'),
|
||||
cvxopt.matrix([1.] * n_pairings, tc='d'),
|
||||
cvxopt.matrix([0.] * n_pairings, tc='d')
|
||||
])
|
||||
|
||||
# inequality maxima
|
||||
ineq_maxs = cvxopt.sparse([
|
||||
cvxopt.spmatrix(
|
||||
np.repeat(self.model_data['source_amount'], self.n_drains),
|
||||
[i % self.n_drains for i in range(n_pairings)],
|
||||
range(n_pairings), tc='d'),
|
||||
cvxopt.spmatrix(1.,
|
||||
range(n_pairings),
|
||||
range(n_pairings)),
|
||||
cvxopt.spmatrix(-1.,
|
||||
range(n_pairings),
|
||||
range(n_pairings))
|
||||
], tc='d')
|
||||
for var in (cost, ineq_maxs, drain_capacity, A, b):
|
||||
plpy.notice('size: {}'.format(var.size))
|
||||
plpy.notice('{}, {}, {}'.format(n_pairings, self.n_sources, self.n_drains))
|
||||
# solve
|
||||
sol = solvers.lp(c=cost, G=ineq_maxs, h=drain_capacity,
|
||||
A=A, b=b, solver=self.model_params['solver'])
|
||||
if sol['status'] != 'optimal':
|
||||
raise Exception("No solution possible: {}".format(sol))
|
||||
|
||||
# NOTE: assignments needs to be shaped like self.model_data['cost'].T
|
||||
return np.array(sol['x'],
|
||||
dtype=float)\
|
||||
.flatten()\
|
||||
.reshape((self.model_data['cost'].shape[1],
|
||||
self.model_data['cost'].shape[0]))
|
||||
@@ -2,97 +2,101 @@
|
||||
Spatial dynamics measurements using Spatial Markov
|
||||
"""
|
||||
|
||||
# TODO: remove all plpy dependencies
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Markov:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
def spatial_markov_trend(subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st quantile
|
||||
-> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
|
||||
def spatial_trend(self, subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st
|
||||
quantile -> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break distribution
|
||||
of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains the
|
||||
geometries
|
||||
@param id_col string (optional): name of column which has the ids of
|
||||
the table
|
||||
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break
|
||||
distribution of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains
|
||||
the geometries
|
||||
@param id_col string (optional): name of column which has the ids
|
||||
of the table
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a lower
|
||||
class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a
|
||||
lower class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
qvals = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
params = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
try:
|
||||
query_result = plpy.execute(
|
||||
pu.construct_neighbor_query(w_type, qvals)
|
||||
)
|
||||
if len(query_result) == 0:
|
||||
return zip([None], [None], [None], [None], [None])
|
||||
except plpy.SPIError, err:
|
||||
plpy.error('Analysis failed: %s' % err)
|
||||
return zip([None], [None], [None], [None], [None])
|
||||
|
||||
query_result = self.data_provider.get_markov(w_type, params)
|
||||
# build weight
|
||||
weights = pu.get_weight(query_result, w_type)
|
||||
weights.transform = 'r'
|
||||
|
||||
# build weight
|
||||
weights = pu.get_weight(query_result, w_type)
|
||||
weights.transform = 'r'
|
||||
# prep time data
|
||||
t_data = get_time_data(query_result, time_cols)
|
||||
|
||||
# prep time data
|
||||
t_data = get_time_data(query_result, time_cols)
|
||||
plpy.debug('shape of t_data %d, %d' % t_data.shape)
|
||||
plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape)
|
||||
plpy.debug('first num elements: %f' % t_data[0, 0])
|
||||
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
|
||||
# get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
# get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
|
||||
# look up probablity distribution for each unit according to class and
|
||||
# lag class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
# look up probablity distribution for each unit according to class and lag
|
||||
# class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
# find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
# find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(
|
||||
prob_dist,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
|
||||
def get_time_data(markov_data, time_cols):
|
||||
@@ -183,8 +187,8 @@ def get_prob_stats(prob_dist, unit_indices):
|
||||
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
|
||||
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
|
||||
if prob_dist[i, unit_indices[i]] > 0.0:
|
||||
trend[i] = (trend_up[i] - trend_down[i]) / (
|
||||
prob_dist[i, unit_indices[i]])
|
||||
trend[i] = ((trend_up[i] - trend_down[i]) /
|
||||
(prob_dist[i, unit_indices[i]]))
|
||||
else:
|
||||
trend[i] = None
|
||||
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import re
|
||||
|
||||
|
||||
class MockCursor:
|
||||
def __init__(self, data):
|
||||
self.cursor_pos = 0
|
||||
self.data = data
|
||||
|
||||
def fetch(self, batch_size):
|
||||
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
|
||||
batch = self.data[self.cursor_pos : self.cursor_pos + batch_size]
|
||||
self.cursor_pos += batch_size
|
||||
return batch
|
||||
|
||||
@@ -46,9 +45,8 @@ class MockPlPy:
|
||||
data = self.execute(query)
|
||||
return MockCursor(data)
|
||||
|
||||
# TODO: additional arguments
|
||||
def execute(self, query):
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
def execute(self, query): # TODO: additional arguments
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
||||
|
||||
38
src/py/crankshaft/test/test_cluster_kmeans.py
Normal file
38
src/py/crankshaft/test/test_cluster_kmeans.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
import numpy as np
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"
|
||||
}
|
||||
|
||||
def test_kmeans(self):
|
||||
data = self.cluster_data
|
||||
plpy._define_result('select' ,data)
|
||||
clusters = cc.kmeans('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1]==0]
|
||||
c2 = [a for a in clusters if a[1]==1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)),2)
|
||||
self.assertEqual(len(c1),20)
|
||||
self.assertEqual(len(c2),20)
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.clustering import Getis
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# Fixture files produced as follows
|
||||
#
|
||||
@@ -37,14 +42,6 @@ from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_getis(self, w_type, param):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class GetisTest(unittest.TestCase):
|
||||
"""Testing class for Getis-Ord's G* funtion
|
||||
This test replicates the work done in PySAL documentation:
|
||||
@@ -52,6 +49,8 @@ class GetisTest(unittest.TestCase):
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
|
||||
# load raw data for analysis
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_getis.json')).read())
|
||||
@@ -65,13 +64,10 @@ class GetisTest(unittest.TestCase):
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
getis = Getis(FakeDataProvider(data))
|
||||
|
||||
result = getis.getis_ord('subquery', 'value',
|
||||
'queen', None, 999, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = cc.getis_ord('subquery', 'value',
|
||||
'queen', None, 999, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = np.array(self.getis_data)[:, 0:2]
|
||||
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Kmeans
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.clustering as cc
|
||||
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mocked_result):
|
||||
self.mocked_result = mocked_result
|
||||
|
||||
def get_spatial_kmeans(self, query):
|
||||
return self.mocked_result
|
||||
|
||||
def get_nonspatial_kmeans(self, query, standarize):
|
||||
return self.mocked_result
|
||||
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for k-means spatial"""
|
||||
|
||||
def setUp(self):
|
||||
self.cluster_data = json.loads(
|
||||
open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"}
|
||||
|
||||
def test_kmeans(self):
|
||||
"""
|
||||
"""
|
||||
data = [{'xs': d['xs'],
|
||||
'ys': d['ys'],
|
||||
'ids': d['ids']} for d in self.cluster_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
kmeans = Kmeans(FakeDataProvider(data))
|
||||
clusters = kmeans.spatial('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1] == 0]
|
||||
c2 = [a for a in clusters if a[1] == 1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)), 2)
|
||||
self.assertEqual(len(c1), 20)
|
||||
self.assertEqual(len(c2), 20)
|
||||
@@ -1,27 +1,25 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Moran
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
import crankshaft.clustering as cc
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
@@ -41,36 +39,33 @@ class MoranTest(unittest.TestCase):
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
from crankshaft.clustering import map_quads
|
||||
self.assertEqual(map_quads(1), 'HH')
|
||||
self.assertEqual(map_quads(2), 'LH')
|
||||
self.assertEqual(map_quads(3), 'LL')
|
||||
self.assertEqual(map_quads(4), 'HL')
|
||||
self.assertEqual(map_quads(33), None)
|
||||
self.assertEqual(map_quads('andy'), None)
|
||||
self.assertEqual(cc.map_quads(1), 'HH')
|
||||
self.assertEqual(cc.map_quads(2), 'LH')
|
||||
self.assertEqual(cc.map_quads(3), 'LL')
|
||||
self.assertEqual(cc.map_quads(4), 'HL')
|
||||
self.assertEqual(cc.map_quads(33), None)
|
||||
self.assertEqual(cc.map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
from crankshaft.clustering import quad_position
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = quad_position(quads)
|
||||
test_ans = cc.quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_local_stat(self):
|
||||
def test_moran_local(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [OrderedDict([('id', d['id']),
|
||||
('attr1', d['value']),
|
||||
('neighbors', d['neighbors'])])
|
||||
for d in self.neighbors_data]
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = moran.local_stat('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = cc.moran_local('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
@@ -85,10 +80,10 @@ class MoranTest(unittest.TestCase):
|
||||
'attr2': 1,
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = cc.moran_local_rate('subquery', 'numerator', 'denominator',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
@@ -101,11 +96,10 @@ class MoranTest(unittest.TestCase):
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1235)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.global_stat('table', 'value',
|
||||
'knn', 5, 99, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = cc.moran('table', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
"""Unit tests for the optimizaiton module"""
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from crankshaft.optimization import Optim
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import cvxopt
|
||||
|
||||
# suppress cvxopt GLPK messages
|
||||
cvxopt.glpk.options['msg_lev'] = 'GLP_MSG_OFF'
|
||||
|
||||
|
||||
class RawDataProvider(AnalysisDataProvider):
|
||||
"""Raw data provider for testing purposes"""
|
||||
def __init__(self, raw_data):
|
||||
self.raw_data = raw_data
|
||||
|
||||
def get_column(self, table, column, dtype=float):
|
||||
"""Returns requested 'column' of data"""
|
||||
if column != 'cartodb_id':
|
||||
return np.array(self.raw_data[column], dtype=dtype)
|
||||
elif table == 'drain_table':
|
||||
return np.arange(1, len(self.raw_data['capacity_col']) + 1)
|
||||
elif table == 'source_table':
|
||||
return np.arange(1, len(self.raw_data['production_col']) + 1)
|
||||
|
||||
def get_pairwise_distances(self, _source, _drain):
|
||||
"""Returns pairwise distances"""
|
||||
return np.array(self.raw_data['pairwise'], dtype=float)
|
||||
|
||||
|
||||
class OptimTest(unittest.TestCase):
|
||||
"""Testing class for Optimization module"""
|
||||
|
||||
def setUp(self):
|
||||
# self.data = json.loads(
|
||||
# open(fixture_file('optim.json')).read())
|
||||
# capacity ~ 0.01 * production given waste_per_person of 0.01
|
||||
# so capacity_col = [9, 31] / 100
|
||||
self.data = {
|
||||
'all_right': {"production_col": [10, 10, 10],
|
||||
"capacity_col": [0.09, 0.31],
|
||||
"marginal_col": [5, 5],
|
||||
"pairwise": [[1, 2, 3], [3, 2, 1]]},
|
||||
'all_left': {"production_col": [10, 10, 10],
|
||||
"capacity_col": [0.31, 0.09],
|
||||
"marginal_col": [5, 5],
|
||||
"pairwise": [[1, 2, 3], [3, 2, 1]]},
|
||||
'2left': {"production_col": [10, 10, 10],
|
||||
"capacity_col": [0.21, 0.11],
|
||||
"marginal_col": [5, 5],
|
||||
"pairwise": [[1, 2, 3], [3, 2, 1]]},
|
||||
'infeasible': {"production_col": [10, 10, 10],
|
||||
"capacity_col": [0.19, 0.11],
|
||||
"marginal_col": [5, 5],
|
||||
"pairwise": [[1, 2, 3], [3, 2, 1]]}}
|
||||
|
||||
self.params = {'waste_per_person': 0.01,
|
||||
'recycle_rate': 0.0,
|
||||
'dist_rate': 0.15,
|
||||
'dist_threshold': None,
|
||||
'data_provider': None}
|
||||
self.args = ('drain_table', 'source_table',
|
||||
'capacity_col', 'production_col',
|
||||
'marginal_col')
|
||||
|
||||
# print(self.model_data)
|
||||
# print(self.model_params)
|
||||
|
||||
def test_optim_output(self):
|
||||
"""Test Optim().output"""
|
||||
outputs = {'all_right': [2, 2, 2],
|
||||
'all_left': [1, 1, 1],
|
||||
'2left': [1, 1, 2],
|
||||
'infeasible': None}
|
||||
for k in self.data:
|
||||
if k == 'infeasible':
|
||||
continue
|
||||
self.params['data_provider'] = RawDataProvider(self.data[k])
|
||||
optim = Optim(*self.args, **self.params)
|
||||
out_vals = optim.output()
|
||||
drain_ids = [row[0] for row in out_vals]
|
||||
print(drain_ids)
|
||||
print(k)
|
||||
self.assertTrue(drain_ids == outputs[k])
|
||||
|
||||
return True
|
||||
|
||||
def test_check_constraints(self):
|
||||
"""Test optim._check_constraints"""
|
||||
for k in self.data:
|
||||
self.params['data_provider'] = RawDataProvider(self.data[k])
|
||||
print(k)
|
||||
try:
|
||||
optim = Optim(*self.args, **self.params)
|
||||
# pylint: disable=protected-access
|
||||
constraint_check = optim._check_constraints() is None
|
||||
except ValueError as err:
|
||||
# if infeasible, catch and say it's acceptable
|
||||
print(k)
|
||||
if k == 'infeasible':
|
||||
constraint_check = True
|
||||
print(constraint_check)
|
||||
else:
|
||||
raise ValueError(err)
|
||||
self.assertTrue(constraint_check)
|
||||
|
||||
# def test_check_model_params(self):
|
||||
# """Test model param defaults are correctly formed"""
|
||||
# for k in self.data:
|
||||
# self.params['data_provider'] = RawDataProvider(self.data[k])
|
||||
# optim = Optim(*self.args, **self.params)
|
||||
# # pylint: disable=protected-access
|
||||
# model_check = optim._check_model_params() is None
|
||||
# self.assertTrue(model_check)
|
||||
|
||||
def test_optim(self):
|
||||
"""Test optim.optim method"""
|
||||
# assert False
|
||||
pass
|
||||
@@ -4,99 +4,86 @@ import numpy as np
|
||||
import unittest
|
||||
|
||||
|
||||
from helper import fixture_file
|
||||
# from mock_plpy import MockPlPy
|
||||
# plpy = MockPlPy()
|
||||
#
|
||||
# import sys
|
||||
# sys.modules['plpy'] = plpy
|
||||
from helper import plpy, fixture_file
|
||||
|
||||
from crankshaft.space_time_dynamics import Markov
|
||||
import crankshaft.space_time_dynamics as std
|
||||
from crankshaft import random_seeds
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import json
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, data):
|
||||
self.mock_result = data
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class SpaceTimeTests(unittest.TestCase):
|
||||
"""Testing class for Markov Functions."""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_markov.json')).read())
|
||||
self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read())
|
||||
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
|
||||
|
||||
self.time_data = np.array([i * np.ones(10, dtype=float)
|
||||
for i in range(10)]).T
|
||||
self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T
|
||||
|
||||
self.transition_matrix = np.array([
|
||||
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
|
||||
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
|
||||
[0., 0.14, 0.74, 0.12, 0.],
|
||||
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
|
||||
[0., 0., 0., 0.16666667, 0.83333333]],
|
||||
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
|
||||
[0., 0., 0.06372549, 0.90196078, 0.03431373],
|
||||
[0., 0., 0., 0.19444444, 0.80555556]],
|
||||
[[0.84693878, 0.15306122, 0., 0., 0.],
|
||||
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
|
||||
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0., 0., 0., 0.10204082, 0.89795918]],
|
||||
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
|
||||
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
|
||||
[0., 0., 0., 0.09661836, 0.90338164]],
|
||||
[[0.33333333, 0.66666667, 0., 0., 0.],
|
||||
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
|
||||
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
|
||||
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]]]
|
||||
[[ 0.96341463, 0.0304878 , 0.00609756, 0. , 0. ],
|
||||
[ 0.06040268, 0.83221477, 0.10738255, 0. , 0. ],
|
||||
[ 0. , 0.14 , 0.74 , 0.12 , 0. ],
|
||||
[ 0. , 0.03571429, 0.32142857, 0.57142857, 0.07142857],
|
||||
[ 0. , 0. , 0. , 0.16666667, 0.83333333]],
|
||||
[[ 0.79831933, 0.16806723, 0.03361345, 0. , 0. ],
|
||||
[ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ],
|
||||
[ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0. ],
|
||||
[ 0. , 0. , 0.06372549, 0.90196078, 0.03431373],
|
||||
[ 0. , 0. , 0. , 0.19444444, 0.80555556]],
|
||||
[[ 0.84693878, 0.15306122, 0. , 0. , 0. ],
|
||||
[ 0.08133971, 0.78947368, 0.1291866 , 0. , 0. ],
|
||||
[ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135],
|
||||
[ 0. , 0. , 0.09411765, 0.87058824, 0.03529412],
|
||||
[ 0. , 0. , 0. , 0.10204082, 0.89795918]],
|
||||
[[ 0.8852459 , 0.09836066, 0. , 0.01639344, 0. ],
|
||||
[ 0.03875969, 0.81395349, 0.13953488, 0. , 0.00775194],
|
||||
[ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
|
||||
[ 0. , 0.02339181, 0.12865497, 0.75438596, 0.09356725],
|
||||
[ 0. , 0. , 0. , 0.09661836, 0.90338164]],
|
||||
[[ 0.33333333, 0.66666667, 0. , 0. , 0. ],
|
||||
[ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0. ],
|
||||
[ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0. ],
|
||||
[ 0. , 0.01036269, 0.06217617, 0.89637306, 0.03108808],
|
||||
[ 0. , 0. , 0. , 0.02352941, 0.97647059]]]
|
||||
)
|
||||
|
||||
def test_spatial_markov(self):
|
||||
"""Test Spatial Markov."""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
# print(str(data[0]))
|
||||
markov = Markov(FakeDataProvider(data))
|
||||
data = [ { 'id': d['id'],
|
||||
'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'],
|
||||
'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||
print(str(data[0]))
|
||||
plpy._define_result('select', data)
|
||||
random_seeds.set_random_seeds(1234)
|
||||
|
||||
result = markov.spatial_trend('subquery',
|
||||
['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'],
|
||||
5, 'knn', 5, 0, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id')
|
||||
|
||||
self.assertTrue(result is not None)
|
||||
self.assertTrue(result != None)
|
||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||
print result[0]
|
||||
expected = self.markov_data
|
||||
@@ -107,178 +94,173 @@ class SpaceTimeTests(unittest.TestCase):
|
||||
|
||||
def test_get_time_data(self):
|
||||
"""Test get_time_data"""
|
||||
data = [{'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009']} for d in self.neighbors_data]
|
||||
data = [ { 'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'] } for d in self.neighbors_data]
|
||||
|
||||
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'])
|
||||
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'])
|
||||
|
||||
# expected was prepared from PySAL example:
|
||||
# f = ps.open(ps.examples.get_path("usjoin.csv"))
|
||||
# pci = np.array([f.by_col[str(y)]
|
||||
# for y in range(1995, 2010)]).transpose()
|
||||
# rpci = pci / (pci.mean(axis = 0))
|
||||
## expected was prepared from PySAL example:
|
||||
### f = ps.open(ps.examples.get_path("usjoin.csv"))
|
||||
### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose()
|
||||
### rpci = pci / (pci.mean(axis = 0))
|
||||
|
||||
expected = np.array(
|
||||
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
|
||||
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
|
||||
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
|
||||
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
|
||||
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
|
||||
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
|
||||
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
|
||||
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
|
||||
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
|
||||
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
|
||||
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
|
||||
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
|
||||
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
|
||||
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
|
||||
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
|
||||
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
|
||||
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
|
||||
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
|
||||
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
|
||||
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
|
||||
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
|
||||
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
|
||||
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
|
||||
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
|
||||
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
|
||||
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
|
||||
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
|
||||
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
|
||||
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
|
||||
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
|
||||
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
|
||||
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
|
||||
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
|
||||
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
|
||||
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
|
||||
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
|
||||
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
|
||||
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
|
||||
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
|
||||
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
|
||||
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
|
||||
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
|
||||
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
|
||||
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
|
||||
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
|
||||
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
|
||||
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
|
||||
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
|
||||
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
|
||||
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
|
||||
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
|
||||
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
|
||||
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
|
||||
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
|
||||
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
|
||||
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
|
||||
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
|
||||
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
|
||||
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
|
||||
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
|
||||
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
|
||||
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
|
||||
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
|
||||
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
|
||||
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
|
||||
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
|
||||
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
|
||||
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
|
||||
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
|
||||
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
|
||||
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
|
||||
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
|
||||
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
|
||||
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
|
||||
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
|
||||
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
|
||||
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
|
||||
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
|
||||
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
|
||||
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
|
||||
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
|
||||
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
|
||||
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
|
||||
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
|
||||
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
|
||||
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
|
||||
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
|
||||
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
|
||||
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
|
||||
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
|
||||
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
|
||||
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
|
||||
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
|
||||
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
|
||||
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
|
||||
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
|
||||
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
|
||||
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
|
||||
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
|
||||
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
|
||||
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
|
||||
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
|
||||
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
|
||||
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
|
||||
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
|
||||
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
|
||||
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
|
||||
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
|
||||
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
|
||||
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
|
||||
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
|
||||
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
|
||||
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
|
||||
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
|
||||
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
|
||||
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
|
||||
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
|
||||
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
|
||||
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
|
||||
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
|
||||
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
|
||||
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
|
||||
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
|
||||
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
|
||||
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
|
||||
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
|
||||
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
|
||||
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
|
||||
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
|
||||
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
|
||||
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
|
||||
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
|
||||
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
|
||||
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
|
||||
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
|
||||
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
|
||||
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
|
||||
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
|
||||
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
|
||||
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
|
||||
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
|
||||
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
|
||||
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
|
||||
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
|
||||
expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, 0.83271652
|
||||
, 0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631
|
||||
, 0.86148267, 0.86252252, 0.86746356],
|
||||
[ 0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978
|
||||
, 0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054
|
||||
, 0.90139505, 0.87897455, 0.86216858],
|
||||
[ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559
|
||||
, 0.80584442, 0.8084998, 0.82258551, 0.82668196, 0.82373724, 0.81814804
|
||||
, 0.83675961, 0.83574199, 0.84647177],
|
||||
[ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948
|
||||
, 1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797
|
||||
, 1.12958508, 1.11371818, 1.09936775],
|
||||
[ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201
|
||||
, 1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468
|
||||
, 1.10863242, 1.09642516, 1.07762337],
|
||||
[ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737
|
||||
, 1.44782832, 1.41978227, 1.39092208, 1.4059372, 1.40788646, 1.44052766
|
||||
, 1.45241216, 1.43306098, 1.4174431 ],
|
||||
[ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138
|
||||
, 1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034
|
||||
, 1.08401709, 1.06959206, 1.07875225],
|
||||
[ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316
|
||||
, 0.9880258, 0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511
|
||||
, 1.01749645, 0.9936394, 0.98279746],
|
||||
[ 0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, 0.99127006
|
||||
, 0.97925917, 0.9683482, 0.95335147, 0.93694787, 0.94308213, 0.92232874
|
||||
, 0.91284091, 0.89689833, 0.88928858],
|
||||
[ 0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, 0.8578708
|
||||
, 0.86036185, 0.86107306, 0.8500772, 0.86981998, 0.86837929, 0.87204141
|
||||
, 0.86633032, 0.84946077, 0.83287146],
|
||||
[ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183
|
||||
, 1.12349752, 1.12596664, 1.12213996, 1.1119989, 1.10257792, 1.10491258
|
||||
, 1.11059842, 1.10509795, 1.10020097],
|
||||
[ 0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, 0.95831051
|
||||
, 0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201
|
||||
, 0.89280392, 0.89298243, 0.89165385],
|
||||
[ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927
|
||||
, 0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815
|
||||
, 0.94281559, 0.96520315, 0.96715441],
|
||||
[ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073
|
||||
, 0.99237486, 0.98209969, 0.9877653, 0.97399471, 0.96910087, 0.98416665
|
||||
, 0.98423613, 0.99823861, 0.99545704],
|
||||
[ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, 0.86191535
|
||||
, 0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648
|
||||
, 0.8198736, 0.82265395, 0.8399404 ],
|
||||
[ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597
|
||||
, 0.86008789, 0.86776298, 0.86720209, 0.8676334, 0.89179317, 0.94202108
|
||||
, 0.9422231, 0.93902708, 0.94479184],
|
||||
[ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632
|
||||
, 0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804
|
||||
, 0.90543476, 0.91737138, 0.94793657],
|
||||
[ 1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869
|
||||
, 1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384
|
||||
, 1.23418676, 1.23626739, 1.26754398],
|
||||
[ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023
|
||||
, 1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766
|
||||
, 1.32936244, 1.33001241, 1.33288462],
|
||||
[ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016
|
||||
, 1.03405057, 1.02747623, 1.03162734, 0.9961416, 0.97356208, 0.94241549
|
||||
, 0.92754547, 0.92549227, 0.92138102],
|
||||
[ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622
|
||||
, 1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389
|
||||
, 1.10577769, 1.11166825, 1.09985155],
|
||||
[ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073
|
||||
, 0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416
|
||||
, 0.77487118, 0.77768683, 0.78801192],
|
||||
[ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819
|
||||
, 0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462
|
||||
, 0.93069645, 0.94020973, 0.94358232],
|
||||
[ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489
|
||||
, 0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334
|
||||
, 0.87863799, 0.87497981, 0.87888675],
|
||||
[ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195
|
||||
, 0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749
|
||||
, 0.99323123, 1.00204939, 0.99602148],
|
||||
[ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397
|
||||
, 1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079
|
||||
, 1.08603695, 1.05997314, 1.02160943],
|
||||
[ 1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, 1.18257029
|
||||
, 1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236
|
||||
, 1.12357937, 1.1159258, 1.12570828],
|
||||
[ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239
|
||||
, 1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743, 1.28656675
|
||||
, 1.29734097, 1.29390205, 1.29345446],
|
||||
[ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975
|
||||
, 0.82848011, 0.8259679, 0.82435705, 0.83108634, 0.84373784, 0.83891093
|
||||
, 0.84349247, 0.85637272, 0.86539395],
|
||||
[ 1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, 1.2256767
|
||||
, 1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297
|
||||
, 1.27962009, 1.27968392, 1.25907738],
|
||||
[ 0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308
|
||||
, 0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464
|
||||
, 0.91475304, 0.90249622, 0.9021363 ],
|
||||
[ 0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, 0.88937573
|
||||
, 0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488
|
||||
, 0.96635366, 1.02520312, 1.0394296 ],
|
||||
[ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019
|
||||
, 0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168, 0.94779638
|
||||
, 0.93759089, 0.93353431, 0.94121705],
|
||||
[ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854
|
||||
, 0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097
|
||||
, 0.88283929, 0.90349491, 0.90600675],
|
||||
[ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626
|
||||
, 0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634
|
||||
, 0.95011401, 0.9434686, 0.94637765],
|
||||
[ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023
|
||||
, 1.03024941, 1.04200483, 1.0402554, 1.03296979, 1.02191682, 1.02476275
|
||||
, 1.02347523, 1.02517684, 1.04359571],
|
||||
[ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, 1.0531801
|
||||
, 1.07452771, 1.09383478, 1.1052447, 1.10322136, 1.09167939, 1.08772756
|
||||
, 1.08859544, 1.09177338, 1.1096083 ],
|
||||
[ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327
|
||||
, 0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343
|
||||
, 0.83374329, 0.82812044, 0.82878599],
|
||||
[ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175
|
||||
, 0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497, 0.93869677
|
||||
, 0.96947771, 1.00362626, 0.98102351],
|
||||
[ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, 0.93092109
|
||||
, 0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556
|
||||
, 0.90430364, 0.89994694, 0.90073864],
|
||||
[ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205
|
||||
, 0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004
|
||||
, 0.9700702, 0.97158948, 0.95884908],
|
||||
[ 0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, 0.84818516
|
||||
, 0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919
|
||||
, 0.83511679, 0.82136617, 0.80921874],
|
||||
[ 0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, 0.95440787
|
||||
, 0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368
|
||||
, 0.97001634, 0.97082733, 0.98782366],
|
||||
[ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188
|
||||
, 1.1214086, 1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975
|
||||
, 1.14474747, 1.14084037, 1.16142926],
|
||||
[ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702
|
||||
, 1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066
|
||||
, 1.09831768, 1.09298232, 1.09176125],
|
||||
[ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194
|
||||
, 0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807
|
||||
, 0.80172182, 0.82168155, 0.85587911],
|
||||
[ 1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979
|
||||
, 0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596
|
||||
, 0.96519561, 0.96173403, 0.95890284],
|
||||
[ 0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, 0.96608031
|
||||
, 0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263, 1.1704974
|
||||
, 1.17025154, 1.18730553, 1.14242645]])
|
||||
|
||||
self.assertTrue(np.allclose(result, expected))
|
||||
self.assertTrue(type(result) == type(expected))
|
||||
@@ -286,35 +268,32 @@ class SpaceTimeTests(unittest.TestCase):
|
||||
|
||||
def test_rebin_data(self):
|
||||
"""Test rebin_data"""
|
||||
# sample in double the time (even case since 10 % 2 = 0):
|
||||
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
|
||||
# = 0.5, 2.5, 4.5, 6.5, 8.5
|
||||
## sample in double the time (even case since 10 % 2 = 0):
|
||||
## (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
|
||||
## = 0.5, 2.5, 4.5, 6.5, 8.5
|
||||
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
|
||||
for i in range(0, 10, 2)]).T
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
|
||||
self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
|
||||
|
||||
# sample in triple the time (uneven since 10 % 3 = 1):
|
||||
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
|
||||
# = 1, 4, 7, 9
|
||||
ans_odd = np.array([i * np.ones(10, dtype=float)
|
||||
for i in (1, 4, 7, 9)]).T
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
|
||||
## sample in triple the time (uneven since 10 % 3 = 1):
|
||||
## (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
|
||||
## = 1, 4, 7, 9
|
||||
ans_odd = np.array([i * np.ones(10, dtype=float)
|
||||
for i in (1, 4, 7, 9)]).T
|
||||
self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
|
||||
|
||||
def test_get_prob_dist(self):
|
||||
"""Test get_prob_dist"""
|
||||
lag_indices = np.array([1, 2, 3, 4])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
[ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ],
|
||||
[ 0. , 0. , 0.09411765, 0.87058824, 0.03529412],
|
||||
[ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
|
||||
[ 0. , 0. , 0. , 0.02352941, 0.97647059]
|
||||
])
|
||||
result = std.get_prob_dist(self.transition_matrix,
|
||||
lag_indices, unit_indices)
|
||||
result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices)
|
||||
|
||||
self.assertTrue(np.array_equal(result, answer))
|
||||
|
||||
@@ -322,20 +301,16 @@ class SpaceTimeTests(unittest.TestCase):
|
||||
"""Test get_prob_stats"""
|
||||
|
||||
probs = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
[ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ],
|
||||
[ 0. , 0. , 0.09411765, 0.87058824, 0.03529412],
|
||||
[ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
|
||||
[ 0. , 0. , 0. , 0.02352941, 0.97647059]
|
||||
])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
|
||||
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
|
||||
answer_trend = np.array([-0.03301887 / 0.88207547,
|
||||
-0.05882353 / 0.87058824,
|
||||
0.02475248 / 0.77722772,
|
||||
-0.02352941 / 0.97647059])
|
||||
answer_volatility = np.array([0.34221495, 0.33705421,
|
||||
0.29226542, 0.38834223])
|
||||
answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824, 0.02475248 / 0.77722772, -0.02352941 / 0.97647059])
|
||||
answer_volatility = np.array([ 0.34221495, 0.33705421, 0.29226542, 0.38834223])
|
||||
|
||||
result = std.get_prob_stats(probs, unit_indices)
|
||||
result_up = result[0]
|
||||
|
||||
Reference in New Issue
Block a user