def moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I implementation for PL/Python Andy Eschbacher """ # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors qvals = OrderedDict([("id_col", id_col), ("attr1", attr), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(5)
def raise_if_errors(self): """ To be used in places where before continuing must be chcked if errors have been found Raises found errors packing them into error message as urlencoded string """ if not self.can_save: msgs = "Dbservice error(s): " + make_record_array(self.messages) plpy.error(msgs)
def get_nonspatial_kmeans(self, query): """fetch data for non-spatial kmeans""" try: data = plpy.execute(query) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def raise_if_errors(self): """ To be used in places where before continuing must be chcked if errors have been found Raises found errors packing them into error message as urlencoded string """ if not self.can_save: msgs = "Dbservice error(s): " + make_record_array( self.messages ) plpy.error( msgs )
def spatial_markov_trend( subquery, time_cols, num_classes=7, w_type="knn", num_ngbrs=5, permutations=0, geom_col="the_geom", id_col="cartodb_id", ): """ Predict the trends of a unit based on: 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) 2. average class of its neighbors Inputs: @param subquery string: e.g., SELECT the_geom, cartodb_id, interesting_time_column FROM table_name @param time_cols list of strings: list of strings of column names @param num_classes (optional): number of classes to break distribution of values into. Currently uses quantile bins. @param w_type string (optional): weight type ('knn' or 'queen') @param num_ngbrs int (optional): number of neighbors (if knn type) @param permutations int (optional): number of permutations for test stats @param geom_col string (optional): name of column which contains the geometries @param id_col string (optional): name of column which has the ids of the table Outputs: @param trend_up float: probablity that a geom will move to a higher class @param trend_down float: probablity that a geom will move to a lower class @param trend float: (trend_up - trend_down) / trend_static @param volatility float: a measure of the volatility based on probability stddev(prob array) """ if len(time_cols) < 2: plpy.error("More than one time column needs to be passed") qvals = { "id_col": id_col, "time_cols": time_cols, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs, } try: query_result = plpy.execute(pu.construct_neighbor_query(w_type, qvals)) if len(query_result) == 0: return zip([None], [None], [None], [None], [None]) except plpy.SPIError, e: plpy.debug("Query failed with exception %s: %s" % (err, pu.construct_neighbor_query(w_type, qvals))) plpy.error("Analysis failed: %s" % e) return zip([None], [None], [None], [None], [None])
def get_gwr_predict(self, params): """fetch data for gwr predict""" query = pu.gwr_predict_query(params) try: query_result = plpy.execute(query) return query_result except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def spatial_markov_trend(subquery, time_cols, num_classes=7, w_type='knn', num_ngbrs=5, permutations=0, geom_col='the_geom', id_col='cartodb_id'): """ Predict the trends of a unit based on: 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) 2. average class of its neighbors Inputs: @param subquery string: e.g., SELECT the_geom, cartodb_id, interesting_time_column FROM table_name @param time_cols list of strings: list of strings of column names @param num_classes (optional): number of classes to break distribution of values into. Currently uses quantile bins. @param w_type string (optional): weight type ('knn' or 'queen') @param num_ngbrs int (optional): number of neighbors (if knn type) @param permutations int (optional): number of permutations for test stats @param geom_col string (optional): name of column which contains the geometries @param id_col string (optional): name of column which has the ids of the table Outputs: @param trend_up float: probablity that a geom will move to a higher class @param trend_down float: probablity that a geom will move to a lower class @param trend float: (trend_up - trend_down) / trend_static @param volatility float: a measure of the volatility based on probability stddev(prob array) """ if len(time_cols) < 2: plpy.error('More than one time column needs to be passed') qvals = { "id_col": id_col, "time_cols": time_cols, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } try: query_result = plpy.execute(pu.construct_neighbor_query(w_type, qvals)) if len(query_result) == 0: return zip([None], [None], [None], [None], [None]) except plpy.SPIError, e: plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) plpy.error('Analysis failed: %s' % e) return zip([None], [None], [None], [None], [None])
def execute(self, arg_dict, all_keys_required = True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: plpy.error("Missing argument: QUERY: %s ARGS: %s VALUES: %s" % ( repr(self.sql), repr(self.arg_map), repr(arg_dict)))
def execute(self, arg_dict, all_keys_required=True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: plpy.error("Missing argument: QUERY: %s ARGS: %s VALUES: %s" % (repr(self.sql), repr(self.arg_map), repr(arg_dict)))
def create_and_predict_segment(query, variable, target_query, model_params): """ generate a segment with machine learning Stuart Lynn """ ## fetch column names try: columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() except Exception, e: plpy.error('Failed to build segmentation model: %s' % e)
def wrapper(*args, **kwargs): """Error checking""" try: data = func(*args, **kwargs) if not data: plpy.error(NULL_VALUE_ERROR) else: return data except plpy.SPIError as err: plpy.error('Analysis failed: {}'.format(err)) return []
def get_getis(self, w_type, params): """fetch data for getis ord's g""" try: query = pu.construct_neighbor_query(w_type, params) result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(4) else: return result except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def get_markov(self, w_type, params): """fetch data for spatial markov""" try: query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) if len(data) == 0: return pu.empty_zipped_array(4) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def get_spatial_kmeans(self, params): """fetch data for spatial kmeans""" query = ("SELECT " "array_agg({id_col} ORDER BY {id_col}) as ids," "array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs," "array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys " "FROM ({subquery}) As a " "WHERE {geom_col} IS NOT NULL").format(**params) try: data = plpy.execute(query) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def get_moran(self, w_type, params): """fetch data for moran's i analyses""" try: query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) # if there are no neighbors, exit if len(data) == 0: return pu.empty_zipped_array(2) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2)
def execute(self, arg_dict, all_keys_required=True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: need = set(self.arg_map) got = set(arg_dict.keys()) missing = list(need.difference(got)) plpy.error("Missing arguments: [%s] QUERY: %s" % (','.join(missing), repr(self.sql)))
def execute(self, arg_dict, all_keys_required = True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: need = set(self.arg_map) got = set(arg_dict.keys()) missing = list(need.difference(got)) plpy.error("Missing arguments: [%s] QUERY: %s" % ( ','.join(missing), repr(self.sql)))
def __init__(self, context, global_dict = None): """ This object must be initiated in the beginning of each db service """ DBService.__init__(self, context, global_dict) rec = skytools.db_urldecode(context) if "username" not in rec: plpy.error("Username must be provided in db service context parameter") self.username = rec['username'] # used for logging purposes res = plpy.execute("select txid_current() as txid;") row = res[0] self.version = row["txid"] self.rows_found = 0 # Flag set by run query to inicate number of rows got
def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I (local) Bivariate (untested) """ plpy.notice('** Constructing query') qvals = { "num_ngbrs": num_ngbrs, "attr1": attr1, "attr2": attr2, "subquery": subquery, "geom_col": geom_col, "id_col": id_col } query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(4) except plpy.SPIError: plpy.error("Error: areas of interest query failed, " \ "check input parameters") plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(4) ## collect attributes attr1_vals = pu.get_attributes(result, 1) attr2_vals = pu.get_attributes(result, 2) # create weights weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, permutations=permutations) plpy.notice("len of Is: %d" % len(lisa.Is)) # find clustering of significance lisa_sig = quad_position(lisa.q) plpy.notice('** Finished calculations') return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
def hba_astar(source, target, ol, cl, cl2, cat, d, p, tablename='routing', col_geom='geom', col_edge='id', col_cost='cost', col_revc='reverse_cost', col_source='source', col_target='target', vertex_tablename='vertex', col_cat='category', col_vertex_geom='geom', col_name='name', col_rule='rule'): #If we don't have open candidates... if len(ol) == 0: return 0 if len(ol) > 0: #x <- node with smallest f-value x = hba_bestNext(ol) #We move through the next best option: cl.append(x) del ol[x] #Have we just found the middle point? if (x == target or x in cl2): try: last_id = int(p[x][1]['id']) except: last_id = -1 global central_node_plan if "source" in col_source: check_x = plpy.execute(central_node_plan, [x, last_id]) else: check_x = plpy.execute(central_node_plan, [last_id, x]) for checking in check_x: return x #Next candidates # If we are in the initialization buffer, use hba_adj_initialization if distance_plan == -1: global distance_plan distance_plan = plpy.prepare('\n\ ' + 'SELECT min(st_distance_sphere(v1.geom, v2.geom)) as dist from vertex v1, vertex v2 where v1.id = $1 and (v2.id = $2 or v2.id = $3)',['Integer', 'Integer', 'Integer']) distance =plpy.execute(distance_plan, [x, source, target], 1)[0]["dist"] adj = hba_adj(cat, x, target, p,tablename, col_geom, col_edge, col_cost, col_source, col_target, col_revc, col_cat, col_name, col_rule) #Forever alone if adj is None: plpy.error("This vertex is alone") #For each candidate hba_process_y(adj, p, cat, d, ol, cl, x, target, vertex_tablename, col_vertex_geom, col_edge, [], distance) #Return false, we still have to loop more return 0
def get_dyn_transfo_params_form_1(params_column, params, time): ''' Return the dynamic transfo parameters. ''' if isinstance(time, datetime.datetime): plpy.error('times as strings unsupported for dynamic transforms of form 1') schema, table, column = tuple(map(plpy.quote_ident, params_column.split('.'))) params = params[0] select = [] for param in params.values(): if isinstance(param, list): for dim in param: append_dim_select(dim, select) else: dim = param append_dim_select(dim, select) select = ', '.join(select) q = (''' with patch as ( select pc_interpolate({column}, 'time', {time:f}, true) point from {schema}.{table} where pc_patchmin({column}, 'time') <= {time:f} and pc_patchmax({column}, 'time') > {time:f} ) select %s from patch ''' % select).format(schema=schema, table=table, column=column, time=time) plpy.debug(q) rv = plpy.execute(q) if len(rv) == 0: plpy.warning('no parameters for the provided time ({:f})'.format(time)) return None if len(rv) != 1: plpy.error('multiple rows returned from time interpolation') values = rv[0] for key, param in params.items(): if isinstance(param, list): for i, dim in enumerate(param): val = values[dim] param[i] = val else: dim = param val = values[dim] params[key] = val return params
def moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Local Rate Andy Eschbacher """ # geometries with values that are null are ignored # resulting in a collection of not as near neighbors query = pu.construct_neighbor_query( w_type, { "id_col": id_col, "numerator": numerator, "denominator": denominator, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs }) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError: plpy.error( 'Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(5) ## collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, permutations=permutations) # find units of significance quads = quad_position(lisa.q) return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I (local) Bivariate (untested) """ plpy.notice('** Constructing query') qvals = OrderedDict([("id_col", id_col), ("attr1", attr1), ("attr2", attr2), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(4) except plpy.SPIError: plpy.error("Error: areas of interest query failed, " \ "check input parameters") plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(4) ## collect attributes attr1_vals = pu.get_attributes(result, 1) attr2_vals = pu.get_attributes(result, 2) # create weights weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, permutations=permutations) plpy.notice("len of Is: %d" % len(lisa.Is)) # find clustering of significance lisa_sig = quad_position(lisa.q) plpy.notice('** Finished calculations') return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
def moran(subquery, attr_name, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I (global) Implementation building neighbors with a PostGIS database and Moran's I core clusters with PySAL. Andy Eschbacher """ qvals = { "id_col": id_col, "attr1": attr_name, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error( 'Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes attr_vals = pu.get_attributes(result) ## calculate weights weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) return zip([moran_global.I], [moran_global.EI])
def moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Rate (global) Andy Eschbacher """ qvals = { "id_col": id_col, "attr1": numerator, "attr2": denominator, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error( 'Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global rate lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, permutations=permutations) return zip([lisa_rate.I], [lisa_rate.EI])
def predict_segment(model, features, target_query): """ Use the provided model to predict the values for the new feature set Input: @param model: The pretrained model @features: A list of features to use in the model prediction (list of column names) @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it. """ batch_size = 1000 joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) try: cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( joined_features=joined_features, target_query=target_query)) except Exception, e: plpy.error('Failed to build segmentation model: %s' % e)
def moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I implementation for PL/Python Andy Eschbacher """ # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors qvals = { "id_col": id_col, "attr1": attr, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError: plpy.error( 'Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local(attr_vals, weight, permutations=permutations) # find quadrants for each geometry quads = quad_position(lisa.q) return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Local Rate Andy Eschbacher """ # geometries with values that are null are ignored # resulting in a collection of not as near neighbors qvals = OrderedDict([("id_col", id_col), ("numerator", numerator), ("denominator", denominator), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(5) ## collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, permutations=permutations) # find quadrants for each geometry quads = quad_position(lisa.q) return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
def hba_astar(source, target, ol, cl, cl2, cat, d, p, tablename='routing', col_geom='geom', col_edge='id', col_cost='cost', col_revc='reverse_cost', col_source='source', col_target='target', vertex_tablename='vertex', col_cat='category', col_vertex_geom='geom', col_name='name', col_rule='rule'): #If we don't have open candidates... if len(ol) == 0: return 0 if len(ol) > 0: #x <- node with smallest f-value x = hba_bestNext(ol) #We move through the next best option: cl.append(x) del ol[x] #Have we just found the middle point? if (x == target or x in cl2): return x #Next candidates # If we are in the initialization buffer, use hba_adj_initialization if distance_plan == -1: global distance_plan distance_plan = plpy.prepare('\n\ ' + 'SELECT st_distance_sphere(v1.geom, v2.geom) as dist from vertex v1, vertex v2 where v1.id = $1 and v2.id = $2', ['Integer', 'Integer']) distance =plpy.execute(distance_plan, [source, x])[0]["dist"] plpy.info("Distance from origin=" + str(distance)) if (distance <= distance_buffer): plpy.info("Using hba_adj_buffer") adj = hba_adj_buffer(x, target, p,tablename, col_geom, col_edge, col_cost, col_source, col_target, col_revc, col_cat, col_name, col_rule) else: plpy.info("Using hba_adj") adj = hba_adj(cat, x, target, p,tablename, col_geom, col_edge, col_cost, col_source, col_target, col_revc, col_cat, col_name, col_rule) #plpy.info("Obtained adjacents for node " + str(x) + " with category cat >= " + str(cat) + ". " + str(adj.nrows()) + " nodes.") #Forever alone if adj is None: plpy.error("This vertex is alone") #For each candidate hba_process_y(adj, p, cat, d, ol, cl, x, target, vertex_tablename, col_vertex_geom, col_edge, [], distance) #Return false, we still have to loop more return 0
def compute_logregr_coef(**kwargs): """ Compute logistic regression coefficients This method serves as an interface to different optimization algorithms. By default, iteratively reweighted least squares is used, but for data with a lot of columns the conjugate-gradient method might perform better. @param source Name of relation containing the training data @param depColumn Name of dependent column in training data (of type BOOLEAN) @param indepColumn Name of independent column in training data (of type DOUBLE PRECISION[]) Optionally also provide the following: @param optimizer Name of the optimizer. 'newton' or 'irls': Iteratively reweighted least squares, 'cg': conjugate gradient (default = 'irls') @param numIterations Maximum number of iterations (default = 20) @param precision Terminate if two consecutive iterations have a difference in the log-likelihood of less than <tt>precision</tt>. In other words, we terminate if the objective function value has converged. If this parameter is 0.0, then the algorithm will not check for convergence and only terminate after <tt>numIterations</tt> iterations. @return array with coefficients in case of convergence, otherwise None """ if not 'optimizer' in kwargs: kwargs.update(optimizer = 'irls') if not 'numIterations' in kwargs: kwargs.update(numIterations = 20) if not 'precision' in kwargs: kwargs.update(precision = 0.0001) if kwargs['optimizer'] == 'cg': return __cg_logregr_coef(**kwargs) elif kwargs['optimizer'] in ['irls', 'newton']: return __irls__logregr_coef(**kwargs) else: plpy.error("Unknown optimizer requested. Must be 'newton'/'irls' or 'cg'") return None
def moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Rate (global) Andy Eschbacher """ qvals = OrderedDict([("id_col", id_col), ("attr1", numerator), ("attr2", denominator)("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2)
def moran(subquery, attr_name, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I (global) Implementation building neighbors with a PostGIS database and Moran's I core clusters with PySAL. Andy Eschbacher """ qvals = OrderedDict([("id_col", id_col), ("attr1", attr_name), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes attr_vals = pu.get_attributes(result) ## calculate weights weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) return zip([moran_global.I], [moran_global.EI])
def get_data(variable, feature_columns, query): """ Fetch data from the database, clean, and package into numpy arrays Input: @param variable: name of the target variable @param feature_columns: list of column names @param query: subquery that data is pulled from for the packaging Output: prepared data, packaged into NumPy arrays """ columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns]) try: data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format( variable=variable, columns=columns, query=query)) except Exception, e: plpy.error('Failed to access data to build segmentation model: %s' % e)
def moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Rate (global) Andy Eschbacher """ qvals = OrderedDict([("id_col", id_col), ("attr1", numerator), ("attr2", denominator) ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global rate lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, permutations=permutations) return zip([lisa_rate.I], [lisa_rate.EI])
def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I (local) Bivariate (untested) """ qvals = OrderedDict([("id_col", id_col), ("attr1", attr1), ("attr2", attr2), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(4) except plpy.SPIError: plpy.error("Error: areas of interest query failed, " "check input parameters") return pu.empty_zipped_array(4) # collect attributes attr1_vals = pu.get_attributes(result, 1) attr2_vals = pu.get_attributes(result, 2) # create weights weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, permutations=permutations) # find clustering of significance lisa_sig = quad_position(lisa.q) return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
def moran(subquery, attr_name, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I (global) Implementation building neighbors with a PostGIS database and Moran's I core clusters with PySAL. Andy Eschbacher """ qvals = OrderedDict([("id_col", id_col), ("attr1", attr_name), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2)
def moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Rate (global) Andy Eschbacher """ qvals = OrderedDict([("id_col", id_col), ("attr1", numerator), ("attr2", denominator) ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2)
def get_transform(transfoid, time): ''' Return information about the transfo whose id is transfoid. A dict with keys "name", "params", "func_name", and "func_sign". ''' if not isinstance(time, (float, str)): plpy.error('unexpected type for "time" parameter ({})'.format(type(time))) if isinstance(time, str): # if time is a string parse it to a datetime object time = dateutil.parser.parse(time) q = ''' select t.name as name, t.parameters_column as params_column, t.parameters as params, tt.name as func_name, tt.func_signature as func_sign from li3ds.transfo t join li3ds.transfo_type tt on t.transfo_type = tt.id where t.id = {:d} '''.format(transfoid) plpy.debug(q) rv = plpy.execute(q) if len(rv) < 1: plpy.error('no transfo with id {:d}'.format(transfoid)) transfo = rv[0] params_column = transfo['params_column'] params = json.loads(transfo['params']) if params_column: # dynamic transform form 1 if not time: plpy.error('no time value provided for dynamic transfo "{}"' .format(transfo['name'])) params = get_dyn_transfo_params_form_1(params_column, params, time) elif params: if len(params) > 1: # dynamic tranform form 2 if not time: plpy.error('no time value provided for dynamic transfo "{}"' .format(transfo['name'])) params = get_dyn_transfo_params_form_2(params, time) else: # static transform params = params[0] if params is None: return None return transfo['name'], params, transfo['func_name'], transfo['func_sign']
def _transform(obj, type_, func_name, func_sign, params): ''' Transform obj, whose type is type_, using func_name, func_sign and params. ''' if func_name not in func_names: plpy.error('function {} is unknown'.format(func_name)) func_name = func_names[func_name] if isinstance(params, basestring): # NOQA params = json.loads(params) args = [params[p] for p in func_sign if p != '_time'] args_str, args_val = args_to_array_string(args) q = 'select {}(\'{}\'::{}{}) r'.format(func_name, obj, type_, args_str) plpy.debug(q, args_val) plan = plpy.prepare(q, ['numeric'] * len(args_val)) rv = plpy.execute(plan, args_val) if len(rv) != 1: plpy.error('unexpected number of rows ({}) returned from {}'.format(len(rv), q)) result = rv[0].get('r') if result is None: plpy.error('unexpected value None returned from {}'.format(q)) return result
def spatial_trend(self, subquery, time_cols, num_classes=7, w_type='knn', num_ngbrs=5, permutations=0, geom_col='the_geom', id_col='cartodb_id'): """ Predict the trends of a unit based on: 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) 2. average class of its neighbors Inputs: @param subquery string: e.g., SELECT the_geom, cartodb_id, interesting_time_column FROM table_name @param time_cols list of strings: list of strings of column names @param num_classes (optional): number of classes to break distribution of values into. Currently uses quantile bins. @param w_type string (optional): weight type ('knn' or 'queen') @param num_ngbrs int (optional): number of neighbors (if knn type) @param permutations int (optional): number of permutations for test stats @param geom_col string (optional): name of column which contains the geometries @param id_col string (optional): name of column which has the ids of the table Outputs: @param trend_up float: probablity that a geom will move to a higher class @param trend_down float: probablity that a geom will move to a lower class @param trend float: (trend_up - trend_down) / trend_static @param volatility float: a measure of the volatility based on probability stddev(prob array) """ if len(time_cols) < 2: plpy.error('More than one time column needs to be passed') params = { "id_col": id_col, "time_cols": time_cols, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } result = self.data_provider.get_markov(w_type, params) # build weight weights = pu.get_weight(result, w_type) weights.transform = 'r' # prep time data t_data = get_time_data(result, time_cols) sp_markov_result = ps.Spatial_Markov(t_data, weights, k=num_classes, fixed=False, permutations=permutations) # get lag classes lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]), k=num_classes).yb # look up probablity distribution for each unit according to class and # lag class prob_dist = get_prob_dist(sp_markov_result.P, lag_classes, sp_markov_result.classes[:, -1]) # find the ups and down and overall distribution of each cell trend_up, trend_down, trend, volatility = get_prob_stats( prob_dist, sp_markov_result.classes[:, -1]) # output the results return zip(trend, trend_up, trend_down, volatility, weights.id_order)
def applyrow(tblname, ev_type, new_row, backup_row=None, alt_pkey_cols=None, fkey_cols=None, fkey_ref_table=None, fkey_ref_cols=None, fn_canapply=canapply_dummy, fn_colfilter=colfilter_full): """Core logic. Actual decisions will be done in callback functions. - [IUD]: If row referenced by fkey does not exist, event is not applied - If pkey does not exist but alt_pkey does, row is not applied. @param tblname: table name, schema-qualified @param ev_type: [IUD]:pkey1,pkey2 @param alt_pkey_cols: list of alternatice columns to consuder @param fkey_cols: columns in this table that refer to other table @param fkey_ref_table: other table referenced here @param fkey_ref_cols: column in other table that must match @param fn_canapply: callback function, gets new and old row, returns whether the row should be applied @param fn_colfilter: callback function, gets new and old row, returns dict of final columns to be applied """ gd = None # parse ev_type tmp = ev_type.split(':', 1) if len(tmp) != 2 or tmp[0] not in ('I', 'U', 'D'): raise DataError('Unsupported ev_type: ' + repr(ev_type)) if not tmp[1]: raise DataError('No pkey in event') cmd = tmp[0] pkey_cols = tmp[1].split(',') qtblname = skytools.quote_fqident(tblname) # parse ev_data fields = skytools.db_urldecode(new_row) if ev_type.find('}') >= 0: raise DataError('Really suspicious activity') if ",".join(fields.keys()).find('}') >= 0: raise DataError('Really suspicious activity 2') # generate pkey expressions tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in pkey_cols] pkey_expr = " and ".join(tmp) alt_pkey_expr = None if alt_pkey_cols: tmp = [ "%s = {%s}" % (skytools.quote_ident(k), k) for k in alt_pkey_cols ] alt_pkey_expr = " and ".join(tmp) log = "data ok" # # Row data seems fine, now apply it # if fkey_ref_table: tmp = [] for k, rk in zip(fkey_cols, fkey_ref_cols): tmp.append("%s = {%s}" % (skytools.quote_ident(rk), k)) fkey_expr = " and ".join(tmp) q = "select 1 from only %s where %s" % ( skytools.quote_fqident(fkey_ref_table), fkey_expr) res = skytools.plpy_exec(gd, q, fields) if not res: return "IGN: parent row does not exist" log += ", fkey ok" # fetch old row if alt_pkey_expr: q = "select * from only %s where %s for update" % (qtblname, alt_pkey_expr) res = skytools.plpy_exec(gd, q, fields) if res: oldrow = res[0] # if altpk matches, but pk not, then delete need_del = 0 for k in pkey_cols: # fixme: proper type cmp? if fields[k] != str(oldrow[k]): need_del = 1 break if need_del: log += ", altpk del" q = "delete from only %s where %s" % (qtblname, alt_pkey_expr) skytools.plpy_exec(gd, q, fields) res = None else: log += ", altpk ok" else: # no altpk q = "select * from only %s where %s for update" % (qtblname, pkey_expr) res = skytools.plpy_exec(None, q, fields) # got old row, with same pk and altpk if res: oldrow = res[0] log += ", old row" ok = fn_canapply(fields, oldrow) if ok: log += ", new row better" if not ok: # ignore the update return "IGN:" + log + ", current row more up-to-date" else: log += ", no old row" oldrow = None if res: if cmd == 'I': cmd = 'U' else: if cmd == 'U': cmd = 'I' # allow column changes if oldrow: fields2 = fn_colfilter(fields, oldrow) for k in pkey_cols: if k not in fields2: fields2[k] = fields[k] fields = fields2 # apply change if cmd == 'I': q = skytools.mk_insert_sql(fields, tblname, pkey_cols) elif cmd == 'U': q = skytools.mk_update_sql(fields, tblname, pkey_cols) elif cmd == 'D': q = skytools.mk_delete_sql(fields, tblname, pkey_cols) else: plpy.error('Huh') plpy.execute(q) return log
def applyrow(tblname, ev_type, new_row, backup_row = None, alt_pkey_cols = None, fkey_cols = None, fkey_ref_table = None, fkey_ref_cols = None, fn_canapply = canapply_dummy, fn_colfilter = colfilter_full): """Core logic. Actual decisions will be done in callback functions. - [IUD]: If row referenced by fkey does not exist, event is not applied - If pkey does not exist but alt_pkey does, row is not applied. @param tblname: table name, schema-qualified @param ev_type: [IUD]:pkey1,pkey2 @param alt_pkey_cols: list of alternatice columns to consuder @param fkey_cols: columns in this table that refer to other table @param fkey_ref_table: other table referenced here @param fkey_ref_cols: column in other table that must match @param fn_canapply: callback function, gets new and old row, returns whether the row should be applied @param fn_colfilter: callback function, gets new and old row, returns dict of final columns to be applied """ gd = None # parse ev_type tmp = ev_type.split(':', 1) if len(tmp) != 2 or tmp[0] not in ('I', 'U', 'D'): raise DataError('Unsupported ev_type: '+repr(ev_type)) if not tmp[1]: raise DataError('No pkey in event') cmd = tmp[0] pkey_cols = tmp[1].split(',') qtblname = skytools.quote_fqident(tblname) # parse ev_data fields = skytools.db_urldecode(new_row) if ev_type.find('}') >= 0: raise DataError('Really suspicious activity') if ",".join(fields.keys()).find('}') >= 0: raise DataError('Really suspicious activity 2') # generate pkey expressions tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in pkey_cols] pkey_expr = " and ".join(tmp) alt_pkey_expr = None if alt_pkey_cols: tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in alt_pkey_cols] alt_pkey_expr = " and ".join(tmp) log = "data ok" # # Row data seems fine, now apply it # if fkey_ref_table: tmp = [] for k, rk in zip(fkey_cols, fkey_ref_cols): tmp.append("%s = {%s}" % (skytools.quote_ident(rk), k)) fkey_expr = " and ".join(tmp) q = "select 1 from only %s where %s" % ( skytools.quote_fqident(fkey_ref_table), fkey_expr) res = skytools.plpy_exec(gd, q, fields) if not res: return "IGN: parent row does not exist" log += ", fkey ok" # fetch old row if alt_pkey_expr: q = "select * from only %s where %s for update" % (qtblname, alt_pkey_expr) res = skytools.plpy_exec(gd, q, fields) if res: oldrow = res[0] # if altpk matches, but pk not, then delete need_del = 0 for k in pkey_cols: # fixme: proper type cmp? if fields[k] != str(oldrow[k]): need_del = 1 break if need_del: log += ", altpk del" q = "delete from only %s where %s" % (qtblname, alt_pkey_expr) skytools.plpy_exec(gd, q, fields) res = None else: log += ", altpk ok" else: # no altpk q = "select * from only %s where %s for update" % (qtblname, pkey_expr) res = skytools.plpy_exec(None, q, fields) # got old row, with same pk and altpk if res: oldrow = res[0] log += ", old row" ok = fn_canapply(fields, oldrow) if ok: log += ", new row better" if not ok: # ignore the update return "IGN:" + log + ", current row more up-to-date" else: log += ", no old row" oldrow = None if res: if cmd == 'I': cmd = 'U' else: if cmd == 'U': cmd = 'I' # allow column changes if oldrow: fields2 = fn_colfilter(fields, oldrow) for k in pkey_cols: if k not in fields2: fields2[k] = fields[k] fields = fields2 # apply change if cmd == 'I': q = skytools.mk_insert_sql(fields, tblname, pkey_cols) elif cmd == 'U': q = skytools.mk_update_sql(fields, tblname, pkey_cols) elif cmd == 'D': q = skytools.mk_delete_sql(fields, tblname, pkey_cols) else: plpy.error('Huh') plpy.execute(q) return log
batch_size = 1000 joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) try: cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( joined_features=joined_features, target_query=target_query)) except Exception, e: plpy.error('Failed to build segmentation model: %s' % e) results = [] while True: rows = cursor.fetch(batch_size) if not rows: break batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) #Need to fix this. Should be global mean. This will cause weird effects batch = replace_nan_with_mean(batch) prediction = model.predict(batch) results.append(prediction) try: cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] except Exception, e: plpy.error('Failed to build segmentation model: %s' % e) return cartodb_ids, np.concatenate(results)