def detail_query(args, aggregate=False): meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'offset', 'limit') meta_vals = (args.data.get(k) for k in meta_params) dataset, shapeset, data_type, geom, offset, limit = meta_vals # If there aren't tree filters provided, a little formatting is needed # to make the general filters into an 'and' tree. if not has_tree_filters(args.data): # Creates an AND condition tree and adds it to args. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['shapeset'] ) # Sort out the filter conditions from the rest of the user arguments. filters = {k: v for k, v in args.data.items() if 'filter' in k} # Get upset if they specify more than a dataset and shapeset filter. if len(filters) > 2: return bad_request("Too many table filters provided.") # Query the point dataset. q = session.query(dataset) # If the user specified a geom, filter results to those within its shape. if geom: q = q.filter(dataset.c.geom.ST_Within( sqlalchemy.func.ST_GeomFromGeoJSON(geom) )) # Retrieve the filters and build conditions from them if they exist. point_ctree = filters.get(dataset.name + '__filter') # If the user specified point dataset filters, parse and apply them. if point_ctree: point_conditions = parse_tree(dataset, point_ctree) q = q.filter(point_conditions) # If a user specified a shape dataset, it was either through the /shapes # enpoint, which uses the aggregate result, or through the /detail endpoint # which uses the joined result. if shapeset is not None: if aggregate: q = q.from_self(shapeset).filter(dataset.c.geom.ST_Intersects(shapeset.c.geom)).group_by(shapeset) else: shape_columns = ['{}.{} as {}'.format(shapeset.name, col.name, col.name) for col in shapeset.c] q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom)) q = q.add_columns(*shape_columns) # If there's a filter specified for the shape dataset, apply those conditions. shape_ctree = filters.get(shapeset.name + '__filter') if shape_ctree: shape_conditions = parse_tree(shapeset, shape_ctree) q = q.filter(shape_conditions) return q
def _export_shape(args): """Route logic for /shapes/<shapeset>/ endpoint. Returns records for a single specified shape dataset. :param args: ValidatorResult of user provided arguments :returns: response object """ meta_params = ('shapeset', 'data_type', 'geom') meta_vals = (args.data.get(k) for k in meta_params) shapeset, data_type, geom = meta_vals if shapeset is None: error_message = 'Could not find shape dataset {}' error_message = error_message.format(request.args['shape']) return make_response(error_message, 404) query = 'SELECT * FROM {}'.format(shapeset.name) conditions = '' if has_tree_filters(args.data): # A string literal is required for ogr2ogr to function correctly. ctree = args.data[shapeset.name + '__filter'] conditions = str(parse_tree(shapeset, ctree, literally=True)) if geom: if conditions: conditions += 'AND ' conditions += "ST_Intersects({}.geom, ST_GeomFromGeoJSON('{}'))".format( shapeset.name, geom) if conditions: query += ' WHERE ' + conditions return query
def _export_shape(args): """Route logic for /shapes/<shapeset>/ endpoint. Returns records for a single specified shape dataset. :param args: ValidatorResult of user provided arguments :returns: response object """ meta_params = ('shapeset', 'data_type', 'geom') meta_vals = (args.data.get(k) for k in meta_params) shapeset, data_type, geom = meta_vals if shapeset is None: error_message = 'Could not find shape dataset {}' error_message = error_message.format(request.args['shape']) return make_response(error_message, 404) query = 'SELECT * FROM {}'.format(shapeset.name) conditions = '' if has_tree_filters(args.data): # A string literal is required for ogr2ogr to function correctly. ctree = args.data[shapeset.name + '__filter'] conditions = str(parse_tree(shapeset, ctree, literally=True)) if geom: if conditions: conditions += 'AND ' conditions += "ST_Intersects({}.geom, ST_GeomFromGeoJSON('{}'))".format(shapeset.name, geom) if conditions: query += ' WHERE ' + conditions return query
def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object""" meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le'] ) dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions ) except Exception as e: return internal_error('Failed to construct timeseries', e) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] resp = None datatype = args.data['data_type'] if datatype == 'json': resp = json_response_base(args, time_counts, request.args) resp['count'] = sum([c['count'] for c in time_counts]) resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': resp = form_csv_detail_response(['point_date', 'hash'], time_counts) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def _grid(args): meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge', 'obs_date__le') meta_vals = (args.data.get(k) for k in meta_params) point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals result_rows = [] if not has_tree_filters(args.data): tname = point_table.name args.data[tname + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['buffer', 'resolution'] ) # We only build conditions from values with a key containing 'filter'. # Therefore we only build dataset conditions from condition trees. dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): tablename = tablename.split('__')[0] metatable = MetaTable.get_by_dataset_name(tablename) table = metatable.point_table conditions = parse_tree(table, condition_tree) try: registry_row = MetaTable.get_by_dataset_name(table.name) # make_grid expects conditions to be iterable. grid_rows, size_x, size_y = registry_row.make_grid( resolution, geom, [conditions], {'upper': obs_date__le, 'lower': obs_date__ge} ) result_rows += grid_rows except Exception as e: return internal_error('Could not make grid aggregation.', e) resp = geojson_response_base() for value in result_rows: if value[1]: pt = shapely.wkb.loads(value[1].decode('hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = {'count': value[0], } add_geojson_feature(resp, new_geom, new_property) resp = make_response(json.dumps(resp, default=date_json_handler), 200) resp.headers['Content-Type'] = 'application/json' return resp
def _grid(args): meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge', 'obs_date__le') meta_vals = (args.data.get(k) for k in meta_params) point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals result_rows = [] if not has_tree_filters(args.data): tname = point_table.name args.data[tname + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['buffer', 'resolution']) # We only build conditions from values with a key containing 'filter'. # Therefore we only build dataset conditions from condition trees. dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): tablename = tablename.rsplit('__')[0] metatable = MetaTable.get_by_dataset_name(tablename) table = metatable.point_table conditions = parse_tree(table, condition_tree) try: registry_row = MetaTable.get_by_dataset_name(table.name) # make_grid expects conditions to be iterable. grid_rows, size_x, size_y = registry_row.make_grid( resolution, geom, [conditions], { 'upper': obs_date__le, 'lower': obs_date__ge }) result_rows += grid_rows except Exception as e: msg = 'Could not make grid aggregation.' return api_response.make_raw_error('{}: {}'.format(msg, e)) resp = api_response.geojson_response_base() for value in result_rows: if value[1]: pt = shapely.wkb.loads(codecs.decode(value[1], 'hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = { 'count': value[0], } api_response.add_geojson_feature(resp, new_geom, new_property) return resp
def _deserialize(self, value, attr, data): feature = request.args['feature'] network = request.view_args['network'] redshift_base.metadata.reflect() try: parsed_json = json.loads(value) table = redshift_base.metadata.tables[network + '__' + feature] valid_tree(table, parsed_json) return parse_tree(table, parsed_json) except (KeyError) as err: raise ValidationError(str(err))
def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object """ meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le']) dataset_conditions = { k: v for k, v in list(args.data.items()) if 'filter' in k } for tablename, condition_tree in list(dataset_conditions.items()): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions) except Exception as e: msg = 'Failed to construct timeseries' return api_response.make_raw_error('{}: {}'.format(msg, e)) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] return time_counts
def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object """ meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le'] ) dataset_conditions = {k: v for k, v in list(args.data.items()) if 'filter' in k} for tablename, condition_tree in list(dataset_conditions.items()): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions ) except Exception as e: msg = 'Failed to construct timeseries' return api_response.make_raw_error('{}: {}'.format(msg, e)) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] return time_counts
def detail_query(args, aggregate=False): meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'obs_date__ge', 'obs_date__le') meta_vals = (args.data.get(k) for k in meta_params) dataset, shapeset, data_type, geom, obs_date__ge, obs_date__le = meta_vals # If there aren't tree filters provided, a little formatting is needed # to make the general filters into an 'and' tree. if not has_tree_filters(args.data): # Creates an AND condition tree and adds it to args. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['shapeset']) # Sort out the filter conditions from the rest of the user arguments. filters = {k: v for k, v in list(args.data.items()) if 'filter' in k} # Get upset if they specify more than a dataset and shapeset filter. if len(filters) > 2: return api_response.bad_request('Too many table filters provided.') # Query the point dataset. q = postgres_session.query(dataset) # If the user specified a geom, filter results to those within its shape. if geom: q = q.filter( dataset.c.geom.ST_Within(sqlalchemy.func.ST_GeomFromGeoJSON(geom))) # Retrieve the filters and build conditions from them if they exist. point_ctree = filters.get(dataset.name + '__filter') # If the user specified point dataset filters, parse and apply them. if point_ctree: point_conditions = parse_tree(dataset, point_ctree) q = q.filter(point_conditions) # To allow both obs_date meta params and filter trees. q = q.filter( dataset.c.point_date >= obs_date__ge) if obs_date__ge else q q = q.filter( dataset.c.point_date <= obs_date__le) if obs_date__le else q # If a user specified a shape dataset, it was either through the /shapes # enpoint, which uses the aggregate result, or through the /detail endpoint # which uses the joined result. if shapeset is not None: if aggregate: q = q.from_self(shapeset).filter( dataset.c.geom.ST_Intersects( shapeset.c.geom)).group_by(shapeset) else: shape_columns = [ '{}.{} as {}'.format(shapeset.name, col.name, col.name) for col in shapeset.c ] q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom)) q = q.add_columns(*shape_columns) # If there's a filter specified for the shape dataset, apply those conditions. shape_ctree = filters.get(shapeset.name + '__filter') if shape_ctree: shape_conditions = parse_tree(shapeset, shape_ctree) q = q.filter(shape_conditions) return q
def _timeseries(args): meta_params = ['geom', 'dataset', 'dataset_name__in', 'obs_date__ge', 'obs_date__le', 'agg'] meta_vals = [args.data.get(k) for k in meta_params] geom, dataset, table_names, start_date, end_date, agg = meta_vals ctrees = {} if has_tree_filters(args.data): # Timeseries is a little tricky. If there aren't filters, # it would be ridiculous to build a condition tree for every one. for field, value in args.data.items(): if 'filter' in field: # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', field)[0] metarecord = MetaTable.get_by_dataset_name(tablename) pt = metarecord.point_table ctrees[pt.name] = parse_tree(pt, value) # Just cleanliness, since we don't use this argument. Doesn't have # to show up in the JSON response. del args.data['dataset'] # If no dataset_name__in list was provided, have to fill it in by invoking # MetaTable.index() here! Not in the validator. This way the list stays up # to date. if table_names is None: table_names = MetaTable.index() args.data['dataset_name__in'] = table_names # If a single dataset was provided, it's the only thing we need to consider. if dataset is not None: table_names = [dataset.name] del args.data['dataset_name__in'] # remove table names which wouldn't return anything for the query, given # the time and geom constraints try: table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom) except Exception as e: msg = 'Failed to gather candidate tables.' return internal_error(msg, e) # If there aren't any table names, it causes an error down the code. Better # to return and inform them that the request wouldn't have found anything. if not table_names: return bad_request("Your request doesn't return any results. Try " "adjusting your time constraint or location " "parameters.") try: panel = MetaTable.timeseries_all( table_names, agg, start_date, end_date, geom, ctrees ) except Exception as e: msg = 'Failed to construct timeseries.' return internal_error(msg, e) panel = MetaTable.attach_metadata(panel) resp = json_response_base(args, panel, args.data) datatype = args.data['data_type'] if datatype == 'json': resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def timeseries(): validator = TimeseriesValidator() deserialized_arguments = validator.load(request.args) serialized_arguments = json.loads(validator.dumps(deserialized_arguments.data).data) if deserialized_arguments.errors: return make_error(deserialized_arguments.errors, 400, serialized_arguments) qargs = deserialized_arguments.data agg = qargs['agg'] data_type = qargs['data_type'] geom = qargs['location_geom__within'] pointset = qargs['dataset_name'] pointsets = qargs['dataset_name__in'] start_date = qargs['obs_date__ge'] end_date = qargs['obs_date__le'] ctrees = {} raw_ctrees = {} if has_tree_filters(request.args): # Timeseries is a little tricky. If there aren't filters, # it would be ridiculous to build a condition tree for every one. for field, value in list(request.args.items()): if 'filter' in field: # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', field)[0] metarecord = MetaTable.get_by_dataset_name(tablename) pt = metarecord.point_table ctrees[pt.name] = parse_tree(pt, json.loads(value)) raw_ctrees[pt.name] = json.loads(value) point_set_names = [p.name for p in pointsets + [pointset] if p is not None] if not point_set_names: point_set_names = MetaTable.index() results = MetaTable.timeseries_all(point_set_names, agg, start_date, end_date, geom, ctrees) payload = { 'meta': { 'message': [], 'query': serialized_arguments, 'status': 'ok', 'total': len(results) }, 'objects': results } if ctrees: payload['meta']['query']['filters'] = raw_ctrees if data_type == 'json': return jsonify(payload) elif data_type == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in payload['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(payload['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp