示例#1
0
def detail_query(args, aggregate=False):

    meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'offset', 'limit')
    meta_vals = (args.data.get(k) for k in meta_params)
    dataset, shapeset, data_type, geom, offset, limit = meta_vals

    # If there aren't tree filters provided, a little formatting is needed
    # to make the general filters into an 'and' tree.
    if not has_tree_filters(args.data):
        # Creates an AND condition tree and adds it to args.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            request_args=args.data,
            ignore=['shapeset']
        )

    # Sort out the filter conditions from the rest of the user arguments.
    filters = {k: v for k, v in args.data.items() if 'filter' in k}

    # Get upset if they specify more than a dataset and shapeset filter.
    if len(filters) > 2:
        return bad_request("Too many table filters provided.")

    # Query the point dataset.
    q = session.query(dataset)

    # If the user specified a geom, filter results to those within its shape.
    if geom:
        q = q.filter(dataset.c.geom.ST_Within(
            sqlalchemy.func.ST_GeomFromGeoJSON(geom)
        ))

    # Retrieve the filters and build conditions from them if they exist.
    point_ctree = filters.get(dataset.name + '__filter')

    # If the user specified point dataset filters, parse and apply them.
    if point_ctree:
        point_conditions = parse_tree(dataset, point_ctree)
        q = q.filter(point_conditions)

    # If a user specified a shape dataset, it was either through the /shapes
    # enpoint, which uses the aggregate result, or through the /detail endpoint
    # which uses the joined result.
    if shapeset is not None:
        if aggregate:
            q = q.from_self(shapeset).filter(dataset.c.geom.ST_Intersects(shapeset.c.geom)).group_by(shapeset)
        else:
            shape_columns = ['{}.{} as {}'.format(shapeset.name, col.name, col.name) for col in shapeset.c]
            q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom))
            q = q.add_columns(*shape_columns)

        # If there's a filter specified for the shape dataset, apply those conditions.
        shape_ctree = filters.get(shapeset.name + '__filter')
        if shape_ctree:
            shape_conditions = parse_tree(shapeset, shape_ctree)
            q = q.filter(shape_conditions)

    return q
示例#2
0
def _export_shape(args):
    """Route logic for /shapes/<shapeset>/ endpoint. Returns records for a
    single specified shape dataset.

    :param args: ValidatorResult of user provided arguments
    :returns: response object
    """
    meta_params = ('shapeset', 'data_type', 'geom')
    meta_vals = (args.data.get(k) for k in meta_params)
    shapeset, data_type, geom = meta_vals

    if shapeset is None:
        error_message = 'Could not find shape dataset {}'
        error_message = error_message.format(request.args['shape'])
        return make_response(error_message, 404)

    query = 'SELECT * FROM {}'.format(shapeset.name)
    conditions = ''

    if has_tree_filters(args.data):
        # A string literal is required for ogr2ogr to function correctly.
        ctree = args.data[shapeset.name + '__filter']
        conditions = str(parse_tree(shapeset, ctree, literally=True))

    if geom:
        if conditions:
            conditions += 'AND '
        conditions += "ST_Intersects({}.geom, ST_GeomFromGeoJSON('{}'))".format(
            shapeset.name, geom)

    if conditions:
        query += ' WHERE ' + conditions

    return query
示例#3
0
def _export_shape(args):
    """Route logic for /shapes/<shapeset>/ endpoint. Returns records for a
    single specified shape dataset.

    :param args: ValidatorResult of user provided arguments
    :returns: response object
    """
    meta_params = ('shapeset', 'data_type', 'geom')
    meta_vals = (args.data.get(k) for k in meta_params)
    shapeset, data_type, geom = meta_vals

    if shapeset is None:
        error_message = 'Could not find shape dataset {}'
        error_message = error_message.format(request.args['shape'])
        return make_response(error_message, 404)

    query = 'SELECT * FROM {}'.format(shapeset.name)
    conditions = ''

    if has_tree_filters(args.data):
        # A string literal is required for ogr2ogr to function correctly.
        ctree = args.data[shapeset.name + '__filter']
        conditions = str(parse_tree(shapeset, ctree, literally=True))

    if geom:
        if conditions:
            conditions += 'AND '
        conditions += "ST_Intersects({}.geom, ST_GeomFromGeoJSON('{}'))".format(shapeset.name, geom)

    if conditions:
        query += ' WHERE ' + conditions

    return query
示例#4
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments

    :returns: csv or json response object"""

    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le']
        )

    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions
            )
        except Exception as e:
            return internal_error('Failed to construct timeseries', e)

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    resp = None

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = json_response_base(args, time_counts, request.args)
        resp['count'] = sum([c['count'] for c in time_counts])
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'

    elif datatype == 'csv':
        resp = form_csv_detail_response(['point_date', 'hash'], time_counts)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
示例#5
0
def _grid(args):

    meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge',
                   'obs_date__le')
    meta_vals = (args.data.get(k) for k in meta_params)
    point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals

    result_rows = []

    if not has_tree_filters(args.data):
        tname = point_table.name
        args.data[tname + '__filter'] = request_args_to_condition_tree(
            request_args=args.data,
            ignore=['buffer', 'resolution']
        )

    # We only build conditions from values with a key containing 'filter'.
    # Therefore we only build dataset conditions from condition trees.
    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():

        tablename = tablename.split('__')[0]

        metatable = MetaTable.get_by_dataset_name(tablename)
        table = metatable.point_table
        conditions = parse_tree(table, condition_tree)

        try:
            registry_row = MetaTable.get_by_dataset_name(table.name)
            # make_grid expects conditions to be iterable.
            grid_rows, size_x, size_y = registry_row.make_grid(
                resolution,
                geom,
                [conditions],
                {'upper': obs_date__le, 'lower': obs_date__ge}
            )
            result_rows += grid_rows
        except Exception as e:
            return internal_error('Could not make grid aggregation.', e)

    resp = geojson_response_base()
    for value in result_rows:
        if value[1]:
            pt = shapely.wkb.loads(value[1].decode('hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__
        else:
            new_geom = None
        new_property = {'count': value[0], }
        add_geojson_feature(resp, new_geom, new_property)

    resp = make_response(json.dumps(resp, default=date_json_handler), 200)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#6
0
def _grid(args):
    meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge',
                   'obs_date__le')
    meta_vals = (args.data.get(k) for k in meta_params)
    point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals

    result_rows = []

    if not has_tree_filters(args.data):
        tname = point_table.name
        args.data[tname + '__filter'] = request_args_to_condition_tree(
            request_args=args.data, ignore=['buffer', 'resolution'])

    # We only build conditions from values with a key containing 'filter'.
    # Therefore we only build dataset conditions from condition trees.
    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():

        tablename = tablename.rsplit('__')[0]

        metatable = MetaTable.get_by_dataset_name(tablename)
        table = metatable.point_table
        conditions = parse_tree(table, condition_tree)

        try:
            registry_row = MetaTable.get_by_dataset_name(table.name)
            # make_grid expects conditions to be iterable.
            grid_rows, size_x, size_y = registry_row.make_grid(
                resolution, geom, [conditions], {
                    'upper': obs_date__le,
                    'lower': obs_date__ge
                })
            result_rows += grid_rows
        except Exception as e:
            msg = 'Could not make grid aggregation.'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

    resp = api_response.geojson_response_base()
    for value in result_rows:
        if value[1]:
            pt = shapely.wkb.loads(codecs.decode(value[1], 'hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north,
                                            east).__geo_interface__
        else:
            new_geom = None
        new_property = {
            'count': value[0],
        }
        api_response.add_geojson_feature(resp, new_geom, new_property)

    return resp
    def _deserialize(self, value, attr, data):
        feature = request.args['feature']
        network = request.view_args['network']
        redshift_base.metadata.reflect()

        try:
            parsed_json = json.loads(value)
            table = redshift_base.metadata.tables[network + '__' + feature]
            valid_tree(table, parsed_json)
            return parse_tree(table, parsed_json)
        except (KeyError) as err:
            raise ValidationError(str(err))
示例#8
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments
    :returns: csv or json response object
    """
    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le'])

    dataset_conditions = {
        k: v
        for k, v in list(args.data.items()) if 'filter' in k
    }
    for tablename, condition_tree in list(dataset_conditions.items()):
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions)
        except Exception as e:
            msg = 'Failed to construct timeseries'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    return time_counts
示例#9
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments
    :returns: csv or json response object
    """
    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le']
        )

    dataset_conditions = {k: v for k, v in list(args.data.items()) if 'filter' in k}
    for tablename, condition_tree in list(dataset_conditions.items()):
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions
            )
        except Exception as e:
            msg = 'Failed to construct timeseries'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    return time_counts
示例#10
0
def detail_query(args, aggregate=False):
    meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'obs_date__ge',
                   'obs_date__le')
    meta_vals = (args.data.get(k) for k in meta_params)
    dataset, shapeset, data_type, geom, obs_date__ge, obs_date__le = meta_vals

    # If there aren't tree filters provided, a little formatting is needed
    # to make the general filters into an 'and' tree.
    if not has_tree_filters(args.data):
        # Creates an AND condition tree and adds it to args.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            request_args=args.data, ignore=['shapeset'])

    # Sort out the filter conditions from the rest of the user arguments.
    filters = {k: v for k, v in list(args.data.items()) if 'filter' in k}

    # Get upset if they specify more than a dataset and shapeset filter.
    if len(filters) > 2:
        return api_response.bad_request('Too many table filters provided.')

    # Query the point dataset.
    q = postgres_session.query(dataset)

    # If the user specified a geom, filter results to those within its shape.
    if geom:
        q = q.filter(
            dataset.c.geom.ST_Within(sqlalchemy.func.ST_GeomFromGeoJSON(geom)))

    # Retrieve the filters and build conditions from them if they exist.
    point_ctree = filters.get(dataset.name + '__filter')

    # If the user specified point dataset filters, parse and apply them.
    if point_ctree:
        point_conditions = parse_tree(dataset, point_ctree)
        q = q.filter(point_conditions)

        # To allow both obs_date meta params and filter trees.
        q = q.filter(
            dataset.c.point_date >= obs_date__ge) if obs_date__ge else q
        q = q.filter(
            dataset.c.point_date <= obs_date__le) if obs_date__le else q

    # If a user specified a shape dataset, it was either through the /shapes
    # enpoint, which uses the aggregate result, or through the /detail endpoint
    # which uses the joined result.
    if shapeset is not None:
        if aggregate:
            q = q.from_self(shapeset).filter(
                dataset.c.geom.ST_Intersects(
                    shapeset.c.geom)).group_by(shapeset)
        else:
            shape_columns = [
                '{}.{} as {}'.format(shapeset.name, col.name, col.name)
                for col in shapeset.c
            ]
            q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom))
            q = q.add_columns(*shape_columns)

        # If there's a filter specified for the shape dataset, apply those conditions.
        shape_ctree = filters.get(shapeset.name + '__filter')
        if shape_ctree:
            shape_conditions = parse_tree(shapeset, shape_ctree)
            q = q.filter(shape_conditions)

    return q
示例#11
0
def _timeseries(args):

    meta_params = ['geom', 'dataset', 'dataset_name__in', 'obs_date__ge', 'obs_date__le', 'agg']
    meta_vals = [args.data.get(k) for k in meta_params]
    geom, dataset, table_names, start_date, end_date, agg = meta_vals

    ctrees = {}

    if has_tree_filters(args.data):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in args.data.items():
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, value)
        # Just cleanliness, since we don't use this argument. Doesn't have
        # to show up in the JSON response.
        del args.data['dataset']

    # If no dataset_name__in list was provided, have to fill it in by invoking
    # MetaTable.index() here! Not in the validator. This way the list stays up
    # to date.
    if table_names is None:
        table_names = MetaTable.index()
        args.data['dataset_name__in'] = table_names

    # If a single dataset was provided, it's the only thing we need to consider.
    if dataset is not None:
        table_names = [dataset.name]
        del args.data['dataset_name__in']

    # remove table names which wouldn't return anything for the query, given
    # the time and geom constraints
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    # If there aren't any table names, it causes an error down the code. Better
    # to return and inform them that the request wouldn't have found anything.
    if not table_names:
        return bad_request("Your request doesn't return any results. Try "
                           "adjusting your time constraint or location "
                           "parameters.")

    try:
        panel = MetaTable.timeseries_all(
            table_names, agg, start_date, end_date, geom, ctrees
        )
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(args, panel, args.data)

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
示例#12
0
def timeseries():
    validator = TimeseriesValidator()

    deserialized_arguments = validator.load(request.args)
    serialized_arguments = json.loads(validator.dumps(deserialized_arguments.data).data)

    if deserialized_arguments.errors:
        return make_error(deserialized_arguments.errors, 400, serialized_arguments)

    qargs = deserialized_arguments.data

    agg = qargs['agg']
    data_type = qargs['data_type']
    geom = qargs['location_geom__within']
    pointset = qargs['dataset_name']
    pointsets = qargs['dataset_name__in']
    start_date = qargs['obs_date__ge']
    end_date = qargs['obs_date__le']

    ctrees = {}
    raw_ctrees = {}

    if has_tree_filters(request.args):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in list(request.args.items()):
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, json.loads(value))
                raw_ctrees[pt.name] = json.loads(value)

    point_set_names = [p.name for p in pointsets + [pointset] if p is not None]
    if not point_set_names:
        point_set_names = MetaTable.index()

    results = MetaTable.timeseries_all(point_set_names, agg, start_date, end_date, geom, ctrees)

    payload = {
        'meta': {
            'message': [],
            'query': serialized_arguments,
            'status': 'ok',
            'total': len(results)
        },
        'objects': results
    }

    if ctrees:
        payload['meta']['query']['filters'] = raw_ctrees

    if data_type == 'json':
        return jsonify(payload)

    elif data_type == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in payload['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(payload['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

        return resp