Пример #1
0
def detail_aggregate():
    raw_query_params = request.args.copy()
    agg, datatype, queries = parse_join_query(raw_query_params)
    if not agg:
        agg = 'day'

    # if no obs_date given, default to >= 90 days ago
    if not raw_query_params.get('obs_date__ge'):
        six_months_ago = datetime.now() - timedelta(days=90)
        raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d')

    if not raw_query_params.get('obs_date__le'):
        raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d')

    mt = MasterTable.__table__
    valid_query, base_clauses, resp, status_code = make_query(
        mt, queries['base'])

    # check for valid output format
    if datatype not in VALID_DATA_TYPE:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid output format" % datatype
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    # check for valid temporal aggregate
    if agg not in VALID_AGG:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta'][
            'message'] = "'%s' is an invalid temporal aggregation" % agg
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    if valid_query:
        time_agg = func.date_trunc(agg, mt.c['obs_date'])
        base_query = session.query(time_agg, func.count(mt.c.dataset_row_id))
        dname = raw_query_params.get('dataset_name')

        try:
            dataset = Table('dat_%s' % dname,
                            Base.metadata,
                            autoload=True,
                            autoload_with=engine,
                            extend_existing=True)
            valid_query, detail_clauses, resp, status_code = make_query(
                dataset, queries['detail'])
        except:
            valid_query = False
            resp['meta']['status'] = 'error'
            if not dname:
                resp['meta']['message'] = "dataset_name' is required"
            else:
                resp['meta']['message'] = "unable to find dataset '%s'" % dname
            resp = make_response(json.dumps(resp, default=dthandler), 400)
            resp.headers['Content-Type'] = 'application/json'

        if valid_query:
            pk = [p.name for p in dataset.primary_key][0]
            base_query = base_query.join(dataset,
                                         mt.c.dataset_row_id == dataset.c[pk])
            for clause in base_clauses:
                base_query = base_query.filter(clause)
            for clause in detail_clauses:
                base_query = base_query.filter(clause)
            values = [
                r for r in base_query.group_by(time_agg).order_by(
                    time_agg).all()
            ]

            # init from and to dates ad python datetimes
            from_date = truncate(parse(raw_query_params['obs_date__ge']), agg)
            if 'obs_date__le' in raw_query_params.keys():
                to_date = parse(raw_query_params['obs_date__le'])
            else:
                to_date = datetime.now()

            items = []
            dense_matrix = []
            cursor = from_date
            v_index = 0
            while cursor <= to_date:
                if v_index < len(values) and \
                    values[v_index][0].replace(tzinfo=None) == cursor:
                    dense_matrix.append((cursor, values[v_index][1]))
                    v_index += 1
                else:
                    dense_matrix.append((cursor, 0))

                cursor = increment_datetime_aggregate(cursor, agg)

            dense_matrix = OrderedDict(dense_matrix)
            for k in dense_matrix:
                i = {
                    'datetime': k,
                    'count': dense_matrix[k],
                }
                items.append(i)

            if datatype == 'json':
                resp['objects'] = items
                resp['meta']['status'] = 'ok'
                resp['meta']['query'] = raw_query_params
                loc = resp['meta']['query'].get('location_geom__within')
                if loc:
                    resp['meta']['query'][
                        'location_geom__within'] = json.loads(loc)
                resp['meta']['query']['agg'] = agg

                resp = make_response(json.dumps(resp, default=dthandler),
                                     status_code)
                resp.headers['Content-Type'] = 'application/json'
            elif datatype == 'csv':
                outp = StringIO()
                writer = csv.DictWriter(outp, fieldnames=items[0].keys())
                writer.writeheader()
                writer.writerows(items)
                resp = make_response(outp.getvalue(), status_code)
                resp.headers['Content-Type'] = 'text/csv'
                filedate = datetime.now().strftime('%Y-%m-%d')
                resp.headers[
                    'Content-Disposition'] = 'attachment; filename=%s.csv' % (
                        filedate)
    return resp
Пример #2
0
def detail_aggregate():
    raw_query_params = request.args.copy()
    agg, datatype, queries = parse_join_query(raw_query_params)
    if not agg:
        agg = 'day'

    # if no obs_date given, default to >= 90 days ago
    if not raw_query_params.get('obs_date__ge'):
        six_months_ago = datetime.now() - timedelta(days=90)
        raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d')

    if not raw_query_params.get('obs_date__le'):
        raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') 

    mt = MasterTable.__table__
    valid_query, base_clauses, resp, status_code = make_query(mt, queries['base'])

    # check for valid output format
    if datatype not in VALID_DATA_TYPE:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid output format" % datatype
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    # check for valid temporal aggregate
    if agg not in VALID_AGG:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    if valid_query:
        time_agg = func.date_trunc(agg, mt.c['obs_date'])
        base_query = session.query(time_agg, func.count(mt.c.dataset_row_id))
        dname = raw_query_params.get('dataset_name')

        try:
            dataset = Table('dat_%s' % dname, Base.metadata,
                autoload=True, autoload_with=engine,
                extend_existing=True)
            valid_query, detail_clauses, resp, status_code = make_query(dataset, queries['detail'])
        except:
            valid_query = False
            resp['meta']['status'] = 'error'
            if not dname:
                resp['meta']['message'] = "dataset_name' is required"
            else:
                resp['meta']['message'] = "unable to find dataset '%s'" % dname
            resp = make_response(json.dumps(resp, default=dthandler), 400)
            resp.headers['Content-Type'] = 'application/json'

        if valid_query:
            pk = [p.name for p in dataset.primary_key][0]
            base_query = base_query.join(dataset, mt.c.dataset_row_id == dataset.c[pk])
            for clause in base_clauses:
                base_query = base_query.filter(clause)
            for clause in detail_clauses:
                base_query = base_query.filter(clause)
            values = [r for r in base_query.group_by(time_agg).order_by(time_agg).all()]
            
            # init from and to dates ad python datetimes
            from_date = truncate(parse(raw_query_params['obs_date__ge']), agg)
            if 'obs_date__le' in raw_query_params.keys():
                to_date = parse(raw_query_params['obs_date__le'])
            else:
                to_date = datetime.now()

            items = []
            dense_matrix = []
            cursor = from_date
            v_index = 0
            while cursor <= to_date:
                if v_index < len(values) and \
                    values[v_index][0].replace(tzinfo=None) == cursor:
                    dense_matrix.append((cursor, values[v_index][1]))
                    v_index += 1
                else:
                    dense_matrix.append((cursor, 0))

                cursor = increment_datetime_aggregate(cursor, agg)

            dense_matrix = OrderedDict(dense_matrix)
            for k in dense_matrix:
                i = {
                    'datetime': k,
                    'count': dense_matrix[k],
                    }
                items.append(i)

            if datatype == 'json':
                resp['objects'] = items
                resp['meta']['status'] = 'ok'
                resp['meta']['query'] = raw_query_params
                loc = resp['meta']['query'].get('location_geom__within')
                if loc:
                    resp['meta']['query']['location_geom__within'] = json.loads(loc)
                resp['meta']['query']['agg'] = agg

                resp = make_response(json.dumps(resp, default=dthandler), status_code)
                resp.headers['Content-Type'] = 'application/json'
            elif datatype == 'csv':
                outp = StringIO()
                writer = csv.DictWriter(outp, fieldnames=items[0].keys())
                writer.writeheader()
                writer.writerows(items)
                resp = make_response(outp.getvalue(), status_code)
                resp.headers['Content-Type'] = 'text/csv'
                filedate = datetime.now().strftime('%Y-%m-%d')
                resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate)
    return resp
Пример #3
0
def dataset():
    raw_query_params = request.args.copy()

    # set default value for temporal aggregation
    agg = raw_query_params.get('agg')
    if not agg:
        agg = 'day'
    else:
        del raw_query_params['agg']

    # if no obs_date given, default to >= 90 days ago
    if not raw_query_params.get('obs_date__ge'):
        six_months_ago = datetime.now() - timedelta(days=90)
        raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d')

    if not raw_query_params.get('obs_date__le'):
        raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d')

    # set datatype
    datatype = 'json'
    if raw_query_params.get('data_type'):
        datatype = raw_query_params['data_type']
        del raw_query_params['data_type']
    q = '''
        SELECT m.dataset_name
        FROM meta_master AS m 
        LEFT JOIN celery_taskmeta AS c 
          ON c.id = (
            SELECT id FROM celery_taskmeta 
            WHERE task_id = ANY(m.result_ids) 
            ORDER BY date_done DESC 
            LIMIT 1
          )
        WHERE m.approved_status = 'true'
        AND c.status = 'SUCCESS'
    '''
    with engine.begin() as c:
        dataset_names = [d[0] for d in c.execute(q)]

    raw_query_params['dataset_name__in'] = ','.join(dataset_names)

    mt = MasterTable.__table__
    valid_query, query_clauses, resp, status_code = make_query(
        mt, raw_query_params)

    # check for valid output format
    if datatype not in VALID_DATA_TYPE:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid output format" % datatype
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    # check for valid temporal aggregate
    if agg not in VALID_AGG:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta'][
            'message'] = "'%s' is an invalid temporal aggregation" % agg
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    if valid_query:
        time_agg = func.date_trunc(agg, mt.c['obs_date'])
        base_query = session.query(time_agg, func.count(mt.c['obs_date']),
                                   mt.c['dataset_name'])
        base_query = base_query.filter(mt.c['current_flag'] == True)
        for clause in query_clauses:
            base_query = base_query.filter(clause)
        base_query = base_query.group_by(mt.c['dataset_name'])\
            .group_by(time_agg)\
            .order_by(time_agg)
        values = [o for o in base_query.all()]

        # init from and to dates ad python datetimes
        from_date = truncate(parse(raw_query_params['obs_date__ge']), agg)
        if 'obs_date__le' in raw_query_params.keys():
            to_date = parse(raw_query_params['obs_date__le'])
        else:
            to_date = datetime.now()

        # build the response
        results = sorted(values, key=itemgetter(2))
        for k, g in groupby(results, key=itemgetter(2)):
            d = {'dataset_name': k}

            items = []
            dense_matrix = []
            cursor = from_date
            v_index = 0
            dataset_values = list(g)
            while cursor <= to_date:
                if v_index < len(dataset_values) and \
                    dataset_values[v_index][0].replace(tzinfo=None) == cursor:
                    dense_matrix.append((cursor, dataset_values[v_index][1]))
                    v_index += 1
                else:
                    dense_matrix.append((cursor, 0))

                cursor = increment_datetime_aggregate(cursor, agg)

            dense_matrix = OrderedDict(dense_matrix)
            for k in dense_matrix:
                i = {
                    'datetime': k,
                    'count': dense_matrix[k],
                }
                items.append(i)

            d['items'] = items
            resp['objects'].append(d)

        resp['meta']['query'] = raw_query_params
        loc = resp['meta']['query'].get('location_geom__within')
        if loc:
            resp['meta']['query']['location_geom__within'] = json.loads(loc)
        resp['meta']['query']['agg'] = agg
        resp['meta']['status'] = 'ok'

        if datatype == 'json':
            resp = make_response(json.dumps(resp, default=dthandler),
                                 status_code)
            resp.headers['Content-Type'] = 'application/json'
        elif datatype == 'csv':

            # response format
            # temporal_group,dataset_name_1,dataset_name_2
            # 2014-02-24 00:00:00,235,653
            # 2014-03-03 00:00:00,156,624

            fields = ['temporal_group']
            for o in resp['objects']:
                fields.append(o['dataset_name'])

            csv_resp = []
            i = 0
            for k, g in groupby(resp['objects'],
                                key=itemgetter('dataset_name')):
                l_g = list(g)[0]

                j = 0
                for row in l_g['items']:
                    # first iteration, populate the first column with temporal_groups
                    if i == 0:
                        csv_resp.append([row['datetime']])
                    csv_resp[j].append(row['count'])
                    j += 1
                i += 1

            csv_resp.insert(0, fields)
            csv_resp = make_csv(csv_resp)
            resp = make_response(csv_resp, 200)
            resp.headers['Content-Type'] = 'text/csv'
            filedate = datetime.now().strftime('%Y-%m-%d')
            resp.headers[
                'Content-Disposition'] = 'attachment; filename=%s.csv' % (
                    filedate)
    return resp
Пример #4
0
def dataset():
    raw_query_params = request.args.copy()

    # set default value for temporal aggregation
    agg = raw_query_params.get('agg')
    if not agg:
        agg = 'day'
    else:
        del raw_query_params['agg']

    # if no obs_date given, default to >= 90 days ago
    if not raw_query_params.get('obs_date__ge'):
        six_months_ago = datetime.now() - timedelta(days=90)
        raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d')

    if not raw_query_params.get('obs_date__le'):
        raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') 

    # set datatype
    datatype = 'json'
    if raw_query_params.get('data_type'):
        datatype = raw_query_params['data_type']
        del raw_query_params['data_type']

    mt = MasterTable.__table__
    valid_query, query_clauses, resp, status_code = make_query(mt,raw_query_params)
    
    # check for valid output format
    if datatype not in VALID_DATA_TYPE:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid output format" % datatype
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    # check for valid temporal aggregate
    if agg not in VALID_AGG:
        valid_query = False
        resp['meta']['status'] = 'error'
        resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg
        resp = make_response(json.dumps(resp, default=dthandler), 400)
        resp.headers['Content-Type'] = 'application/json'

    if valid_query:
        time_agg = func.date_trunc(agg, mt.c['obs_date'])
        base_query = session.query(time_agg, 
            func.count(mt.c['obs_date']),
            mt.c['dataset_name'])
        base_query = base_query.filter(mt.c['current_flag'] == True)
        for clause in query_clauses:
            base_query = base_query.filter(clause)
        base_query = base_query.group_by(mt.c['dataset_name'])\
            .group_by(time_agg)\
            .order_by(time_agg)
        values = [o for o in base_query.all()]

        # init from and to dates ad python datetimes
        from_date = truncate(parse(raw_query_params['obs_date__ge']), agg)
        if 'obs_date__le' in raw_query_params.keys():
            to_date = parse(raw_query_params['obs_date__le'])
        else:
            to_date = datetime.now()

        # build the response
        results = sorted(values, key=itemgetter(2))
        for k,g in groupby(results, key=itemgetter(2)):
            d = {'dataset_name': k}

            items = []
            dense_matrix = []
            cursor = from_date
            v_index = 0
            dataset_values = list(g)
            while cursor <= to_date:
                if v_index < len(dataset_values) and \
                    dataset_values[v_index][0].replace(tzinfo=None) == cursor:
                    dense_matrix.append((cursor, dataset_values[v_index][1]))
                    v_index += 1
                else:
                    dense_matrix.append((cursor, 0))

                cursor = increment_datetime_aggregate(cursor, agg)

            dense_matrix = OrderedDict(dense_matrix)
            for k in dense_matrix:
                i = {
                    'datetime': k,
                    'count': dense_matrix[k],
                    }
                items.append(i)

            d['items'] = items
            resp['objects'].append(d)

        resp['meta']['query'] = raw_query_params
        loc = resp['meta']['query'].get('location_geom__within')
        if loc:
            resp['meta']['query']['location_geom__within'] = json.loads(loc)
        resp['meta']['query']['agg'] = agg
        resp['meta']['status'] = 'ok'
    
        if datatype == 'json':
            resp = make_response(json.dumps(resp, default=dthandler), status_code)
            resp.headers['Content-Type'] = 'application/json'
        elif datatype == 'csv':
 
            # response format
            # temporal_group,dataset_name_1,dataset_name_2
            # 2014-02-24 00:00:00,235,653
            # 2014-03-03 00:00:00,156,624
 
            fields = ['temporal_group']
            for o in resp['objects']:
                fields.append(o['dataset_name'])
 
            csv_resp = []
            i = 0
            for k,g in groupby(resp['objects'], key=itemgetter('dataset_name')):
                l_g = list(g)[0]
                
                j = 0
                for row in l_g['items']:
                    # first iteration, populate the first column with temporal_groups
                    if i == 0: 
                        csv_resp.append([row['datetime']])
                    csv_resp[j].append(row['count'])
                    j += 1
                i += 1
                    
            csv_resp.insert(0, fields)
            csv_resp = make_csv(csv_resp)
            resp = make_response(csv_resp, 200)
            resp.headers['Content-Type'] = 'text/csv'
            filedate = datetime.now().strftime('%Y-%m-%d')
            resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate)
    return resp