Пример #1
0
def get_timeline_from_dt(user_id,
                         place_key,
                         trip_key,
                         start_local_dt,
                         end_local_dt,
                         geojson=None,
                         extra_query_list=None):
    logging.info("About to query for date components %s -> %s" %
                 (start_local_dt, end_local_dt))
    (place_gq, trip_gq) = get_place_trip_geoquery(geojson)
    places_entries = esda.get_entries(place_key,
                                      user_id,
                                      esttc.TimeComponentQuery(
                                          "data.enter_local_dt",
                                          start_local_dt, end_local_dt),
                                      geo_query=place_gq,
                                      extra_query_list=extra_query_list)
    trips_entries = esda.get_entries(trip_key,
                                     user_id,
                                     esttc.TimeComponentQuery(
                                         "data.start_local_dt", start_local_dt,
                                         end_local_dt),
                                     geo_query=trip_gq,
                                     extra_query_list=extra_query_list)

    for place in places_entries:
        logging.debug("Considering place %s: %s -> %s " %
                      (place.get_id(), place.data.enter_fmt_time,
                       place.data.exit_fmt_time))
    for trip in trips_entries:
        logging.debug(
            "Considering trip %s: %s -> %s " %
            (trip.get_id(), trip.data.start_fmt_time, trip.data.end_fmt_time))

    return Timeline(place_key, trip_key, places_entries, trips_entries)
Пример #2
0
def getTimeseriesEntries(time_type):
    if 'user' not in request.json:
        abort(401, "only a user can read his/her data")

    user_uuid = getUUID(request)

    key_list = request.json['key_list']
    if 'from_local_date' in request.json and 'to_local_date' in request.json:
        start_time = request.json['from_local_date']
        end_time = request.json['to_local_date']
        time_query = esttc.TimeComponentQuery("metadata.write_ts",
                                              start_time,
                                              end_time)
    else:
        start_time = request.json['start_time']
        end_time = request.json['end_time']
        time_query = estt.TimeQuery("metadata.write_ts",
                                              start_time,
                                              end_time)
    # Note that queries from usercache are limited to 100,000 entries
    # and entries from timeseries are limited to 250,000, so we will
    # return at most 350,000 entries. So this means that we don't need
    # additional filtering, but this should be documented in
    # the API
    data_list = esdc.find_entries(user_uuid, key_list, time_query)
    return {'phone_data': data_list}
Пример #3
0
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list):
    """
    Get grouped data frames for the specified local date range and frequency
    :param user_id: id for the user. None for aggregate.
    :param from_dt: start local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param to_dt: end local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param freq: since we only expand certain local_dt fields, we can only
    support frequencies corresponding to them. These are represented in the
    `LocalFreq` enum.
    :return: a dict containing the last start_ts of the last section processed
        and a result list of ModeStatTimeSummary objects
        If there were no matching sections, the last start_ts is None
        and the list is empty.
    """
    time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
    section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(),
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return {
            "last_ts_processed": None,
            "result": [[] for i in range(len(summary_fn_list))]
        }

    groupby_arr = _get_local_group_by(freq)
    time_grouped_df = section_df.groupby(groupby_arr)
    local_dt_fill_fn = _get_local_key_to_fill_fn(freq)
    return {
        "last_ts_processed": section_df.iloc[-1].start_ts,
        "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)
                        for summary_fn in summary_fn_list]
    }
Пример #4
0
def range_mode_heatmap(mode, start_ts, end_ts):
    start_dt = esdl.get_local_date(start_ts, "UTC")
    end_dt = esdl.get_local_date(end_ts, "UTC")
    time_query = esttc.TimeComponentQuery("data.ts", start_dt, end_dt)
    loc_entry_list = esda.get_entries(
        esda.CLEANED_LOCATION_KEY,
        user_id=None,
        time_query=time_query,
        geo_query=None,
        extra_query_list=[esdlq.get_mode_query(mode)])
    return {"latlng": [e.data.loc.coordinates for e in loc_entry_list]}
Пример #5
0
def get_time_query(year, month):
    if year is None and month is None:
        return None

    if month is None:
        assert year is not None
        query_ld = ecwl.LocalDate({"year": year})
    else:
        assert year is not None and month is not None
        query_ld = ecwl.LocalDate({"year": year, "month": month})
    tq = esttc.TimeComponentQuery("data.start_local_dt", query_ld, query_ld)
    return tq
Пример #6
0
def getTimeseriesEntries(time_type):
    if 'user' not in request.json:
        abort(401, "only a user can read his/her data")

    user_uuid = getUUID(request)

    key_list = request.json['key_list']
    if 'from_local_date' in request.json and 'to_local_date' in request.json:
        start_time = request.json['from_local_date']
        end_time = request.json['to_local_date']
        time_key = request.json.get('key_local_date', 'metadata.write_ts')
        time_query = esttc.TimeComponentQuery(time_key,
                                              start_time,
                                              end_time)
    else:
        start_time = request.json['start_time']
        end_time = request.json['end_time']
        time_key = request.json.get('key_time', 'metadata.write_ts')
        time_query = estt.TimeQuery(time_key,
                                    start_time,
                                    end_time)
    # Note that queries from usercache are limited to 100,000 entries
    # and entries from timeseries are limited to 250,000, so we will
    # return at most 350,000 entries. So this means that we don't need
    # additional filtering, but this should be documented in
    # the API
    data_list = esdc.find_entries(user_uuid, key_list, time_query)
    if 'max_entries' in request.json:
        me = request.json['max_entries']
        if (type(me) != int):
            logging.error("aborting: max entry count is %s, type %s, expected int" % (me, type(me)))
            abort(500, "Invalid max_entries %s" % me)

        if len(data_list) > me:
            if request.json['trunc_method'] == 'first':
                logging.debug("first n entries is %s" % me)
                data_list = data_list[:me]
            if request.json['trunc_method'] == 'last':
                logging.debug("first n entries is %s" % me)
                data_list = data_list[-me:]
            elif request.json["trunc_method"] == "sample":
                sample_rate = len(data_list)//me + 1
                logging.debug("sampling rate is %s" % sample_rate)
                data_list = data_list[::sample_rate]
            else:
                logging.error("aborting: unexpected sampling method %s" % request.json["trunc_method"])
                abort(500, "sampling method not specified while retriving limited data")
        else:
            logging.debug("Found %d entries < %s, no truncation" % (len(data_list), me))
    logging.debug("successfully returning list of size %s" % len(data_list))
    return {'phone_data': data_list}
Пример #7
0
def range_mode_heatmap(modes, from_ld, to_ld, region):
    time_query = esttc.TimeComponentQuery("data.local_dt", from_ld, to_ld)

    if region is None:
        geo_query = None
    else:
        geo_query = estg.GeoQuery(["data.loc"], region)

    extra_query_list = []
    if modes is not None:
        mode_enum_list = [ecwm.MotionTypes[mode] for mode in modes]
        extra_query_list.append(esdlq.get_mode_query(mode_enum_list))

    loc_entry_list = esda.get_entries(esda.CLEANED_LOCATION_KEY,
                                      user_id=None,
                                      time_query=time_query,
                                      geo_query=geo_query,
                                      extra_query_list=extra_query_list)
    return {"lnglat": [e.data.loc.coordinates for e in loc_entry_list]}
Пример #8
0
def Berkeley_pop_route(start_ts, end_ts):
    berkeley_json = {
        "geometry": {
            "type":
            "Polygon",
            "coordinates":
            [[[-122.267443, 37.864693], [-122.267443, 37.880687],
              [-122.250985, 37.880687], [-122.250985, 37.864693],
              [-122.267443, 37.864693]]]
        }
    }
    # box = [ [-122.267443, 37.864693], [-122.250985, 37.880687] ]
    start_dt = esdl.get_local_date(start_ts, "UTC")
    end_dt = esdl.get_local_date(end_ts, "UTC")
    time_query = esttc.TimeComponentQuery("data.ts", start_dt, end_dt)
    geo_query = estg.GeoQuery(["data.loc"], berkeley_json)
    loc_entry_list = esda.get_entries(esda.CLEANED_LOCATION_KEY,
                                      user_id=None,
                                      time_query=time_query,
                                      geo_query=geo_query)
    return {"lnglat": [e.data.loc.coordinates for e in loc_entry_list]}
Пример #9
0
def query(spec):
    time_type = spec['time_type']
    if 'from_local_date' in spec and 'to_local_date' in spec:
        start_ld = spec['from_local_date']
        end_ld = spec['to_local_date']
        time_query = esttc.TimeComponentQuery("data.local_dt", start_ld,
                                              end_ld)
    elif 'start_time' in spec and 'end_time' in spec:
        start_ts = spec['start_time']
        end_ts = spec['end_time']
        time_query = estt.TimeQuery("data.ts", start_ts, end_ts)
    else:
        time_query = None

    modes = spec['modes']
    region = spec['sel_region']
    logging.debug("Filtering values for modes %s, range %s, region %s" %
                  (modes, time_query, region))
    query_fn = uuid_list_query
    uuid_list = query_fn(modes, time_query, region)
    logging.info("matched uuid_list of length = %s = %s" %
                 (len(uuid_list), uuid_list))
    return uuid_list
Пример #10
0
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn):
    """
    Get grouped data frames for the specified local date range and frequency
    :param user_id: id for the user. None for aggregate.
    :param from_dt: start local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param to_dt: end local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param freq: since we only expand certain local_dt fields, we can only
    support frequencies corresponding to them. These are represented in the
    `LocalFreq` enum.
    :return: pandas.core.groupby.DataFrameGroupBy object
    """
    time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
    section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return []
    groupby_arr = _get_local_group_by(freq)
    time_grouped_df = section_df.groupby(groupby_arr)
    local_dt_fill_fn = _get_local_key_to_fill_fn(freq)
    return grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)
Пример #11
0
 def testComponentQuery(self):
     ts = esta.TimeSeries.get_time_series(self.testUUID)
     tq = esttc.TimeComponentQuery("metadata.write_local_dt",
         ecwl.LocalDate({"hour": 8}), ecwl.LocalDate({"hour":9}))
     self.assertEqual(len(list(ts.find_entries(time_query=tq))), 490)
Пример #12
0
def incident_heatmap_local_date(user_uuid, modes, from_ld, to_ld, region):
    time_query = esttc.TimeComponentQuery("data.local_dt", from_ld, to_ld)
    return incident_heatmap(user_uuid, modes, time_query, region)