def get_timeline_from_dt(user_id, place_key, trip_key, start_local_dt, end_local_dt, geojson=None, extra_query_list=None): logging.info("About to query for date components %s -> %s" % (start_local_dt, end_local_dt)) (place_gq, trip_gq) = get_place_trip_geoquery(geojson) places_entries = esda.get_entries(place_key, user_id, esttc.TimeComponentQuery( "data.enter_local_dt", start_local_dt, end_local_dt), geo_query=place_gq, extra_query_list=extra_query_list) trips_entries = esda.get_entries(trip_key, user_id, esttc.TimeComponentQuery( "data.start_local_dt", start_local_dt, end_local_dt), geo_query=trip_gq, extra_query_list=extra_query_list) for place in places_entries: logging.debug("Considering place %s: %s -> %s " % (place.get_id(), place.data.enter_fmt_time, place.data.exit_fmt_time)) for trip in trips_entries: logging.debug( "Considering trip %s: %s -> %s " % (trip.get_id(), trip.data.start_fmt_time, trip.data.end_fmt_time)) return Timeline(place_key, trip_key, places_entries, trips_entries)
def getTimeseriesEntries(time_type): if 'user' not in request.json: abort(401, "only a user can read his/her data") user_uuid = getUUID(request) key_list = request.json['key_list'] if 'from_local_date' in request.json and 'to_local_date' in request.json: start_time = request.json['from_local_date'] end_time = request.json['to_local_date'] time_query = esttc.TimeComponentQuery("metadata.write_ts", start_time, end_time) else: start_time = request.json['start_time'] end_time = request.json['end_time'] time_query = estt.TimeQuery("metadata.write_ts", start_time, end_time) # Note that queries from usercache are limited to 100,000 entries # and entries from timeseries are limited to 250,000, so we will # return at most 350,000 entries. So this means that we don't need # additional filtering, but this should be documented in # the API data_list = esdc.find_entries(user_uuid, key_list, time_query) return {'phone_data': data_list}
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list): """ Get grouped data frames for the specified local date range and frequency :param user_id: id for the user. None for aggregate. :param from_dt: start local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param to_dt: end local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param freq: since we only expand certain local_dt fields, we can only support frequencies corresponding to them. These are represented in the `LocalFreq` enum. :return: a dict containing the last start_ts of the last section processed and a result list of ModeStatTimeSummary objects If there were no matching sections, the last start_ts is None and the list is empty. """ time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt) section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(), user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return { "last_ts_processed": None, "result": [[] for i in range(len(summary_fn_list))] } groupby_arr = _get_local_group_by(freq) time_grouped_df = section_df.groupby(groupby_arr) local_dt_fill_fn = _get_local_key_to_fill_fn(freq) return { "last_ts_processed": section_df.iloc[-1].start_ts, "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn) for summary_fn in summary_fn_list] }
def range_mode_heatmap(mode, start_ts, end_ts): start_dt = esdl.get_local_date(start_ts, "UTC") end_dt = esdl.get_local_date(end_ts, "UTC") time_query = esttc.TimeComponentQuery("data.ts", start_dt, end_dt) loc_entry_list = esda.get_entries( esda.CLEANED_LOCATION_KEY, user_id=None, time_query=time_query, geo_query=None, extra_query_list=[esdlq.get_mode_query(mode)]) return {"latlng": [e.data.loc.coordinates for e in loc_entry_list]}
def get_time_query(year, month): if year is None and month is None: return None if month is None: assert year is not None query_ld = ecwl.LocalDate({"year": year}) else: assert year is not None and month is not None query_ld = ecwl.LocalDate({"year": year, "month": month}) tq = esttc.TimeComponentQuery("data.start_local_dt", query_ld, query_ld) return tq
def getTimeseriesEntries(time_type): if 'user' not in request.json: abort(401, "only a user can read his/her data") user_uuid = getUUID(request) key_list = request.json['key_list'] if 'from_local_date' in request.json and 'to_local_date' in request.json: start_time = request.json['from_local_date'] end_time = request.json['to_local_date'] time_key = request.json.get('key_local_date', 'metadata.write_ts') time_query = esttc.TimeComponentQuery(time_key, start_time, end_time) else: start_time = request.json['start_time'] end_time = request.json['end_time'] time_key = request.json.get('key_time', 'metadata.write_ts') time_query = estt.TimeQuery(time_key, start_time, end_time) # Note that queries from usercache are limited to 100,000 entries # and entries from timeseries are limited to 250,000, so we will # return at most 350,000 entries. So this means that we don't need # additional filtering, but this should be documented in # the API data_list = esdc.find_entries(user_uuid, key_list, time_query) if 'max_entries' in request.json: me = request.json['max_entries'] if (type(me) != int): logging.error("aborting: max entry count is %s, type %s, expected int" % (me, type(me))) abort(500, "Invalid max_entries %s" % me) if len(data_list) > me: if request.json['trunc_method'] == 'first': logging.debug("first n entries is %s" % me) data_list = data_list[:me] if request.json['trunc_method'] == 'last': logging.debug("first n entries is %s" % me) data_list = data_list[-me:] elif request.json["trunc_method"] == "sample": sample_rate = len(data_list)//me + 1 logging.debug("sampling rate is %s" % sample_rate) data_list = data_list[::sample_rate] else: logging.error("aborting: unexpected sampling method %s" % request.json["trunc_method"]) abort(500, "sampling method not specified while retriving limited data") else: logging.debug("Found %d entries < %s, no truncation" % (len(data_list), me)) logging.debug("successfully returning list of size %s" % len(data_list)) return {'phone_data': data_list}
def range_mode_heatmap(modes, from_ld, to_ld, region): time_query = esttc.TimeComponentQuery("data.local_dt", from_ld, to_ld) if region is None: geo_query = None else: geo_query = estg.GeoQuery(["data.loc"], region) extra_query_list = [] if modes is not None: mode_enum_list = [ecwm.MotionTypes[mode] for mode in modes] extra_query_list.append(esdlq.get_mode_query(mode_enum_list)) loc_entry_list = esda.get_entries(esda.CLEANED_LOCATION_KEY, user_id=None, time_query=time_query, geo_query=geo_query, extra_query_list=extra_query_list) return {"lnglat": [e.data.loc.coordinates for e in loc_entry_list]}
def Berkeley_pop_route(start_ts, end_ts): berkeley_json = { "geometry": { "type": "Polygon", "coordinates": [[[-122.267443, 37.864693], [-122.267443, 37.880687], [-122.250985, 37.880687], [-122.250985, 37.864693], [-122.267443, 37.864693]]] } } # box = [ [-122.267443, 37.864693], [-122.250985, 37.880687] ] start_dt = esdl.get_local_date(start_ts, "UTC") end_dt = esdl.get_local_date(end_ts, "UTC") time_query = esttc.TimeComponentQuery("data.ts", start_dt, end_dt) geo_query = estg.GeoQuery(["data.loc"], berkeley_json) loc_entry_list = esda.get_entries(esda.CLEANED_LOCATION_KEY, user_id=None, time_query=time_query, geo_query=geo_query) return {"lnglat": [e.data.loc.coordinates for e in loc_entry_list]}
def query(spec): time_type = spec['time_type'] if 'from_local_date' in spec and 'to_local_date' in spec: start_ld = spec['from_local_date'] end_ld = spec['to_local_date'] time_query = esttc.TimeComponentQuery("data.local_dt", start_ld, end_ld) elif 'start_time' in spec and 'end_time' in spec: start_ts = spec['start_time'] end_ts = spec['end_time'] time_query = estt.TimeQuery("data.ts", start_ts, end_ts) else: time_query = None modes = spec['modes'] region = spec['sel_region'] logging.debug("Filtering values for modes %s, range %s, region %s" % (modes, time_query, region)) query_fn = uuid_list_query uuid_list = query_fn(modes, time_query, region) logging.info("matched uuid_list of length = %s = %s" % (len(uuid_list), uuid_list)) return uuid_list
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn): """ Get grouped data frames for the specified local date range and frequency :param user_id: id for the user. None for aggregate. :param from_dt: start local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param to_dt: end local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param freq: since we only expand certain local_dt fields, we can only support frequencies corresponding to them. These are represented in the `LocalFreq` enum. :return: pandas.core.groupby.DataFrameGroupBy object """ time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return [] groupby_arr = _get_local_group_by(freq) time_grouped_df = section_df.groupby(groupby_arr) local_dt_fill_fn = _get_local_key_to_fill_fn(freq) return grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)
def testComponentQuery(self): ts = esta.TimeSeries.get_time_series(self.testUUID) tq = esttc.TimeComponentQuery("metadata.write_local_dt", ecwl.LocalDate({"hour": 8}), ecwl.LocalDate({"hour":9})) self.assertEqual(len(list(ts.find_entries(time_query=tq))), 490)
def incident_heatmap_local_date(user_uuid, modes, from_ld, to_ld, region): time_query = esttc.TimeComponentQuery("data.local_dt", from_ld, to_ld) return incident_heatmap(user_uuid, modes, time_query, region)