def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list): """ Get grouped data frames for the specified local date range and frequency :param user_id: id for the user. None for aggregate. :param from_dt: start local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param to_dt: end local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param freq: since we only expand certain local_dt fields, we can only support frequencies corresponding to them. These are represented in the `LocalFreq` enum. :return: a dict containing the last start_ts of the last section processed and a result list of ModeStatTimeSummary objects If there were no matching sections, the last start_ts is None and the list is empty. """ time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return { "last_ts_processed": None, "result": [[] for i in range(len(summary_fn_list))] } groupby_arr = _get_local_group_by(freq) time_grouped_df = section_df.groupby(groupby_arr) local_dt_fill_fn = _get_local_key_to_fill_fn(freq) return { "last_ts_processed": section_df.iloc[-1].start_ts, "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn) for summary_fn in summary_fn_list] }
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list): """ Get grouped data frames for the specified local date range and frequency :param user_id: id for the user. None for aggregate. :param from_dt: start local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param to_dt: end local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param freq: since we only expand certain local_dt fields, we can only support frequencies corresponding to them. These are represented in the `LocalFreq` enum. :return: a dict containing the last start_ts of the last section processed and a result list of ModeStatTimeSummary objects If there were no matching sections, the last start_ts is None and the list is empty. """ time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt) section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(), user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return { "last_ts_processed": None, "result": [[] for i in range(len(summary_fn_list))] } groupby_arr = _get_local_group_by(freq) time_grouped_df = section_df.groupby(groupby_arr) local_dt_fill_fn = _get_local_key_to_fill_fn(freq) return { "last_ts_processed": section_df.iloc[-1].start_ts, "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn) for summary_fn in summary_fn_list] }
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn): """ Get grouped dataframes for the specific time range and at the specified frequency :param user_id: The user for whom we are computing this information. None for all users. :param from_ld: The start timestamp :param to_ld: The end timestamp :param freq: The frequency as specified in a pandas date_range frequency string. We only support frequencies of a day or longer in order to return the data in a format that makes sense http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases The canonical list can be found at: > pandas.tseries.offsets.prefix_mapping :return: a list of ModeStatTimeSummary objects """ time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return [] logging.debug("first row is %s" % section_df.iloc[0]) secs_to_nanos = lambda x: x * 10 ** 9 section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts)) time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt')) return grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn)
def uuid_list_query(modes, time_query, region): if region is None: geo_query = None else: geo_query = estg.GeoQuery(["data.loc"], region) extra_query_list = [] if modes is not None: mode_enum_list = [ecwm.MotionTypes[mode] for mode in modes] extra_query_list.append(esdlq.get_mode_query(mode_enum_list)) loc_entry_df = esda.get_data_df(esda.CLEANED_LOCATION_KEY, user_id=None, time_query=time_query, geo_query=geo_query, extra_query_list=extra_query_list) if len(loc_entry_df) == 0: logging.info("No points found matching query, returning empty list") return [] unique_uuid_list = loc_entry_df.user_id.unique().tolist() logging.info("Found %d points with %d unique uuids" % (len(loc_entry_df), len(unique_uuid_list))) return unique_uuid_list
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list): """ Get grouped dataframes for the specific time range and at the specified frequency :param user_id: The user for whom we are computing this information. None for all users. :param from_ld: The start timestamp :param to_ld: The end timestamp :param freq: The frequency as specified in a pandas date_range frequency string. We only support frequencies of a day or longer in order to return the data in a format that makes sense http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases The canonical list can be found at: > pandas.tseries.offsets.prefix_mapping :return: a dict containing the last start_ts of the last section processed and a result list of ModeStatTimeSummary objects If there were no matching sections, the last start_ts is None and the list is empty. """ time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return { "last_ts_processed": None, "result": [[] for i in range(len(summary_fn_list))] } logging.debug("first row is %s" % section_df.iloc[0]) secs_to_nanos = lambda x: x * 10 ** 9 section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts)) time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt')) return { "last_ts_processed": section_df.iloc[-1].start_ts, "result": [grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn) for summary_fn in summary_fn_list] }
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list): """ Get grouped dataframes for the specific time range and at the specified frequency :param user_id: The user for whom we are computing this information. None for all users. :param from_ld: The start timestamp :param to_ld: The end timestamp :param freq: The frequency as specified in a pandas date_range frequency string. We only support frequencies of a day or longer in order to return the data in a format that makes sense http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases The canonical list can be found at: > pandas.tseries.offsets.prefix_mapping :return: a dict containing the last start_ts of the last section processed and a result list of ModeStatTimeSummary objects If there were no matching sections, the last start_ts is None and the list is empty. """ time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts) section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(), user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return { "last_ts_processed": None, "result": [[] for i in range(len(summary_fn_list))] } logging.debug("first row is %s" % section_df.iloc[0]) secs_to_nanos = lambda x: x * 10 ** 9 section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts)) time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt')) return { "last_ts_processed": section_df.iloc[-1].start_ts, "result": [grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn) for summary_fn in summary_fn_list] }
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn): """ Get grouped data frames for the specified local date range and frequency :param user_id: id for the user. None for aggregate. :param from_dt: start local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param to_dt: end local dt object. We assume that only the year, month and date entries are filled in and represent a date range. :param freq: since we only expand certain local_dt fields, we can only support frequencies corresponding to them. These are represented in the `LocalFreq` enum. :return: pandas.core.groupby.DataFrameGroupBy object """ time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return [] groupby_arr = _get_local_group_by(freq) time_grouped_df = section_df.groupby(groupby_arr) local_dt_fill_fn = _get_local_key_to_fill_fn(freq) return grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)