Пример #1
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(
        time_threshold=5 * 60,  # 5 mins
        point_threshold=9,
        distance_threshold=100)  # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(
        time_threshold=10 * 60,  # 10 mins
        point_threshold=9,
        distance_threshold=50)  # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {
        "time": "DwellSegmentationTimeFilter",
        "distance": "DwellSegmentationDistFilter"
    }
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    filters_in_df = loc_df["filter"].unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy

        segmentation_points = filter_methods[
            filters_in_df[0]].segment_into_trips(ts, time_query)
    else:
        segmentation_points = get_combined_segmentation_points(
            ts, loc_df, time_query, filters_in_df, filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points,
                                    filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id,
                                       get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)
 def testSegmentationPointsDwellSegmentationDistFilter(self):
     ts = esta.TimeSeries.get_time_series(self.iosUUID)
     tq = estt.TimeQuery("metadata.write_ts", 1446796800, 1446847600)
     dstdsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 5 mins
                                               point_threshold = 10,
                                               distance_threshold = 100) # 100 m
     segmentation_points = dstdsm.segment_into_trips(ts, tq)
     for (start, end) in segmentation_points:
         logging.debug("trip is from %s (%f) -> %s (%f)" % (start.fmt_time, start.ts, end.fmt_time, end.ts))
     self.assertIsNotNone(segmentation_points)
     self.assertEqual(len(segmentation_points), 3)
     self.assertEqual([start.ts for (start, end) in segmentation_points],
                      [1446797042.282652, 1446821561.559255, 1446825828.465837])
     self.assertEqual([end.ts for (start, end) in segmentation_points],
                      [1446797923.682973, 1446825092.302420, 1446828217.125328])
Пример #3
0
def segment_current_trips(user_id):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = epq.get_time_range_for_segmentation(user_id)

    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf
    import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf
    dstfsm = dstf.DwellSegmentationTimeFilter(
        time_threshold=5 * 60,  # 5 mins
        point_threshold=9,
        distance_threshold=100)  # 100 m

    dsdfsm = dsdf.DwellSegmentationDistFilter(
        time_threshold=10 * 60,  # 10 mins
        point_threshold=9,
        distance_threshold=50)  # 50 m

    filter_methods = {"time": dstfsm, "distance": dsdfsm}
    filter_method_names = {
        "time": "DwellSegmentationTimeFilter",
        "distance": "DwellSegmentationDistFilter"
    }
    # We need to use the appropriate filter based on the incoming data
    # So let's read in the location points for the specified query
    loc_df = ts.get_data_df("background/filtered_location", time_query)
    if len(loc_df) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(loc_df) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
        return

    out_of_order_points = loc_df[loc_df.ts.diff() < 0]
    if len(out_of_order_points) > 0:
        logging.info("Found out of order points!")
        logging.info("%s" % out_of_order_points)
        # drop from the table
        loc_df = loc_df.drop(out_of_order_points.index.tolist())
        # delete from the database. Should be generally discouraged, so we
        # are kindof putting it in here secretively
        import emission.core.get_database as edb

        out_of_order_id_list = out_of_order_points["_id"].tolist()
        logging.debug("out_of_order_id_list = %s" % out_of_order_id_list)
        edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}})

    filters_in_df = loc_df["filter"].dropna().unique()
    logging.debug("Filters in the dataframe = %s" % filters_in_df)
    if len(filters_in_df) == 1:
        # Common case - let's make it easy

        segmentation_points = filter_methods[
            filters_in_df[0]].segment_into_trips(ts, time_query)
    else:
        segmentation_points = get_combined_segmentation_points(
            ts, loc_df, time_query, filters_in_df, filter_methods)
    # Create and store trips and places based on the segmentation points
    if segmentation_points is None:
        epq.mark_segmentation_failed(user_id)
    elif len(segmentation_points) == 0:
        # no new segments, no need to keep looking at these again
        logging.debug("len(segmentation_points) == 0, early return")
        epq.mark_segmentation_done(user_id, None)
    else:
        try:
            create_places_and_trips(user_id, segmentation_points,
                                    filter_method_names[filters_in_df[0]])
            epq.mark_segmentation_done(user_id,
                                       get_last_ts_processed(filter_methods))
        except:
            logging.exception("Trip generation failed for user %s" % user_id)
            epq.mark_segmentation_failed(user_id)