def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter( time_threshold=5 * 60, # 5 mins point_threshold=9, distance_threshold=100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter( time_threshold=10 * 60, # 10 mins point_threshold=9, distance_threshold=50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = { "time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter" } # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return filters_in_df = loc_df["filter"].unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[ filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points( ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)
def testSegmentationPointsDwellSegmentationDistFilter(self): ts = esta.TimeSeries.get_time_series(self.iosUUID) tq = estt.TimeQuery("metadata.write_ts", 1446796800, 1446847600) dstdsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 5 mins point_threshold = 10, distance_threshold = 100) # 100 m segmentation_points = dstdsm.segment_into_trips(ts, tq) for (start, end) in segmentation_points: logging.debug("trip is from %s (%f) -> %s (%f)" % (start.fmt_time, start.ts, end.fmt_time, end.ts)) self.assertIsNotNone(segmentation_points) self.assertEqual(len(segmentation_points), 3) self.assertEqual([start.ts for (start, end) in segmentation_points], [1446797042.282652, 1446821561.559255, 1446825828.465837]) self.assertEqual([end.ts for (start, end) in segmentation_points], [1446797923.682973, 1446825092.302420, 1446828217.125328])
def segment_current_trips(user_id): ts = esta.TimeSeries.get_time_series(user_id) time_query = epq.get_time_range_for_segmentation(user_id) import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_time_filter as dstf import emission.analysis.intake.segmentation.trip_segmentation_methods.dwell_segmentation_dist_filter as dsdf dstfsm = dstf.DwellSegmentationTimeFilter( time_threshold=5 * 60, # 5 mins point_threshold=9, distance_threshold=100) # 100 m dsdfsm = dsdf.DwellSegmentationDistFilter( time_threshold=10 * 60, # 10 mins point_threshold=9, distance_threshold=50) # 50 m filter_methods = {"time": dstfsm, "distance": dsdfsm} filter_method_names = { "time": "DwellSegmentationTimeFilter", "distance": "DwellSegmentationDistFilter" } # We need to use the appropriate filter based on the incoming data # So let's read in the location points for the specified query loc_df = ts.get_data_df("background/filtered_location", time_query) if len(loc_df) == 0: # no new segments, no need to keep looking at these again logging.debug("len(loc_df) == 0, early return") epq.mark_segmentation_done(user_id, None) return out_of_order_points = loc_df[loc_df.ts.diff() < 0] if len(out_of_order_points) > 0: logging.info("Found out of order points!") logging.info("%s" % out_of_order_points) # drop from the table loc_df = loc_df.drop(out_of_order_points.index.tolist()) # delete from the database. Should be generally discouraged, so we # are kindof putting it in here secretively import emission.core.get_database as edb out_of_order_id_list = out_of_order_points["_id"].tolist() logging.debug("out_of_order_id_list = %s" % out_of_order_id_list) edb.get_timeseries_db().remove({"_id": {"$in": out_of_order_id_list}}) filters_in_df = loc_df["filter"].dropna().unique() logging.debug("Filters in the dataframe = %s" % filters_in_df) if len(filters_in_df) == 1: # Common case - let's make it easy segmentation_points = filter_methods[ filters_in_df[0]].segment_into_trips(ts, time_query) else: segmentation_points = get_combined_segmentation_points( ts, loc_df, time_query, filters_in_df, filter_methods) # Create and store trips and places based on the segmentation points if segmentation_points is None: epq.mark_segmentation_failed(user_id) elif len(segmentation_points) == 0: # no new segments, no need to keep looking at these again logging.debug("len(segmentation_points) == 0, early return") epq.mark_segmentation_done(user_id, None) else: try: create_places_and_trips(user_id, segmentation_points, filter_method_names[filters_in_df[0]]) epq.mark_segmentation_done(user_id, get_last_ts_processed(filter_methods)) except: logging.exception("Trip generation failed for user %s" % user_id) epq.mark_segmentation_failed(user_id)