def create_places_and_trips(user_id, segmentation_points, segmentation_method_name): # new segments, need to deal with them # First, retrieve the last place so that we can stitch it to the newly created trip. # Again, there are easy and hard. In the easy case, the trip was # continuous, was stopped when the trip end was detected, and there is # no gap between the start of the trip and the last place. But there # can be other issues caused by gaps in tracking. A more detailed # description of dealing with gaps in tracking can be found in the wiki. # Let us first deal with the easy case. # restart_events_df = get_restart_events(ts, time_query) ts = esta.TimeSeries.get_time_series(user_id) last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id) if last_place_entry is None: last_place = start_new_chain(user_id) last_place.source = segmentation_method_name last_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", last_place, create_id = True) else: last_place = last_place_entry.data # if is_easy_case(restart_events_df): # Theoretically, we can do some sanity checks here to make sure # that we are fairly close to the last point. Maybe mark some kind # of confidence level based on that? logging.debug("segmentation_point_list has length %s" % len(segmentation_points)) for (start_loc_doc, end_loc_doc) in segmentation_points: logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc)) get_loc_for_row = lambda row: ts.df_row_to_entry("background/filtered_location", row).data start_loc = get_loc_for_row(start_loc_doc) end_loc = get_loc_for_row(end_loc_doc) logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc)) # Stitch together the last place and the current trip curr_trip = ecwrt.Rawtrip() curr_trip.source = segmentation_method_name curr_trip_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_trip", curr_trip, create_id = True) new_place = ecwrp.Rawplace() new_place.source = segmentation_method_name new_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", new_place, create_id = True) stitch_together_start(last_place_entry, curr_trip_entry, start_loc) stitch_together_end(new_place_entry, curr_trip_entry, end_loc) ts.insert(curr_trip_entry) # last_place is a copy of the data in this entry. So after we fix it # the way we want, we need to assign it back to the entry, otherwise # it will be lost ts.update(last_place_entry) last_place_entry = new_place_entry # The last last_place hasn't been stitched together yet, but we # need to save it so that it can be the last_place for the next run ts.insert(last_place_entry)
def create_and_link_timeline(tl, user_id, trip_map): last_cleaned_place = esdp.get_last_place_entry(esda.CLEANED_PLACE_KEY, user_id) cleaned_places = [] curr_cleaned_start_place = last_cleaned_place if curr_cleaned_start_place is None: # If it is not present - maybe this user is getting started for the first # time, we create an entry based on the first trip from the timeline curr_cleaned_start_place = get_filtered_place(tl.first_place()) logging.debug("no last cleaned place found, created place with id %s" % curr_cleaned_start_place.get_id()) # We just created this place here, so lets add it to the created places # and insert rather than update it cleaned_places.append(curr_cleaned_start_place) else: logging.debug("Cleaned place %s found, using it" % curr_cleaned_start_place.get_id()) if curr_cleaned_start_place is None: # If the timeline has no entries, we give up and return return (None, None) unsquished_trips = [] for raw_trip in tl.trips: if raw_trip.get_id() in trip_map: # there is a clean representation for this trip, so we can link its # start to the curr_cleaned_start_place curr_cleaned_trip = trip_map[raw_trip.get_id()] raw_start_place = tl.get_object(raw_trip.data.start_place) link_trip_start(curr_cleaned_trip, curr_cleaned_start_place, raw_start_place) raw_end_place = tl.get_object(raw_trip.data.end_place) curr_cleaned_end_place = get_filtered_place(raw_end_place) cleaned_places.append(curr_cleaned_end_place) link_trip_end(curr_cleaned_trip, curr_cleaned_end_place, raw_end_place) curr_cleaned_start_place = curr_cleaned_end_place logging.debug("Found mapping %s -> %s, added links" % (raw_trip.get_id(), curr_cleaned_trip.get_id())) unsquished_trips.append(curr_cleaned_trip) else: # this is a squished trip, so we combine the start place with the # current start place we do not need to combine both start and end # places, since the end place of one trip is the start place of another. We combine start places instead of end places # because when the squishy part ends, we combine the start place of the un-squished trip # with the existing cleaned start and create a new entry for the un-squished end logging.debug("Found squished trip, linking raw start place %s to new cleaned place %s" % (raw_trip.data.start_place, curr_cleaned_start_place.get_id())) link_squished_place(curr_cleaned_start_place, tl.get_object(raw_trip.data.start_place)) logging.debug("Finished creating and linking timeline, returning %d places and %d trips" % (len(cleaned_places), len(trip_map.values()))) return (last_cleaned_place, esdtl.Timeline(esda.CLEANED_PLACE_KEY, esda.CLEANED_TRIP_KEY, cleaned_places, unsquished_trips))
def testGetLastPlace(self): old_place = ecwrp.Rawplace() old_place.enter_ts = 5 old_place_id = esta.TimeSeries.get_time_series( self.testUserId).insert_data( self.testUserId, "segmentation/raw_place", old_place) old_place_entry = esda.get_entry(esda.RAW_PLACE_KEY, old_place_id) logging.debug("old place entry is %s "% old_place_entry) esta.TimeSeries.get_time_series(self.testUserId).update(old_place_entry) # The place saved in the previous step has no exit_ts set, so it is the # last place last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, self.testUserId) last_place_entry["data"]["exit_ts"] = 6 logging.debug("About to update entry to %s" % last_place_entry) esta.TimeSeries.get_time_series(self.testUserId).update(last_place_entry) # Now that I have set the exit_ts and saved it, there is no last place last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, self.testUserId) self.assertIsNone(last_place_entry)
def testGetLastPlace(self): old_place = ecwrp.Rawplace() old_place.enter_ts = 5 old_place_id = esta.TimeSeries.get_time_series( self.testUserId).insert_data(self.testUserId, "segmentation/raw_place", old_place) old_place_entry = esda.get_entry(esda.RAW_PLACE_KEY, old_place_id) logging.debug("old place entry is %s " % old_place_entry) esta.TimeSeries.get_time_series( self.testUserId).update(old_place_entry) # The place saved in the previous step has no exit_ts set, so it is the # last place last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, self.testUserId) last_place_entry["data"]["exit_ts"] = 6 logging.debug("About to update entry to %s" % last_place_entry) esta.TimeSeries.get_time_series( self.testUserId).update(last_place_entry) # Now that I have set the exit_ts and saved it, there is no last place last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, self.testUserId) self.assertIsNone(last_place_entry)
def create_and_link_timeline(tl, user_id, trip_map): last_cleaned_place = esdp.get_last_place_entry(esda.CLEANED_PLACE_KEY, user_id) cleaned_places = [] curr_cleaned_start_place = last_cleaned_place if curr_cleaned_start_place is None: # If it is not present - maybe this user is getting started for the first # time, we create an entry based on the first trip from the timeline curr_cleaned_start_place = get_filtered_place(tl.first_place()) # We just created this place here, so lets add it to the created places # and insert rather than update it cleaned_places.append(curr_cleaned_start_place) if curr_cleaned_start_place is None: # If the timeline has no entries, we give up and return return (None, None) for raw_trip in tl.trips: if raw_trip.get_id() in trip_map: # there is a clean representation for this trip, so we can link its # start to the curr_cleaned_start_place curr_cleaned_trip = trip_map[raw_trip.get_id()] raw_start_place = tl.get_object(raw_trip.data.start_place) link_trip_start(curr_cleaned_trip, curr_cleaned_start_place, raw_start_place) raw_end_place = tl.get_object(raw_trip.data.end_place) curr_cleaned_end_place = get_filtered_place(raw_end_place) cleaned_places.append(curr_cleaned_end_place) link_trip_end(curr_cleaned_trip, curr_cleaned_end_place, raw_end_place) curr_cleaned_start_place = curr_cleaned_end_place else: # this is a squished trip, so we combine the start place with the # current start place we do not need to combine both start and end # places, since the end place of one trip is the start place of another. We combine start places instead of end places # because when the squishy part ends, we combine the start place of the un-squished trip # with the existing cleaned start and create a new entry for the un-squished end link_squished_place(curr_cleaned_start_place, tl.get_object(raw_trip.data.start_place)) return (last_cleaned_place, esdtl.Timeline(esda.CLEANED_PLACE_KEY, esda.CLEANED_TRIP_KEY, cleaned_places, trip_map.values()))
def create_places_and_trips(user_id, segmentation_points, segmentation_method_name): # new segments, need to deal with them # First, retrieve the last place so that we can stitch it to the newly created trip. # Again, there are easy and hard. In the easy case, the trip was # continuous, was stopped when the trip end was detected, and there is # no gap between the start of the trip and the last place. But there # can be other issues caused by gaps in tracking. A more detailed # description of dealing with gaps in tracking can be found in the wiki. # Let us first deal with the easy case. # restart_events_df = get_restart_events(ts, time_query) ts = esta.TimeSeries.get_time_series(user_id) last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id) if last_place_entry is None: last_place = start_new_chain(user_id) last_place.source = segmentation_method_name last_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", last_place, create_id=True) else: last_place = last_place_entry.data # if is_easy_case(restart_events_df): # Theoretically, we can do some sanity checks here to make sure # that we are fairly close to the last point. Maybe mark some kind # of confidence level based on that? logging.debug("segmentation_point_list has length %s" % len(segmentation_points)) for (start_loc_doc, end_loc_doc) in segmentation_points: logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc)) get_loc_for_row = lambda row: ts.df_row_to_entry( "background/filtered_location", row).data start_loc = get_loc_for_row(start_loc_doc) end_loc = get_loc_for_row(end_loc_doc) logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc)) # Stitch together the last place and the current trip curr_trip = ecwrt.Rawtrip() curr_trip.source = segmentation_method_name curr_trip_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_trip", curr_trip, create_id=True) new_place = ecwrp.Rawplace() new_place.source = segmentation_method_name new_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", new_place, create_id=True) if found_untracked_period(ts, last_place_entry.data, start_loc): # Fill in the gap in the chain with an untracked period curr_untracked = ecwut.Untrackedtime() curr_untracked.source = segmentation_method_name curr_untracked_entry = ecwe.Entry.create_entry( user_id, "segmentation/raw_untracked", curr_untracked, create_id=True) restarted_place = ecwrp.Rawplace() restarted_place.source = segmentation_method_name restarted_place_entry = ecwe.Entry.create_entry( user_id, "segmentation/raw_place", restarted_place, create_id=True) untracked_start_loc = ecwe.Entry( ts.get_entry_at_ts("background/filtered_location", "data.ts", last_place_entry.data.enter_ts)).data untracked_start_loc[ "ts"] = untracked_start_loc.ts + epq.END_FUZZ_AVOID_LTE _link_and_save(ts, last_place_entry, curr_untracked_entry, restarted_place_entry, untracked_start_loc, start_loc) logging.debug("Created untracked period %s from %s to %s" % (curr_untracked_entry.get_id(), curr_untracked_entry.data.start_ts, curr_untracked_entry.data.end_ts)) logging.debug("Resetting last_place_entry from %s to %s" % (last_place_entry, restarted_place_entry)) last_place_entry = restarted_place_entry _link_and_save(ts, last_place_entry, curr_trip_entry, new_place_entry, start_loc, end_loc) last_place_entry = new_place_entry # The last last_place hasn't been stitched together yet, but we # need to save it so that it can be the last_place for the next run ts.insert(last_place_entry)
def create_places_and_trips(user_id, segmentation_points, segmentation_method_name): # new segments, need to deal with them # First, retrieve the last place so that we can stitch it to the newly created trip. # Again, there are easy and hard. In the easy case, the trip was # continuous, was stopped when the trip end was detected, and there is # no gap between the start of the trip and the last place. But there # can be other issues caused by gaps in tracking. A more detailed # description of dealing with gaps in tracking can be found in the wiki. # Let us first deal with the easy case. # restart_events_df = get_restart_events(ts, time_query) ts = esta.TimeSeries.get_time_series(user_id) last_place_entry = esdp.get_last_place_entry(esda.RAW_PLACE_KEY, user_id) if last_place_entry is None: last_place = start_new_chain(user_id) last_place.source = segmentation_method_name last_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", last_place, create_id = True) else: last_place = last_place_entry.data # if is_easy_case(restart_events_df): # Theoretically, we can do some sanity checks here to make sure # that we are fairly close to the last point. Maybe mark some kind # of confidence level based on that? logging.debug("segmentation_point_list has length %s" % len(segmentation_points)) for (start_loc_doc, end_loc_doc) in segmentation_points: logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc)) get_loc_for_row = lambda row: ts.df_row_to_entry("background/filtered_location", row).data start_loc = get_loc_for_row(start_loc_doc) end_loc = get_loc_for_row(end_loc_doc) logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc)) # Stitch together the last place and the current trip curr_trip = ecwrt.Rawtrip() curr_trip.source = segmentation_method_name curr_trip_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_trip", curr_trip, create_id = True) new_place = ecwrp.Rawplace() new_place.source = segmentation_method_name new_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", new_place, create_id = True) if found_untracked_period(ts, last_place_entry.data, start_loc): # Fill in the gap in the chain with an untracked period curr_untracked = ecwut.Untrackedtime() curr_untracked.source = segmentation_method_name curr_untracked_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_untracked", curr_untracked, create_id=True) restarted_place = ecwrp.Rawplace() restarted_place.source = segmentation_method_name restarted_place_entry = ecwe.Entry.create_entry(user_id, "segmentation/raw_place", restarted_place, create_id=True) untracked_start_loc = ecwe.Entry(ts.get_entry_at_ts("background/filtered_location", "data.ts", last_place_entry.data.enter_ts)).data untracked_start_loc["ts"] = untracked_start_loc.ts + epq.END_FUZZ_AVOID_LTE _link_and_save(ts, last_place_entry, curr_untracked_entry, restarted_place_entry, untracked_start_loc, start_loc) logging.debug("Created untracked period %s from %s to %s" % (curr_untracked_entry.get_id(), curr_untracked_entry.data.start_ts, curr_untracked_entry.data.end_ts)) logging.debug("Resetting last_place_entry from %s to %s" % (last_place_entry, restarted_place_entry)) last_place_entry = restarted_place_entry _link_and_save(ts, last_place_entry, curr_trip_entry, new_place_entry, start_loc, end_loc) last_place_entry = new_place_entry # The last last_place hasn't been stitched together yet, but we # need to save it so that it can be the last_place for the next run ts.insert(last_place_entry)