def split_segment(self, i, curr_seg, direction): import emission.analysis.intake.cleaning.location_smoothing as ls if direction == SmoothZigzag.Direction.RIGHT: recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df) # Find the first point that does not belong to the cluster new_split_point = recomputed_speed_df[recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0] new_seg = Segment(new_split_point, curr_seg.end, self) replace_seg = Segment(curr_seg.start, new_split_point, self) self.segment_list[i] = replace_seg self.segment_list.insert(i+1, new_seg) return replace_seg if direction == SmoothZigzag.Direction.LEFT: # Need to compute speeds and distances from the left edge recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df.iloc[::-1]) logging.debug("Recomputed_speed_df = %s", recomputed_speed_df.speed) # Find the first point that does not belong to the cluster new_split_point = recomputed_speed_df[recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0] logging.debug("new split point = %s", new_split_point) new_seg = Segment(curr_seg.start, new_split_point + 1, self) replace_seg = Segment(new_split_point + 1, curr_seg.end, self) self.segment_list[i] = replace_seg self.segment_list.insert(i, new_seg) return replace_seg
def split_segment(self, i, curr_seg, direction): import emission.analysis.intake.cleaning.location_smoothing as ls if direction == SmoothZigzag.Direction.RIGHT: recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df) # Find the first point that does not belong to the cluster new_split_point = recomputed_speed_df[ recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0] new_seg = Segment(new_split_point, curr_seg.end, self) replace_seg = Segment(curr_seg.start, new_split_point, self) self.segment_list[i] = replace_seg self.segment_list.insert(i + 1, new_seg) return replace_seg if direction == SmoothZigzag.Direction.LEFT: # Need to compute speeds and distances from the left edge recomputed_speed_df = ls.recalc_speed( curr_seg.segment_df.iloc[::-1]) logging.debug("Recomputed_speed_df = %s", recomputed_speed_df.speed) # Find the first point that does not belong to the cluster new_split_point = recomputed_speed_df[ recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0] logging.debug("new split point = %s", new_split_point) new_seg = Segment(curr_seg.start, new_split_point + 1, self) replace_seg = Segment(new_split_point + 1, curr_seg.end, self) self.segment_list[i] = replace_seg self.segment_list.insert(i, new_seg) return replace_seg
def filter(self, with_speeds_df): self.inlier_mask_ = pd.Series([True] * with_speeds_df.shape[0]) self.with_speeds_df = with_speeds_df self.find_segments() logging.debug("After splitting, segment list is %s with size %s" % (self.segment_list, len(self.segment_list))) if len(self.segment_list) == 1: # there were no jumps, so there's nothing to do logging.info("No jumps, nothing to filter") return start_segment_idx = self.find_start_segment(self.segment_list) self.segment_list[start_segment_idx].state = Segment.State.GOOD self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.RIGHT) self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.LEFT) unknown_segments = [ segment for segment in self.segment_list if segment.state == Segment.State.UNKNOWN ] logging.debug("unknown_segments = %s" % unknown_segments) assert len( unknown_segments ) == 0, "Found %s unknown segments - early termination of loop?" % len( unknown_segments) bad_segments = [ segment for segment in self.segment_list if segment.state == Segment.State.BAD ] logging.debug("bad_segments = %s" % bad_segments) for segment in bad_segments: self.inlier_mask_[segment.start:segment.end] = False logging.debug("after setting values, outlier_mask = %s" % np.nonzero(self.inlier_mask_ == False)) # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)]) # TODO: This is not the right place for this - adds too many dependencies # Should do this in the outer class in general so that we can do # multiple passes of any filtering algorithm import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso import emission.analysis.intake.cleaning.location_smoothing as ls recomputed_speeds_df = ls.recalc_speed( self.with_speeds_df[self.inlier_mask_]) recomputed_threshold = cso.BoxplotOutlier( ignore_zeros=True).get_threshold(recomputed_speeds_df) # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] != 0: logging.info( "After first round, still have outliers %s" % recomputed_speeds_df[ recomputed_speeds_df.speed > recomputed_threshold])
def filter(self, with_speeds_df): self.inlier_mask_ = pd.Series([True] * with_speeds_df.shape[0]) self.with_speeds_df = with_speeds_df self.find_segments() logging.debug("After splitting, segment list is %s with size %s" % (self.segment_list, len(self.segment_list))) if len(self.segment_list) == 1: # there were no jumps, so there's nothing to do logging.info("No jumps, nothing to filter") return start_segment_idx = self.find_start_segment(self.segment_list) self.segment_list[start_segment_idx].state = Segment.State.GOOD self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.RIGHT) self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.LEFT) unknown_segments = [segment for segment in self.segment_list if segment.state == Segment.State.UNKNOWN] logging.debug("unknown_segments = %s" % unknown_segments) assert len(unknown_segments) == 0, "Found %s unknown segments - early termination of loop?" % len(unknown_segments) bad_segments = [segment for segment in self.segment_list if segment.state == Segment.State.BAD] logging.debug("bad_segments = %s" % bad_segments) for segment in bad_segments: self.inlier_mask_[segment.start:segment.end] = False logging.debug("after setting values, outlier_mask = %s" % np.nonzero(self.inlier_mask_ == False)) # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)]) # TODO: This is not the right place for this - adds too many dependencies # Should do this in the outer class in general so that we can do # multiple passes of any filtering algorithm import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso import emission.analysis.intake.cleaning.location_smoothing as ls recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[self.inlier_mask_]) recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df) # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] != 0: logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold])