Exemplo n.º 1
0
    def filter(self, with_speeds_df):
        self.inlier_mask_ = [True] * with_speeds_df.shape[0]

        prev_pt = None
        for (i, pt) in enumerate(
                with_speeds_df[["mLatitude", "mLongitude", "mTime",
                                "speed"]].to_dict('records')):
            pt = ad.AttrDict(dict(pt))
            if prev_pt is None:
                # Don't have enough data yet, so don't make any decisions
                prev_pt = pt
            else:
                currSpeed = pf.calSpeed(prev_pt, pt)
                logging.debug(
                    "while considering point %s(%s), prev_pt (%s) speed = %s" %
                    (pt, i, prev_pt, currSpeed))
                if currSpeed > self.maxSpeed:
                    logging.debug("currSpeed > %s, removing index %s " %
                                  (self.maxSpeed, i))
                    self.inlier_mask_[i] = False
                else:
                    logging.debug("currSpeed < %s, retaining index %s " %
                                  (self.maxSpeed, i))
                    prev_pt = pt
        logging.info("Filtering complete, removed indices = %s" %
                     np.nonzero(self.inlier_mask_))
Exemplo n.º 2
0
def add_dist_heading_speed(points_df):
    # type: (pandas.DataFrame) -> pandas.DataFrame
    """
    Returns a new dataframe with an added "speed" column.
    The speed column has the speed between each point and its previous point.
    The first row has a speed of zero.
    """
    point_list = [ad.AttrDict(row) for row in points_df.to_dict('records')]
    zipped_points_list = list(zip(point_list, point_list[1:]))

    distances = [pf.calDistance(p1, p2) for (p1, p2) in zipped_points_list]
    distances.insert(0, 0)
    speeds = [pf.calSpeed(p1, p2) for (p1, p2) in zipped_points_list]
    speeds.insert(0, 0)
    headings = [pf.calHeading(p1, p2) for (p1, p2) in zipped_points_list]
    headings.insert(0, 0)

    with_distances_df = pd.concat(
        [points_df, pd.Series(distances, name="distance")], axis=1)
    with_speeds_df = pd.concat(
        [with_distances_df, pd.Series(speeds, name="speed")], axis=1)
    if "heading" in with_speeds_df.columns:
        with_speeds_df.drop("heading", axis=1, inplace=True)
    with_headings_df = pd.concat(
        [with_speeds_df, pd.Series(headings, name="heading")], axis=1)
    return with_headings_df
Exemplo n.º 3
0
def recalc_speed(points_df):
    """
    The input dataframe already has "speed" and "distance" columns.
    Drop them and recalculate speeds from the first point onwards.
    The speed column has the speed between each point and its previous point.
    The first row has a speed of zero.
    """
    stripped_df = points_df.drop("speed", axis=1).drop("distance", axis=1)
    logging.debug("columns in points_df = %s" % points_df.columns)
    point_list = [ad.AttrDict(row) for row in points_df.to_dict('records')]
    zipped_points_list = list(zip(point_list, point_list[1:]))
    distances = [pf.calDistance(p1, p2) for (p1, p2) in zipped_points_list]
    distances.insert(0, 0)
    with_speeds_df = pd.concat([
        stripped_df,
        pd.Series(distances, index=points_df.index, name="distance")
    ],
                               axis=1)
    speeds = [pf.calSpeed(p1, p2) for (p1, p2) in zipped_points_list]
    speeds.insert(0, 0)
    with_speeds_df = pd.concat([
        with_speeds_df,
        pd.Series(speeds, index=points_df.index, name="speed")
    ],
                               axis=1)
    return with_speeds_df
def recalc_speed(points_df):
    """
    The input dataframe already has "speed" and "distance" columns.
    Drop them and recalculate speeds from the first point onwards.
    The speed column has the speed between each point and its previous point.
    The first row has a speed of zero.
    """
    stripped_df = points_df.drop("speed", axis=1).drop("distance", axis=1)
    point_list = [ad.AttrDict(row) for row in points_df.to_dict('records')]
    zipped_points_list = zip(point_list, point_list[1:])
    distances = [pf.calDistance(p1, p2) for (p1, p2) in zipped_points_list]
    distances.insert(0, 0)
    with_speeds_df = pd.concat([stripped_df, pd.Series(distances, index=points_df.index, name="distance")], axis=1)
    speeds = [pf.calSpeed(p1, p2) for (p1, p2) in zipped_points_list]
    speeds.insert(0, 0)
    with_speeds_df = pd.concat([with_speeds_df, pd.Series(speeds, index=points_df.index, name="speed")], axis=1)
    return with_speeds_df
Exemplo n.º 5
0
    def filter(self, with_speeds_df):
        self.inlier_mask_ = [True] * with_speeds_df.shape[0]

        prev_pt = None
        for (i, pt) in enumerate(with_speeds_df[["mLatitude", "mLongitude", "mTime", "speed"]].to_dict('records')):
            pt = ad.AttrDict(dict(pt))
            if prev_pt is None:
                # Don't have enough data yet, so don't make any decisions
                prev_pt = pt
            else:
                currSpeed = pf.calSpeed(prev_pt, pt)
                logging.debug("while considering point %s(%s), prev_pt (%s) speed = %s" % (pt, i, prev_pt, currSpeed))
                if currSpeed > self.maxSpeed:
                    logging.debug("currSpeed > %s, removing index %s " % (self.maxSpeed, i))
                    self.inlier_mask_[i] = False
                else:
                    logging.debug("currSpeed < %s, retaining index %s " % (self.maxSpeed, i))
                    prev_pt = pt
        logging.info("Filtering complete, removed indices = %s" % np.nonzero(self.inlier_mask_))
def add_dist_heading_speed(points_df):
    """
    Returns a new dataframe with an added "speed" column.
    The speed column has the speed between each point and its previous point.
    The first row has a speed of zero.
    """
    point_list = [ad.AttrDict(row) for row in points_df.to_dict('records')]
    zipped_points_list = zip(point_list, point_list[1:])

    distances = [pf.calDistance(p1, p2) for (p1, p2) in zipped_points_list]
    distances.insert(0, 0)
    speeds = [pf.calSpeed(p1, p2) for (p1, p2) in zipped_points_list]
    speeds.insert(0, 0)
    headings = [pf.calHeading(p1, p2) for (p1, p2) in zipped_points_list]
    headings.insert(0, 0)

    with_distances_df = pd.concat([points_df, pd.Series(distances, name="distance")], axis=1)
    with_speeds_df = pd.concat([with_distances_df, pd.Series(speeds, name="speed")], axis=1)
    with_headings_df = pd.concat([with_speeds_df, pd.Series(headings, name="heading")], axis=1)
    return with_headings_df
Exemplo n.º 7
0
    def filter(self, with_speeds_df):
        self.inlier_mask_ = [True] * with_speeds_df.shape[0]

        quality_segments = []
        curr_segment = []
        prev_pt = None

        for (i, pt) in enumerate(with_speeds_df.to_dict('records')):
            pt = ad.AttrDict(pt)
            if prev_pt is None:
                # Don't have enough data yet, so don't make any decisions
                prev_pt = pt
            else:
                currSpeed = pf.calSpeed(prev_pt, pt)
                print("while considering point %s, speed = %s" % (i, currSpeed))
                # Should make this configurable
                if currSpeed > self.maxSpeed:
                    print("currSpeed > %d, starting new quality segment at index %s " % (self.maxSpeed, i))
                    quality_segments.append(curr_segment)
                    curr_segment = []
                else:
                    print("currSpeed < %d, retaining index %s in existing quality segment " % (self.maxSpeed, i))
                prev_pt = pt
                curr_segment.append(i)
        # Append the last segment once we are at the end
        quality_segments.append(curr_segment)

        print("Number of quality segments is %d" % len(quality_segments))

        last_segment = quality_segments[0]
        for curr_segment in quality_segments[1:]:
            print("Considering segments %s and %s" % (last_segment, curr_segment))

            if len(last_segment) == 0:
                # If the last segment has no points, we can't compare last and
                # current, but should reset last, otherwise, we will be stuck
                # forever
                logging.info("len(last_segment) = %d, len(curr_segment) = %d, skipping" %
                    (len(last_segment), len(curr_segment)))
                last_segment = curr_segment
                continue

            if len(curr_segment) == 0:
                # If the current segment has no points, we can't compare last and
                # current, but can just continue since the for loop will reset current
                logging.info("len(last_segment) = %d, len(curr_segment) = %d, skipping" %
                    (len(last_segment), len(curr_segment)))
                continue
            get_coords = lambda i: [with_speeds_df.iloc[i]["mLongitude"], with_speeds_df.iloc[i]["mLatitude"]]
            get_ts = lambda i: with_speeds_df.iloc[i]["mTime"]
            # I don't know why they would use time instead of distance, but
            # this is what the existing POSDAP code does.
            print("About to compare curr_segment duration %s with last segment duration %s" %
                            (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]),
                             get_ts(last_segment[-1]) - get_ts(last_segment[0])))
            if (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]) <=
                get_ts(last_segment[-1]) - get_ts(last_segment[0])):
                print("curr segment %s is shorter, cut it" % curr_segment)
                ref_idx = last_segment[-1]
                for curr_idx in curr_segment:
                    print("Comparing distance %s with speed %s * time %s = %s" %
                        (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                         old_div(self.maxSpeed, 100), abs(get_ts(ref_idx) - get_ts(curr_idx)),
                         self.maxSpeed / 100 * abs(get_ts(ref_idx) - get_ts(curr_idx))))

                    if (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                        (self.maxSpeed / 1000 * abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                        print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                        self.inlier_mask_[curr_idx] = False
            else:
                print("prev segment %s is shorter, cut it" % last_segment)
                ref_idx = curr_segment[-1]
                for curr_idx in reversed(last_segment):
                    print("Comparing distance %s with speed %s * time %s = %s" %
                        (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                         old_div(self.maxSpeed, 1000) , abs(get_ts(ref_idx) - get_ts(curr_idx)),
                         self.maxSpeed / 1000 * abs(get_ts(ref_idx) - get_ts(curr_idx))))
                    if (abs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                        (self.maxSpeed / 1000 *  abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                        print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                        self.inlier_mask_[curr_idx] = False
            last_segment = curr_segment
        logging.info("Filtering complete, removed indices = %s" % np.nonzero(self.inlier_mask_))
Exemplo n.º 8
0
    def filter(self, with_speeds_df):
        self.inlier_mask_ = [True] * with_speeds_df.shape[0]

        quality_segments = []
        curr_segment = []
        prev_pt = None

        for (i, pt) in enumerate(with_speeds_df.to_dict('records')):
            pt = ad.AttrDict(pt)
            if prev_pt is None:
                # Don't have enough data yet, so don't make any decisions
                prev_pt = pt
            else:
                currSpeed = pf.calSpeed(prev_pt, pt)
                print("while considering point %s, speed = %s" %
                      (i, currSpeed))
                # Should make this configurable
                if currSpeed > self.maxSpeed:
                    print(
                        "currSpeed > %d, starting new quality segment at index %s "
                        % (self.maxSpeed, i))
                    quality_segments.append(curr_segment)
                    curr_segment = []
                else:
                    print(
                        "currSpeed < %d, retaining index %s in existing quality segment "
                        % (self.maxSpeed, i))
                prev_pt = pt
                curr_segment.append(i)
        # Append the last segment once we are at the end
        quality_segments.append(curr_segment)

        print("Number of quality segments is %d" % len(quality_segments))

        last_segment = quality_segments[0]
        for curr_segment in quality_segments[1:]:
            print("Considering segments %s and %s" %
                  (last_segment, curr_segment))

            if len(last_segment) == 0:
                # If the last segment has no points, we can't compare last and
                # current, but should reset last, otherwise, we will be stuck
                # forever
                logging.info(
                    "len(last_segment) = %d, len(curr_segment) = %d, skipping"
                    % (len(last_segment), len(curr_segment)))
                last_segment = curr_segment
                continue

            if len(curr_segment) == 0:
                # If the current segment has no points, we can't compare last and
                # current, but can just continue since the for loop will reset current
                logging.info(
                    "len(last_segment) = %d, len(curr_segment) = %d, skipping"
                    % (len(last_segment), len(curr_segment)))
                continue
            get_coords = lambda i: [
                with_speeds_df.iloc[i]["mLongitude"], with_speeds_df.iloc[i][
                    "mLatitude"]
            ]
            get_ts = lambda i: with_speeds_df.iloc[i]["mTime"]
            # I don't know why they would use time instead of distance, but
            # this is what the existing POSDAP code does.
            print(
                "About to compare curr_segment duration %s with last segment duration %s"
                % (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]),
                   get_ts(last_segment[-1]) - get_ts(last_segment[0])))
            if (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]) <=
                    get_ts(last_segment[-1]) - get_ts(last_segment[0])):
                print("curr segment %s is shorter, cut it" % curr_segment)
                ref_idx = last_segment[-1]
                for curr_idx in curr_segment:
                    print(
                        "Comparing distance %s with speed %s * time %s = %s" %
                        (math.fabs(
                            ec.calDistance(get_coords(ref_idx),
                                           get_coords(curr_idx))),
                         old_div(self.maxSpeed, 100),
                         abs(get_ts(ref_idx) - get_ts(curr_idx)), self.maxSpeed
                         / 100 * abs(get_ts(ref_idx) - get_ts(curr_idx))))

                    if (math.fabs(
                            ec.calDistance(get_coords(ref_idx),
                                           get_coords(curr_idx))) >
                        (self.maxSpeed / 1000 *
                         abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                        print(
                            "Distance is greater than max speed * time, deleting %s"
                            % curr_idx)
                        self.inlier_mask_[curr_idx] = False
            else:
                print("prev segment %s is shorter, cut it" % last_segment)
                ref_idx = curr_segment[-1]
                for curr_idx in reversed(last_segment):
                    print(
                        "Comparing distance %s with speed %s * time %s = %s" %
                        (math.fabs(
                            ec.calDistance(get_coords(ref_idx),
                                           get_coords(curr_idx))),
                         old_div(self.maxSpeed, 1000),
                         abs(get_ts(ref_idx) - get_ts(curr_idx)), self.maxSpeed
                         / 1000 * abs(get_ts(ref_idx) - get_ts(curr_idx))))
                    if (abs(
                            ec.calDistance(get_coords(ref_idx),
                                           get_coords(curr_idx))) >
                        (self.maxSpeed / 1000 *
                         abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                        print(
                            "Distance is greater than max speed * time, deleting %s"
                            % curr_idx)
                        self.inlier_mask_[curr_idx] = False
            last_segment = curr_segment
        logging.info("Filtering complete, removed indices = %s" %
                     np.nonzero(self.inlier_mask_))