Exemplo n.º 1
0
    def _interpolation(self):

        # mcf-trackerオリジナルとほとんど同じ
        # ref:
        #   external/pymotutils/pytmotutils/application/application.py
        #   min_cost_flow_pymot.py

        # just get result
        trajectories = self.tracker.compute_trajectories()

        trajectories = [[
            pymotutils.Detection(
                frame_idx=x[0], sensor_data=x[1], tsn_scores=x[2])
            for x in trajectory
        ] for trajectory in trajectories]

        hypotheses = pymotutils.TrackSet()

        for i, trajectory in enumerate(trajectories):
            track = hypotheses.create_track(i)
            for detection in trajectory:
                track.add(detection)

        # 線形補間
        hypotheses = interpolate_track_set(hypotheses)
        return hypotheses
Exemplo n.º 2
0
 def compute_trajectories(self):
     trajectories = self.tracker.compute_trajectories()
     trajectories = [[
         pymotutils.Detection(frame_idx=self._start_idx + x[0],
                              sensor_data=x[1]) for x in trajectory
     ] for trajectory in trajectories]
     return trajectories
Exemplo n.º 3
0
def read_ground_truth(filename,
                      object_classes=None,
                      min_height=MIN_OBJECT_HEIGHT_IN_PIXELS):
    """

    File format:

    #Values    Name      Description
    ----------------------------------------------------------------------------
       1    frame        Frame within the sequence where the object appearers
       1    track id     Unique tracking id of this object within this sequence
       1    type         Describes the type of object: 'Car', 'Van', 'Truck',
                         'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
                         'Misc' or 'DontCare'
       1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
                         truncated refers to the object leaving image
                         boundaries.
                         Truncation 2 indicates an ignored object (in particular
                         in the beginning or end of a track) introduced by
                         manual labeling.
       1    occluded     Integer (0,1,2,3) indicating occlusion state:
                         0 = fully visible, 1 = partly occluded
                         2 = largely occluded, 3 = unknown
       1    alpha        Observation angle of object, ranging [-pi..pi]
       4    bbox         2D bounding box of object in the image (0-based index):
                         contains left, top, right, bottom pixel coordinates
       3    dimensions   3D object dimensions: height, width, length (in meters)
       3    location     3D object location x,y,z in camera coordinates
                         (in meters)
       1    rotation_y   Rotation ry around Y-axis in camera coordinates
                         [-pi..pi]

    """
    with open(filename, "r") as f:
        lines = f.read().splitlines()

    track_set = pymotutils.TrackSet()
    for line in lines:
        words = line.strip().split(' ')
        assert len(words) == 17, "Invalid number of elements in line."
        object_class = words[2]
        if object_class not in object_classes:
            continue
        frame_idx, track_id = int(words[0]), int(words[1])
        roi = np.asarray([float(x) for x in words[6:10]])
        roi[2:] -= roi[:2] - 1  # Convert to x, y, w, h
        if roi[3] < min_height:
            continue

        if track_id not in track_set.tracks:
            track_set.create_track(track_id)
        track_set.tracks[track_id].add(pymotutils.Detection(frame_idx, roi))

    return track_set
Exemplo n.º 4
0
def read_groundtruth(filename, sensor_data_is_3d=False):
    """Read ground truth file.

    Parameters
    ----------
    filename : str
        Path to the ground truth file.
    sensor_data_is_3d : bool
        If True, the ground truth's sensor data is set to the 3D position.
        If False, the ground truth's sensor data is set to the region of
        interest (ROI).

        Note that not all of the sequences provided by the MOT challenge contain
        valid 3D positions.

    Returns
    -------
    TrackSet
        Returns the tracking ground truth. If sensor_data_is_3d is True, the
        sensor data contains the 3D position. Otherwise, sensor_data
        is set to the region of interest (ROI).

    """
    # format: frame id, track id, bbox (x, y, w, h), care_flag, world (x, y, z)
    if not os.path.isfile(filename):
        return pymotutils.TrackSet()
    data = np.loadtxt(filename, delimiter=',')

    has_threed = np.any(data[:, 7:10] != -1)
    if sensor_data_is_3d and not has_threed:
        raise RuntimeError("File does not contain valid 3D coordinates")

    ground_truth = pymotutils.TrackSet()
    for row in data:
        frame_idx, track_id = int(row[0]), int(row[1])
        do_not_care = row[6] == 0
        if sensor_data_is_3d:
            sensor_data = row[7:10]
        else:
            sensor_data = row[2:6]
        if track_id not in ground_truth.tracks:
            ground_truth.create_track(track_id)
        ground_truth.tracks[track_id].add(
            pymotutils.Detection(frame_idx, sensor_data, do_not_care))
    return ground_truth
Exemplo n.º 5
0
def interpolate_track_set(track_set):
    """Interpolate sensor data in given track set.

    This method uses linear interpolation to fill missing detections in each
    track of the given track set. Each dimension of the sensor data is
    interpolated independently of all others.

    For example, if the sensor data contains 3-D positions, then the X, Y, and Z
    coordinates of the trajectory are interpolated linearily. The same method
    works fairly well for image regions as well.

    Parameters
    ----------
    track_set : TrackSet
        The track set to be interpolated. The sensor data must be an array_like
        (ndim=1).

    Returns
    -------
    TrackSet
        The interpolated track set, where each target is visible from the first
        frame of appearance until leaving the scene.

    """
    interp_set = pymotutils.TrackSet()
    for tag, track in track_set.tracks.items():
        first, last = track.first_frame_idx(), track.last_frame_idx()
        frame_range = np.arange(first, last + 1)
        xp = sorted(list(track.detections.keys()))

        if len(xp) == 0:
            continue  # This is an empty trajectory.

        sensor_data = np.asarray([track.detections[i].sensor_data for i in xp])
        fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])]
        interps = [np.interp(frame_range, xp, fp) for fp in fps]

        itrack = interp_set.create_track(tag)
        for i, frame_idx in enumerate(frame_range):
            sensor_data = np.array([interp[i] for interp in interps])
            do_not_care = frame_idx not in track.detections
            itrack.add(
                pymotutils.Detection(frame_idx, sensor_data, do_not_care))
    return interp_set
Exemplo n.º 6
0
def read_groundtruth(filename):
    """Read ground truth data.

    Parameters
    ----------
    filename : str
        Path to the ground truth file.

    Returns
    -------
    dataset.TrackSet
        Returns the tracking ground truth. However the ground truth file
        contains other useful information, the sensor_data contains only the ROI
        [left, top, width, height].

    """
    if not os.path.isfile(filename):
        return pymotutils.TrackSet()
    tree = ElementTree.parse(filename)

    ground_truth = pymotutils.TrackSet()
    sequence = tree.getroot()
    for frame in sequence.iter('frame'):
        for target in frame.iter('target'):
            frame_idx = int(frame.get('num'))
            track_id = int(target.get('id'))
            box = target.find('box')
            sensor_data = np.asarray([
                float(box.get('left')),
                float(box.get('top')),
                float(box.get('width')),
                float(box.get('height'))
            ])

            if track_id not in ground_truth.tracks:
                ground_truth.create_track(track_id)
            ground_truth.tracks[track_id].add(
                pymotutils.Detection(frame_idx, sensor_data))
    return ground_truth
Exemplo n.º 7
0
def _read_groundtruth(label_file):
    """
    label_file: VATIC format filepath
        local_id
        xmin
        ymin
        xmax
        ymax
        frame
        lost
        occuluded
        generated
        name
        action
    """

    # see motchallnge_io.py read_groundtruth
    ground_truth = pymotutils.TrackSet()
    df = pd.read_csv(label_file,
                     sep=' ',
                     names=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    # ignore when lost or occuluded
    df = df[(df[6] != 1) & (df[7] != 1)]
    # change bbox format from (xmin, ymin, xmax, ymax)
    # to (xmin, ymin, width, height)
    df[3] -= df[1]
    df[4] -= df[2]
    for i in range(len(df)):
        row = df.iloc[i]
        frame_idx, track_id = int(row[5]), int(row[0])
        sensor_data = np.asarray(row[1:5])
        if track_id not in ground_truth.tracks:
            ground_truth.create_track(track_id)
        ground_truth.tracks[track_id].add(
            pymotutils.Detection(frame_idx, sensor_data, do_not_care=False))

    return ground_truth
Exemplo n.º 8
0
def interpolate_track_set(track_set):
    """Interpolate sensor data in given track set.

    This method uses linear interpolation to fill missing detections in each
    track of the given track set. Each dimension of the sensor data is
    interpolated independently of all others.

    For example, if the sensor data contains 3-D positions, then the X, Y, and Z
    coordinates of the trajectory are interpolated linearily. The same method
    works fairly well for image regions as well.

    Parameters
    ----------
    track_set : TrackSet
        The track set to be interpolated. The sensor data must be an array_like
        (ndim=1).

    Returns
    -------
    TrackSet
        The interpolated track set, where each target is visible from the first
        frame of appearance until leaving the scene.

    """
    interp_set = pymotutils.TrackSet()
    for tag, track in track_set.tracks.items():
        first, last = track.first_frame_idx(), track.last_frame_idx()
        frame_range = np.arange(first, last + 1)

        # フレームのインデックス
        xp = sorted(list(track.detections.keys()))

        if len(xp) == 0:
            continue  # This is an empty trajectory.

        # BBoxのリスト
        sensor_data = np.asarray([track.detections[i].sensor_data for i in xp])

        # sensor_data
        # array([[1475.,  109.,   99.,   96.],
        #    [1473.,  115.,   95.,   95.],
        #    [1473.,  123.,   92.,   96.],
        #    ...,
        #    [1683., 2083.,   57.,   56.],
        #    [1681., 2091.,   59.,   51.],
        #    [1678., 2097.,   59.,   45.]])

        # fps = [[xmin1, xmin2, ...,], [ymin1, ymin2, ...], ...]
        fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])]
        # fps = [array([1475., 1473., 1473., 1470., 1470., 1467., 1467., 1464., 1464.,
        #    1461., 1461., 1458., 1456., 1453., 1450., 1447., 1444., 1442.,
        #    1439., 1436., 1436., 1433., 1430., 1428., 1428., 1425., 1422.,
        #    1422., 1419., 1416., 1416., 1413., 1413., 1411., 1411., 1408.,
        #    1408., 1405., 1405., 1402., 1402., 1399., 1399., 1397., 1397.,
        #    1394., 1394., 1391., 1391., 1388., 1388., 1388., 1388., 1388., ...]

        # len(fps) = 4

        # 線形補間
        interps = [np.interp(frame_range, xp, fp) for fp in fps]

        # 補完したデータでtrackを作り直し
        itrack = interp_set.create_track(tag)
        for i, frame_idx in enumerate(frame_range):
            sensor_data = np.array([interp[i] for interp in interps])
            do_not_care = frame_idx not in track.detections
            tsn_scores = None if frame_idx not in track.detections else track.detections[frame_idx].tsn_scores
            itrack.add(pymotutils.Detection(frame_idx, sensor_data, do_not_care,
            tsn_scores=tsn_scores))
    return interp_set