def _interpolation(self): # mcf-trackerオリジナルとほとんど同じ # ref: # external/pymotutils/pytmotutils/application/application.py # min_cost_flow_pymot.py # just get result trajectories = self.tracker.compute_trajectories() trajectories = [[ pymotutils.Detection( frame_idx=x[0], sensor_data=x[1], tsn_scores=x[2]) for x in trajectory ] for trajectory in trajectories] hypotheses = pymotutils.TrackSet() for i, trajectory in enumerate(trajectories): track = hypotheses.create_track(i) for detection in trajectory: track.add(detection) # 線形補間 hypotheses = interpolate_track_set(hypotheses) return hypotheses
def compute_trajectories(self): trajectories = self.tracker.compute_trajectories() trajectories = [[ pymotutils.Detection(frame_idx=self._start_idx + x[0], sensor_data=x[1]) for x in trajectory ] for trajectory in trajectories] return trajectories
def read_ground_truth(filename, object_classes=None, min_height=MIN_OBJECT_HEIGHT_IN_PIXELS): """ File format: #Values Name Description ---------------------------------------------------------------------------- 1 frame Frame within the sequence where the object appearers 1 track id Unique tracking id of this object within this sequence 1 type Describes the type of object: 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc' or 'DontCare' 1 truncated Float from 0 (non-truncated) to 1 (truncated), where truncated refers to the object leaving image boundaries. Truncation 2 indicates an ignored object (in particular in the beginning or end of a track) introduced by manual labeling. 1 occluded Integer (0,1,2,3) indicating occlusion state: 0 = fully visible, 1 = partly occluded 2 = largely occluded, 3 = unknown 1 alpha Observation angle of object, ranging [-pi..pi] 4 bbox 2D bounding box of object in the image (0-based index): contains left, top, right, bottom pixel coordinates 3 dimensions 3D object dimensions: height, width, length (in meters) 3 location 3D object location x,y,z in camera coordinates (in meters) 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] """ with open(filename, "r") as f: lines = f.read().splitlines() track_set = pymotutils.TrackSet() for line in lines: words = line.strip().split(' ') assert len(words) == 17, "Invalid number of elements in line." object_class = words[2] if object_class not in object_classes: continue frame_idx, track_id = int(words[0]), int(words[1]) roi = np.asarray([float(x) for x in words[6:10]]) roi[2:] -= roi[:2] - 1 # Convert to x, y, w, h if roi[3] < min_height: continue if track_id not in track_set.tracks: track_set.create_track(track_id) track_set.tracks[track_id].add(pymotutils.Detection(frame_idx, roi)) return track_set
def read_groundtruth(filename, sensor_data_is_3d=False): """Read ground truth file. Parameters ---------- filename : str Path to the ground truth file. sensor_data_is_3d : bool If True, the ground truth's sensor data is set to the 3D position. If False, the ground truth's sensor data is set to the region of interest (ROI). Note that not all of the sequences provided by the MOT challenge contain valid 3D positions. Returns ------- TrackSet Returns the tracking ground truth. If sensor_data_is_3d is True, the sensor data contains the 3D position. Otherwise, sensor_data is set to the region of interest (ROI). """ # format: frame id, track id, bbox (x, y, w, h), care_flag, world (x, y, z) if not os.path.isfile(filename): return pymotutils.TrackSet() data = np.loadtxt(filename, delimiter=',') has_threed = np.any(data[:, 7:10] != -1) if sensor_data_is_3d and not has_threed: raise RuntimeError("File does not contain valid 3D coordinates") ground_truth = pymotutils.TrackSet() for row in data: frame_idx, track_id = int(row[0]), int(row[1]) do_not_care = row[6] == 0 if sensor_data_is_3d: sensor_data = row[7:10] else: sensor_data = row[2:6] if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data, do_not_care)) return ground_truth
def interpolate_track_set(track_set): """Interpolate sensor data in given track set. This method uses linear interpolation to fill missing detections in each track of the given track set. Each dimension of the sensor data is interpolated independently of all others. For example, if the sensor data contains 3-D positions, then the X, Y, and Z coordinates of the trajectory are interpolated linearily. The same method works fairly well for image regions as well. Parameters ---------- track_set : TrackSet The track set to be interpolated. The sensor data must be an array_like (ndim=1). Returns ------- TrackSet The interpolated track set, where each target is visible from the first frame of appearance until leaving the scene. """ interp_set = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): first, last = track.first_frame_idx(), track.last_frame_idx() frame_range = np.arange(first, last + 1) xp = sorted(list(track.detections.keys())) if len(xp) == 0: continue # This is an empty trajectory. sensor_data = np.asarray([track.detections[i].sensor_data for i in xp]) fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])] interps = [np.interp(frame_range, xp, fp) for fp in fps] itrack = interp_set.create_track(tag) for i, frame_idx in enumerate(frame_range): sensor_data = np.array([interp[i] for interp in interps]) do_not_care = frame_idx not in track.detections itrack.add( pymotutils.Detection(frame_idx, sensor_data, do_not_care)) return interp_set
def read_groundtruth(filename): """Read ground truth data. Parameters ---------- filename : str Path to the ground truth file. Returns ------- dataset.TrackSet Returns the tracking ground truth. However the ground truth file contains other useful information, the sensor_data contains only the ROI [left, top, width, height]. """ if not os.path.isfile(filename): return pymotutils.TrackSet() tree = ElementTree.parse(filename) ground_truth = pymotutils.TrackSet() sequence = tree.getroot() for frame in sequence.iter('frame'): for target in frame.iter('target'): frame_idx = int(frame.get('num')) track_id = int(target.get('id')) box = target.find('box') sensor_data = np.asarray([ float(box.get('left')), float(box.get('top')), float(box.get('width')), float(box.get('height')) ]) if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data)) return ground_truth
def _read_groundtruth(label_file): """ label_file: VATIC format filepath local_id xmin ymin xmax ymax frame lost occuluded generated name action """ # see motchallnge_io.py read_groundtruth ground_truth = pymotutils.TrackSet() df = pd.read_csv(label_file, sep=' ', names=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) # ignore when lost or occuluded df = df[(df[6] != 1) & (df[7] != 1)] # change bbox format from (xmin, ymin, xmax, ymax) # to (xmin, ymin, width, height) df[3] -= df[1] df[4] -= df[2] for i in range(len(df)): row = df.iloc[i] frame_idx, track_id = int(row[5]), int(row[0]) sensor_data = np.asarray(row[1:5]) if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data, do_not_care=False)) return ground_truth
def interpolate_track_set(track_set): """Interpolate sensor data in given track set. This method uses linear interpolation to fill missing detections in each track of the given track set. Each dimension of the sensor data is interpolated independently of all others. For example, if the sensor data contains 3-D positions, then the X, Y, and Z coordinates of the trajectory are interpolated linearily. The same method works fairly well for image regions as well. Parameters ---------- track_set : TrackSet The track set to be interpolated. The sensor data must be an array_like (ndim=1). Returns ------- TrackSet The interpolated track set, where each target is visible from the first frame of appearance until leaving the scene. """ interp_set = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): first, last = track.first_frame_idx(), track.last_frame_idx() frame_range = np.arange(first, last + 1) # フレームのインデックス xp = sorted(list(track.detections.keys())) if len(xp) == 0: continue # This is an empty trajectory. # BBoxのリスト sensor_data = np.asarray([track.detections[i].sensor_data for i in xp]) # sensor_data # array([[1475., 109., 99., 96.], # [1473., 115., 95., 95.], # [1473., 123., 92., 96.], # ..., # [1683., 2083., 57., 56.], # [1681., 2091., 59., 51.], # [1678., 2097., 59., 45.]]) # fps = [[xmin1, xmin2, ...,], [ymin1, ymin2, ...], ...] fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])] # fps = [array([1475., 1473., 1473., 1470., 1470., 1467., 1467., 1464., 1464., # 1461., 1461., 1458., 1456., 1453., 1450., 1447., 1444., 1442., # 1439., 1436., 1436., 1433., 1430., 1428., 1428., 1425., 1422., # 1422., 1419., 1416., 1416., 1413., 1413., 1411., 1411., 1408., # 1408., 1405., 1405., 1402., 1402., 1399., 1399., 1397., 1397., # 1394., 1394., 1391., 1391., 1388., 1388., 1388., 1388., 1388., ...] # len(fps) = 4 # 線形補間 interps = [np.interp(frame_range, xp, fp) for fp in fps] # 補完したデータでtrackを作り直し itrack = interp_set.create_track(tag) for i, frame_idx in enumerate(frame_range): sensor_data = np.array([interp[i] for interp in interps]) do_not_care = frame_idx not in track.detections tsn_scores = None if frame_idx not in track.detections else track.detections[frame_idx].tsn_scores itrack.add(pymotutils.Detection(frame_idx, sensor_data, do_not_care, tsn_scores=tsn_scores)) return interp_set