def __init__(self, data_source): assert isinstance(data_source, DataSource), "data_source is of wrong type" self.data_source = data_source self.hypotheses = pymotutils.TrackSet() self.ground_truth = pymotutils.TrackSet() self._visualization = None self._playback_trackset = pymotutils.TrackSet() self._tracker = None self._prev_timestep = None
def convert_track_set(track_set, detection_converter): """Create a track set copy with modified detections. Using this function, you can modify the detections contained in a track set using a user-specified converter function. For example, you may wish to set the sensor_data attribute to an application specific field 'roi' in order to interpolate this data: >>> track_set = create_my_tracking_hypotheses() >>> dataset_converter = lambda d: Detection(d.frame_idx, d.roi) >>> roi_data = convert_track_set(track_set, dataset_converter) >>> interpolated = interpolate_track_set(roi_data) Parameters ---------- track_set : TrackSet The input track set. detection_converter : Callable[Detection] -> Detection The converter function. This is called once for each detection contained in the track set. Returns ------- TrackSet Returns the converted track set with the same structure as the input track set, but where each detection has been converted. """ result = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): result_track = result.create_track(tag) for detection in track.detections.values(): result_track.add(detection_converter(detection)) return result
def _interpolation(self): # mcf-trackerオリジナルとほとんど同じ # ref: # external/pymotutils/pytmotutils/application/application.py # min_cost_flow_pymot.py # just get result trajectories = self.tracker.compute_trajectories() trajectories = [[ pymotutils.Detection( frame_idx=x[0], sensor_data=x[1], tsn_scores=x[2]) for x in trajectory ] for trajectory in trajectories] hypotheses = pymotutils.TrackSet() for i, trajectory in enumerate(trajectories): track = hypotheses.create_track(i) for detection in trajectory: track.add(detection) # 線形補間 hypotheses = interpolate_track_set(hypotheses) return hypotheses
def read_cvml_groundtruth(filename, projection_matrix): def fattr(node, name): return float(node.attributes[name].value) wrapped_projection_matrix = np.eye(4) wrapped_projection_matrix[:3, :4] = projection_matrix inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix) xmldoc = minidom.parse(filename) track_set = pymotutils.TrackSet() for frame in xmldoc.getElementsByTagName("frame"): frame_idx = int(frame.attributes["number"].value) for obj in frame.getElementsByTagName("object"): box = obj.getElementsByTagName("box")[0] xc, yc = fattr(box, "xc"), fattr(box, "yc") w, h = fattr(box, "w"), fattr(box, "h") roi = xc - w / 2., yc - h / 2., w, h xyz, isvalid = intersect_with_ground_plane( inv_projection_matrix, GROUND_PLANE_NORMAL, GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]])) assert isvalid[0], "Failed to compute ground plane projection" track_id = int(obj.attributes["id"].value) if track_id not in track_set.tracks: track_set.create_track(track_id) track_set.tracks[track_id].add( pymotutils.RegionOfInterestDetection(frame_idx, roi, xyz=xyz[0])) return track_set
def read_groundtruth(filename, sensor_data_is_3d=False): """Read ground truth file. Parameters ---------- filename : str Path to the ground truth file. sensor_data_is_3d : bool If True, the ground truth's sensor data is set to the 3D position. If False, the ground truth's sensor data is set to the region of interest (ROI). Note that not all of the sequences provided by the MOT challenge contain valid 3D positions. Returns ------- TrackSet Returns the tracking ground truth. If sensor_data_is_3d is True, the sensor data contains the 3D position. Otherwise, sensor_data is set to the region of interest (ROI). """ # format: frame id, track id, bbox (x, y, w, h), care_flag, world (x, y, z) if not os.path.isfile(filename): return pymotutils.TrackSet() data = np.loadtxt(filename, delimiter=',') has_threed = np.any(data[:, 7:10] != -1) if sensor_data_is_3d and not has_threed: raise RuntimeError("File does not contain valid 3D coordinates") ground_truth = pymotutils.TrackSet() for row in data: frame_idx, track_id = int(row[0]), int(row[1]) do_not_care = row[6] == 0 if sensor_data_is_3d: sensor_data = row[7:10] else: sensor_data = row[2:6] if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data, do_not_care)) return ground_truth
def compute_trajectories(self, interpolation, detection_converter=None): """Compute trajectories on the sequence of data that has previously been processed. You must call process_data before computing trajectories, otherwise this method will fail. Optionally, you can pass in a function for interpolating tracking results. Prior to interpolation, you may convert tracking results using a user-specified function. See ---- interpolate_track_set convert_track_set Parameters ---------- interpolation : bool | Callable[TrackSet] -> TrackSet If True, track hypotheses and ground truth data will be interpolated (i.e., missed detections will be filled). If False, track hypotheses and ground truth will not be interpolated. Alternatively, you can pass in a function to use for track set interpolation. detection_converter : Optional[Callable[Detection] -> Detection] A converter function that is called once for each detection in order to convert tracking results to a format that is suitable for evaluation. This function is called prior to interpolation. Returns ------- List[List[Detection]] Returns a list of trajectories, where each trajectory is a sequence of detections that belong to the same object. """ trajectories = self._tracker.compute_trajectories() self.hypotheses = pymotutils.TrackSet() for i, trajectory in enumerate(trajectories): track = self.hypotheses.create_track(i) for detection in trajectory: track.add(detection) if detection_converter is not None: self.hypotheses = pymotutils.postprocessing.convert_track_set( self.hypotheses, detection_converter) if not isinstance(interpolation, bool): self.ground_truth = interpolation(self.ground_truth) self.hypotheses = interpolation(self.hypotheses) elif interpolation: interpolation = pymotutils.postprocessing.interpolate_track_set self.ground_truth = interpolation(self.ground_truth) self.hypotheses = interpolation(self.hypotheses) return trajectories
def process_data(self, tracker, visualization=None, start_idx=None, end_idx=None): """Process a batch of frames. This method runs the given tracker on a sequence of data and collects the ground truth detections contained within. Parameters ---------- tracker : Tracker A concrete tracking implementation. visualization : Visualization A concrete implementation of Visualization that draws detections and state estimates. start_idx : Optional[int] Index of the first frame. Defaults to 0. end_idx : Optional[int] One plus index of the last frame. Defaults to the number of frames in the data source. """ if visualization is None: visualization = NoVisualization() assert isinstance(visualization, Visualization), "visualization is of wrong type" assert isinstance(tracker, Tracker), "tracker is of wrong type" start_idx = (start_idx if start_idx is not None else self.data_source.first_frame_idx()) end_idx = (end_idx if end_idx is not None else self.data_source.last_frame_idx() + 1) self._visualization = visualization self.ground_truth = pymotutils.TrackSet() self.hypotheses = pymotutils.TrackSet() self._tracker = tracker self._tracker.reset(start_idx, end_idx) self._visualization.run(start_idx, end_idx, self._next_frame_process_data)
def read_ground_truth(filename, object_classes=None, min_height=MIN_OBJECT_HEIGHT_IN_PIXELS): """ File format: #Values Name Description ---------------------------------------------------------------------------- 1 frame Frame within the sequence where the object appearers 1 track id Unique tracking id of this object within this sequence 1 type Describes the type of object: 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc' or 'DontCare' 1 truncated Float from 0 (non-truncated) to 1 (truncated), where truncated refers to the object leaving image boundaries. Truncation 2 indicates an ignored object (in particular in the beginning or end of a track) introduced by manual labeling. 1 occluded Integer (0,1,2,3) indicating occlusion state: 0 = fully visible, 1 = partly occluded 2 = largely occluded, 3 = unknown 1 alpha Observation angle of object, ranging [-pi..pi] 4 bbox 2D bounding box of object in the image (0-based index): contains left, top, right, bottom pixel coordinates 3 dimensions 3D object dimensions: height, width, length (in meters) 3 location 3D object location x,y,z in camera coordinates (in meters) 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] """ with open(filename, "r") as f: lines = f.read().splitlines() track_set = pymotutils.TrackSet() for line in lines: words = line.strip().split(' ') assert len(words) == 17, "Invalid number of elements in line." object_class = words[2] if object_class not in object_classes: continue frame_idx, track_id = int(words[0]), int(words[1]) roi = np.asarray([float(x) for x in words[6:10]]) roi[2:] -= roi[:2] - 1 # Convert to x, y, w, h if roi[3] < min_height: continue if track_id not in track_set.tracks: track_set.create_track(track_id) track_set.tracks[track_id].add(pymotutils.Detection(frame_idx, roi)) return track_set
def read_groundtruth(filename): """Read ground truth data. Parameters ---------- filename : str Path to the ground truth file. Returns ------- dataset.TrackSet Returns the tracking ground truth. However the ground truth file contains other useful information, the sensor_data contains only the ROI [left, top, width, height]. """ if not os.path.isfile(filename): return pymotutils.TrackSet() tree = ElementTree.parse(filename) ground_truth = pymotutils.TrackSet() sequence = tree.getroot() for frame in sequence.iter('frame'): for target in frame.iter('target'): frame_idx = int(frame.get('num')) track_id = int(target.get('id')) box = target.find('box') sensor_data = np.asarray([ float(box.get('left')), float(box.get('top')), float(box.get('width')), float(box.get('height')) ]) if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data)) return ground_truth
def clip_track_set_at_tracking_area(track_set, xyz="sensor_data"): cropped_track_set = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): detections = { i: d for i, d in track.detections.items() if np.all(getattr(d, xyz) >= CROPPED_TRACKING_AREA_MIN) and np.all( getattr(d, xyz) <= CROPPED_TRACKING_AREA_MAX) } if len(detections) == 0: continue cropped_track = cropped_track_set.create_track(tag) cropped_track.detections = detections return cropped_track_set
def interpolate_track_set(track_set): """Interpolate sensor data in given track set. This method uses linear interpolation to fill missing detections in each track of the given track set. Each dimension of the sensor data is interpolated independently of all others. For example, if the sensor data contains 3-D positions, then the X, Y, and Z coordinates of the trajectory are interpolated linearily. The same method works fairly well for image regions as well. Parameters ---------- track_set : TrackSet The track set to be interpolated. The sensor data must be an array_like (ndim=1). Returns ------- TrackSet The interpolated track set, where each target is visible from the first frame of appearance until leaving the scene. """ interp_set = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): first, last = track.first_frame_idx(), track.last_frame_idx() frame_range = np.arange(first, last + 1) xp = sorted(list(track.detections.keys())) if len(xp) == 0: continue # This is an empty trajectory. sensor_data = np.asarray([track.detections[i].sensor_data for i in xp]) fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])] interps = [np.interp(frame_range, xp, fp) for fp in fps] itrack = interp_set.create_track(tag) for i, frame_idx in enumerate(frame_range): sensor_data = np.array([interp[i] for interp in interps]) do_not_care = frame_idx not in track.detections itrack.add( pymotutils.Detection(frame_idx, sensor_data, do_not_care)) return interp_set
def _read_groundtruth(label_file): """ label_file: VATIC format filepath local_id xmin ymin xmax ymax frame lost occuluded generated name action """ # see motchallnge_io.py read_groundtruth ground_truth = pymotutils.TrackSet() df = pd.read_csv(label_file, sep=' ', names=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) # ignore when lost or occuluded df = df[(df[6] != 1) & (df[7] != 1)] # change bbox format from (xmin, ymin, xmax, ymax) # to (xmin, ymin, width, height) df[3] -= df[1] df[4] -= df[2] for i in range(len(df)): row = df.iloc[i] frame_idx, track_id = int(row[5]), int(row[0]) sensor_data = np.asarray(row[1:5]) if track_id not in ground_truth.tracks: ground_truth.create_track(track_id) ground_truth.tracks[track_id].add( pymotutils.Detection(frame_idx, sensor_data, do_not_care=False)) return ground_truth
def interpolate_track_set(track_set): """Interpolate sensor data in given track set. This method uses linear interpolation to fill missing detections in each track of the given track set. Each dimension of the sensor data is interpolated independently of all others. For example, if the sensor data contains 3-D positions, then the X, Y, and Z coordinates of the trajectory are interpolated linearily. The same method works fairly well for image regions as well. Parameters ---------- track_set : TrackSet The track set to be interpolated. The sensor data must be an array_like (ndim=1). Returns ------- TrackSet The interpolated track set, where each target is visible from the first frame of appearance until leaving the scene. """ interp_set = pymotutils.TrackSet() for tag, track in track_set.tracks.items(): first, last = track.first_frame_idx(), track.last_frame_idx() frame_range = np.arange(first, last + 1) # フレームのインデックス xp = sorted(list(track.detections.keys())) if len(xp) == 0: continue # This is an empty trajectory. # BBoxのリスト sensor_data = np.asarray([track.detections[i].sensor_data for i in xp]) # sensor_data # array([[1475., 109., 99., 96.], # [1473., 115., 95., 95.], # [1473., 123., 92., 96.], # ..., # [1683., 2083., 57., 56.], # [1681., 2091., 59., 51.], # [1678., 2097., 59., 45.]]) # fps = [[xmin1, xmin2, ...,], [ymin1, ymin2, ...], ...] fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])] # fps = [array([1475., 1473., 1473., 1470., 1470., 1467., 1467., 1464., 1464., # 1461., 1461., 1458., 1456., 1453., 1450., 1447., 1444., 1442., # 1439., 1436., 1436., 1433., 1430., 1428., 1428., 1425., 1422., # 1422., 1419., 1416., 1416., 1413., 1413., 1411., 1411., 1408., # 1408., 1405., 1405., 1402., 1402., 1399., 1399., 1397., 1397., # 1394., 1394., 1391., 1391., 1388., 1388., 1388., 1388., 1388., ...] # len(fps) = 4 # 線形補間 interps = [np.interp(frame_range, xp, fp) for fp in fps] # 補完したデータでtrackを作り直し itrack = interp_set.create_track(tag) for i, frame_idx in enumerate(frame_range): sensor_data = np.array([interp[i] for interp in interps]) do_not_care = frame_idx not in track.detections tsn_scores = None if frame_idx not in track.detections else track.detections[frame_idx].tsn_scores itrack.add(pymotutils.Detection(frame_idx, sensor_data, do_not_care, tsn_scores=tsn_scores)) return interp_set