def simple_predictions(): video = Video.from_filename("video.mp4") skeleton = Skeleton() skeleton.add_node("a") skeleton.add_node("b") track_a = Track(0, "a") track_b = Track(0, "b") labels = Labels() instances = [] instances.append( PredictedInstance( skeleton=skeleton, score=2, track=track_a, points=dict(a=PredictedPoint(1, 1, score=0.5), b=PredictedPoint(1, 1, score=0.5)), )) instances.append( PredictedInstance( skeleton=skeleton, score=5, track=track_b, points=dict(a=PredictedPoint(1, 1, score=0.7), b=PredictedPoint(1, 1, score=0.7)), )) labeled_frame = LabeledFrame(video, frame_idx=0, instances=instances) labels.append(labeled_frame) instances = [] instances.append( PredictedInstance( skeleton=skeleton, score=3, track=track_a, points=dict(a=PredictedPoint(4, 5, score=1.5), b=PredictedPoint(1, 1, score=1.0)), )) instances.append( PredictedInstance( skeleton=skeleton, score=6, track=track_b, points=dict(a=PredictedPoint(6, 13, score=1.7), b=PredictedPoint(1, 1, score=1.0)), )) labeled_frame = LabeledFrame(video, frame_idx=1, instances=instances) labels.append(labeled_frame) return labels
def multi_skel_vid_labels(hdf5_vid, small_robot_mp4_vid, skeleton, stickman): """ Build a big list of LabeledFrame objects and wrap it in Labels class. Args: hdf5_vid: An HDF5 video fixture small_robot_mp4_vid: An MP4 video fixture skeleton: A fly skeleton. stickman: A stickman skeleton Returns: The Labels object containing all the labeled frames """ labels = [] stick_tracks = [ Track(spawned_on=0, name=f"Stickman {i}") for i in range(6) ] fly_tracks = [Track(spawned_on=0, name=f"Fly {i}") for i in range(6)] # Make some tracks None to test that fly_tracks[3] = None stick_tracks[2] = None for f in range(500): vid = [hdf5_vid, small_robot_mp4_vid][f % 2] label = LabeledFrame(video=vid, frame_idx=f % vid.frames) fly_instances = [] for i in range(6): fly_instances.append( Instance(skeleton=skeleton, track=fly_tracks[i])) for node in skeleton.nodes: fly_instances[i][node] = Point(x=i % vid.width, y=i % vid.height) stickman_instances = [] for i in range(6): stickman_instances.append( Instance(skeleton=stickman, track=stick_tracks[i])) for node in stickman.nodes: stickman_instances[i][node] = Point(x=i % vid.width, y=i % vid.height) label.instances = stickman_instances + fly_instances labels.append(label) labels = Labels(labels) return labels
def test_many_tracks_hdf5(tmpdir): labels = Labels() filename = os.path.join(tmpdir, "test.h5") labels.tracks = [Track(spawned_on=i, name=f"track {i}") for i in range(4000)] Labels.save_hdf5(filename=filename, labels=labels)
def test_multivideo_tracks(): vid_a = Video.from_filename("foo.mp4") vid_b = Video.from_filename("bar.mp4") skeleton = Skeleton.load_json("tests/data/skeleton/fly_skeleton_legs.json") track_a = Track(spawned_on=2, name="A") track_b = Track(spawned_on=3, name="B") inst_a = Instance(track=track_a, skeleton=skeleton) inst_b = Instance(track=track_b, skeleton=skeleton) lf_a = LabeledFrame(vid_a, frame_idx=2, instances=[inst_a]) lf_b = LabeledFrame(vid_b, frame_idx=3, instances=[inst_b]) labels = Labels(labeled_frames=[lf_a, lf_b]) # Try setting video B instance to track used in video A labels.track_swap(vid_b, new_track=track_a, old_track=track_b, frame_range=(3, 4)) assert inst_b.track == track_a
def read( cls, file: FileHandle, video_path: str, skeleton_path: str, *args, **kwargs, ) -> Labels: f = file.file video = Video.from_filename(video_path) skeleton_data = pd.read_csv(skeleton_path, header=0) skeleton = Skeleton() skeleton.add_nodes(skeleton_data["name"]) nodes = skeleton.nodes for name, parent, swap in skeleton_data.itertuples(index=False, name=None): if parent is not np.nan: skeleton.add_edge(parent, name) lfs = [] pose_matrix = f["pose"][:] track_count, frame_count, node_count, _ = pose_matrix.shape tracks = [Track(0, f"Track {i}") for i in range(track_count)] for frame_idx in range(frame_count): lf_instances = [] for track_idx in range(track_count): points_array = pose_matrix[track_idx, frame_idx, :, :] points = dict() for p in range(len(points_array)): x, y, score = points_array[p] points[nodes[p]] = Point(x, y) # TODO: score inst = Instance(skeleton=skeleton, track=tracks[track_idx], points=points) lf_instances.append(inst) lfs.append( LabeledFrame(video, frame_idx=frame_idx, instances=lf_instances)) return Labels(labeled_frames=lfs)
def read( cls, file: FileHandle, video: Union[Video, str], *args, **kwargs, ) -> Labels: connect_adj_nodes = False if video is None: raise ValueError( "Cannot read analysis hdf5 if no video specified.") if not isinstance(video, Video): video = Video.from_filename(video) f = file.file tracks_matrix = f["tracks"][:].T track_names_list = f["track_names"][:].T node_names_list = f["node_names"][:].T # shape: frames * nodes * 2 * tracks frame_count, node_count, _, track_count = tracks_matrix.shape tracks = [ Track(0, track_name.decode()) for track_name in track_names_list ] skeleton = Skeleton() last_node_name = None for node_name in node_names_list: node_name = node_name.decode() skeleton.add_node(node_name) if connect_adj_nodes and last_node_name: skeleton.add_edge(last_node_name, node_name) last_node_name = node_name frames = [] for frame_idx in range(frame_count): instances = [] for track_idx in range(track_count): points = tracks_matrix[frame_idx, ..., track_idx] if not np.all(np.isnan(points)): point_scores = np.ones(len(points)) # make everything a PredictedInstance since the usual use # case is to export predictions for analysis instances.append( PredictedInstance.from_arrays( points=points, point_confidences=point_scores, skeleton=skeleton, track=tracks[track_idx], instance_score=1, )) if instances: frames.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=instances)) return Labels(labeled_frames=frames)
def read( cls, file: FileHandle, img_dir: str, use_missing_gui: bool = False, *args, **kwargs, ) -> Labels: dicts = file.json # Make skeletons from "categories" skeleton_map = dict() for category in dicts["categories"]: skeleton = Skeleton(name=category["name"]) skeleton_id = category["id"] node_names = category["keypoints"] skeleton.add_nodes(node_names) try: for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx], node_names[dst_idx]) except IndexError as e: # According to the COCO data format specifications[^1], the edges # are supposed to be 1-indexed. But in some of their own # dataset the edges are 1-indexed! So we'll try. # [1]: http://cocodataset.org/#format-data # Clear any edges we already created using 0-indexing skeleton.clear_edges() # Add edges for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx - 1], node_names[dst_idx - 1]) skeleton_map[skeleton_id] = skeleton # Make videos from "images" # Remove images that aren't referenced in the annotations img_refs = [annotation["image_id"] for annotation in dicts["annotations"]] dicts["images"] = list(filter(lambda im: im["id"] in img_refs, dicts["images"])) # Key in JSON file should be "file_name", but sometimes it's "filename", # so we have to check both. img_filename_key = "file_name" if img_filename_key not in dicts["images"][0].keys(): img_filename_key = "filename" # First add the img_dir to each image filename img_paths = [ os.path.join(img_dir, image[img_filename_key]) for image in dicts["images"] ] # See if there are any missing files img_missing = [not os.path.exists(path) for path in img_paths] if sum(img_missing): if use_missing_gui: okay = MissingFilesDialog(img_paths, img_missing).exec_() if not okay: return None else: raise FileNotFoundError( f"Images for COCO dataset could not be found in {img_dir}." ) # Update the image paths (with img_dir or user selected path) for image, path in zip(dicts["images"], img_paths): image[img_filename_key] = path # Create the video objects for the image files image_video_map = dict() vid_id_video_map = dict() for image in dicts["images"]: image_id = image["id"] image_filename = image[img_filename_key] # Sometimes images have a vid_id which links multiple images # together as one video. If so, we'll use that as the video key. # But if there isn't a vid_id, we'll treat each images as a # distinct video and use the image id as the video id. vid_id = image.get("vid_id", image_id) if vid_id not in vid_id_video_map: kwargs = dict(filenames=[image_filename]) for key in ("width", "height"): if key in image: kwargs[key] = image[key] video = Video.from_image_filenames(**kwargs) vid_id_video_map[vid_id] = video frame_idx = 0 else: video = vid_id_video_map[vid_id] frame_idx = video.num_frames video.backend.filenames.append(image_filename) image_video_map[image_id] = (video, frame_idx) # Make instances from "annotations" lf_map = dict() track_map = dict() for annotation in dicts["annotations"]: skeleton = skeleton_map[annotation["category_id"]] image_id = annotation["image_id"] video, frame_idx = image_video_map[image_id] keypoints = np.array(annotation["keypoints"], dtype="int").reshape(-1, 3) track = None if "track_id" in annotation: track_id = annotation["track_id"] if track_id not in track_map: track_map[track_id] = Track(frame_idx, str(track_id)) track = track_map[track_id] points = dict() any_visible = False for i in range(len(keypoints)): node = skeleton.nodes[i] x, y, flag = keypoints[i] if flag == 0: # node not labeled for this instance continue is_visible = flag == 2 any_visible = any_visible or is_visible points[node] = Point(x, y, is_visible) if points: # If none of the points had 2 has the "visible" flag, we'll # assume this incorrect and just mark all as visible. if not any_visible: for point in points.values(): point.visible = True inst = Instance(skeleton=skeleton, points=points, track=track) if image_id not in lf_map: lf_map[image_id] = LabeledFrame(video, frame_idx) lf_map[image_id].insert(0, inst) return Labels(labeled_frames=list(lf_map.values()))
def track( self, untracked_instances: List[InstanceType], img: Optional[np.ndarray] = None, t: int = None, ) -> List[InstanceType]: """Performs a single step of tracking. Args: untracked_instances: List of instances to assign to tracks. img: Image data of the current frame for flow shifting. t: Current timestep. If not provided, increments from the internal queue. Returns: A list of the instances that were tracked. """ if self.candidate_maker is None: return untracked_instances # Infer timestep if not provided. if t is None: if len(self.track_matching_queue) > 0: # Default to last timestep + 1 if available. t = self.track_matching_queue[-1].t + 1 else: t = 0 # Initialize containers for tracked instances at the current timestep. tracked_instances = [] tracked_inds = [] # Make cache so similarity function doesn't have to recompute everything. # similarity_cache = dict() # Process untracked instances. if len(untracked_instances) > 0: # Build a pool of matchable candidate instances. candidate_instances = self.candidate_maker.get_candidates( track_matching_queue=self.track_matching_queue, t=t, img=img, ) if len(candidate_instances) > 0: # Group candidate instances by track. candidate_instances_by_track = defaultdict(list) for instance in candidate_instances: candidate_instances_by_track[instance.track].append( instance) # Compute similarity matrix between untracked instances and best # candidate for each track. candidate_tracks = list(candidate_instances_by_track.keys()) matching_similarities = np.full( (len(untracked_instances), len(candidate_tracks)), np.nan) matching_candidates = [] for i, untracked_instance in enumerate(untracked_instances): matching_candidates.append([]) for j, candidate_track in enumerate(candidate_tracks): # Compute similarity between untracked instance and all track # candidates. track_instances = candidate_instances_by_track[ candidate_track] track_matching_similarities = [ self.similarity_function( untracked_instance, candidate_instance, # cache=similarity_cache ) for candidate_instance in track_instances ] # Keep the best scoring instance for this track. best_ind = np.argmax(track_matching_similarities) matching_candidates[i].append( track_instances[best_ind]) # Use the best similarity score for matching. best_similarity = track_matching_similarities[best_ind] matching_similarities[i, j] = best_similarity # Perform matching between untracked instances and candidates. cost = -matching_similarities cost[np.isnan(cost)] = np.inf matches = self.matching_function(cost) # Assign each matched instance. for i, j in matches: # Pull out matched pair. matched_instance = untracked_instances[i] ref_instance = matching_candidates[i][j] # Save matching score. match_similarity = matching_similarities[i, j] # Assign to track and save. tracked_instances.append( attr.evolve( matched_instance, track=ref_instance.track, tracking_score=match_similarity, )) # Keep track of the assigned instances. tracked_inds.append(i) # Spawn a new track for each remaining untracked instance. for i, inst in enumerate(untracked_instances): # Skip if this instance was tracked. if i in tracked_inds: continue # Skip if this instance is too small to spawn a new track with. if inst.n_visible_points < self.min_new_track_points: continue # Spawn new track. new_track = Track(spawned_on=t, name=f"track_{len(self.spawned_tracks)}") self.spawned_tracks.append(new_track) # Assign instance to the new track and save. tracked_instances.append(attr.evolve(inst, track=new_track)) # Add the tracked instances to the matching buffer. self.track_matching_queue.append( MatchedInstance(t, tracked_instances, img)) # Save tracked instances internally. if self.save_tracked_instances: self.tracked_instances[t] = tracked_instances return tracked_instances
def load_predicted_labels_json_old( data_path: str, parsed_json: dict = None, adjust_matlab_indexing: bool = True, fix_rel_paths: bool = True, ) -> List[LabeledFrame]: """ Load predicted instances from Talmo's old JSON format. Args: data_path: The path to the JSON file. parsed_json: The parsed json if already loaded, so we can save some time if already parsed. adjust_matlab_indexing: Whether to adjust indexing from MATLAB. fix_rel_paths: Whether to fix paths to videos to absolute paths. Returns: List of :class:`LabeledFrame` objects. """ if parsed_json is None: data = json.loads(open(data_path).read()) else: data = parsed_json videos = pd.DataFrame(data["videos"]) predicted_instances = pd.DataFrame(data["predicted_instances"]) predicted_points = pd.DataFrame(data["predicted_points"]) if adjust_matlab_indexing: predicted_instances.frameIdx -= 1 predicted_points.frameIdx -= 1 predicted_points.node -= 1 predicted_points.x -= 1 predicted_points.y -= 1 skeleton = Skeleton() skeleton.add_nodes(data["skeleton"]["nodeNames"]) edges = data["skeleton"]["edges"] if adjust_matlab_indexing: edges = np.array(edges) - 1 for (src_idx, dst_idx) in edges: skeleton.add_edge( data["skeleton"]["nodeNames"][src_idx], data["skeleton"]["nodeNames"][dst_idx], ) if fix_rel_paths: for i, row in videos.iterrows(): p = row.filepath if not os.path.exists(p): p = os.path.join(os.path.dirname(data_path), p) if os.path.exists(p): videos.at[i, "filepath"] = p # Make the video objects video_objects = {} for i, row in videos.iterrows(): if videos.at[i, "format"] == "media": vid = Video.from_media(videos.at[i, "filepath"]) else: vid = Video.from_hdf5( filename=videos.at[i, "filepath"], dataset=videos.at[i, "dataset"] ) video_objects[videos.at[i, "id"]] = vid track_ids = predicted_instances["trackId"].values unique_track_ids = np.unique(track_ids) spawned_on = { track_id: predicted_instances.loc[predicted_instances["trackId"] == track_id][ "frameIdx" ].values[0] for track_id in unique_track_ids } tracks = { i: Track(name=str(i), spawned_on=spawned_on[i]) for i in np.unique(predicted_instances["trackId"].values).tolist() } # A function to get all the instances for a particular video frame def get_frame_predicted_instances(video_id, frame_idx): points = predicted_points is_in_frame = (points["videoId"] == video_id) & ( points["frameIdx"] == frame_idx ) if not is_in_frame.any(): return [] instances = [] frame_instance_ids = np.unique(points["instanceId"][is_in_frame]) for i, instance_id in enumerate(frame_instance_ids): is_instance = is_in_frame & (points["instanceId"] == instance_id) track_id = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["trackId"].values[0] match_score = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["matching_score"].values[0] track_score = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["tracking_score"].values[0] instance_points = { data["skeleton"]["nodeNames"][n]: PredictedPoint( x, y, visible=v, score=confidence ) for x, y, n, v, confidence in zip( *[ points[k][is_instance] for k in ["x", "y", "node", "visible", "confidence"] ] ) } instance = PredictedInstance( skeleton=skeleton, points=instance_points, track=tracks[track_id], score=match_score, ) instances.append(instance) return instances # Get the unique labeled frames and construct a list of LabeledFrame objects for them. frame_keys = list( { (videoId, frameIdx) for videoId, frameIdx in zip( predicted_points["videoId"], predicted_points["frameIdx"] ) } ) frame_keys.sort() labels = [] for videoId, frameIdx in frame_keys: label = LabeledFrame( video=video_objects[videoId], frame_idx=frameIdx, instances=get_frame_predicted_instances(videoId, frameIdx), ) labels.append(label) return labels