def simple_predictions(): video = Video.from_filename("video.mp4") skeleton = Skeleton() skeleton.add_node("a") skeleton.add_node("b") track_a = Track(0, "a") track_b = Track(0, "b") labels = Labels() instances = [] instances.append( PredictedInstance( skeleton=skeleton, score=2, track=track_a, points=dict(a=PredictedPoint(1, 1, score=0.5), b=PredictedPoint(1, 1, score=0.5)), )) instances.append( PredictedInstance( skeleton=skeleton, score=5, track=track_b, points=dict(a=PredictedPoint(1, 1, score=0.7), b=PredictedPoint(1, 1, score=0.7)), )) labeled_frame = LabeledFrame(video, frame_idx=0, instances=instances) labels.append(labeled_frame) instances = [] instances.append( PredictedInstance( skeleton=skeleton, score=3, track=track_a, points=dict(a=PredictedPoint(4, 5, score=1.5), b=PredictedPoint(1, 1, score=1.0)), )) instances.append( PredictedInstance( skeleton=skeleton, score=6, track=track_b, points=dict(a=PredictedPoint(6, 13, score=1.7), b=PredictedPoint(1, 1, score=1.0)), )) labeled_frame = LabeledFrame(video, frame_idx=1, instances=instances) labels.append(labeled_frame) return labels
def test_labels_predicted_hdf5(multi_skel_vid_labels, tmpdir): labels = multi_skel_vid_labels filename = os.path.join(tmpdir, "test.h5") # Lets promote some of these Instances to predicted instances for label in labels: for i, instance in enumerate(label.instances): if i % 2 == 0: label.instances[i] = PredictedInstance.from_instance( instance, 0.3) # Lets also add some from_predicted values for label in labels: label.instances[1].from_predicted = label.instances[0] # Try adding a node to the skeleton labels.skeletons[0].add_node("new node") # Save and compare the results Labels.save_hdf5(filename=filename, labels=labels) loaded_labels = Labels.load_hdf5(filename=filename) _check_labels_match(labels, loaded_labels) # Try deleting nodes from the skeleton node = labels.skeletons[0].nodes[-1] labels.skeletons[0].delete_node(node) node = labels.skeletons[0].nodes[-1] labels.skeletons[0].delete_node(node) # Save and compare the results Labels.save_hdf5(filename=filename, labels=labels) loaded_labels = Labels.load_hdf5(filename=filename) _check_labels_match(labels, loaded_labels)
def test_frame_merge_between_predicted_and_user(skeleton, centered_pair_vid): user_inst = Instance(skeleton=skeleton, points={skeleton.nodes[0]: Point(1, 2)},) user_labels = Labels( [LabeledFrame(video=centered_pair_vid, frame_idx=0, instances=[user_inst],)] ) pred_inst = PredictedInstance( skeleton=skeleton, points={skeleton.nodes[0]: PredictedPoint(1, 2, score=1.0)}, score=1.0, ) pred_labels = Labels( [LabeledFrame(video=centered_pair_vid, frame_idx=0, instances=[pred_inst],)] ) # Merge predictions into current labels dataset _, _, new_conflicts = Labels.complex_merge_between( user_labels, new_labels=pred_labels, unify=False, # since we used match_to when loading predictions file ) # new predictions should replace old ones Labels.finish_complex_merge(user_labels, new_conflicts) # We should be able to cleanly merge the user and the predicted instance, # and we want to retain both even though they perfectly match. assert user_inst in user_labels[0].instances assert pred_inst in user_labels[0].instances assert len(user_labels[0].instances) == 2
def test_frame_merge_predicted_and_user(skeleton, centered_pair_vid): user_inst = Instance( skeleton=skeleton, points={skeleton.nodes[0]: Point(1, 2)}, ) user_frame = LabeledFrame( video=centered_pair_vid, frame_idx=0, instances=[user_inst], ) pred_inst = PredictedInstance( skeleton=skeleton, points={skeleton.nodes[0]: PredictedPoint(1, 2, score=1.0)}, score=1.0, ) pred_frame = LabeledFrame( video=centered_pair_vid, frame_idx=0, instances=[pred_inst], ) LabeledFrame.complex_frame_merge(user_frame, pred_frame) # We should be able to cleanly merge the user and the predicted instance, # and we want to retain both even though they perfectly match. assert user_inst in user_frame.instances assert pred_inst in user_frame.instances assert len(user_frame.instances) == 2
def removal_test_labels(): skeleton = Skeleton() video = Video(backend=MediaVideo) lf_user_only = LabeledFrame( video=video, frame_idx=0, instances=[Instance(skeleton=skeleton)] ) lf_pred_only = LabeledFrame( video=video, frame_idx=1, instances=[PredictedInstance(skeleton=skeleton)] ) lf_both = LabeledFrame( video=video, frame_idx=2, instances=[Instance(skeleton=skeleton), PredictedInstance(skeleton=skeleton)], ) labels = Labels([lf_user_only, lf_pred_only, lf_both]) return labels
def test_merge_predictions(): dummy_video_a = Video.from_filename("foo.mp4") dummy_video_b = Video.from_filename("foo.mp4") dummy_skeleton_a = Skeleton() dummy_skeleton_a.add_node("node") dummy_skeleton_b = Skeleton() dummy_skeleton_b.add_node("node") dummy_instances_a = [] dummy_instances_a.append( Instance(skeleton=dummy_skeleton_a, points=dict(node=Point(1, 1))) ) dummy_instances_a.append( Instance(skeleton=dummy_skeleton_a, points=dict(node=Point(2, 2))) ) labels_a = Labels() labels_a.append( LabeledFrame(dummy_video_a, frame_idx=0, instances=dummy_instances_a) ) dummy_instances_b = [] dummy_instances_b.append( Instance(skeleton=dummy_skeleton_b, points=dict(node=Point(1, 1))) ) dummy_instances_b.append( PredictedInstance( skeleton=dummy_skeleton_b, points=dict(node=Point(3, 3)), score=1 ) ) labels_b = Labels() labels_b.append( LabeledFrame(dummy_video_b, frame_idx=0, instances=dummy_instances_b) ) # Frames have one redundant instance (perfect match) and all the # non-matching instances are different types (one predicted, one not). merged, extra_a, extra_b = Labels.complex_merge_between(labels_a, labels_b) assert len(merged[dummy_video_a]) == 1 assert len(merged[dummy_video_a][0]) == 1 # the predicted instance was merged assert not extra_a assert not extra_b
def test_hdf5_from_predicted(multi_skel_vid_labels, tmpdir): labels = multi_skel_vid_labels filename = os.path.join(tmpdir, "test.h5") # Add some predicted instances to create from_predicted links for frame_num, frame in enumerate(labels): if frame_num % 20 == 0: frame.instances[ 0].from_predicted = PredictedInstance.from_instance( frame.instances[0], float(frame_num)) frame.instances.append(frame.instances[0].from_predicted) # Save and load, compare the results Labels.save_hdf5(filename=filename, labels=labels) loaded_labels = Labels.load_hdf5(filename=filename) for frame_num, frame in enumerate(loaded_labels): if frame_num % 20 == 0: assert frame.instances[0].from_predicted.score == float(frame_num)
def test_remove_predictions_with_new_labels(removal_test_labels): labels = removal_test_labels assert len(labels) == 3 new_labels = Labels([ LabeledFrame( video=labels.video, frame_idx=1, instances=[PredictedInstance(skeleton=labels.skeleton)], ) ]) labels.remove_predictions(new_labels=new_labels) assert len(labels) == 2 assert labels[0].frame_idx == 0 assert labels[0].has_user_instances assert not labels[0].has_predicted_instances assert labels[1].frame_idx == 2 assert labels[1].has_user_instances assert labels[1].has_predicted_instances
def test_predicted_points_array_with_score(skeleton): pred_inst = PredictedInstance( skeleton=skeleton, points={ skeleton.nodes[0]: PredictedPoint(1, 2, score=0.3), skeleton.nodes[1]: PredictedPoint(4, 5, score=0.6, visible=False), }, score=1.0, ) pts = pred_inst.points_and_scores_array # Make sure we got (x, y, score) for first point assert pts[0, 0] == 1 assert pts[0, 1] == 2 assert pts[0, 2] == 0.3 # Make sure invisible point has NaNs assert np.isnan(pts[1, 0])
def get_frame_predicted_instances(video_id, frame_idx): points = predicted_points is_in_frame = (points["videoId"] == video_id) & ( points["frameIdx"] == frame_idx ) if not is_in_frame.any(): return [] instances = [] frame_instance_ids = np.unique(points["instanceId"][is_in_frame]) for i, instance_id in enumerate(frame_instance_ids): is_instance = is_in_frame & (points["instanceId"] == instance_id) track_id = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["trackId"].values[0] match_score = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["matching_score"].values[0] track_score = predicted_instances.loc[ predicted_instances["id"] == instance_id ]["tracking_score"].values[0] instance_points = { data["skeleton"]["nodeNames"][n]: PredictedPoint( x, y, visible=v, score=confidence ) for x, y, n, v, confidence in zip( *[ points[k][is_instance] for k in ["x", "y", "node", "visible", "confidence"] ] ) } instance = PredictedInstance( skeleton=skeleton, points=instance_points, track=tracks[track_id], score=match_score, ) instances.append(instance) return instances
def read( cls, file: FileHandle, video: Union[Video, str], *args, **kwargs, ) -> Labels: connect_adj_nodes = False if video is None: raise ValueError( "Cannot read analysis hdf5 if no video specified.") if not isinstance(video, Video): video = Video.from_filename(video) f = file.file tracks_matrix = f["tracks"][:].T track_names_list = f["track_names"][:].T node_names_list = f["node_names"][:].T # shape: frames * nodes * 2 * tracks frame_count, node_count, _, track_count = tracks_matrix.shape tracks = [ Track(0, track_name.decode()) for track_name in track_names_list ] skeleton = Skeleton() last_node_name = None for node_name in node_names_list: node_name = node_name.decode() skeleton.add_node(node_name) if connect_adj_nodes and last_node_name: skeleton.add_edge(last_node_name, node_name) last_node_name = node_name frames = [] for frame_idx in range(frame_count): instances = [] for track_idx in range(track_count): points = tracks_matrix[frame_idx, ..., track_idx] if not np.all(np.isnan(points)): point_scores = np.ones(len(points)) # make everything a PredictedInstance since the usual use # case is to export predictions for analysis instances.append( PredictedInstance.from_arrays( points=points, point_confidences=point_scores, skeleton=skeleton, track=tracks[track_idx], instance_score=1, )) if instances: frames.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=instances)) return Labels(labeled_frames=frames)
def read( cls, file: format.filehandle.FileHandle, video_search: Union[Callable, List[Text], None] = None, match_to: Optional[Labels] = None, *args, **kwargs, ): f = file.file labels = cls.read_headers(file, video_search, match_to) frames_dset = f["frames"][:] instances_dset = f["instances"][:] points_dset = f["points"][:] pred_points_dset = f["pred_points"][:] # Shift the *non-predicted* points since these used to be saved with a gridline # coordinate system. if (file.format_id or 0) < 1.1: points_dset[:]["x"] -= 0.5 points_dset[:]["y"] -= 0.5 # Rather than instantiate a bunch of Point\PredictedPoint objects, we will use # inplace numpy recarrays. This will save a lot of time and memory when reading # things in. points = PointArray(buf=points_dset, shape=len(points_dset)) pred_points = PredictedPointArray( buf=pred_points_dset, shape=len(pred_points_dset) ) # Extend the tracks list with a None track. We will signify this with a -1 in # the data which will map to last element of tracks tracks = labels.tracks.copy() tracks.extend([None]) # A dict to keep track of instances that have a from_predicted link. The key is # the instance and the value is the index of the instance. from_predicted_lookup = {} # Create the instances instances = [] for i in instances_dset: track = tracks[i["track"]] skeleton = labels.skeletons[i["skeleton"]] if i["instance_type"] == 0: # Instance instance = Instance( skeleton=skeleton, track=track, points=points[i["point_id_start"] : i["point_id_end"]], ) else: # PredictedInstance instance = PredictedInstance( skeleton=skeleton, track=track, points=pred_points[i["point_id_start"] : i["point_id_end"]], score=i["score"], ) instances.append(instance) if i["from_predicted"] != -1: from_predicted_lookup[instance] = i["from_predicted"] # Make a second pass to add any from_predicted links for instance, from_predicted_idx in from_predicted_lookup.items(): instance.from_predicted = instances[from_predicted_idx] # Create the labeled frames frames = [ LabeledFrame( video=labels.videos[frame["video"]], frame_idx=frame["frame_idx"], instances=instances[ frame["instance_id_start"] : frame["instance_id_end"] ], ) for i, frame in enumerate(frames_dset) ] labels.labeled_frames = frames # Do the stuff that should happen after we have labeled frames labels.update_cache() return labels
def predicted_instances(instances): return [PredictedInstance.from_instance(i, 1.0) for i in instances]
def test_nms_instances_to_remove(): skeleton = Skeleton() skeleton.add_nodes(("a", "b")) instances = [] inst = PredictedInstance(skeleton=skeleton) inst["a"].x = 10 inst["a"].y = 10 inst["b"].x = 20 inst["b"].y = 20 inst.score = 1 instances.append(inst) inst = PredictedInstance(skeleton=skeleton) inst["a"].x = 10 inst["a"].y = 10 inst["b"].x = 15 inst["b"].y = 15 inst.score = 0.3 instances.append(inst) inst = PredictedInstance(skeleton=skeleton) inst["a"].x = 30 inst["a"].y = 30 inst["b"].x = 40 inst["b"].y = 40 inst.score = 1 instances.append(inst) inst = PredictedInstance(skeleton=skeleton) inst["a"].x = 32 inst["a"].y = 32 inst["b"].x = 42 inst["b"].y = 42 inst.score = 0.5 instances.append(inst) to_keep, to_remove = nms_instances(instances, iou_threshold=0.5, target_count=3) assert len(to_remove) == 1 assert to_remove[0].matches(instances[1])