def test_accumulate() -> None: """Verify that the accumulate function matches known output for a self-comparison.""" cfg = DetectionCfg() # compare a set of labels to itself cls_to_accum, cls_to_ninst = accumulate( TEST_DATA_LOC / "detections", TEST_DATA_LOC / "detections/1/per_sweep_annotations_amodal/tracked_object_labels_0.json", cfg, ) # ensure the detections match at all thresholds, have 0 TP errors, and have AP = 1 expected_ATE = 0.0 expected_ASE = 0.0 expected_AOE = 0.0 expected_AP = 1.0 assert (cls_to_accum["VEHICLE"] == np.array([ [ 1.0, 1.0, 1.0, 1.0, expected_ATE, expected_ASE, expected_AOE, expected_AP ], [ 1.0, 1.0, 1.0, 1.0, expected_ATE, expected_ASE, expected_AOE, expected_AP ], ])).all() assert cls_to_ninst[ "VEHICLE"] == 2 # there are 2 vehicle labels in this file assert sum(cls_to_ninst.values()) == 2 # and no other labels
def evaluate(self) -> pd.DataFrame: """Evaluate detection output and return metrics. The multiprocessing library is used for parallel processing of sweeps -- each sweep is processed independently, computing assignment between detections and ground truth annotations. Returns: Evaluation metrics of shape (C + 1, K) where C + 1 is the number of classes. plus a row for their means. K refers to the number of evaluation metrics. """ dt_fpaths = list( self.dt_root_fpath.glob("*/per_sweep_annotations_amodal/*.json")) gt_fpaths = list( self.gt_root_fpath.glob("*/per_sweep_annotations_amodal/*.json")) assert len(dt_fpaths) == len(gt_fpaths) data: DefaultDict[str, np.ndarray] = defaultdict(list) cls_to_ninst: DefaultDict[str, int] = defaultdict(int) if self.num_procs == 1: accum = [ accumulate(self.dt_root_fpath, gt_fpath, self.cfg, self.avm) for gt_fpath in gt_fpaths ] else: args = [(self.dt_root_fpath, gt_fpath, self.cfg, self.avm) for gt_fpath in gt_fpaths] with Pool(self.num_procs) as p: accum = p.starmap(accumulate, args) for frame_stats, frame_cls_to_inst in accum: for cls_name, cls_stats in frame_stats.items(): data[cls_name].append(cls_stats) for cls_name, num_inst in frame_cls_to_inst.items(): cls_to_ninst[cls_name] += num_inst data = defaultdict(np.ndarray, {k: np.vstack(v) for k, v in data.items()}) init_data = { dt_cls: self.cfg.summary_default_vals for dt_cls in self.cfg.dt_classes } summary = pd.DataFrame.from_dict(init_data, orient="index", columns=STATISTIC_NAMES) summary_update = pd.DataFrame.from_dict(self.summarize( data, cls_to_ninst), orient="index", columns=STATISTIC_NAMES) summary.update(summary_update) summary = summary.round(SIGNIFICANT_DIGITS) summary.index = summary.index.str.title() summary.loc["Average Metrics"] = summary.mean().round( SIGNIFICANT_DIGITS) return summary
def test_accumulate() -> None: """Verify that the accumulate function matches known output for a self-comparison.""" cfg = DetectionCfg(eval_only_roi_instances=False) job = AccumulateJob( TEST_DATA_LOC / "detections", TEST_DATA_LOC / "detections/1/per_sweep_annotations_amodal/tracked_object_labels_0.json", cfg, avm= None, # ArgoverseMap instance not required when not using ROI info in evaluation ) cls_to_accum, cls_to_ninst = accumulate(job) # ensure the detections match at all thresholds, have 0 TP errors, and have AP = 1 expected_ATE = 0.0 expected_ASE = 0.0 expected_AOE = 0.0 expected_AP = 1.0 assert (cls_to_accum["VEHICLE"] == np.array([ [ 1.0, 1.0, 1.0, 1.0, expected_ATE, expected_ASE, expected_AOE, expected_AP, ], [ 1.0, 1.0, 1.0, 1.0, expected_ATE, expected_ASE, expected_AOE, expected_AP, ], ])).all() assert cls_to_ninst[ "VEHICLE"] == 2 # there are 2 vehicle labels in this file assert sum(cls_to_ninst.values()) == 2 # and no other labels