def coco_evaluation(gts, detections, height, width, labelmap=("car", "pedestrian")): """Simple helper function wrapping around COCO's Python API Args: gts: iterable of numpy boxes for the ground truth detections: iterable of numpy boxes for the detections height (int): frame height width (int): frame width labelmap (list): iterable of class labels """ categories = [{ "id": id + 1, "name": class_name, "supercategory": "none" } for id, class_name in enumerate(labelmap)] dataset, results = _to_coco_format(gts, detections, categories, height=height, width=width) coco_gt = COCO() coco_gt.dataset = dataset coco_gt.createIndex() coco_pred = coco_gt.loadRes(results) if len(results) else COCO() coco_eval = COCOeval(coco_gt, coco_pred, 'bbox') coco_eval.params.imgIds = np.arange(1, len(gts) + 1, dtype=int) coco_eval.evaluate() coco_eval.accumulate() coco_eval.stats = summarize(coco_eval) stats = { "mean_ap": coco_eval.stats[0], "mean_ap50": coco_eval.stats[1], "mean_ap75": coco_eval.stats[2], "mean_ap_small": coco_eval.stats[3], "mean_ap_medium": coco_eval.stats[4], "mean_ap_big": coco_eval.stats[5], "mean_ar": coco_eval.stats[8], "mean_ar_small": coco_eval.stats[9], "mean_ar_medium": coco_eval.stats[10], "mean_ar_big": coco_eval.stats[11] } return stats
def coco_eval(gts, proposals, labelmap, height, width, tmp_path, epoch, dump=False): categories = [{ "id": id + 1, "name": class_name, "supercategory": "none" } for id, class_name in enumerate(labelmap)] annotations = [] results = [] image_ids = [] images = [] box_type = np.float32 # to dictionary for image_id, (gt, pred) in enumerate(zip(gts, proposals)): im_id = image_id + 1 images.append({ "date_captured": "2019", "file_name": "n.a", "id": im_id, "license": 1, "url": "", "height": height, "width": width }) for i in range(len(gt)): bbox = gt[i] segmentation = [] x1, y1, x2, y2 = bbox[:4].astype(box_type).tolist() w, h = (x2 - x1), (y2 - y1) area = w * h category_id = bbox[4] annotation = { "area": float(area), "iscrowd": False, "image_id": im_id, "bbox": [x1, y1, w, h], "category_id": int(category_id) + 1, "id": len(annotations) + 1 } annotations.append(annotation) for i in range(len(pred)): bbox = pred[i, :4] x1, y1, x2, y2 = bbox[:4].astype(box_type).tolist() w, h = (x2 - x1), (y2 - y1) score = pred[i, 4] category_id = pred[i, 5] image_result = { 'image_id': im_id, 'category_id': int(category_id) + 1, 'score': float(score), 'bbox': [x1, y1, w, h], } results.append(image_result) image_ids.append(im_id) json_data = { "info": {}, "licenses": [], "type": 'instances', "images": images, "annotations": annotations, "categories": categories } if len(results) == 0: results = [{ 'image_id': 1, 'category_id': 1, 'score': 0, 'bbox': [0, 0, 0, 0] }] # Writing the file is time-consuming if dump: gt_filename = os.path.join(tmp_path, 'gt.json') result_filename = os.path.join(tmp_path, 'res.json') json.dump(json_data, open(gt_filename, 'w'), sort_keys=True, indent=4) json.dump(results, open(result_filename, 'w'), indent=4) coco_true = COCO(gt_filename) coco_pred = coco_true.loadRes(result_filename) else: coco_true = COCO() coco_true.dataset = json_data coco_true.createIndex() coco_pred = coco_true.loadRes(results) coco_eval = COCOeval(coco_true, coco_pred, 'bbox') coco_eval.params.imgIds = image_ids coco_eval.evaluate() coco_eval.accumulate() coco_eval.stats = summarize(coco_eval) stats = { "mean_ap": coco_eval.stats[0], "mean_ap50": coco_eval.stats[1], "mean_ap75": coco_eval.stats[2], "mean_ap_small": coco_eval.stats[3], "mean_ap_medium": coco_eval.stats[4], "mean_ap_big": coco_eval.stats[5], "mean_ar": coco_eval.stats[8], "mean_ar_small": coco_eval.stats[9], "mean_ar_medium": coco_eval.stats[10], "mean_ar_big": coco_eval.stats[11] } return stats
def evaluate_coco(coco_gt, coco_dt, parameters, catIds=None): LOG = getLogger('processor.EvaluateSegmentation') LOG.info("comparing segmentations") stats = dict(parameters) coco_eval = COCOeval(coco_gt, coco_dt, 'segm') # bbox if catIds: coco_eval.params.catIds = catIds #coco_eval.params.iouThrs = [.5:.05:.95] #coco_eval.params.iouThrs = np.linspace(.3, .95, 14) coco_eval.params.maxDets = [ None ] # unlimited nr of detections (requires pycocotools#559) #coco_eval.params.areaRng = [(0, np.inf)] # unlimited region size #coco_eval.params.areaRngLbl = ['all'] # unlimited region size # Note: The IoU threshold criterion is inadequate for flat segmentation, # because over-/undersegmentation can quickly become false negative/positive. # The pycocotools implementation is especially inadequate, because # it only counts 1:1 matches (and not even the largest or best-scoring, #564). # On the other hand, purely pixel-wise measures do not distinguish instances, # i.e. neighbours can quickly become merged or instances torn apart. # Our approach therefore does not build on pycocotools for matching # and aggregation, only for fast IoU calculation. All non-zero pairs # are considered matches if their intersection over union > 0.5 _or_ # their intersection over either side > 0.5. Matches can thus be n:m. # Non-matches are counted as well (false positives and false negatives). # Aggregation uses microaveraging over images. Besides counting segments, # the pixel areas are counted and averaged (as ratios). # FIXME: We must differentiate between allowable and non-allowable over/under-segmentation (splits/merges). # (A region's split is allowable if it flows in the textLineOrder of the respective GT, # i.e. lines are likely to be either on one side or the other, but not both. # For top-to-bottom/bottom-to-top regions, vertical splits are allowable. # For left-to-right/right-to-left regions, horizontal splits are allowable. # To be sure, we could also validate that explicitly – evaluating both levels at the same time. # Analogously, a number of regions' merge is allowable if it flows in the textLineOrder # of them all, and the GT global reading order has no other regions in between. # For top-to-bottom/bottom-to-top regions, vertical merges are allowable. # For left-to-right/right-to-left regions, horizontal merges are allowable. # Again, we could also validate that the overall textline flow is equivalent.) # This difference can in turn be used to weigh a match pair's score accordingly # when aggregating. For precision-like scores, we would rule out non-allowable merges # (by counting them as FP), and for recall-like scores, we would rule out non-allowable splits # (by counting them as FN). # We can also weigh these non-allowable cases by their share of height # (in vertical textLineOrder and horizontal writing) or width # (in horizontal textLineOrder and vertical writing) which is in disagreement, # or the share of its textlines that have been split or lost. # Furthermore, we can weigh matches by the share of non-text regions or fg pixels involved. coco_eval.evaluate() # get by-page alignment (ignoring inadequate 1:1 matching by pycocotools) def get(arg): return lambda x: x[arg] numImgs = len(coco_eval.params.imgIds) numAreas = len(coco_eval.params.areaRng) for imgind, imgId in enumerate(coco_eval.params.imgIds): img = coco_gt.imgs[imgId] pageId = img['file_name'] for catind, catId in enumerate(coco_eval.params.catIds): cat = coco_gt.cats[catId] catName = cat['name'] if not catId: continue # bypassing COCOeval.evaluateImg, hook onto its results # (again, we stay at areaRng[0]=all and maxDets[0]=all) start = catind * numImgs * numAreas evalimg = coco_eval.evalImgs[start + imgind] if evalimg is None: continue # no DT and GT here # record as dict by pageId / by category imgstats = stats.setdefault('by-image', dict()) pagestats = imgstats.setdefault(pageId, dict()) # get matches and ious and scores ious = coco_eval.ious[imgId, catId] if len(ious): overlaps_dt, overlaps_gt = ious.nonzero() else: overlaps_dt = overlaps_gt = [] # reconstruct score sorting in computeIoU gt = coco_eval._gts[imgId, catId] dt = coco_eval._dts[imgId, catId] dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') dt = [dt[i] for i in dtind] matches = list() gtmatches = dict() dtmatches = dict() for dtind, gtind in zip(overlaps_dt, overlaps_gt): d = dt[dtind] g = gt[gtind] iou = ious[dtind, gtind] union = maskArea( mergeMasks([g['segmentation'], d['segmentation']])) intersection = int(iou * union) # cannot use g or d['area'] here, because mask might be fractional (only-fg) instead of outline areag = int(maskArea(g['segmentation'])) aread = int(maskArea(d['segmentation'])) iogt = intersection / areag iodt = intersection / aread if iou < 0.5 and iogt < 0.5 and iodt < 0.5: continue gtmatches.setdefault(gtind, list()).append(dtind) dtmatches.setdefault(dtind, list()).append(gtind) matches.append( (g['id'], d['id'], iogt, iodt, iou, intersection)) pagestats.setdefault('true_positives', dict()).setdefault(catName, list()).append({ 'GT.ID': g['segment_id'], 'DT.ID': d['segment_id'], 'GT.area': areag, 'DT.area': aread, 'I.area': intersection, 'IoGT': iogt, 'IoDT': iodt, 'IoU': iou }) dtmisses = [] for dtind, d in enumerate(dt): if dtind in dtmatches: continue dtmisses.append((d['id'], maskArea(d['segmentation']))) pagestats.setdefault('false_positives', dict()).setdefault(catName, list()).append({ 'DT.ID': d['segment_id'], 'area': int(d['area']) }) gtmisses = [] for gtind, g in enumerate(gt): if gtind in gtmatches: continue gtmisses.append((g['id'], maskArea(g['segmentation']))) pagestats.setdefault('false_negatives', dict()).setdefault(catName, list()).append({ 'GT.ID': g['segment_id'], 'area': int(g['area']) }) # measure under/oversegmentation for this image and category # (follows Zhang et al 2021: Rethinking Semantic Segmentation Evaluation [arXiv:2101.08418]) over_gt = set(gtind for gtind in gtmatches if len(gtmatches[gtind]) > 1) over_dt = set( chain.from_iterable(gtmatches[gtind] for gtind in gtmatches if len(gtmatches[gtind]) > 1)) under_dt = set(dtind for dtind in dtmatches if len(dtmatches[dtind]) > 1) under_gt = set( chain.from_iterable(dtmatches[dtind] for dtind in dtmatches if len(dtmatches[dtind]) > 1)) over_degree = sum(len(gtmatches[gtind]) - 1 for gtind in gtmatches) under_degree = sum( len(dtmatches[dtind]) - 1 for dtind in dtmatches) if len(dt) and len(gt): oversegmentation = len(over_gt) * len(over_dt) / len(gt) / len( dt) undersegmentation = len(under_gt) * len(under_dt) / len( gt) / len(dt) # Zhang's idea of attenuating the under/oversegmentation ratio with a "penalty" # to account for the degree of further sub-segmentation is misguided IMHO, # because its degree term depends on the total number of segments: # oversegmentation = np.tanh(oversegmentation * over_degree) # undersegmentation = np.tanh(undersegmentation * under_degree) pagestats.setdefault('oversegmentation', dict())[catName] = oversegmentation pagestats.setdefault('undersegmentation', dict())[catName] = undersegmentation pagestats.setdefault( 'precision', dict())[catName] = (len(dt) - len(dtmisses)) / len(dt) pagestats.setdefault( 'recall', dict())[catName] = (len(gt) - len(gtmisses)) / len(gt) tparea = sum(map(get(5), matches)) # sum(inter) fparea = sum(map(get(1), dtmisses)) # sum(area) fnarea = sum(map(get(1), gtmisses)) # sum(area) if tparea or (fparea and fnarea): pagestats.setdefault( 'pixel_precision', dict())[catName] = tparea / (tparea + fparea) pagestats.setdefault( 'pixel_recall', dict())[catName] = tparea / (tparea + fnarea) pagestats.setdefault( 'pixel_iou', dict())[catName] = tparea / (tparea + fparea + fnarea) # aggregate per-img/per-cat IoUs for microaveraging evalimg['matches'] = matches # TP evalimg['dtMisses'] = dtmisses # FP evalimg['gtMisses'] = gtmisses # FN evalimg['dtIdsOver'] = [dt[dtind]['id'] for dtind in over_dt] evalimg['gtIdsOver'] = [gt[gtind]['id'] for gtind in over_gt] evalimg['dtIdsUnder'] = [dt[dtind]['id'] for dtind in under_dt] evalimg['gtIdsUnder'] = [gt[gtind]['id'] for gtind in under_gt] catstats = stats.setdefault('by-category', dict()) # accumulate our over-/undersegmentation and IoU ratios numImgs = len(coco_eval.params.imgIds) numAreas = len(coco_eval.params.areaRng) for catind, catId in enumerate(coco_eval.params.catIds): cat = coco_gt.cats[catId] catstats.setdefault(cat['name'], dict()) start = catind * numImgs * numAreas # again, we stay at areaRng[0]=all and maxDets[0]=all evalimgs = [ coco_eval.evalImgs[start + imgind] for imgind in range(numImgs) ] evalimgs = [img for img in evalimgs if img is not None] assert all(img['category_id'] == catId for img in evalimgs) assert all(img['maxDet'] is None for img in evalimgs) assert all(img['aRng'] == coco_eval.params.areaRng[0] for img in evalimgs) if not len(evalimgs): continue # again, we can ignore gtIgnore here, because we only look at areaRng[0]=all # again, we can ignore dtIgnore here, because we only look at maxDet=None numDTs = sum(len(img['dtIds']) for img in evalimgs) numGTs = sum(len(img['gtIds']) for img in evalimgs) overDTs = sum(len(img['dtIdsOver']) for img in evalimgs) overGTs = sum(len(img['gtIdsOver']) for img in evalimgs) underDTs = sum(len(img['dtIdsUnder']) for img in evalimgs) underGTs = sum(len(img['gtIdsUnder']) for img in evalimgs) numIoUs = sum(len(img['matches']) for img in evalimgs) numFPs = sum(len(img['dtMisses']) for img in evalimgs) numFNs = sum(len(img['gtMisses']) for img in evalimgs) sumIoUs = sum(sum(map(get(4), img['matches'])) for img in evalimgs) # sum(iou) sumIoGTs = sum(sum(map(get(2), img['matches'])) for img in evalimgs) # sum(iogt) sumIoDTs = sum(sum(map(get(3), img['matches'])) for img in evalimgs) # sum(iodt) sumTParea = sum(sum(map(get(5), img['matches'])) for img in evalimgs) # sum(inter) sumFParea = sum(sum(map(get(1), img['dtMisses'])) for img in evalimgs) # sum(area) sumFNarea = sum(sum(map(get(1), img['gtMisses'])) for img in evalimgs) # sum(area) if numDTs and numGTs: oversegmentation = overDTs * overGTs / numDTs / numGTs undersegmentation = underDTs * underGTs / numDTs / numGTs precision = (numDTs - numFPs) / numDTs recall = (numGTs - numFNs) / numGTs else: oversegmentation = undersegmentation = precision = recall = -1 if numIoUs: iou = sumIoUs / numIoUs iogt = sumIoGTs / numIoUs iodt = sumIoDTs / numIoUs else: iou = iogt = iodt = -1 if sumTParea or (sumFParea and sumFNarea): pixel_precision = sumTParea / (sumTParea + sumFParea) pixel_recall = sumTParea / (sumTParea + sumFNarea) pixel_iou = sumTParea / (sumTParea + sumFParea + sumFNarea) else: pixel_precision = pixel_recall = pixel_iou = -1 catstats[cat['name']]['oversegmentation'] = oversegmentation catstats[cat['name']]['undersegmentation'] = undersegmentation catstats[cat['name']]['segment-precision'] = precision catstats[cat['name']]['segment-recall'] = recall catstats[cat['name']]['IoGT'] = iogt # i.e. per-match pixel-recall catstats[cat['name']]['IoDT'] = iodt # i.e. per-match pixel-precision catstats[cat['name']]['IoU'] = iou # i.e. per-match pixel-jaccardindex catstats[cat['name']]['pixel-precision'] = pixel_precision catstats[cat['name']]['pixel-recall'] = pixel_recall catstats[cat['name']]['pixel-iou'] = pixel_iou coco_eval.accumulate() coco_eval.summarize() statInds = np.ones(12, np.bool) statInds[7] = False # AR maxDet[1] statInds[8] = False # AR maxDet[2] coco_eval.stats = coco_eval.stats[statInds] stats['scores'] = dict( zip([ 'Average Precision (AP) @[ IoU=0.50:0.95 | area= all ]', 'Average Precision (AP) @[ IoU=0.50 | area= all ]', 'Average Precision (AP) @[ IoU=0.75 | area= all ]', 'Average Precision (AP) @[ IoU=0.50:0.95 | area= small ]', 'Average Precision (AP) @[ IoU=0.50:0.95 | area=medium ]', 'Average Precision (AP) @[ IoU=0.50:0.95 | area= large ]', 'Average Recall (AR) @[ IoU=0.50:0.95 | area= all ]', 'Average Recall (AR) @[ IoU=0.50:0.95 | area= small ]', 'Average Recall (AR) @[ IoU=0.50:0.95 | area=medium ]', 'Average Recall (AR) @[ IoU=0.50:0.95 | area= large ]', ], coco_eval.stats.tolist())) return stats
def _summarize_coco(cocoeval: COCOeval): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = cocoeval.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = cocoeval.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = cocoeval.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) return mean_s def _summarizeDets(): stats = np.zeros((12, )) stats[0] = _summarize(1, maxDets=cocoeval.params.maxDets[1]) stats[1] = _summarize(1, iouThr=.5, maxDets=cocoeval.params.maxDets[1]) stats[2] = _summarize(1, iouThr=.75, maxDets=cocoeval.params.maxDets[1]) stats[3] = _summarize(1, areaRng='small', maxDets=cocoeval.params.maxDets[1]) stats[4] = _summarize(1, areaRng='medium', maxDets=cocoeval.params.maxDets[1]) stats[5] = _summarize(1, areaRng='large', maxDets=cocoeval.params.maxDets[1]) stats[6] = _summarize(0, maxDets=cocoeval.params.maxDets[0]) stats[9] = _summarize(0, areaRng='small', maxDets=cocoeval.params.maxDets[0]) stats[10] = _summarize(0, areaRng='medium', maxDets=cocoeval.params.maxDets[0]) stats[11] = _summarize(0, areaRng='large', maxDets=cocoeval.params.maxDets[0]) return stats def _summarizeKps(): stats = np.zeros((10, )) stats[0] = _summarize(1, maxDets=20) stats[1] = _summarize(1, maxDets=20, iouThr=.5) stats[2] = _summarize(1, maxDets=20, iouThr=.75) stats[3] = _summarize(1, maxDets=20, areaRng='medium') stats[4] = _summarize(1, maxDets=20, areaRng='large') stats[5] = _summarize(0, maxDets=20) stats[6] = _summarize(0, maxDets=20, iouThr=.5) stats[7] = _summarize(0, maxDets=20, iouThr=.75) stats[8] = _summarize(0, maxDets=20, areaRng='medium') stats[9] = _summarize(0, maxDets=20, areaRng='large') return stats if not cocoeval.eval: raise Exception('Please run accumulate() first') iouType = cocoeval.params.iouType if iouType == 'segm' or iouType == 'bbox': summarize = _summarizeDets elif iouType == 'keypoints': summarize = _summarizeKps cocoeval.stats = summarize()
def evaluate_coco( dataset_path, result_path, metric="bbox", classwise=False, proposal_nums=(10, 100, 500), iou_thrs=None, metric_items=None, out_dir=None, ): """Evaluation in COCO protocol. Args: dataset_path (str): COCO dataset json path. result_path (str): COCO result json path. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal'. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@500. Default: (10, 100, 500). iou_thrs (Sequence[float], optional): IoU threshold used for evaluating recalls/mAPs. If set to a list, the average of all IoUs will also be computed. If not specified, [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used. Default: None. metric_items (list[str] | str, optional): Metric items that will be returned. If not specified, ``['AR@10', 'AR@100', 'AR@500', 'AR_s@500', 'AR_m@500', 'AR_l@500' ]`` will be used when ``metric=='proposal'``, ``['mAP', 'mAP50', 'mAP75', 'mAP_s', 'mAP_m', 'mAP_l', 'mAP50_s', 'mAP50_m', 'mAP50_l']`` will be used when ``metric=='bbox' or metric=='segm'``. out_dir (str): Directory to save evaluation result json. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ["bbox", "segm", "proposal"] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f"metric {metric} is not supported") if iou_thrs is None: iou_thrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] eval_results = OrderedDict() cocoGt = COCO(dataset_path) cat_ids = list(cocoGt.cats.keys()) for metric in metrics: msg = f"Evaluating {metric}..." msg = "\n" + msg print(msg) iou_type = metric with open(result_path) as json_file: results = json.load(json_file) try: if iou_type == "segm": # Refer to https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L331 # noqa # When evaluating mask AP, if the results contain bbox, # cocoapi will use the box area instead of the mask area # for calculating the instance area. Though the overall AP # is not affected, this leads to different # small/medium/large mask AP results. for x in results: x.pop("bbox") warnings.simplefilter("once") warnings.warn( 'The key "bbox" is deleted for more accurate mask AP ' "of small/medium/large instances since v2.12.0. This " "does not change the overall mAP calculation.", UserWarning, ) cocoDt = cocoGt.loadRes(results) except IndexError: print("The testing results of the whole dataset is empty.") break cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = cat_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { "mAP": 0, "mAP50": 1, "mAP75": 2, "mAP_s": 3, "mAP_m": 4, "mAP_l": 5, "AR@10": 6, "AR@100": 7, "AR@500": 8, "AR_s@500": 9, "AR_m@500": 10, "AR_l@500": 11, "mAP50_s": 12, "mAP50_m": 13, "mAP50_l": 14, } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError(f"metric item {metric_item} is not supported") if metric == "proposal": cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if metric_items is None: metric_items = ["AR@10", "AR@100", "AR@500", "AR_s@500", "AR_m@500", "AR_l@500"] for item in metric_items: val = float(f"{cocoEval.stats[coco_metric_names[item]]:.3f}") eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() # calculate mAP50_s/m/l mAP50_s = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, areaRng="small", maxDets=cocoEval.params.maxDets[-1] ) mAP50_m = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, areaRng="medium", maxDets=cocoEval.params.maxDets[-1] ) mAP50_l = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, areaRng="large", maxDets=cocoEval.params.maxDets[-1] ) cocoEval.stats = np.append(cocoEval.stats, [mAP50_s, mAP50_m, mAP50_l], 0) if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval["precision"] # precision: (iou, recall, cls, area range, max dets) assert len(cat_ids) == precisions.shape[2] max_cat_name_len = 0 for idx, catId in enumerate(cat_ids): nm = cocoGt.loadCats(catId)[0] cat_name_len = len(nm["name"]) max_cat_name_len = cat_name_len if cat_name_len > max_cat_name_len else max_cat_name_len results_per_category = [] for idx, catId in enumerate(cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = cocoGt.loadCats(catId)[0] ap = _cocoeval_summarize( cocoEval, ap=1, catIdx=idx, areaRng="all", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap_s = _cocoeval_summarize( cocoEval, ap=1, catIdx=idx, areaRng="small", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap_m = _cocoeval_summarize( cocoEval, ap=1, catIdx=idx, areaRng="medium", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap_l = _cocoeval_summarize( cocoEval, ap=1, catIdx=idx, areaRng="large", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap50 = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, catIdx=idx, areaRng="all", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap50_s = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, catIdx=idx, areaRng="small", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap50_m = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, catIdx=idx, areaRng="medium", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) ap50_l = _cocoeval_summarize( cocoEval, ap=1, iouThr=0.5, catIdx=idx, areaRng="large", maxDets=cocoEval.params.maxDets[-1], catName=nm["name"], nameStrLen=max_cat_name_len, ) results_per_category.append((f'{metric}_{nm["name"]}_mAP', f"{float(ap):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP_s', f"{float(ap_s):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP_m', f"{float(ap_m):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP_l', f"{float(ap_l):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP50', f"{float(ap50):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP50_s', f"{float(ap50_s):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP50_m', f"{float(ap50_m):0.3f}")) results_per_category.append((f'{metric}_{nm["name"]}_mAP50_l', f"{float(ap50_l):0.3f}")) num_columns = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) headers = ["category", "AP"] * (num_columns // 2) results_2d = itertools.zip_longest(*[results_flatten[i::num_columns] for i in range(num_columns)]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print("\n" + table.table) if metric_items is None: metric_items = ["mAP", "mAP50", "mAP75", "mAP_s", "mAP_m", "mAP_l", "mAP50_s", "mAP50_m", "mAP50_l"] for metric_item in metric_items: key = f"{metric}_{metric_item}" val = float(f"{cocoEval.stats[coco_metric_names[metric_item]]:.3f}") eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f"{metric}_mAP_copypaste"] = ( f"{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} " f"{ap[4]:.3f} {ap[5]:.3f}" ) if classwise: eval_results["results_per_category"] = {key: value for key, value in results_per_category} # set save path if not out_dir: out_dir = Path(result_path).parent save_path = str(out_dir / "eval.json") # export as json with open(save_path, "w", encoding="utf-8") as outfile: json.dump(eval_results, outfile, indent=4, separators=(",", ":")) return eval_results