def evaluate(self, results, metric=['track'], logger=None, resfile_path=None): if isinstance(metric, list): metrics = metric elif isinstance(metric, str): metrics = [metric] else: raise TypeError('metric must be a list or a str.') allowed_metrics = ['bbox', 'track'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported.') result_files, tmp_dir = self.format_results(results, resfile_path) eval_results = dict() if 'track' in metrics: from tao.toolkit.tao import TaoEval print_log('Evaluating TAO results...', logger) tao_eval = TaoEval(self.ann_file, result_files['track']) tao_eval.params.img_ids = self.img_ids tao_eval.params.cat_ids = self.cat_ids tao_eval.params.iou_thrs = np.array([0.5, 0.75]) tao_eval.run() tao_eval.print_results() tao_results = tao_eval.get_results() for k, v in tao_results.items(): if isinstance(k, str) and k.startswith('AP'): key = 'track_{}'.format(k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val if 'bbox' in metrics: try: import lvis assert lvis.__version__ >= '10.5.3' from lvis import LVIS, LVISResults, LVISEval except AssertionError: raise AssertionError( 'Incompatible version of lvis is installed. ' 'Run pip uninstall lvis first. Then run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis. ') except ImportError: raise ImportError( 'Package lvis is not installed. Please run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis.') print_log('Evaluating detection results...', logger) lvis_gt = LVIS(self.ann_file) lvis_dt = LVISResults(lvis_gt, result_files['bbox']) lvis_eval = LVISEval(lvis_gt, lvis_dt, 'bbox') lvis_eval.params.imgIds = self.img_ids lvis_eval.params.catIds = self.cat_ids lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_eval.print_results() lvis_results = lvis_eval.get_results() for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format('bbox', k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['bbox_mAP_copypaste'] = ap_summary if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def init_weights(self, pretrained=None): if pretrained is not None: print_log(f'load model from: {pretrained}', logger='root')
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=None, metric_items=None): """Evaluation in COCO protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float], optional): IoU threshold used for evaluating recalls/mAPs. If set to a list, the average of all IoUs will also be computed. If not specified, [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used. Default: None. metric_items (list[str] | str, optional): Metric items that will be returned. If not specified, ``['AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ]`` will be used when ``metric=='proposal'``, ``['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']`` will be used when ``metric=='bbox' or metric=='segm'``. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') if iou_thrs is None: iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] result_files, tmp_dir = self.format_results(results, jsonfile_prefix) eval_results = OrderedDict() cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError(f'{metric} is not in results') try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { 'mAP': 0, 'mAP_50': 1, 'mAP_75': 2, 'mAP_s': 3, 'mAP_m': 4, 'mAP_l': 5, 'AR@100': 6, 'AR@300': 7, 'AR@1000': 8, 'AR_s@1000': 9, 'AR_m@1000': 10, 'AR_l@1000': 11 } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError( f'metric item {metric_item} is not supported') if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if metric_items is None: metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for item in metric_items: val = float( f'{cocoEval.stats[coco_metric_names[item]]:.3f}') eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) if metric_items is None: metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for metric_item in metric_items: key = f'{metric}_{metric_item}' val = float( f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' ) eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metric='mIoU', logger=None, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if not isinstance(metric, str): assert len(metric) == 1 metric = metric[0] allowed_metrics = ['mIoU'] if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps() if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) all_acc, acc, iou = mean_iou(results, gt_seg_maps, num_classes, ignore_index=self.ignore_index) summary_str = '' summary_str += 'per class results:\n' line_format = '{:<15} {:>10} {:>10}\n' summary_str += line_format.format('Class', 'IoU', 'Acc') if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES for i in range(num_classes): iou_str = '{:.2f}'.format(iou[i] * 100) acc_str = '{:.2f}'.format(acc[i] * 100) summary_str += line_format.format(class_names[i], iou_str, acc_str) summary_str += 'Summary:\n' line_format = '{:<15} {:>10} {:>10} {:>10}\n' summary_str += line_format.format('Scope', 'mIoU', 'mAcc', 'aAcc') iou_str = '{:.2f}'.format(np.nanmean(iou) * 100) acc_str = '{:.2f}'.format(np.nanmean(acc) * 100) all_acc_str = '{:.2f}'.format(all_acc * 100) summary_str += line_format.format('global', iou_str, acc_str, all_acc_str) print_log(summary_str, logger) eval_results['mIoU'] = np.nanmean(iou) eval_results['mAcc'] = np.nanmean(acc) eval_results['aAcc'] = all_acc return eval_results
def evaluate_det_segm(self, results, result_files, coco_gt, metrics, logger=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=None, metric_items=None): """Instance segmentation and object detection evaluation in COCO protocol. Args: results (list[list | tuple | dict]): Testing results of the dataset. result_files (dict[str, str]): a dict contains json file path. coco_gt (COCO): COCO API object with ground truth annotation. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float], optional): IoU threshold used for evaluating recalls/mAPs. If set to a list, the average of all IoUs will also be computed. If not specified, [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used. Default: None. metric_items (list[str] | str, optional): Metric items that will be returned. If not specified, ``['AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ]`` will be used when ``metric=='proposal'``, ``['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']`` will be used when ``metric=='bbox' or metric=='segm'``. Returns: dict[str, float]: COCO style evaluation metric. """ if iou_thrs is None: iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] eval_results = OrderedDict() for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': if isinstance(results[0], tuple): raise KeyError('proposal_fast is not supported for ' 'instance segmentation result.') ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue iou_type = 'bbox' if metric == 'proposal' else metric if metric not in result_files: raise KeyError(f'{metric} is not in results') try: predictions = mmcv.load(result_files[metric]) if iou_type == 'segm': # Refer to https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L331 # noqa # When evaluating mask AP, if the results contain bbox, # cocoapi will use the box area instead of the mask area # for calculating the instance area. Though the overall AP # is not affected, this leads to different # small/medium/large mask AP results. for x in predictions: x.pop('bbox') warnings.simplefilter('once') warnings.warn( 'The key "bbox" is deleted for more accurate mask AP ' 'of small/medium/large instances since v2.12.0. This ' 'does not change the overall mAP calculation.', UserWarning) coco_det = coco_gt.loadRes(predictions) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break cocoEval = COCOeval(coco_gt, coco_det, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { 'mAP': 0, 'mAP_50': 1, 'mAP_75': 2, 'mAP_s': 3, 'mAP_m': 4, 'mAP_l': 5, 'AR@100': 6, 'AR@300': 7, 'AR@1000': 8, 'AR_s@1000': 9, 'AR_m@1000': 10, 'AR_l@1000': 11 } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError( f'metric item {metric_item} is not supported') if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() # Save coco summarize print information to logger redirect_string = io.StringIO() with contextlib.redirect_stdout(redirect_string): cocoEval.summarize() print_log('\n' + redirect_string.getvalue(), logger=logger) if metric_items is None: metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for item in metric_items: val = float( f'{cocoEval.stats[coco_metric_names[item]]:.3f}') eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() # Save coco summarize print information to logger redirect_string = io.StringIO() with contextlib.redirect_stdout(redirect_string): cocoEval.summarize() print_log('\n' + redirect_string.getvalue(), logger=logger) if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) if metric_items is None: metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for metric_item in metric_items: key = f'{metric}_{metric_item}' val = float( f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' ) eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') return eval_results
def evaluate(self, results, metric='mAP', logger=None, proposal_nums=(100, 300, 1000), iou_thr=0.5, scale_ranges=None): """Evaluate in VOC protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'mAP', 'recall'. logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thr (float | list[float]): IoU threshold. Default: 0.5. scale_ranges (list[tuple], optional): Scale ranges for evaluating mAP. If not specified, all bounding boxes would be included in evaluation. Default: None. Returns: dict[str, float]: AP/recall metrics. """ if not isinstance(metric, str): assert len(metric) == 1 metric = metric[0] allowed_metrics = ['mAP', 'recall'] if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') annotations = [self.get_ann_info(i) for i in range(len(self))] eval_results = OrderedDict() iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr if metric == 'mAP': assert isinstance(iou_thrs, list) if self.year == 2007: ds_name = 'voc07' else: ds_name = self.CLASSES mean_aps = [] for iou_thr in iou_thrs: print_log(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}') mean_ap, _ = eval_map(results, annotations, scale_ranges=None, iou_thr=iou_thr, dataset=ds_name, logger=logger) mean_aps.append(mean_ap) eval_results[f'AP{int(iou_thr * 100):02d}'] = round(mean_ap, 3) eval_results['mAP'] = sum(mean_aps) / len(mean_aps) elif metric == 'recall': gt_bboxes = [ann['bboxes'] for ann in annotations] recalls = eval_recalls(gt_bboxes, results, proposal_nums, iou_thr, logger=logger) for i, num in enumerate(proposal_nums): for j, iou in enumerate(iou_thr): eval_results[f'recall@{num}@{iou}'] = recalls[i, j] if recalls.shape[1] > 1: ar = recalls.mean(axis=1) for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] return eval_results
def evaluate(self, results, metrics='top_k_accuracy', metric_options=dict(top_k_accuracy=dict(topk=(1, 5))), logger=None, **deprecated_kwargs): """Perform evaluation for common datasets. Args: results (list): Output results. metrics (str | sequence[str]): Metrics to be performed. Defaults: 'top_k_accuracy'. metric_options (dict): Dict for metric options. Options are ``topk`` for ``top_k_accuracy``. Default: ``dict(top_k_accuracy=dict(topk=(1, 5)))``. logger (logging.Logger | None): Logger for recording. Default: None. deprecated_kwargs (dict): Used for containing deprecated arguments. See 'https://github.com/open-mmlab/mmaction2/pull/286'. Returns: dict: Evaluation results dict. """ # Protect ``metric_options`` since it uses mutable value as default metric_options = copy.deepcopy(metric_options) if deprecated_kwargs != {}: warnings.warn( 'Option arguments for metrics has been changed to ' "`metric_options`, See 'https://github.com/open-mmlab/mmaction2/pull/286' " # noqa: E501 'for more details') metric_options['top_k_accuracy'] = dict( metric_options['top_k_accuracy'], **deprecated_kwargs) if not isinstance(results, list): raise TypeError(f'results must be a list, but got {type(results)}') assert len(results) == len(self), ( f'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}') metrics = metrics if isinstance(metrics, (list, tuple)) else [metrics] allowed_metrics = [ 'top_k_accuracy', 'mean_class_accuracy', 'mean_average_precision', 'mmit_mean_average_precision' ] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') eval_results = {} gt_labels = [ann['label'] for ann in self.video_infos] for metric in metrics: msg = f'Evaluating {metric} ...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'top_k_accuracy': topk = metric_options.setdefault('top_k_accuracy', {}).setdefault( 'topk', (1, 5)) if not isinstance(topk, (int, tuple)): raise TypeError('topk must be int or tuple of int, ' f'but got {type(topk)}') if isinstance(topk, int): topk = (topk, ) top_k_acc = top_k_accuracy(results, gt_labels, topk) log_msg = [] for k, acc in zip(topk, top_k_acc): eval_results[f'top{k}_acc'] = acc log_msg.append(f'\ntop{k}_acc\t{acc:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric == 'mean_class_accuracy': mean_acc = mean_class_accuracy(results, gt_labels) eval_results['mean_class_accuracy'] = mean_acc log_msg = f'\nmean_acc\t{mean_acc:.4f}' print_log(log_msg, logger=logger) continue if metric in [ 'mean_average_precision', 'mmit_mean_average_precision' ]: gt_labels = [ self.label2array(self.num_classes, label) for label in gt_labels ] if metric == 'mean_average_precision': mAP = mean_average_precision(results, gt_labels) elif metric == 'mmit_mean_average_precision': mAP = mmit_mean_average_precision(results, gt_labels) eval_results['mean_average_precision'] = mAP log_msg = f'\nmean_average_precision\t{mAP:.4f}' print_log(log_msg, logger=logger) continue return eval_results
def kaggle_map(det_results, annotations, iou_thrs=(0.5, 0.55, 0.6, 0.65, 0.7, 0.75), logger=None, n_jobs=4, by_sample=False): """Evaluate kaggle mAP of a dataset. Args: det_results (list[list]): [[cls1_det, cls2_det, ...], ...]. The outer list indicates images, and the inner list indicates per-class detected bboxes. annotations (list[dict]): Ground truth annotations where each item of the list indicates an image. Keys of annotations are: - `bboxes`: numpy array of shape (n, 4) - `labels`: numpy array of shape (n, ) - `bboxes_ignore` (optional): numpy array of shape (k, 4) - `labels_ignore` (optional): numpy array of shape (k, ) iou_thrs (list): IoU thresholds to be considered as matched. Default: (0.5, 0.55, 0.6, 0.65, 0.7, 0.75). logger (logging.Logger | str | None): The way to print the mAP summary. See `mmdet.utils.print_log()` for details. Default: None. n_jobs (int): Processes used for computing TP, FP and FN. Default: 4. by_sample (bool): Return AP by sample. Returns: tuple: (mAP, [dict, dict, ...]) """ assert len(det_results) == len(annotations) num_imgs = len(det_results) num_classes = len(det_results[0]) # positive class num pool = Pool(n_jobs) eval_results = [] for i in range(num_classes): # get gt and det bboxes of this class cls_dets, cls_gts, _ = get_cls_results(det_results, annotations, i) # compute tp and fp for each image with multiple processes aps_by_thrs = [] aps_by_sample = np.zeros(num_imgs) for iou_thr in iou_thrs: tpfpfn = pool.starmap( calc_tpfpfn, zip(cls_dets, cls_gts, [iou_thr for _ in range(num_imgs)])) iou_thr_aps = np.array( [tp / (tp + fp + fn) for tp, fp, fn in tpfpfn]) if by_sample: aps_by_sample += iou_thr_aps aps_by_thrs.append(np.mean(iou_thr_aps)) eval_results.append({ "num_gts": len(cls_gts), "num_dets": len(cls_dets), "ap": np.mean(aps_by_thrs), "ap_by_sample": None if not by_sample else aps_by_sample / len(iou_thrs), }) pool.close() aps = [] for cls_result in eval_results: if cls_result["num_gts"] > 0: aps.append(cls_result["ap"]) mean_ap = np.array(aps).mean().item() if aps else 0.0 print_log(f"\nKaggle mAP: {mean_ap}", logger=logger) return mean_ap, eval_results
def evaluate(self, results, metric='waymo', logger=None, pklfile_prefix=None, submission_prefix=None, show=False, out_dir=None): """Evaluation in KITTI protocol. Args: results (list[dict]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Default: 'waymo'. Another supported metric is 'kitti'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. pklfile_prefix (str | None): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. submission_prefix (str | None): The prefix of submission datas. If not specified, the submission data will not be generated. show (bool): Whether to visualize. Default: False. out_dir (str): Path to save the visualization results. Default: None. Returns: dict[str: float]: results of each evaluation metric """ print("Input evaluation metric:", metric) metric = 'kitti' #LKK add print("Using evaluation metric:", metric) assert ('waymo' in metric or 'kitti' in metric), \ f'invalid metric {metric}' if 'kitti' in metric: result_files, tmp_dir = self.format_results(results, pklfile_prefix, submission_prefix, data_format='kitti') from mmdet3d.core.evaluation import kitti_eval gt_annos = [info['annos'] for info in self.data_infos] if isinstance(result_files, dict): ap_dict = dict() for name, result_files_ in result_files.items(): eval_types = ['bev', '3d'] ap_result_str, ap_dict_ = kitti_eval(gt_annos, result_files_, self.CLASSES, eval_types=eval_types) for ap_type, ap in ap_dict_.items(): ap_dict[f'{name}/{ap_type}'] = float( '{:.4f}'.format(ap)) print_log(f'Results of {name}:\n' + ap_result_str, logger=logger) else: ap_result_str, ap_dict = kitti_eval(gt_annos, result_files, self.CLASSES, eval_types=['bev', '3d']) print_log('\n' + ap_result_str, logger=logger) if 'waymo' in metric: waymo_root = osp.join( self.data_root.split('kitti_format')[0], 'kitti_format' ) #LKK: change waymo_format to kitti_format folder name if pklfile_prefix is None: eval_tmp_dir = tempfile.TemporaryDirectory() pklfile_prefix = osp.join(eval_tmp_dir.name, 'results') else: eval_tmp_dir = None result_files, tmp_dir = self.format_results(results, pklfile_prefix, submission_prefix, data_format='waymo') import subprocess ret_bytes = subprocess.check_output( 'mmdet3d/core/evaluation/waymo_utils/' + f'compute_detection_metrics_main {pklfile_prefix}.bin ' + f'{waymo_root}/gt.bin', shell=True) ret_texts = ret_bytes.decode('utf-8') print_log(ret_texts) # parse the text to get ap_dict ap_dict = { 'Vehicle/L1 mAP': 0, 'Vehicle/L1 mAPH': 0, 'Vehicle/L2 mAP': 0, 'Vehicle/L2 mAPH': 0, 'Pedestrian/L1 mAP': 0, 'Pedestrian/L1 mAPH': 0, 'Pedestrian/L2 mAP': 0, 'Pedestrian/L2 mAPH': 0, 'Sign/L1 mAP': 0, 'Sign/L1 mAPH': 0, 'Sign/L2 mAP': 0, 'Sign/L2 mAPH': 0, 'Cyclist/L1 mAP': 0, 'Cyclist/L1 mAPH': 0, 'Cyclist/L2 mAP': 0, 'Cyclist/L2 mAPH': 0, 'Overall/L1 mAP': 0, 'Overall/L1 mAPH': 0, 'Overall/L2 mAP': 0, 'Overall/L2 mAPH': 0 } mAP_splits = ret_texts.split('mAP ') mAPH_splits = ret_texts.split('mAPH ') for idx, key in enumerate(ap_dict.keys()): split_idx = int(idx / 2) + 1 if idx % 2 == 0: # mAP ap_dict[key] = float(mAP_splits[split_idx].split(']')[0]) else: # mAPH ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0]) ap_dict['Overall/L1 mAP'] = \ (ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] + ap_dict['Cyclist/L1 mAP']) / 3 ap_dict['Overall/L1 mAPH'] = \ (ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] + ap_dict['Cyclist/L1 mAPH']) / 3 ap_dict['Overall/L2 mAP'] = \ (ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] + ap_dict['Cyclist/L2 mAP']) / 3 ap_dict['Overall/L2 mAPH'] = \ (ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] + ap_dict['Cyclist/L2 mAPH']) / 3 if eval_tmp_dir is not None: eval_tmp_dir.cleanup() if tmp_dir is not None: tmp_dir.cleanup() if show: self.show(results, out_dir) return ap_dict
def evaluate(self, results, metric='bbox', logger=None, outfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), largest_max_dets=None, iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in COCO protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. outfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') result_files, tmp_dir = self.format_results(results, outfile_prefix, format_type='coco') eval_results = {} cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError(f'{metric} is not in results') try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.params.maxDets = list(proposal_nums) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for i, item in enumerate(metric_items): val = float(f'{cocoEval.stats[i + 6]:.3f}') eval_results[item] = val else: if largest_max_dets is not None: assert largest_max_dets > 100, \ 'specify largest_max_dets only when' \ 'you need to evaluate more than 100 detections' cocoEval.params.maxDets[-1] = largest_max_dets cocoEval.params.maxDets[-2] = 100 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] table_data = [] waymo_iou_metrics = {} iouThr_dict = {None: 'AP', 0.5: 'AP 0.5', 0.7: 'AP 0.7'} for iouThr, metric_name in iouThr_dict.items(): results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] if iouThr is not None: t = np.where( iouThr == cocoEval.params.iouThrs)[0] precision = precision[t] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.4f}')) if self.CLASSWISE_IOU[nm["name"]] == iouThr: waymo_iou_metrics[nm["name"]] = ap num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', metric_name ] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data += [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) table.inner_heading_row_border = False table.inner_row_border = True print_log('\n' + table.table, logger=logger) for category, category_iou in self.CLASSWISE_IOU.items(): if category not in waymo_iou_metrics: continue print_log(f'AP{category_iou} ({category}): ' + f'{waymo_iou_metrics[category]:0.4f}', logger=logger) ap_waymo = np.mean(list(waymo_iou_metrics.values())) print_log('AP (Waymo challenge IoU, COCO script): ' + f'{ap_waymo:0.4f}', logger=logger) metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for i in range(len(metric_items)): key = f'{metric}_{metric_items[i]}' val = float(f'{cocoEval.stats[i]:.3f}') eval_results[key] = val ap = cocoEval.stats[:6] map_copypaste = (f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} ' f'{ap[3]:.3f} {ap[4]:.3f} {ap[5]:.3f}') eval_results[f'{metric}_mAP_copypaste'] = map_copypaste print_log(f'{metric}_mAP_copypaste: {map_copypaste}', logger=logger) if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. 'mIoU' and 'mDice' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps(efficient_test) if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics( results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES ret_metrics_round = [ np.round(ret_metric * 100, 2) for ret_metric in ret_metrics ] for i in range(num_classes): class_table_data.append([class_names[i]] + [m[i] for m in ret_metrics_round[2:]] + [ret_metrics_round[1][i]]) summary_table_data = [['Scope'] + ['m' + head for head in class_table_data[0][1:]] + ['aAcc']] ret_metrics_mean = [ np.round(np.nanmean(ret_metric) * 100, 2) for ret_metric in ret_metrics ] summary_table_data.append(['global'] + ret_metrics_mean[2:] + [ret_metrics_mean[1]] + [ret_metrics_mean[0]]) print_log('per class results:', logger) table = AsciiTable(class_table_data) print_log('\n' + table.table, logger=logger) print_log('Summary:', logger) table = AsciiTable(summary_table_data) print_log('\n' + table.table, logger=logger) for i in range(1, len(summary_table_data[0])): eval_results[summary_table_data[0] [i]] = summary_table_data[1][i] / 100.0 if mmcv.is_list_of(results, str): for file_name in results: os.remove(file_name) return eval_results
def pq_compute(gt_json_file, pred_json_file, gt_folder=None, pred_folder=None, logger=None): start_time = time.time() with open(gt_json_file, 'r') as f: gt_json = json.load(f) with open(pred_json_file, 'r') as f: pred_json = json.load(f) if gt_folder is None: gt_folder = gt_json_file.replace('.json', '') if pred_folder is None: pred_folder = pred_json_file.replace('.json', '') categories = {el['id']: el for el in gt_json['categories']} print("Evaluation panoptic segmentation metrics:") print("Ground truth:") print("\tSegmentation folder: {}".format(gt_folder)) print("\tJSON file: {}".format(gt_json_file)) print("Prediction:") print("\tSegmentation folder: {}".format(pred_folder)) print("\tJSON file: {}".format(pred_json_file)) if not os.path.isdir(gt_folder): raise Exception("Folder {} with ground truth segmentations " "doesn't exist".format(gt_folder)) if not os.path.isdir(pred_folder): raise Exception("Folder {} with predicted segmentations " "doesn't exist".format(pred_folder)) pred_annotations = {el['image_id']: el for el in pred_json['annotations']} matched_annotations_list = [] for gt_ann in gt_json['annotations']: image_id = gt_ann['image_id'] if image_id not in pred_annotations: raise Exception('no prediction for the image ' 'with id: {}'.format(image_id)) matched_annotations_list.append((gt_ann, pred_annotations[image_id])) cpu_num = multiprocessing.cpu_count() annotations_split = np.array_split(matched_annotations_list, cpu_num) print("Number of cores: {}, images per core: {}".format( cpu_num, len(annotations_split[0]))) workers = multiprocessing.Pool(processes=cpu_num) processes = [] for proc_id, annotation_set in enumerate(annotations_split): p = workers.apply_async( pq_compute_single_core, (proc_id, annotation_set, gt_folder, pred_folder, categories)) processes.append(p) pq_stat = PQStat() for p in processes: pq_stat += p.get() metrics = [("All", None), ("Things", True), ("Stuff", False)] results = {} for name, isthing in metrics: results[name], per_class_results = pq_stat.pq_average(categories, isthing=isthing) if name == 'All': results['per_class'] = per_class_results print_log("{:10s}| {:>5s} {:>5s} {:>5s} {:>5s}".format( "", "PQ", "SQ", "RQ", "N"), logger=logger) print_log("-" * (10 + 7 * 4), logger=logger) for name, _isthing in metrics: print_log("{:10s}| {:5.1f} {:5.1f} {:5.1f} {:5d}".format( name, 100 * results[name]['pq'], 100 * results[name]['sq'], 100 * results[name]['rq'], results[name]['n']), logger=logger) t_delta = time.time() - start_time print("Time elapsed: {:0.2f} seconds".format(t_delta))
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in COCO protocol. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str: float] """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) result_files, tmp_dir = self.format_results(results, jsonfile_prefix) eval_results = {} cocoGt = self.coco for metric in metrics: msg = 'Evaluating {}...'.format(metric) if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall( results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results['AR@{}'.format(num)] = ar[i] log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i])) log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError('{} is not in results'.format(metric)) try: cocoDt = cocoGt.loadEntireRes(result_files[metric]) except IndexError: print_log( 'The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.imgIds = self.img_ids if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.params.maxDets = list(proposal_nums) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for i, item in enumerate(metric_items): val = float('{:.3f}'.format(cocoEval.stats[i + 6])) eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if classwise: # Compute per-category AP pass # TODO metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for i in range(len(metric_items)): key = '{}_{}'.format(metric, metric_items[i]) val = float('{:.3f}'.format(cocoEval.stats[i])) eval_results[key] = val eval_results['{}_mAP_copypaste'.format(metric)] = ( '{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' '{ap[4]:.3f} {ap[5]:.3f}').format(ap=cocoEval.stats[:6]) if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None, box_type_3d=None, box_mode_3d=None): """Indoor Evaluation. Evaluate the result of the detection. Args: gt_annos (list[dict]): Ground truth annotations. dt_annos (list[dict]): Detection annotations. the dict includes the following keys - labels_3d (torch.Tensor): Labels of boxes. - boxes_3d (:obj:`BaseInstance3DBoxes`): \ 3D bounding boxes in Depth coordinate. - scores_3d (torch.Tensor): Scores of boxes. metric (list[float]): IoU thresholds for computing average precisions. label2cat (dict): Map from label to category. logger (logging.Logger | str | None): The way to print the mAP summary. See `mmdet.utils.print_log()` for details. Default: None. Return: dict[str, float]: Dict of results. """ assert len(dt_annos) == len(gt_annos) pred = {} # map {class_id: pred} gt = {} # map {class_id: gt} for img_id in range(len(dt_annos)): # parse detected annotations det_anno = dt_annos[img_id] for i in range(len(det_anno['labels_3d'])): label = det_anno['labels_3d'].numpy()[i] bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i] score = det_anno['scores_3d'].numpy()[i] if label not in pred: pred[int(label)] = {} if img_id not in pred[label]: pred[int(label)][img_id] = [] if label not in gt: gt[int(label)] = {} if img_id not in gt[label]: gt[int(label)][img_id] = [] pred[int(label)][img_id].append((bbox, score)) # parse gt annotations gt_anno = gt_annos[img_id] if gt_anno['gt_num'] != 0: gt_boxes = box_type_3d( gt_anno['gt_boxes_upright_depth'], box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1], origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d) labels_3d = gt_anno['class'] else: gt_boxes = box_type_3d(np.array([], dtype=np.float32)) labels_3d = np.array([], dtype=np.int64) for i in range(len(labels_3d)): label = labels_3d[i] bbox = gt_boxes[i] if label not in gt: gt[label] = {} if img_id not in gt[label]: gt[label][img_id] = [] gt[label][img_id].append(bbox) rec, prec, ap = eval_map_recall(pred, gt, metric) ret_dict = dict() header = ['classes'] table_columns = [[label2cat[label] for label in ap[0].keys()] + ['Overall']] for i, iou_thresh in enumerate(metric): header.append(f'AP_{iou_thresh:.2f}') header.append(f'AR_{iou_thresh:.2f}') rec_list = [] for label in ap[i].keys(): ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = float( ap[i][label][0]) ret_dict[f'mAP_{iou_thresh:.2f}'] = float(np.mean(list( ap[i].values()))) table_columns.append(list(map(float, list(ap[i].values())))) table_columns[-1] += [ret_dict[f'mAP_{iou_thresh:.2f}']] table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]] for label in rec[i].keys(): ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = float( rec[i][label][-1]) rec_list.append(rec[i][label][-1]) ret_dict[f'mAR_{iou_thresh:.2f}'] = float(np.mean(rec_list)) table_columns.append(list(map(float, rec_list))) table_columns[-1] += [ret_dict[f'mAR_{iou_thresh:.2f}']] table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]] table_data = [header] table_rows = list(zip(*table_columns)) table_data += table_rows table = AsciiTable(table_data) table.inner_footing_row_border = True print_log('\n' + table.table, logger=logger) return ret_dict
def eval_hmean(results, img_infos, ann_infos, metrics={'hmean-iou'}, score_thr=0.3, rank_list=None, logger=None, **kwargs): """Evaluation in hmean metric. Args: results (list[dict]): Each dict corresponds to one image, containing the following keys: boundary_result img_infos (list[dict]): Each dict corresponds to one image, containing the following keys: filename, height, width ann_infos (list[dict]): Each dict corresponds to one image, containing the following keys: masks, masks_ignore score_thr (float): Score threshold of prediction map. metrics (set{str}): Hmean metric set, should be one or all of {'hmean-iou', 'hmean-ic13'} Returns: dict[str: float] """ assert utils.is_type_list(results, dict) assert utils.is_type_list(img_infos, dict) assert utils.is_type_list(ann_infos, dict) assert len(results) == len(img_infos) == len(ann_infos) assert isinstance(metrics, set) gts, gts_ignore = get_gt_masks(ann_infos) preds = [] pred_scores = [] for result in results: _, texts, scores = extract_boundary(result) if len(texts) > 0: assert utils.valid_boundary(texts[0], False) valid_texts, valid_text_scores = filter_2dlist_result( texts, scores, score_thr) preds.append(valid_texts) pred_scores.append(valid_text_scores) eval_results = {} for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) best_result = dict(hmean=-1) for iter in range(3, 10): thr = iter * 0.1 top_preds = select_top_boundary(preds, pred_scores, thr) if metric == 'hmean-iou': result, img_result = hmean_iou.eval_hmean_iou( top_preds, gts, gts_ignore) elif metric == 'hmean-ic13': result, img_result = hmean_ic13.eval_hmean_ic13( top_preds, gts, gts_ignore) else: raise NotImplementedError if rank_list is not None: output_ranklist(img_result, img_infos, rank_list) print_log('thr {0:.1f}, recall:{1[recall]:.3f}, ' 'precision: {1[precision]:.3f}, ' 'hmean:{1[hmean]:.3f}'.format(thr, result), logger=logger) if result['hmean'] > best_result['hmean']: best_result = result eval_results[metric + ':recall'] = best_result['recall'] eval_results[metric + ':precision'] = best_result['precision'] eval_results[metric + ':hmean'] = best_result['hmean'] return eval_results
def ref_img_sampling(self, img_info, frame_range, stride=1, num_ref_imgs=1, filter_key_img=True, method='uniform', return_key_img=True): """Sampling reference frames in the same video for key frame. Args: img_info (dict): The information of key frame. frame_range (List(int) | int): The sampling range of reference frames in the same video for key frame. stride (int): The sampling frame stride when sampling reference images. Default: 1. num_ref_imgs (int): The number of sampled reference images. Default: 1. filter_key_img (bool): If False, the key image will be in the sampling reference candidates, otherwise, it is exclude. Default: True. method (str): The sampling method. Options are 'uniform', 'bilateral_uniform', 'test_with_adaptive_stride', 'test_with_fix_stride'. 'uniform' denotes reference images are randomly sampled from the nearby frames of key frame. 'bilateral_uniform' denotes reference images are randomly sampled from the two sides of the nearby frames of key frame. 'test_with_adaptive_stride' is only used in testing, and denotes the sampling frame stride is equal to (video length / the number of reference images). test_with_fix_stride is only used in testing with sampling frame stride equalling to `stride`. Default: 'uniform'. return_key_img (bool): If True, the information of key frame is returned, otherwise, not returned. Default: True. Returns: list(dict): `img_info` and the reference images information or only the reference images information. """ assert isinstance(img_info, dict) if isinstance(frame_range, int): assert frame_range >= 0, 'frame_range can not be a negative value.' frame_range = [-frame_range, frame_range] elif isinstance(frame_range, list): assert len(frame_range) == 2, 'The length must be 2.' assert frame_range[0] <= 0 and frame_range[1] >= 0 for i in frame_range: assert isinstance(i, int), 'Each element must be int.' else: raise TypeError('The type of frame_range must be int or list.') if 'test' in method and \ (frame_range[1] - frame_range[0]) != num_ref_imgs: print_log( 'Warning:' "frame_range[1] - frame_range[0] isn't equal to num_ref_imgs." 'Set num_ref_imgs to frame_range[1] - frame_range[0].', logger=self.logger) self.ref_img_sampler[ 'num_ref_imgs'] = frame_range[1] - frame_range[0] if (not self.load_as_video) or img_info.get('frame_id', -1) < 0 \ or (frame_range[0] == 0 and frame_range[1] == 0): ref_img_infos = [] for i in range(num_ref_imgs): ref_img_infos.append(img_info.copy()) else: vid_id, img_id, frame_id = img_info['video_id'], img_info[ 'id'], img_info['frame_id'] img_ids = self.coco.get_img_ids_from_vid(vid_id) left = max(0, frame_id + frame_range[0]) right = min(frame_id + frame_range[1], len(img_ids) - 1) ref_img_ids = [] if method == 'uniform': valid_ids = img_ids[left:right + 1] if filter_key_img and img_id in valid_ids: valid_ids.remove(img_id) num_samples = min(num_ref_imgs, len(valid_ids)) ref_img_ids.extend(random.sample(valid_ids, num_samples)) elif method == 'bilateral_uniform': assert num_ref_imgs % 2 == 0, \ 'only support load even number of ref_imgs.' for mode in ['left', 'right']: if mode == 'left': valid_ids = img_ids[left:frame_id + 1] else: valid_ids = img_ids[frame_id:right + 1] if filter_key_img and img_id in valid_ids: valid_ids.remove(img_id) num_samples = min(num_ref_imgs // 2, len(valid_ids)) sampled_inds = random.sample(valid_ids, num_samples) ref_img_ids.extend(sampled_inds) elif method == 'test_with_adaptive_stride': if frame_id == 0: stride = float(len(img_ids) - 1) / (num_ref_imgs - 1) for i in range(num_ref_imgs): ref_id = round(i * stride) ref_img_ids.append(img_ids[ref_id]) elif method == 'test_with_fix_stride': if frame_id == 0: for i in range(frame_range[0], 1): ref_img_ids.append(img_ids[0]) for i in range(1, frame_range[1] + 1): ref_id = min(round(i * stride), len(img_ids) - 1) ref_img_ids.append(img_ids[ref_id]) elif frame_id % stride == 0: ref_id = min(round(frame_id + frame_range[1] * stride), len(img_ids) - 1) ref_img_ids.append(img_ids[ref_id]) img_info['num_left_ref_imgs'] = abs(frame_range[0]) \ if isinstance(frame_range, list) else frame_range img_info['frame_stride'] = stride else: raise NotImplementedError ref_img_infos = [] for ref_img_id in ref_img_ids: ref_img_info = self.coco.load_imgs([ref_img_id])[0] ref_img_info['filename'] = ref_img_info['file_name'] ref_img_infos.append(ref_img_info) ref_img_infos = sorted(ref_img_infos, key=lambda i: i['frame_id']) if return_key_img: return [img_info, *ref_img_infos] else: return ref_img_infos
def evaluate(self, results, logger=None, **kwargs): """Evaluate the results. Args: results (list[tuple[torch.Tensor]] | list[str]]): per image pre_eval results or predict segmentation map for computing evaluation metric. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. Returns: dict[str: float]: evaluate results of the total dataset or each separate dataset if `self.separate_eval=True`. """ assert len(results) == self.cumulative_sizes[-1], \ ('Dataset and results have different sizes: ' f'{self.cumulative_sizes[-1]} v.s. {len(results)}') # Check whether all the datasets support evaluation for dataset in self.datasets: assert hasattr(dataset, 'evaluate'), \ f'{type(dataset)} does not implement evaluate function' if self.separate_eval: dataset_idx = -1 total_eval_results = dict() for size, dataset in zip(self.cumulative_sizes, self.datasets): start_idx = 0 if dataset_idx == -1 else \ self.cumulative_sizes[dataset_idx] end_idx = self.cumulative_sizes[dataset_idx + 1] results_per_dataset = results[start_idx:end_idx] print_log( f'\nEvaluateing {dataset.img_dir} with ' f'{len(results_per_dataset)} images now', logger=logger) eval_results_per_dataset = dataset.evaluate( results_per_dataset, logger=logger, **kwargs) dataset_idx += 1 for k, v in eval_results_per_dataset.items(): total_eval_results.update({f'{dataset_idx}_{k}': v}) return total_eval_results if len(set([type(ds) for ds in self.datasets])) != 1: raise NotImplementedError( 'All the datasets should have same types when ' 'self.separate_eval=False') else: if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of( results, str): # merge the generators of gt_seg_maps gt_seg_maps = chain( *[dataset.get_gt_seg_maps() for dataset in self.datasets]) else: # if the results are `pre_eval` results, # we do not need gt_seg_maps to evaluate gt_seg_maps = None eval_results = self.datasets[0].evaluate( results, gt_seg_maps=gt_seg_maps, logger=logger, **kwargs) return eval_results
def evaluate(self, results, metrics='mean_average_precision', metric_options=None, logger=None): """Evaluation in HVU Video Dataset. We only support evaluating mAP for each tag categories. Since some tag categories are missing for some videos, we can not evaluate mAP for all tags. Args: results (list): Output results. metrics (str | sequence[str]): Metrics to be performed. Defaults: 'mean_average_precision'. metric_options (dict | None): Dict for metric options. Default: None. logger (logging.Logger | None): Logger for recording. Default: None. Returns: dict: Evaluation results dict. """ # Protect ``metric_options`` since it uses mutable value as default metric_options = copy.deepcopy(metric_options) if not isinstance(results, list): raise TypeError(f'results must be a list, but got {type(results)}') assert len(results) == len(self), ( f'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}') metrics = metrics if isinstance(metrics, (list, tuple)) else [metrics] # There should be only one metric in the metrics list: # 'mean_average_precision' assert len(metrics) == 1 metric = metrics[0] assert metric == 'mean_average_precision' gt_labels = [ann['label'] for ann in self.video_infos] eval_results = OrderedDict() for category in self.tag_categories: start_idx = self.category2startidx[category] num = self.category2num[category] preds = [ result[start_idx:start_idx + num] for video_idx, result in enumerate(results) if category in gt_labels[video_idx] ] gts = [ gt_label[category] for gt_label in gt_labels if category in gt_label ] gts = [self.label2array(num, item) for item in gts] mAP = mean_average_precision(preds, gts) eval_results[f'{category}_mAP'] = mAP log_msg = f'\n{category}_mAP\t{mAP:.4f}' print_log(log_msg, logger=logger) return eval_results
def print_map_summary(mean_ap, results, dataset=None, scale_ranges=None, logger=None): """Print mAP and results of each class. A table will be printed to show the gts/dets/recall/AP of each class and the mAP. Args: mean_ap (float): Calculated from `eval_map()`. results (list[dict]): Calculated from `eval_map()`. dataset (list[str] | str | None): Dataset name or dataset classes. scale_ranges (list[tuple] | None): Range of scales to be evaluated. logger (logging.Logger | str | None): The way to print the mAP summary. See `mmdet.utils.print_log()` for details. Default: None. """ if logger == 'silent': return if isinstance(results[0]['ap'], np.ndarray): num_scales = len(results[0]['ap']) else: num_scales = 1 if scale_ranges is not None: assert len(scale_ranges) == num_scales num_classes = len(results) recalls = np.zeros((num_scales, num_classes), dtype=np.float32) aps = np.zeros((num_scales, num_classes), dtype=np.float32) num_gts = np.zeros((num_scales, num_classes), dtype=int) for i, cls_result in enumerate(results): if cls_result['recall'].size > 0: recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1] aps[:, i] = cls_result['ap'] num_gts[:, i] = cls_result['num_gts'] if dataset is None: label_names = [str(i) for i in range(num_classes)] elif mmcv.is_str(dataset): label_names = get_classes(dataset) else: label_names = dataset if not isinstance(mean_ap, list): mean_ap = [mean_ap] header = ['class', 'gts', 'dets', 'recall', 'ap'] for i in range(num_scales): if scale_ranges is not None: print_log(f'Scale range {scale_ranges[i]}', logger=logger) table_data = [header] for j in range(num_classes): row_data = [ label_names[j], num_gts[i, j], results[j]['num_dets'], f'{recalls[i, j]:.3f}', f'{aps[i, j]:.3f}' ] table_data.append(row_data) table_data.append(['mAP', '', '', '', f'{mean_ap[i]:.3f}']) table = AsciiTable(table_data) table.inner_footing_row_border = True print_log('\n' + table.table, logger=logger)
def print_map_summary(mean_ap, results, dataset=None, scale_ranges=None, logger=None, model_name=None): """Print mAP and results of each class. A table will be printed to show the gts/dets/recall/AP of each class and the mAP. Args: mean_ap (float): Calculated from `eval_map()`. results (list[dict]): Calculated from `eval_map()`. dataset (list[str] | str | None): Dataset name or dataset classes. scale_ranges (list[tuple] | None): Range of scales to be evaluated. logger (logging.Logger | str | None): The way to print the mAP summary. See `mmcv.utils.print_log()` for details. Default: None. """ if model_name: print(model_name) if logger == 'silent': return if isinstance(results[0]['ap'], np.ndarray): num_scales = len(results[0]['ap']) else: num_scales = 1 if scale_ranges is not None: assert len(scale_ranges) == num_scales num_classes = len(results) recalls = np.zeros((num_scales, num_classes), dtype=np.float32) aps = np.zeros((num_scales, num_classes), dtype=np.float32) num_gts = np.zeros((num_scales, num_classes), dtype=int) for i, cls_result in enumerate(results): if cls_result['recall'].size > 0: recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1] aps[:, i] = cls_result['ap'] num_gts[:, i] = cls_result['num_gts'] if dataset is None: label_names = [str(i) for i in range(num_classes)] elif mmcv.is_str(dataset): label_names = get_classes(dataset) else: label_names = dataset if not isinstance(mean_ap, list): mean_ap = [mean_ap] # TODO: Remove num_classes = 1 header = [ 'class', 'gts', 'dets', 'recall', 'ap', 'TP', 'FP', 'FPRed', 'FN' ] for i in range(num_scales): if scale_ranges is not None: print_log(f'Scale range {scale_ranges[i]}', logger=logger) table_data = [header] for j in range(num_classes): tps = int(sum([np.sum(tps) for tps in results[j]['tp']])) fps = int(sum([np.sum(fps) for fps in results[j]['fp']])) fps_red = int( sum([np.sum(fps) for fps in results[j]['fp_redundant']])) row_data = [ label_names[j], num_gts[i, j], results[j]['num_dets'], f'{recalls[i, j]*100:.2f}', f'{aps[i, j]*100:.2f}', tps, fps, fps_red, num_gts[i, j] - tps ] table_data.append(row_data) # table_data.append(['All', np.sum(num_gts), sum(t[2] for t in table_data[1:4]), # np.mean([float(t[3]) for t in table_data[1:4]]).round(2), # np.float32(mean_ap[i]*100).round(2), # sum(t[5] for t in table_data[1:4]), # sum(t[6] for t in table_data[1:4]), # sum(t[7] for t in table_data[1:4]), # sum(t[8] for t in table_data[1:4]) # ]) table = AsciiTable(table_data) table.inner_footing_row_border = True print_log('\n' + table.table, logger=logger) df_summary = pd.DataFrame(table_data[1:], columns=header) df_summary["model_name"] = [model_name for _ in range(1)] df_summary = df_summary.set_index('model_name') return df_summary
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=None, metric_items=None): metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') if iou_thrs is None: iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] result_files, tmp_dir = self.format_results(results, jsonfile_prefix) eval_results = OrderedDict() cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError(f'{metric} is not in results') try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { 'mAP': 0, 'mAP_50': 1, 'mAP_75': 2, 'mAP_s': 3, 'mAP_m': 4, 'mAP_l': 5, 'AR@100': 6, 'AR@300': 7, 'AR@1000': 8, 'AR_s@1000': 9, 'AR_m@1000': 10, 'AR_l@1000': 11 } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError( f'metric item {metric_item} is not supported') if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if metric_items is None: metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for item in metric_items: val = float( f'{cocoEval.stats[coco_metric_names[item]]:.3f}') eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) #################### eval_results['results_per_category'] \ = results_per_category #################### if metric_items is None: metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for metric_item in metric_items: key = f'{metric}_{metric_item}' val = float( f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' ) eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metric='mIoU', logger=None, **kwargs): """Evaluate the dataset. Args: results (list[tuple[torch.Tensor]] | list[str]): per image pre_eval results or predict segmentation map for computing evaluation metric. metric (str | list[str]): Metrics to be evaluated. 'mIoU', 'mDice' and 'mFscore' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice', 'mFscore'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} # test a list of files if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of( results, str): gt_seg_maps = self.get_gt_seg_maps() if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) # reset generator gt_seg_maps = self.get_gt_seg_maps() ret_metrics = eval_metrics( results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) # test a list of pre_eval_results else: ret_metrics = pre_eval_to_metrics(results, metric) # Because dataset.CLASSES is required for per-eval. if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES # summary table ret_metrics_summary = OrderedDict({ ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) # each class table ret_metrics.pop('aAcc', None) ret_metrics_class = OrderedDict({ ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) ret_metrics_class.update({'Class': class_names}) ret_metrics_class.move_to_end('Class', last=False) # for logger class_table_data = PrettyTable() for key, val in ret_metrics_class.items(): class_table_data.add_column(key, val) summary_table_data = PrettyTable() for key, val in ret_metrics_summary.items(): if key == 'aAcc': summary_table_data.add_column(key, [val]) else: summary_table_data.add_column('m' + key, [val]) print_log('per class results:', logger) print_log('\n' + class_table_data.get_string(), logger=logger) print_log('Summary:', logger) print_log('\n' + summary_table_data.get_string(), logger=logger) # each metric dict for key, value in ret_metrics_summary.items(): if key == 'aAcc': eval_results[key] = value / 100.0 else: eval_results['m' + key] = value / 100.0 ret_metrics_class.pop('Class', None) for key, value in ret_metrics_class.items(): eval_results.update({ key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names) }) return eval_results
def evaluate(self, results, metric=None, logger=None, pklfile_prefix=None, submission_prefix=None, show=False, out_dir=None): """Evaluation in KITTI protocol. Args: results (list[dict]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. pklfile_prefix (str | None): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. submission_prefix (str | None): The prefix of submission datas. If not specified, the submission data will not be generated. show (bool): Whether to visualize. Default: False. out_dir (str): Path to save the visualization results. Default: None. Returns: dict[str, float]: Results of each evaluation metric. """ result_files, tmp_dir = self.format_results(results, pklfile_prefix) from mmdet3d.core.evaluation import kitti_eval gt_annos = [info['annos'] for info in self.data_infos] if isinstance(result_files, dict): ap_dict = dict() for name, result_files_ in result_files.items(): eval_types = ['bbox', 'bev', '3d'] if 'img' in name: eval_types = ['bbox'] ap_result_str, ap_dict_ = kitti_eval( gt_annos, result_files_, self.CLASSES, eval_types=eval_types) for ap_type, ap in ap_dict_.items(): ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap)) print_log( f'Results of {name}:\n' + ap_result_str, logger=logger) else: if metric == 'img_bbox': ap_result_str, ap_dict = kitti_eval( gt_annos, result_files, self.CLASSES, eval_types=['bbox']) else: ap_result_str, ap_dict = kitti_eval(gt_annos, result_files, self.CLASSES) print_log('\n' + ap_result_str, logger=logger) if tmp_dir is not None: tmp_dir.cleanup() if show: self.show(results, out_dir) return ap_dict
def evaluate2(self, result_files, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') eval_results = {} cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric not in result_files: raise KeyError(f'{metric} is not in results') try: # cocoDt = cocoGt.loadRes(result_files[metric]) from mmdet.helper.helpers import loadRes cocoDt = loadRes(cocoGt, result_files[metric]) except IndexError: print_log( 'The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.params.maxDets = list(proposal_nums) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for i, item in enumerate(metric_items): val = float(f'{cocoEval.stats[i + 6]:.3f}') eval_results[item] = val else: # cocoEval.evaluate() cocoevalhelper.evaluate(cocoEval) ############calculate centroids ###################### print("start calculate centroids.") cocoevalhelper.computeCentroids(cocoEval,save_dir='/home/xuma/mmdet/work_dirs/mask_rcnn_osr50/') # cocoEval.accumulate() self.accumulate(cocoEval) self.summarize(cocoEval) if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for i in range(len(metric_items)): key = f'{metric}_{metric_items[i]}' val = float(f'{cocoEval.stats[i]:.3f}') eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') return eval_results
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in LVIS protocol. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str: float] """ try: from lvis import LVISResults, LVISEval except ImportError: raise ImportError('Please follow install.md to ' 'install open-mmlab forked cocoapi first.') assert isinstance(results, list), 'results must be a list' assert len(results) == len(self), ( 'The length of results is not equal to the dataset len: {} != {}'. format(len(results), len(self))) metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) if jsonfile_prefix is None: tmp_dir = tempfile.TemporaryDirectory() jsonfile_prefix = osp.join(tmp_dir.name, 'results') else: tmp_dir = None result_files = self.results2json(results, jsonfile_prefix) eval_results = {} # get original api lvis_gt = self.coco for metric in metrics: msg = 'Evaluating {}...'.format(metric) if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall( results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results['AR@{}'.format(num)] = ar[i] log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i])) log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError('{} is not in results'.format(metric)) try: lvis_dt = LVISResults(lvis_gt, result_files[metric]) except IndexError: print_log( 'The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric lvis_eval = LVISEval(lvis_gt, lvis_dt, iou_type) lvis_eval.params.imgIds = self.img_ids if metric == 'proposal': lvis_eval.params.useCats = 0 lvis_eval.params.maxDets = list(proposal_nums) lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() for k, v in lvis_eval.get_results().items(): if k.startswith('AR'): val = float('{:.3f}'.format(float(v))) eval_results[k] = val else: lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_results = lvis_eval.get_results() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = lvis_eval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.load_cats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format(metric, k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['{}_mAP_copypaste'.format(metric)] = ap_summary lvis_eval.print_results() if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def openevaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in COCO protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') result_files, tmp_dir = self.format_results(results, jsonfile_prefix) eval_results = {} cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall( results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError(f'{metric} is not in results') try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log( 'The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids cocoEval.evaluate() # ############calculate centroids ###################### # cocoevalhelper.computeCentroids(cocoEval) # cocoEval.accumulate() self.accumulate(cocoEval) cocoevalhelper.opensummarize(cocoEval) metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for i in range(len(metric_items)): key = f'{metric}_{metric_items[i]}' val = float(f'{cocoEval.stats[i]:.3f}') eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metrics='top_k_accuracy', topk=(1, 5), logger=None): """Evaluation in rawframe dataset. Args: results (list): Output results. metrics (str | sequence[str]): Metrics to be performed. Defaults: 'top_k_accuracy'. logger (obj): Training logger. Defaults: None. topk (int | tuple[int]): K value for top_k_accuracy metric. Defaults: (1, 5). logger (logging.Logger | None): Logger for recording. Default: None. Returns: dict: Evaluation results dict. """ if not isinstance(results, list): raise TypeError(f'results must be a list, but got {type(results)}') assert len(results) == len(self), ( f'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}') if not isinstance(topk, (int, tuple)): raise TypeError( f'topk must be int or tuple of int, but got {type(topk)}') if isinstance(topk, int): topk = (topk, ) metrics = metrics if isinstance(metrics, (list, tuple)) else [metrics] allowed_metrics = [ 'top_k_accuracy', 'mean_class_accuracy', 'mean_average_precision' ] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') eval_results = {} gt_labels = [ann['label'] for ann in self.video_infos] for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'top_k_accuracy': top_k_acc = top_k_accuracy(results, gt_labels, topk) log_msg = [] for k, acc in zip(topk, top_k_acc): eval_results[f'top{k}_acc'] = acc log_msg.append(f'\ntop{k}_acc\t{acc:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric == 'mean_class_accuracy': mean_acc = mean_class_accuracy(results, gt_labels) eval_results['mean_class_accuracy'] = mean_acc log_msg = f'\nmean_acc\t{mean_acc:.4f}' print_log(log_msg, logger=logger) continue if metric == 'mean_average_precision': gt_labels = [label.cpu().numpy() for label in gt_labels] mAP = mean_average_precision(results, gt_labels) eval_results['mean_average_precision'] = mAP log_msg = f'\nmean_average_precision\t{mAP:.4f}' print_log(log_msg, logger=logger) continue return eval_results
def _evaluate_cityscapes(self, results, txtfile_prefix, logger): """Evaluation in Cityscapes protocol. Args: results (list): Testing results of the dataset. txtfile_prefix (str | None): The prefix of output txt file logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. Returns: dict[str: float]: Cityscapes evaluation results, contains 'mAP' \ and 'AP@50'. """ try: import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa except ImportError: raise ImportError('Please run "pip install citscapesscripts" to ' 'install cityscapesscripts first.') msg = 'Evaluating in Cityscapes style' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) result_files, tmp_dir = self.format_results(results, txtfile_prefix) if tmp_dir is None: result_dir = osp.join(txtfile_prefix, 'results') else: result_dir = osp.join(tmp_dir.name, 'results') eval_results = {} print_log(f'Evaluating results under {result_dir} ...', logger=logger) # set global states in cityscapes evaluation API CSEval.args.cityscapesPath = os.path.join(self.img_prefix, '../..') CSEval.args.predictionPath = os.path.abspath(result_dir) CSEval.args.predictionWalk = None CSEval.args.JSONOutput = False CSEval.args.colorized = False CSEval.args.gtInstancesFile = os.path.join(result_dir, 'gtInstances.json') CSEval.args.groundTruthSearch = os.path.join( self.img_prefix.replace('leftImg8bit', 'gtFine'), '*/*_gtFine_instanceIds.png') groundTruthImgList = glob.glob(CSEval.args.groundTruthSearch) assert len(groundTruthImgList), 'Cannot find ground truth images' \ f' in {CSEval.args.groundTruthSearch}.' predictionImgList = [] for gt in groundTruthImgList: predictionImgList.append(CSEval.getPrediction(gt, CSEval.args)) CSEval_results = CSEval.evaluateImgLists(predictionImgList, groundTruthImgList, CSEval.args)['averages'] eval_results['mAP'] = CSEval_results['allAp'] eval_results['AP@50'] = CSEval_results['allAp50%'] if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def eval_mot(results, annotations, logger=None, classes=None, iou_thr=0.5, ignore_iof_thr=0.5, ignore_by_classes=False, nproc=4): """Evaluation CLEAR MOT metrics. Args: results (list[list[list[ndarray]]]): The first list indicates videos, The second list indicates images. The third list indicates categories. The ndarray indicates the tracking results. annotations (list[list[dict]]): The first list indicates videos, The second list indicates images. The third list indicates the annotations of each video. Keys of annotations are - `bboxes`: numpy array of shape (n, 4) - `labels`: numpy array of shape (n, ) - `instance_ids`: numpy array of shape (n, ) - `bboxes_ignore` (optional): numpy array of shape (k, 4) - `labels_ignore` (optional): numpy array of shape (k, ) logger (logging.Logger | str | None, optional): The way to print the evaluation results. Defaults to None. classes (list, optional): Classes in the dataset. Defaults to None. iou_thr (float, optional): IoU threshold for evaluation. Defaults to 0.5. ignore_iof_thr (float, optional): Iof threshold to ignore results. Defaults to 0.5. ignore_by_classes (bool, optional): Whether ignore the results by classes or not. Defaults to False. nproc (int, optional): Number of the processes. Defaults to 4. Returns: dict[str, float]: Evaluation results. """ print_log('---CLEAR MOT Evaluation---', logger) t = time.time() gts = annotations.copy() if classes is None: classes = [i + 1 for i in range(len(results[0]))] assert len(results) == len(gts) metrics = METRIC_MAPS.keys() print_log('Accumulating...', logger) pool = Pool(nproc) accs = pool.starmap( acc_single_video, zip(results, gts, [iou_thr for _ in range(len(gts))], [ignore_iof_thr for _ in range(len(gts))], [ignore_by_classes for _ in range(len(gts))])) names, accs, items = aggregate_accs(accs, classes) print_log('Evaluating...', logger) eval_results = pd.DataFrame(columns=metrics) summaries = pool.starmap(eval_single_class, zip(names, accs)) pool.close() # category and overall results for i, item in enumerate(items): eval_results.loc[item] = summaries[i] dtypes = {m: type(d) for m, d in zip(metrics, summaries[0])} # average results avg_results = [] for i, m in enumerate(metrics): v = np.array([s[i] for s in summaries[:len(classes)]]) v = np.nan_to_num(v, nan=0) if dtypes[m] == int: avg_results.append(int(v.sum())) elif dtypes[m] == float: avg_results.append(float(v.mean())) else: raise TypeError() eval_results.loc['AVERAGE'] = avg_results eval_results = eval_results.astype(dtypes) print_log('Rendering...', logger) strsummary = mm.io.render_summary( eval_results, formatters=mm.metrics.create().formatters, namemap=METRIC_MAPS) print_log('\n' + strsummary, logger) print_log(f'Evaluation finishes with {(time.time() - t):.2f} s.', logger) eval_results = eval_results.to_dict() out = {METRIC_MAPS[k]: v['OVERALL'] for k, v in eval_results.items()} for k, v in out.items(): out[k] = float(f'{(v):.3f}') if isinstance(v, float) else int(f'{v}') for m in ['OVERALL', 'AVERAGE']: out[f'track_{m}_copypaste'] = '' for k in METRIC_MAPS.keys(): v = eval_results[k][m] v = f'{(v):.3f} ' if isinstance(v, float) else f'{v} ' out[f'track_{m}_copypaste'] += v return out
def coco_evaluate(model, data_loader, classwise=True, feature=False): device = 'cuda' n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for images, targets in metric_logger.log_every(data_loader, 1000, header): images = list(img.to(device) for img in images) torch.cuda.synchronize() model_time = time.time() if feature: _, outputs = model(images) else: outputs = model(images) for output in outputs: if 'features' in output.keys(): del output['features'] outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images # coco_evaluator.evaluate() coco_evaluator.accumulate() coco_evaluator.summarize() if classwise: # Compute per-category AP cat_ids = coco.get_cat_ids(cat_names=COCO_CLASSES) # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = coco_evaluator.coco_eval['bbox'].eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table) torch.set_num_threads(n_threads) return coco_evaluator