def evaluate(self): """ Returns: In detectron2.tools.train_net.py, following format expected: dict: * key: the name of the task (e.g., bbox) * value: a dict of {metric name: score}, e.g.: {"AP50": 80} """ if self._distributed: comm.synchronize() prediction_counts = comm.gather(self.prediction_counts, dst=0) prediction_counts = list(itertools.chain(*prediction_counts)) confidence_scores = comm.gather(self.confidence_scores, dst=0) confidence_scores = list(itertools.chain(*confidence_scores)) if not comm.is_main_process(): return {} else: prediction_counts = self.prediction_counts confidence_scores = self.confidence_scores mpi = np.mean(prediction_counts) mcp = np.mean(confidence_scores) output_metrics = OrderedDict({ "false_positives": { "predictions_per_image": mpi, "confidence_per_prediction": mcp, } }) logger.info(f"mean predictions per image: {mpi}") logger.info(f"mean confidence per prediction: {mcp}") return output_metrics
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) self.submit_results = comm.gather(self.submit_results, dst=0) self.submit_results = list(itertools.chain(*self.submit_results)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} self._logger.info("Preparing results for COCO format ...") self._coco_results = list( itertools.chain(*[x["instances"] for x in self._predictions])) if self._output_dir: res_file = os.path.join(self._output_dir, "crowdhuman_evaluate_results.json") self._logger.info("Saving results to {}".format(res_file)) with PathManager.open(res_file, "w") as f: f.write(json.dumps(self._coco_results)) f.flush() self._logger.info("Saving results to {}".format(res_file)) submit_file = os.path.join(self._output_dir, "submission.txt") with PathManager.open(submit_file, "w") as f: for result in self.submit_results: f.write(json.dumps(result)) f.write("\n") f.flush() self._logger.info("Evaluating predictions ...") metrics = ["ALL"] results = {} ret_results = OrderedDict() for gt_json in [self._metadata.gt_file]: name = gt_json.split("/")[-1].split(".")[0] for id_setup in range(len(metrics)): cocoGt = COCO(gt_json) cocoDt = cocoGt.loadRes(res_file) imgIds = sorted(cocoGt.getImgIds()) cocoEval = CrowdHumanEval(cocoGt, cocoDt, "bbox") cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() performance_dict = cocoEval.summarize(id_setup) for key in performance_dict.keys(): results[name + " " + key] = performance_dict[key] self._logger.info( "Evaluation results for Pedestrian Detection on CrowdHuman: \n" + create_small_table(results)) ret_results["PedestrianDetection"] = copy.deepcopy(results) return ret_results
def evaluate( self, max_table_size: int = 25, output_filename: str = "topk_accuracies.pkl" ) -> "OrderedDict[str, Dict[str, Any]]": total: int correct_in_top_k: Dict[int, int] # if distributed, gather and sum correct answers if self._distributed: comm.synchronize() totals: List[int] = comm.gather(self.total, dst=0) citks: List[Dict[int, int]] = comm.gather(self.correct_in_top_k, dst=0) if not comm.is_main_process(): return OrderedDict() else: correct_in_top_k = {k: 0 for k in self.top_ks} total = sum(totals) # merge count dictionaries for d_citk in citks: for k, count in d_citk.items(): correct_in_top_k[k] += count else: total = self.total correct_in_top_k = self.correct_in_top_k # normalize into accuracies: accuracies: Dict[int, float] = { k: v / total for k, v in correct_in_top_k.items() } del correct_in_top_k # saving accuracies if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, output_filename) with PathManager.open(file_path, "wb") as f: pickle.dump(accuracies, f) # displaying accuracies by k as table (if it isn't too huge) if len(self.top_ks) <= max_table_size: table = tabulate( {str(k): (v,) for k, v in accuracies.items()}, headers="keys", showindex="default", floatfmt=".3f", numalign="left", ) self._logger.info(table) # collect and return results results: OrderedDict[str, Dict[str, float]] = OrderedDict( [("classification", {f"top{k}": v for k, v in accuracies.items()})] ) self._logger.info(results) return results
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "instances" in self._predictions[0]: self._eval_predictions() # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions # preictions: list of dict [{'image_id', 'instances'(list of dict [{'image_id', 'category_id', bbox, score}])}] if len(predictions) == 0: self._logger.warning( "[SMDEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(set(self._tasks), predictions) self._eval_predictions_others(self._coco_api, predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): # Se ejecuta una vez que están todas las predicciones if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(set(self._tasks), predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } loss_dict = {} for key in metrics_dict.keys(): if key[:4] == "loss": loss_dict[key] = metrics_dict[key] total_losses_reduced = sum(loss for loss in loss_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict)
def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for loss in metrics_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict)
def evaluate(self): if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions image_contains_mixed_unknowns = [ prediction['image_contains_mixed_unknowns'] for prediction in predictions ] scores = [prediction['scores'] for prediction in predictions] correct = [prediction['correct'] for prediction in predictions] pred_classes = [ prediction['pred_classes'] for prediction in predictions ] category_counts = {} for category in self._coco_api.cats: if category not in category_counts: category_counts[self.internal_dataset_mapping[category]] = 0 category_counts[self.internal_dataset_mapping[category]] += len( self._coco_api.getAnnIds(catIds=[category])) return dict(predictions=dict( image_contains_mixed_unknowns=image_contains_mixed_unknowns, scores=scores, correct=correct, pred_classes=pred_classes), category_counts=category_counts)
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) # print(f'===========') # print(predictions) # defaultdict( < # # class 'list'>, {3: ['0 0.999 491.7 216.9 567.4 303.8', '2 0.711 407.3 278.6 487.8 353.0', # '3 0.996 312.4 173.7 370.3 232.4', '4 0.998 485.9 171.0 542.2 230.1', # '5 0.999 517.4 163.7 584.3 237.0', '6 0.999 567.7 133.4 623.3 193.5', # '7 0.998 375.9 387.6 497.6 521.4', '8 0.999 450.3 249.1 538.1 341.4'], 1: [ # '1 0.996 469.5 241.8 553.2 328.4', '6 0.899 805.6 138.3 844.2 177.7', '9 0.999 568.0 117.9 620.9 173.7', # '9 0.826 316.6 156.8 345.0 185.7'] # # }) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012 ) ) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: # dirname='pascal_voc_eval_x1fnyvsm' res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) # print(f'======={cls_id}========={cls_name}===={lines}======') with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} return ret
def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "proposals" in self._predictions[0]: self._eval_box_proposals() if "instances" in self._predictions[0]: miou = {'miou': self._eval_predictions(set(self._tasks))} return miou
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info(f"Evaluating {self._dataset_name}") with tempfile.TemporaryDirectory(prefix="digits_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class aps_base = defaultdict(list) aps_novel = defaultdict(list) exist_base, exist_novel = False, False for cls_id, cls_name in enumerate(self._classes): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } # write per class AP to logger per_class_res = { self._classes[idx]: ap for idx, ap in enumerate(aps[50]) } self._logger.info("Evaluate per-class mAP50:\n" + create_small_table(per_class_res)) self._logger.info("Evaluate overall bbox:\n" + create_small_table(ret["bbox"])) return ret
def split_epoch_end(self, outputs, split='val'): outputs = d2comm.gather(outputs) # master node if d2comm.is_main_process(): assert self.trainer.node_rank == 0 and self.trainer.local_rank == 0 outputs = sum(outputs, []) opt = self.opt loss_mean = sum([_['loss'].item() for _ in outputs]) / len(outputs) predictions = sum([_['predictions'] for _ in outputs], []) if len(outputs[0]['n_predictions']) != 0: n_predictions = sum([_['n_predictions'] for _ in outputs], []) else: n_predictions = [] lang_stats = None if len(n_predictions) > 0 and 'perplexity' in n_predictions[0]: n_predictions = sorted(n_predictions, key=lambda x: x['perplexity']) if not os.path.isdir('eval_results'): os.mkdir('eval_results') torch.save( (predictions, n_predictions), os.path.join('eval_results/', '.saved_pred_' + opt.id + '_' + split + '.pth')) if opt.language_eval: lang_stats = eval_utils.language_eval(opt.input_json, predictions, n_predictions, vars(opt), split) if opt.reduce_on_plateau: optimizer = self.trainer.optimizers[0] if 'CIDEr' in lang_stats: optimizer.scheduler_step(-lang_stats['CIDEr']) else: optimizer.scheduler_step(loss_mean) out = {'loss': loss_mean} out.update(lang_stats) out['to_monitor'] = lang_stats[ 'CIDEr'] if lang_stats is not None else -loss_mean else: out = {} out = d2comm.all_gather(out)[0] # Only the one from master node assert len(out) > 0 # make sure the head has index 0 # must all be tensors out = { k: torch.tensor(v) if not torch.is_tensor(v) else v for k, v in out.items() } return out
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} . " "Note that results do not use the official Matlab API.".format( self._dataset_name)) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") full_aps = defaultdict(list) # iou -> ap per class rare_aps = defaultdict(list) non_rare_aps = defaultdict(list) for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_name, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) # FIX: use inside class variable for thresh in [50]: # if debug: rec, prec, ap = voc_eval( self._gts, res_file_template, self._json_folder, cls_name, ovthresh=thresh / 100.0, ) full_aps[thresh].append(ap * 100) if cls_name in rare_hoi: rare_aps[thresh].append(ap * 100) else: non_rare_aps[thresh].append(ap * 100) ret = OrderedDict() full_mAP = {iou: np.mean(x) for iou, x in full_aps.items()} rare_mAP = {iou: np.mean(x) for iou, x in rare_aps.items()} non_rare_mAP = {iou: np.mean(x) for iou, x in non_rare_aps.items()} ret["bbox"] = { "full mAP": full_mAP[50], "rare mAP": rare_mAP[50], "non rare mAP": non_rare_mAP[50] } print(ret) return ret
def _do_eval_loss(self, data_loader): total = len(data_loader) with torch.no_grad(): for idx, inputs in enumerate(data_loader): loss_dict = self._model(inputs) # loss_dict_scaled = {k: v * self.weight_dict[k] if k in self.weight_dict else v for k, v in loss_dict.items()} device = next(iter(loss_dict.values())).device with torch.cuda.stream(torch.cuda.Stream() if device.type == "cuda" else None): metrics_dict = { 'val_' + k: v.detach().cpu().item() for k, v in loss_dict.items() } all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum( metrics_dict[k] * self.weight_dict[k.split('val_')[-1]] for k in metrics_dict.keys() if k.split('val_')[-1] in self.weight_dict) if not np.isfinite(total_losses_reduced): raise FloatingPointError( f"Loss became infinite or NaN at iteration={idx}!\n" f"loss_dict = {metrics_dict}") if torch.cuda.is_available(): max_mem_mb = torch.cuda.max_memory_allocated( ) / 1024.0 / 1024.0 else: max_mem_mb = None log_every_n_seconds( logging.INFO, msg= " iter: {iter}/{total} val_loss:{val_loss} {losses} {memory}" .format(iter=idx + 1, total=total, val_loss='{:.3f}'.format(total_losses_reduced), losses=" ".join([ "{}: {:.3f}".format( k.split('val_loss_')[-1], v) for k, v in metrics_dict.items() ]), memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else ""), n=5, name=self.logger) storage = get_event_storage() if len(metrics_dict) > 1: storage.put_scalars( total_val_loss=total_losses_reduced, **metrics_dict)
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return if len(self._predictions) == 0: self._logger.warning( "[LVISEvaluator] Did not receive valid predictions.") return {} self._logger.info("Preparing results in the OID format ...") _unified_results = list( itertools.chain(*[x["instances"] for x in self._predictions])) if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "unified_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(_unified_results)) f.flush() self._oid_results = map_back_unified_id(_unified_results, self.map_back) # unmap the category ids for LVIS (from 0-indexed to 1-indexed) for result in self._oid_results: result["category_id"] += 1 PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "oid_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._oid_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating predictions ...") self._results = OrderedDict() res = _evaluate_predictions_on_oid(self._oid_api, file_path, eval_seg=self._mask_on) self._results['bbox'] = res return copy.deepcopy(self._results)
def evaluate( self, max_table_size: int = 25) -> "OrderedDict[str, Dict[str, Any]]": # if distributed, gather and sum confusion matrices cm: torch.Tensor if self._distributed: comm.synchronize() cms = comm.gather(self._cm, dst=0) if not comm.is_main_process(): return OrderedDict() cm = torch.stack(cms, dim=0).sum(dim=0) else: cm = self._cm # saving confusion matrix if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, self._output_name + ".pth") with PathManager.open(file_path, "wb") as f: torch.save(cm, f) file_path = os.path.join(self._output_dir, self._output_name + ".json") with PathManager.open(file_path, "w") as f: json_dict = {"confusion_matrix": cm.to("cpu").tolist()} json.dump(json_dict, f) # calculating accuracy accuracy = self.accuracy(cm) # displaying confusion matrix as table (if it isn't too huge) if self.num_classes <= max_table_size: headers, showindex = (), "default" if self._metadata is not None: headers = self._metadata.get("classes", default=headers) showindex = self._metadata.get("classes", default=showindex) table = tabulate( cm, headers=headers, showindex=showindex, tablefmt="pipe", floatfmt=".0f", numalign="left", ) self._logger.info(table) # collect and return results results: OrderedDict[str, Dict[str, float]] = OrderedDict([(self._task_name, { "top1": accuracy })]) self._logger.info(results) return results
def evaluate(self): if self._distributed: comm.synchronize() preds = comm.gather(self._preds, dst=0) preds = list(itertools.chain(*preds)) gts = comm.gather(self._gts, dst=0) gts = list(itertools.chain(*gts)) if not comm.is_main_process(): return {} hand_evaluation(preds, gts, self.cfg) # if self._output_dir: # PathManager.mkdirs(self._output_dir) # file_path = os.path.join(self._output_dir, "predictions.pth") # with PathManager.open(file_path, "wb") as f: # torch.save(self._results, f) return self._results
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012)) #with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: dirname = Path("tmp/dets") dirname.mkdir(parents=True, exist_ok=True) res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class pool = mp.Pool(10) for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) args = [] for thresh in range(50, 100, 5): args.append([ res_file_template, self._anno_file_template, self._image_set_path, cls_name, thresh / 100.0, self._is_2007 ]) results = pool.starmap(voc_eval, args) for thresh, result in zip(range(50, 100, 5), results): rec, prec, ap = result aps[thresh].append(ap * 100) pool.close() pool.join() ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } return ret
def evaluate(self): all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions tmp_results_path = os.path.join(self._output_folder, "wider_face_val_results") for image_id in predictions.keys(): tmp_results_file = tmp_results_path + "/" + image_id[:-4] + ".txt" dirname = os.path.dirname(tmp_results_file) if not os.path.isdir(dirname): os.makedirs(dirname) with open(tmp_results_file, "w") as fd: # bboxs = dets file_name = os.path.basename(tmp_results_file)[:-4] + "\n" bboxs_num = str(len(predictions[image_id])) + "\n" fd.write(file_name) fd.write(bboxs_num) idx = 0 for box in predictions[image_id]: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(float(box[4])) line = ( str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n" ) fd.write(line) idx = idx + 1 aps = evaluation(tmp_results_path, "datasets/widerface/val/ground_truth") ret = OrderedDict() ret["bbox"] = {"Easy": aps[0], "Medium": aps[1], "Hard": aps[2]} return ret
def _file_storage_gather( storage: SingleProcessFileTensorStorage, dst_rank: int = 0, mode: str = "rb", ) -> Optional[MultiProcessFileTensorStorage]: storage.storage_impl.close() fpath_list = gather(storage.fpath, dst=dst_rank) if get_rank() != dst_rank: return None rank_to_fpath = {i: fpath_list[i] for i in range(len(fpath_list))} return MultiProcessFileTensorStorage(storage.data_schema, rank_to_fpath, mode)
def _write_metrics( self, loss_dict: Dict[str, torch.Tensor], data_time: float, prefix: str = "", ): """ Args: loss_dict (dict): dict of scalar losses data_time (float): time taken by the dataloader iteration """ metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()} metrics_dict["data_time"] = data_time # Gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): storage = get_event_storage() # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = {k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()} total_sup_loss = sum(metrics_dict[k] for k in ["loss_rpn_cls", "loss_rpn_loc", "loss_cls", "loss_box_reg"]) if not np.isfinite(total_sup_loss): raise FloatingPointError( f"Loss became infinite or NaN at iteration={self.iter}!\n" f"loss_dict = {metrics_dict}" ) storage.put_scalar("{}total_loss".format(prefix), total_sup_loss) if len(metrics_dict) > 1: storage.put_scalars(**metrics_dict) storage.put_scalar("csd_weight", self.solver_csd_loss_weight) # CSD loss weight # Store aggregates csd_loss = ( metrics_dict["sup_csd_loss_cls"] + metrics_dict["sup_csd_loss_box_reg"] + metrics_dict["unsup_csd_loss_cls"] + metrics_dict["unsup_csd_loss_box_reg"] ) * self.solver_csd_loss_weight storage.put_scalar("total_csd_loss", csd_loss) # Sum of the CSD losses storage.put_scalar( # Sum of all losses "total_all_loss", total_sup_loss + csd_loss, )
def evaluate(self): comm.synchronize() self._predictions = comm.gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return # PanopticApi requires local files gt_json = PathManager.get_local_path(self._metadata.panoptic_json) gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: logger.info( "Writing all panoptic predictions to {} ...".format(pred_dir)) for p in self._predictions: with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: f.write(p.pop("png_string")) with open(gt_json, "r") as f: json_data = json.load(f) json_data["annotations"] = self._predictions output_dir = self._output_dir or pred_dir predictions_json = os.path.join(output_dir, "predictions.json") with PathManager.open(predictions_json, "w") as f: f.write(json.dumps(json_data)) from panopticapi.evaluation import pq_compute with contextlib.redirect_stdout(io.StringIO()): pq_res = pq_compute( gt_json, PathManager.get_local_path(predictions_json), gt_folder=gt_folder, pred_folder=pred_dir, ) res = {} res["PQ"] = 100 * pq_res["All"]["pq"] res["SQ"] = 100 * pq_res["All"]["sq"] res["RQ"] = 100 * pq_res["All"]["rq"] res["PQ_th"] = 100 * pq_res["Things"]["pq"] res["SQ_th"] = 100 * pq_res["Things"]["sq"] res["RQ_th"] = 100 * pq_res["Things"]["rq"] res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] results = OrderedDict({"panoptic_seg": res}) _print_panoptic_results(pq_res) return results
def evaluate(self): if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} dota_det_dict = dict( (k, []) for k in range(len(self._metadata.thing_classes))) for p in predictions: for k in p['dota_instances'].keys(): dota_det_dict[k] += p['dota_instances'][k] p.pop('dota_instances') if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) if self.save_dota: dirpath = os.path.join(self._output_dir, "dota_dets") if PathManager.exists(dirpath) is False: PathManager.mkdirs(dirpath) else: shutil.rmtree(dirpath) PathManager.mkdirs(dirpath) # Write class detections for cls_id, cls_name in enumerate( self._metadata.thing_classes): dfilename = os.path.join(dirpath, f'Task1_{cls_name}.txt') with open(dfilename, 'w') as j: for p in dota_det_dict[cls_id]: j.write("%s\n" % p) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(set(self._tasks), predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _write_metrics( self, loss_dict: Dict[str, torch.Tensor], data_time: float, prefix: str = "", ): """Patch for existing Default Trainer _write_metrics method so that metrics can also be logged to Comet Args: loss_dict (dict): dict of scalar losses data_time (float): time taken by the dataloader iteration """ metrics_dict = { k: v.detach().cpu().item() for k, v in loss_dict.items() } metrics_dict["data_time"] = data_time # Gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): storage = get_event_storage() # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(metrics_dict.values()) if not np.isfinite(total_losses_reduced): raise FloatingPointError( f"Loss became infinite or NaN at iteration={self.iter}!\n" f"loss_dict = {metrics_dict}") self.experiment.log_metrics(metrics_dict, prefix=prefix) self.experiment.log_metric( "{}total_loss".format(prefix), total_losses_reduced, ) storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced) if len(metrics_dict) > 1: storage.put_scalars(**metrics_dict)
def evaluate(self): # for parallel execution if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) #self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} # Return empty if inputs and outputs are non-existing if len(self._predictions) == 0: logger.warning( "[LOFAREvaluator] Did not receive valid predictions.") return {} # Save predicted instances if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) if self.inference_only: return copy.deepcopy(self.return_component_list()) else: includes_associated_fail_fraction, includes_unassociated_fail_fraction = \ self._evaluate_predictions_on_lofar_score() # Calculate/print catalogue improvement base_score = self.baseline() correct_cat = self.our_score(includes_associated_fail_fraction, includes_unassociated_fail_fraction) self.improv(base_score, correct_cat) self._results = OrderedDict() self._results["bbox"] = { "assoc_single_fail_fraction": includes_associated_fail_fraction[0], "assoc_multi_fail_fraction": includes_associated_fail_fraction[1], "unassoc_single_fail_fraction": includes_unassociated_fail_fraction[0], "unassoc_multi_fail_fraction": includes_unassociated_fail_fraction[1], "correct_catalogue": correct_cat } # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012)) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) self._logger.info("Class-wise breakdown of AP at 0.5 IoU") for cls_name, AP in zip(self._class_names, aps[50]): self._logger.info("{} --> {}".format(cls_name, AP)) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } return ret
def _ram_storage_gather(storage: SingleProcessRamTensorStorage, dst_rank: int = 0) -> MultiProcessRamTensorStorage: storage.storage_impl.seek(0, os.SEEK_SET) # TODO: overhead, pickling a bytes object, can just pass bytes in a tensor directly # see detectron2/utils.comm.py data_list = gather(storage.storage_impl.read(), dst=dst_rank) if get_rank() != dst_rank: return None rank_to_buffer = { i: io.BytesIO(data_list[i]) for i in range(len(data_list)) } storage = MultiProcessRamTensorStorage(storage.data_schema, rank_to_buffer) return storage
def _write_metrics(self, loss_dict: Dict[str, torch.Tensor], data_time: float): """ Args: loss_dict (dict): dict of scalar losses data_time (float): time taken by the dataloader iteration """ device = next(iter(loss_dict.values())).device # Use a new stream so these ops don't wait for DDP or backward with torch.cuda.stream(torch.cuda.Stream() if device.type == "cuda" else None): metrics_dict = { k: v.detach().cpu().item() for k, v in loss_dict.items() } metrics_dict["data_time"] = data_time # Gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): storage = get_event_storage() # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) storage.put_scalar("data_time", data_time) # average the rest metrics try: metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } except: # pdb.set_trace() print(all_metrics_dict[0].keys()) print(all_metrics_dict) total_losses_reduced = sum(metrics_dict.values()) if not np.isfinite(total_losses_reduced): raise FloatingPointError( f"Loss became infinite or NaN at iteration={self.iter}!\n" f"loss_dict = {metrics_dict}") storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: storage.put_scalars(**metrics_dict)