def after_epoch(self): # No way to use **kwargs storage = get_event_storage() metric_dict = dict( metric=storage.latest()[self.metric_name][0] if self.metric_name in storage.latest() else -1 ) self.step(self.trainer.epoch, **metric_dict)
def _log_accuracy(self, pred_class_logits, gt_classes): """ Log the accuracy metrics to EventStorage. """ bsz = pred_class_logits.size(0) maxk = max(self._topk) _, pred_class = pred_class_logits.topk(maxk, 1, True, True) pred_class = pred_class.t() correct = pred_class.eq(gt_classes.view(1, -1).expand_as(pred_class)) ret = [] for k in self._topk: correct_k = correct[:k].view(-1).float().sum(dim=0, keepdim=True) ret.append(correct_k.mul_(1. / bsz)) storage = get_event_storage() storage.put_scalar("cls_accuracy", ret[0])
def _write_metrics(self, loss_dict: Dict[str, torch.Tensor], data_time: float): """ Args: loss_dict (dict): dict of scalar losses data_time (float): time taken by the dataloader iteration """ device = next(iter(loss_dict.values())).device # Use a new stream so these ops don't wait for DDP or backward with torch.cuda.stream(torch.cuda.Stream() if device.type == "cuda" else None): metrics_dict = { k: v.detach().cpu().item() for k, v in loss_dict.items() } metrics_dict["data_time"] = data_time # Gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): storage = get_event_storage() # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(metrics_dict.values()) if not np.isfinite(total_losses_reduced): raise FloatingPointError( f"Loss became infinite or NaN at iteration={self.iter}!\n" f"loss_dict = {metrics_dict}") storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: storage.put_scalars(**metrics_dict)