def _calculate_step_metrics(self, logits, y): # prepare the metrics loss = self._loss_function(logits[1], y) # loss = F.cross_entropy(logits[1], y) preds = torch.argmax(logits[1], dim=1) num_correct = torch.eq(preds.view(-1), y.view(-1)).sum() acc = accuracy(preds, y) f1_score = f1(preds, y, num_classes=2, average='weighted') fb05_score = fbeta(preds, y, num_classes=2, average='weighted', beta=0.5) fb2_score = fbeta(preds, y, num_classes=2, average='weighted', beta=2) cm = confusion_matrix(preds, y, num_classes=2) prec = precision(preds, y, num_classes=2, class_reduction='weighted') rec = recall(preds, y, num_classes=2, class_reduction='weighted') # au_roc = auroc(preds, y, pos_label=1) return { 'loss': loss, 'acc': acc, 'f1_score': f1_score, 'f05_score': fb05_score, 'f2_score': fb2_score, 'precision': prec, 'recall': rec, # 'auroc': au_roc, 'confusion_matrix': cm, 'num_correct': num_correct }
def metrics(self, pred, target, num_classes=3, remove_bg=False, is_swnet=False): if is_swnet: pred[pred == 2] = 1 target[target == 2] = 1 num_classes = 2 confusion_m = mf.confusion_matrix(pred, target, num_classes=num_classes) # acc accuracy = confusion_m.diag().sum() / len(pred) # kappa p0 = accuracy pc = 0 for i in range(confusion_m.shape[0]): pc = pc + confusion_m[i].sum() * confusion_m[:, i].sum() pc = pc / len(pred)**2 kc = (p0 - pc) / (1 - pc) # iou if remove_bg: iou = mf.iou(pred, target, num_classes=num_classes, ignore_index=0) else: iou = mf.iou(pred, target, num_classes=num_classes) f1 = mf.f1_score(pred, target, num_classes=num_classes, class_reduction='none') precision = mf.precision(pred, target, num_classes=num_classes, class_reduction='none') recall = mf.recall(pred, target, num_classes=num_classes, class_reduction='none') return accuracy, kc, iou, f1, precision, recall
def compute_metrics(self, pred, target): metrics = dict() metrics['accuracy'] = accuracy(pred, target, num_classes=self.num_classes) metrics['precision'] = precision(pred, target, num_classes=self.num_classes) metrics['recall'] = recall(pred, target, num_classes=self.num_classes) metrics['f1'] = f1(pred, target, num_classes=self.num_classes) return metrics
def _epoch_end(self, stage, steps): y_true = torch.cat([x["y_true"] for x in steps]).reshape(-1,1) y_pred = torch.cat([x["y_pred"] for x in steps]).reshape(-1,1) return { f"{stage}_acc": metrics.accuracy(y_pred, y_true), f"{stage}_f1": metrics.f1(y_pred, y_true, num_classes=self.num_classes), f"{stage}_recall": metrics.recall(y_pred, y_true), f"{stage}_precision": metrics.precision(y_pred, y_true), }
def forward(self, logits: torch.FloatTensor, labels: torch.LongTensor): assert logits.ndim == 2 assert labels.ndim == 1 with torch.no_grad(): if self.average == 'macro': return precision(pred=nn.functional.softmax(logits, dim=1), target=labels, num_classes=self.num_classes, reduction='elementwise_mean') else: raise NotImplementedError
def validation_step(self, batch, batch_idx) -> Dict[str, Tensor]: logits = self.forward(batch) labels = batch['label'] loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1)) preds = torch.argmax(logits, dim=1) val_acc = accuracy(preds, labels.view(-1), num_classes=self.num_classes) val_pr = precision(preds, labels.view(-1), num_classes=self.num_classes) val_rc = recall(preds, labels.view(-1), num_classes=self.num_classes) return {'val_loss': loss, 'val_acc': val_acc, 'val_pr': val_pr, 'val_rc': val_rc}
def validation_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict: """ Similar to the training step but with the model in eval mode. Returns: - dictionary passed to the validation_end function. """ inputs, targets = batch model_out = self.forward(**inputs) loss_val = self.loss(model_out, targets) y = targets["labels"] y_hat = model_out["logits"] # acc labels_hat = torch.argmax(y_hat, dim=1) val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) val_acc = torch.tensor(val_acc) if self.on_gpu: val_acc = val_acc.cuda(loss_val.device.index) # in DP mode (default) make sure if result is scalar, there's another dim in the beginning if self.trainer.use_dp or self.trainer.use_ddp2: loss_val = loss_val.unsqueeze(0) val_acc = val_acc.unsqueeze(0) self.log('val_loss', loss_val) f1 = metrics.f1(labels_hat, y, average='weighted', num_classes=3) prec = metrics.precision(labels_hat, y, class_reduction='weighted', num_classes=3) recall = metrics.recall(labels_hat, y, class_reduction='weighted', num_classes=3) acc = metrics.accuracy(labels_hat, y, class_reduction='weighted', num_classes=3) self.log('val_prec', prec) self.log('val_f1', f1) self.log('val_recall', recall) self.log('val_acc_weighted', acc)
def test_v1_5_metric_precision_recall(): AveragePrecision.__init__.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): AveragePrecision() Precision.__init__.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): Precision() Recall.__init__.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): Recall() PrecisionRecallCurve.__init__.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): PrecisionRecallCurve() pred = torch.tensor([0, 1, 2, 3]) target = torch.tensor([0, 1, 1, 1]) average_precision.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): assert average_precision(pred, target) == torch.tensor(1.) precision.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): assert precision(pred, target) == torch.tensor(0.5) recall.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): assert recall(pred, target) == torch.tensor(0.5) precision_recall.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): prec, rc = precision_recall(pred, target) assert prec == torch.tensor(0.5) assert rc == torch.tensor(0.5) precision_recall_curve.warned = False with pytest.deprecated_call(match='It will be removed in v1.5.0'): prec, rc, thrs = precision_recall_curve(pred, target) assert torch.equal(prec, torch.tensor([1., 1., 1., 1.])) assert torch.allclose(rc, torch.tensor([1., 0.6667, 0.3333, 0.]), atol=1e-4) assert torch.equal(thrs, torch.tensor([1, 2, 3]))
def test_step(self, batch, batch_idx): v, c, m, target = batch logits = self.forward(v, c, m) loss = self.model_loss(logits, target) logs = { 'test_loss': loss, 'test_acc': accuracy(logits, target, num_classes=self.config['nb_classes']), 'test_recall': recall(logits, target, num_classes=self.config['nb_classes']), 'test_prec': precision(logits, target, num_classes=self.config['nb_classes']) } # pp.pprint(logs) return {'loss': loss, 'log': logs}
def test_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict: """ Runs one training step. This usually consists in the forward function followed by the loss function. :param batch: The output of your dataloader. :param batch_nb: Integer displaying which batch this is Returns: - dictionary containing the loss and the metrics to be added to the lightning logger. """ inputs, targets = batch model_out = self.forward(**inputs) loss_val = self.loss(model_out, targets) # in DP mode (default) make sure if result is scalar, there's another dim in the beginning if self.trainer.use_dp or self.trainer.use_ddp2: loss_val = loss_val.unsqueeze(0) self.log('test_loss', loss_val) y_hat = model_out['logits'] labels_hat = torch.argmax(y_hat, dim=1) y = targets['labels'] f1 = metrics.f1(labels_hat, y, average='weighted', num_classes=3) prec = metrics.precision(labels_hat, y, class_reduction='weighted', num_classes=3) recall = metrics.recall(labels_hat, y, class_reduction='weighted', num_classes=3) acc = metrics.accuracy(labels_hat, y, class_reduction='weighted', num_classes=3) self.confusion_matrix.update(labels_hat, y) self.log('test_batch_prec', prec) self.log('test_batch_f1', f1) self.log('test_batch_recall', recall) self.log('test_batch_weighted_acc', acc)
def metrics(self, pred, target, num_classes=3, is_swnet=False): if is_swnet: pred[pred == 2] = 1 target[target == 2] = 1 num_classes = 2 confusion_m = mf.confusion_matrix(pred, target, num_classes=num_classes) # acc # try: accuracy = confusion_m.diag().sum() / len(pred) # except: # print("pred:") # print(pred) # print("target:") # print(target) # print("confusion_m:") # print("confusion_m") # print("---") # accuracy = 0 # kappa # try: p0 = accuracy pc = 0 for i in range(confusion_m.shape[0]): pc = pc + confusion_m[i].sum() * confusion_m[:, i].sum() pc = pc / len(pred)**2 kc = (p0 - pc) / (1 - pc) # if pc != 1: # kc = (p0 - pc) / (1 - pc) # else: # kc = torch.tensor(1.0) # kc.to(p0.device) # except: # kc = 0 f1 = mf.f1_score(pred, target, num_classes=num_classes, class_reduction='none') precision = mf.precision(pred, target, num_classes=num_classes, class_reduction='none') recall = mf.recall(pred, target, num_classes=num_classes, class_reduction='none') return accuracy, kc, f1, precision, recall
def test_step(self, batch: Dict = None, batch_idx: int = None) -> Dict[str, Tensor]: logits = self.forward(batch) if self.num_classes == 3: labels = batch['label_major'] else: labels = batch['label_minor'] loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1)) preds = torch.argmax(logits, dim=1) test_acc = accuracy(preds, labels.view(-1), num_classes=self.num_classes) test_pr = precision(preds, labels.view(-1), num_classes=self.num_classes) test_rc = recall(preds, labels.view(-1), num_classes=self.num_classes) return {'test_loss': loss, 'test_acc': test_acc, 'test_pr': test_pr, 'test_rc': test_rc}
def test_precision_recall_joint(average): """A simple test of the joint precision_recall metric. No need to test this thorougly, as it is just a combination of precision and recall, which are already tested thoroughly. """ precision_result = precision( _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES ) recall_result = recall( _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES ) prec_recall_result = precision_recall( _input_mcls_prob.preds[0], _input_mcls_prob.target[0], average=average, num_classes=NUM_CLASSES ) assert torch.equal(precision_result, prec_recall_result[0]) assert torch.equal(recall_result, prec_recall_result[1])
def __metrics_per_batch(self, batch): # 1. Forward pass: x, y_true = batch logits = self.forward(x) # 2. Compute loss & performance metrics: # class prediction: if binary (num_outputs == 1) then class label is 0 if logit < 0 else it's 1 # if multiclass then simply run argmax to find the index of the most confident class y_hat = torch.argmax(logits, dim=1) if self.n_outputs > 1 else ( logits > 0.0).squeeze(1).long() loss = self.loss( logits, y_true if self.n_outputs > 1 else y_true.view( (-1, 1)).type_as(x)) num_correct = torch.eq(y_hat, y_true.view(-1)).sum() acc = num_correct.float() / self.batch_size prec = plm.precision(y_hat, y_true, num_classes=self.n_classes) rec = plm.recall(y_hat, y_true, num_classes=self.n_classes) f1 = plm.f1_score(y_hat, y_true, num_classes=self.n_classes) conf_matrix = plm.confusion_matrix(y_hat.long(), y_true.long()) acc1, acc5 = self.__accuracy(logits, y_true, topk=(1, 5)) return (y_true, y_hat, logits, loss, num_correct, acc, prec, rec, f1, conf_matrix, acc1, acc5)
def precision(preds, target): return metrics.precision(preds[:, :, 0] > 0.5, target[:, :, 0], class_reduction='none', num_classes=2)[1]
val_loss = 0 val_acc = 0 val_recall = 0 val_precision = 0 val_f1 = 0 for X_batch, y_batch in val_loader: X_batch, y_batch = X_batch.to(device), y_batch.to( device) # send values to device (GPU) y_val_pred = model(X_batch) val_loss += criterion(y_val_pred, y_batch) val_acc += accuracy(y_val_pred, y_batch) val_recall += recall(y_batch, y_val_pred) val_precision += precision(y_batch, y_val_pred) val_f1 += f1_score(y_batch, y_val_pred) history['val_loss'].append(val_loss / len(val_loader)) history['val_acc'].append(val_acc / len(val_loader)) history['val_recall'].append(val_recall / len(val_loader)) history['val_precision'].append(val_precision / len(val_loader)) history['val_f1'].append(val_f1 / len(val_loader)) logger.info(f"EPOCH: {e} (validation)") logger.info( f"{'':<10}Loss{'':<5} ----> {val_loss / len(val_loader):.5f}") logger.info( f"{'':<10}Accuracy{'':<1} ----> {val_acc / len(val_loader):.3f}" ) logger.info(
def precision_v2(all_preds, target): preds, _, _ = all_preds return metrics.precision(preds > 0.5, target, class_reduction='none', num_classes=2)[1]
def calculate_performance_pl(true: torch.tensor, pred: torch.tensor, return_list=True, print_result=False) -> dict: """ :param print_result: :param true: Multi-hot :param pred: Multi-hot :param return_list: """ scores = {} scores["accuracy"] = accuracy_pl(true, pred) scores["precision"] = {} scores["precision"]["classes"] = precision(true, pred, class_reduction='none') scores["precision"]["micro"] = precision(true, pred, class_reduction='micro') scores["precision"]["macro"] = precision(true, pred, class_reduction='macro') scores["precision"]["weighted"] = precision(true, pred, class_reduction='weighted') # scores["precision"]["samples"] = precision(true, pred, reduction='samples') scores["recall"] = {} scores["recall"]["classes"] = recall(true, pred, class_reduction='none') scores["recall"]["micro"] = recall(true, pred, class_reduction='micro') scores["recall"]["macro"] = recall(true, pred, class_reduction='macro') scores["recall"]["weighted"] = recall(true, pred, class_reduction='weighted') # scores["recall"]["samples"] = recall(true, pred, reduction='samples') scores["f1"] = {} scores["f1"]["classes"] = f1_pl(true, pred, class_reduction='none') scores["f1"]["micro"] = f1_pl(true, pred, class_reduction='micro') scores["f1"]["macro"] = f1_pl(pred, true, class_reduction='macro') scores["f1"]["weighted"] = f1_pl(true, pred, class_reduction='weighted') # scores["f1"]["samples"] = f1_pl(true, pred, reduction='samples') if return_list: scores["accuracy"] = scores["accuracy"].tolist() scores["precision"]["classes"] = scores["precision"]["classes"].tolist( ) scores["precision"]["micro"] = scores["precision"]["micro"].tolist() scores["precision"]["macro"] = scores["precision"]["macro"].tolist() scores["precision"]["weighted"] = scores["precision"][ "weighted"].tolist() scores["recall"]["classes"] = scores["recall"]["classes"].tolist() scores["recall"]["micro"] = scores["recall"]["micro"].tolist() scores["recall"]["macro"] = scores["recall"]["macro"].tolist() scores["recall"]["weighted"] = scores["recall"]["weighted"].tolist() scores["f1"]["classes"] = scores["f1"]["classes"].tolist() scores["f1"]["micro"] = scores["f1"]["micro"].tolist() scores["f1"]["macro"] = scores["f1"]["macro"].tolist() scores["f1"]["weighted"] = scores["f1"]["weighted"].tolist() if print_result: logger.info(dumps(scores, indent=4)) else: if print_result: logger.info(scores) return scores
def precision_dist(preds, target): return metrics.precision((preds < MIN_EPS_HOLDER.MIN_EPS / 2).int(), (target == 0.0).int(), class_reduction='none', num_classes=2)[1]