def _test(y_pred, y, batch_size): re.reset() assert re._updated is False if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) else: re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == "multilabel" assert re._updated is True re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) assert re1._updated is False assert re2._updated is False re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1._updated is True assert re2._updated is True assert re1.compute() == pytest.approx(re2.compute().mean().item()) assert re1._updated is True assert re2._updated is True
def test_multilabel_wrong_inputs(): re = Recall(average=True, is_multilabel=True) assert re._updated is False with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, )).long())) assert re._updated is False with pytest.raises(ValueError): # incompatible y_pred re.update((torch.rand(10, 5), torch.randint(0, 2, size=(10, 5)).long())) assert re._updated is False with pytest.raises(ValueError): # incompatible y re.update((torch.randint(0, 5, size=(10, 5, 6)), torch.rand(10))) assert re._updated is False with pytest.raises(ValueError): # incompatible shapes between two updates re.update((torch.randint(0, 2, size=(20, 5)), torch.randint(0, 2, size=(20, 5)).long())) re.update((torch.randint(0, 2, size=(20, 6)), torch.randint(0, 2, size=(20, 6)).long())) assert re._updated is True
def test_binary_wrong_inputs(): re = Recall() with pytest.raises( ValueError, match=r"For binary cases, y must be comprised of 0's and 1's"): # y has not only 0 or 1 values re.update((torch.randint(0, 2, size=(10, )), torch.arange(0, 10).long())) with pytest.raises( ValueError, match=r"For binary cases, y_pred must be comprised of 0's and 1's" ): # y_pred values are not thresholded to 0, 1 values re.update((torch.rand(10, 1), torch.randint(0, 2, size=(10, )).long())) with pytest.raises(ValueError, match=r"y must have shape of"): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, 5)).long())) with pytest.raises(ValueError, match=r"y must have shape of"): # incompatible shapes re.update((torch.randint(0, 2, size=(10, 5, 6)), torch.randint(0, 2, size=(10, )).long())) with pytest.raises(ValueError, match=r"y must have shape of"): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, 5, 6)).long()))
def _test(average): re = Recall(average=average) y_pred = torch.rand(10, 5, 18, 16) y = torch.randint(0, 4, size=(10, 18, 16)).type(torch.LongTensor) re.update((y_pred, y)) np_y_pred = y_pred.numpy().argmax(axis=1).ravel() np_y = y.numpy().ravel() assert re._type == 'multiclass' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sklearn_average_parameter = 'macro' if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score( np_y, np_y_pred, average=sklearn_average_parameter) == pytest.approx(re_compute) re.reset() y_pred = torch.rand(10, 7, 20, 12) y = torch.randint(0, 6, size=(10, 20, 12)).type(torch.LongTensor) re.update((y_pred, y)) np_y_pred = y_pred.numpy().argmax(axis=1).ravel() np_y = y.numpy().ravel() assert re._type == 'multiclass' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sklearn_average_parameter = 'macro' if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score( np_y, np_y_pred, average=sklearn_average_parameter) == pytest.approx(re_compute)
def _test(average): re = Recall(average=average) # TODO: y_pred should be binary after 0.1.2 release # y_pred = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) y_pred = torch.rand(10, 12, 10) y = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() # np_y_pred = y_pred.numpy().ravel() np_y_pred = (y_pred.numpy().ravel() > 0.5).astype('int') assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute) re.reset() # TODO: y_pred should be binary after 0.1.2 release # y_pred = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) y_pred = torch.rand(10, 1, 12, 10) y = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() # np_y_pred = y_pred.numpy().ravel() np_y_pred = (y_pred.numpy().ravel() > 0.5).astype('int') assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute)
def test_incorrect_type(): # Tests changing of type during training def _test(average): re = Recall(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).long() re.update((y_pred, y)) y_pred = torch.zeros(4, ) y = torch.ones(4).long() with pytest.raises(RuntimeError): re.update((y_pred, y)) _test(average=True) _test(average=False) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1.compute() == pytest.approx(re2.compute().mean().item())
def test_binary_wrong_inputs(): re = Recall() with pytest.raises(ValueError): # y has not only 0 or 1 values re.update((torch.randint(0, 2, size=(10, )), torch.arange(0, 10).long())) with pytest.raises(ValueError): # y_pred values are not thresholded to 0, 1 values re.update((torch.rand(10, 1), torch.randint(0, 2, size=(10, )).long())) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, 5)).long())) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, 5, 6)), torch.randint(0, 2, size=(10, )).long())) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, 5, 6)).long()))
def test_multilabel_input_NCHW(): def _test(average): re = Recall(average=average, is_multilabel=True) y_pred = torch.randint(0, 2, size=(10, 5, 18, 16)) y = torch.randint(0, 2, size=(10, 5, 18, 16)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) re.reset() y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.randint(0, 2, size=(100, 5, 12, 14)) y = torch.randint(0, 2, size=(100, 5, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = to_numpy_multilabel(y) np_y_pred = to_numpy_multilabel(y_pred) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) for _ in range(5): _test(average=True) _test(average=False) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1.compute() == pytest.approx(re2.compute().mean().item())
def test_compute_average(): recall = Recall(average=True) y_pred = torch.eye(4) y = torch.ones(4).type(torch.LongTensor) recall.update((y_pred, y)) assert recall.compute() == 0.0625
def _test(average): re = Recall(average=average) y_pred = torch.randint(0, 2, size=(10, 4)).float() y = torch.randint(4, 5, size=(10, )).long() with pytest.raises(ValueError): re.update((y_pred, y))
def test_compute_all_wrong(): recall = Recall() y_pred = torch.FloatTensor([[1.0, 0.0], [1.0, 0.0]]) y = torch.ones(2).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.0 assert result[1] == 0.0
def _test(average): re = Recall(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).type(torch.LongTensor) re.update((y_pred, y)) y_pred = torch.rand(4, 1) y = torch.ones(4).type(torch.LongTensor) with pytest.raises(RuntimeError): re.update((y_pred, y))
def _test(average): re = Recall(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).long() re.update((y_pred, y)) y_pred = torch.zeros(4, ) y = torch.ones(4).long() with pytest.raises(RuntimeError): re.update((y_pred, y))
class FbetaScore(Metric): def __init__( self, beta: int = 1, output_transform: Callable = lambda x: x, average: str = "macro", is_multilabel: bool = False, device: Optional[Union[str, torch.device]] = None, ): self._beta = beta self._average = average _average_flag = self._average != "macro" self._precision = Precision( output_transform=output_transform, average=_average_flag, is_multilabel=is_multilabel, device=device, ) self._recall = Recall( output_transform=output_transform, average=_average_flag, is_multilabel=is_multilabel, device=device, ) super(FbetaScore, self).__init__( output_transform=output_transform, device=device ) @reinit__is_reduced def reset(self) -> None: self._precision.reset() self._recall.reset() def compute(self) -> torch.Tensor: precision_val = self._precision.compute() recall_val = self._recall.compute() fbeta_val = ( (1.0 + self._beta ** 2) * precision_val * recall_val / (self._beta ** 2 * precision_val + recall_val + 1e-15) ) if self._average == "macro": fbeta_val = torch.mean(fbeta_val).item() return fbeta_val @reinit__is_reduced def update(self, output: Sequence[torch.Tensor]) -> None: self._precision.update(output) self._recall.update(output)
def test_binary_wrong_inputs(): re = Recall() with pytest.raises(ValueError): # y has not only 0 or 1 values re.update((torch.randint(0, 2, size=(10, )).type(torch.LongTensor), torch.arange(0, 10).type(torch.LongTensor))) # TODO: Uncomment the following after 0.1.2 release # with pytest.raises(ValueError): # # y_pred values are not thresholded to 0, 1 values # pr.update((torch.rand(10, 1), # torch.randint(0, 2, size=(10,)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )).type(torch.LongTensor), torch.randint(0, 2, size=(10, 5)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor), torch.randint(0, 2, size=(10, )).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )).type(torch.LongTensor), torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor)))
def test_incorrect_shape(): recall = Recall() y_pred = torch.zeros(2, 3, 2, 2) y = torch.zeros(2, 3) with pytest.raises(ValueError): recall.update((y_pred, y)) y_pred = torch.zeros(2, 3, 2, 2) y = torch.zeros(2, 3, 4, 4) with pytest.raises(ValueError): recall.update((y_pred, y))
def evaluate_epoch(eval_dl, model, criterion): """ evaluation in a epoch Args: eval_dl (DataLoader): DataLoader of validation set model (nn.Module): model in PyTorch criterion (loss): PyTorch loss epoch (int): epoch number writer (SummaryWriter): instance of SummaryWriter for TensorBoard Returns: """ device = next(model.parameters()).device model.eval() recall = Recall(lambda x: (x[0], x[1])) precision = Precision(lambda x: (x[0], x[1])) mean_recall = [] mean_precision = [] mean_loss = [] val_loss = 0.0 iou_metric = iou.IoU(21, normalized=False) with torch.no_grad(): for idx, (inputs, targets) in enumerate(eval_dl): inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets) val_loss += loss.item() # preds = outputs.argmax(1) pred_class = torch.argmax(nn.functional.softmax(outputs, dim=1), dim=1) iou_metric.add(pred_class, target=targets) precision.update((outputs, targets)) recall.update((outputs, targets)) mean_loss.append(loss.item()) # mean_recall.append(recall.compute().item()) # mean_precision.append(precision.compute().item()) iou_class, mean_iou = iou_metric.value() val_loss /= len(eval_dl) # mean_precision, mean_recall = np.array(mean_precision).mean(), np.array(mean_recall).mean() # f1 = mean_precision * mean_recall * 2 / (mean_precision + mean_recall + 1e-20) return val_loss, iou_class, mean_iou
def _test(average): re = Recall(average=average) y_pred = torch.randint(0, 2, size=(10, )) y = torch.randint(0, 2, size=(10, )).long() re.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == "binary" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average="binary") == pytest.approx(re_compute) re.reset() y_pred = torch.randint(0, 2, size=(10, )) y = torch.randint(0, 2, size=(10, )).long() re.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == "binary" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average="binary") == pytest.approx(re_compute) re.reset() y_pred = torch.Tensor( [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.51]) y_pred = torch.round(y_pred) y = torch.randint(0, 2, size=(10, )).long() re.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == "binary" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average="binary") == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.randint(0, 2, size=(100, )) y = torch.randint(0, 2, size=(100, )).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == "binary" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average="binary") == pytest.approx(re_compute)
def _test(average, metric_device): re = Recall(average=average, device=metric_device) assert re._device == metric_device # Since the shape of the accumulated amount isn't known before the first update # call, the internal variables aren't tensors on the right device yet. y_reed = torch.randint(0, 2, size=(10, )) y = torch.randint(0, 2, size=(10, )).long() re.update((y_reed, y)) assert ( re._true_positives.device == metric_device ), f"{type(re._true_positives.device)}:{re._true_positives.device} vs {type(metric_device)}:{metric_device}" assert ( re._positives.device == metric_device ), f"{type(re._positives.device)}:{re._positives.device} vs {type(metric_device)}:{metric_device}"
def evaluate_epoch(eval_dl, model, criterion, epoch, writer): """ evaluation in a epoch Args: eval_dl (DataLoader): DataLoader of validation set model (nn.Module): model in PyTorch criterion (loss): PyTorch loss epoch (int): epoch number writer (SummaryWriter): instance of SummaryWriter for TensorBoard Returns: """ print('\neval epoch {}'.format(epoch)) device = next(model.parameters()).device model.eval() recall = Recall(lambda x: (x[0], x[1])) precision = Precision(lambda x: (x[0], x[1])) mean_recall = [] mean_precision = [] mean_loss = [] with torch.no_grad(): for idx, (inputs, targets) in enumerate(eval_dl): inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets) preds = outputs.argmax(1) precision.update((preds, targets)) recall.update((preds, targets)) mean_loss.append(loss.item()) mean_recall.append(recall.compute().item()) mean_precision.append(precision.compute().item()) # print('val-epoch:{} [{}/{}], loss: {:5.3}'.format(epoch, idx + 1, len(dataloader), loss.item())) writer.add_scalar('test/loss', loss.item(), len(eval_dl) * epoch + idx) mean_precision, mean_recall = np.array(mean_precision).mean(), np.array(mean_recall).mean() f1 = mean_precision * mean_recall * 2 / (mean_precision + mean_recall + 1e-20) print('precision: {:07.5}, recall: {:07.5}, f1: {:07.5}\n'.format(mean_precision, mean_recall, f1)) writer.add_scalar('test/epoch-loss', np.array(mean_loss).mean(), epoch) writer.add_scalar('test/f1', f1, epoch) writer.add_scalar('test/precision', mean_precision, epoch) writer.add_scalar('test/recall', mean_recall, epoch)
def test_ner_example(): recall = Recall() y = torch.Tensor([[0, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2]]).type(torch.LongTensor) y_pred = torch.softmax(torch.rand(2, 3, 8), dim=1) indices = torch.max(y_pred, dim=1)[1] y_pred_labels = list(set(indices.view(-1).tolist())) recall_sk = recall_score(y.view(-1).data.numpy(), indices.view(-1).data.numpy(), labels=y_pred_labels, average=None) recall.update((y_pred, y)) recall_ig = recall.compute().tolist() recall_ig = [recall_ig[i] for i in y_pred_labels] assert all([a == pytest.approx(b) for a, b in zip(recall_sk, recall_ig)])
def evalidation(epoch, dataloader, model, criterion, device, writer, tb_test_imgs): print('\neval epoch {}'.format(epoch)) model.eval() recall = Recall(lambda x: (x[0], x[1])) precision = Precision(lambda x: (x[0], x[1])) mean_recall = [] mean_precision = [] mean_loss = [] with torch.no_grad(): for idx, (pre_img, post_img, targets) in enumerate(dataloader): pre_img, post_img, targets = pre_img.to(device), post_img.to( device), targets.to(device) outputs = model(pre_img, post_img) loss = criterion(outputs, targets) preds = outputs.argmax(1) precision.update((preds, targets)) recall.update((preds, targets)) mean_loss.append(loss.item()) mean_recall.append(recall.compute().item()) mean_precision.append(precision.compute().item()) # print('val-epoch:{} [{}/{}], loss: {:5.3}'.format(epoch, idx + 1, len(dataloader), loss.item())) writer.add_scalar('test/loss', loss.item(), len(dataloader) * epoch + idx) if idx < tb_test_imgs: writer.add_image('test/pre', pre_img[0], idx) writer.add_image('test/post', post_img[0], idx) writer.add_image('test/label', label[0], idx) writer.add_image('test/pred', preds, idx) mean_precision, mean_recall = np.array(mean_precision).mean(), np.array( mean_recall).mean() f1 = mean_precision * mean_recall * 2 / (mean_precision + mean_recall + 1e-20) print('precision: {:07.5}, recall: {:07.5}, f1: {:07.5}\n'.format( mean_precision, mean_recall, f1)) writer.add_scalar('test/epoch-loss', np.array(mean_loss).mean(), epoch) writer.add_scalar('test/f1', f1, epoch) writer.add_scalar('test/precision', mean_precision, epoch) writer.add_scalar('test/recall', mean_recall, epoch)
def test_sklearn_compute(): recall = Recall(average=False) y = torch.Tensor(range(5)).type(torch.LongTensor) y_pred = torch.softmax(torch.rand(5, 5), dim=1) indices = torch.max(y_pred, dim=1)[1] recall.update((y_pred, y)) y_pred_labels = list(set(indices.tolist())) recall_sk = recall_score(y.data.numpy(), indices.data.numpy(), labels=y_pred_labels, average=None) recall_ig = recall.compute().tolist() recall_ig = [recall_ig[i] for i in y_pred_labels] assert all([a == pytest.approx(b) for a, b in zip(recall_sk, recall_ig)])
def test_binary_shapes(): recall = Recall(average=True) y = torch.LongTensor([1, 0]) y_pred = torch.FloatTensor([0.9, 0.2]) y_pred = y_pred.unsqueeze(1) indices = torch.max(torch.cat([1.0 - y_pred, y_pred], dim=1), dim=1)[1] recall.update((y_pred, y)) assert recall.compute() == pytest.approx( recall_score(y.data.numpy(), indices.data.numpy(), average='macro')) assert recall.compute() == 1.0 y = torch.LongTensor([[1], [0]]) y_pred = torch.FloatTensor([[0.9], [0.2]]) indices = torch.max(torch.cat([1.0 - y_pred, y_pred], dim=1), dim=1)[1] recall.reset() recall.update((y_pred, y)) assert recall.compute() == pytest.approx( recall_score(y.data.numpy(), indices.data.numpy(), average='macro')) assert recall.compute() == 1.0
def _test(average, metric_device): re = Recall(is_multilabel=True, average=average, device=metric_device) assert re._device == metric_device assert ( re._true_positives.device == metric_device ), f"{type(re._true_positives.device)}:{re._true_positives.device} vs {type(metric_device)}:{metric_device}" assert ( re._positives.device == metric_device ), f"{type(re._positives.device)}:{re._positives.device} vs {type(metric_device)}:{metric_device}" y_reed = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re.update((y_reed, y)) assert ( re._true_positives.device == metric_device ), f"{type(re._true_positives.device)}:{re._true_positives.device} vs {type(metric_device)}:{metric_device}" assert ( re._positives.device == metric_device ), f"{type(re._positives.device)}:{re._positives.device} vs {type(metric_device)}:{metric_device}"
def _test(average, metric_device): re = Recall(is_multilabel=True, average=average, device=metric_device) assert re._device == metric_device assert re._true_positives.device == metric_device, "{}:{} vs {}:{}".format( type(re._true_positives.device), re._true_positives.device, type(metric_device), metric_device) assert re._positives.device == metric_device, "{}:{} vs {}:{}".format( type(re._positives.device), re._positives.device, type(metric_device), metric_device) y_reed = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re.update((y_reed, y)) assert re._true_positives.device == metric_device, "{}:{} vs {}:{}".format( type(re._true_positives.device), re._true_positives.device, type(metric_device), metric_device) assert re._positives.device == metric_device, "{}:{} vs {}:{}".format( type(re._positives.device), re._positives.device, type(metric_device), metric_device)
def test_compute(): recall = Recall() y_pred = torch.eye(4) y = torch.ones(4).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.0 assert result[1] == 0.25 assert result[2] == 0.0 assert result[3] == 0.0 recall.reset() y_pred = torch.eye(2) y = torch.ones(2).type(torch.LongTensor) recall.update((y_pred, y)) y = torch.zeros(2).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.5 assert result[1] == 0.5
def test_predict(model,dataloader_test,use_cuda): if use_cuda: model = model.cuda() precision = Precision() recall = Recall() f1 = Fbeta(beta=1.0, average=True, precision=precision, recall=recall) for i,(img, label) in enumerate(dataloader_test): img, labels = Variable(img),Variable(label) if use_cuda: img = img.cuda() label = label.cuda() pred = model(img) _,my_label = torch.max(label, dim=1) precision.update((pred, my_label)) recall.update((pred, my_label)) f1.update((pred, my_label)) precision.compute() recall.compute() print("\tF1 Score: {:0.2f}".format(f1.compute()*100))
def test_multilabel_wrong_inputs(): re = Recall(average=True, is_multilabel=True) with pytest.raises(ValueError): # incompatible shapes re.update((torch.randint(0, 2, size=(10, )), torch.randint(0, 2, size=(10, )).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible y_pred re.update((torch.rand(10, 5), torch.randint(0, 2, size=(10, 5)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible y re.update((torch.randint(0, 5, size=(10, 5, 6)), torch.rand(10))) with pytest.raises(ValueError): # incompatible shapes between two updates re.update((torch.randint(0, 2, size=(20, 5)), torch.randint(0, 2, size=(20, 5)).type(torch.LongTensor))) re.update((torch.randint(0, 2, size=(20, 6)), torch.randint(0, 2, size=(20, 6)).type(torch.LongTensor)))
def _test(average): re = Recall(average=average) y_pred = torch.rand(10, 5, 18, 16) y = torch.randint(0, 5, size=(10, 18, 16)).long() re.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute) re.reset() y_pred = torch.rand(10, 7, 20, 12) y = torch.randint(0, 7, size=(10, 20, 12)).long() re.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.rand(100, 10, 12, 14) y = torch.randint(0, 10, size=(100, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) num_classes = y_pred.shape[1] np_y = y.numpy().ravel() np_y_pred = y_pred.argmax(dim=1).numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute)