示例#1
0
def test_error_multiclass_no_num_classes():
    with pytest.raises(
            ValueError,
            match=
            "Detected input to `multiclass` but you did not provide `num_classes` argument"
    ):
        _ = auroc(torch.randn(20, 3).softmax(dim=-1), torch.randint(3, (20, )))
示例#2
0
def train_clf(modality: str, img_size: int):
    # temp
    # train_ds = MimicIMG(modality=modality, split='train', img_size=img_size, undersample_dataset=False, transform=True)
    train_ds = MimicIMG(modality=modality, split='eval', img_size=img_size, undersample_dataset=False, transform=False)
    eval_ds = MimicIMG(modality=modality, split='eval', img_size=img_size, undersample_dataset=False, transform=False)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=DL_WORKERS)
    eval_loader = DataLoader(eval_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=DL_WORKERS, drop_last=False)

    imgs, label = next(iter(eval_loader))
    print(imgs.shape)
    # Train model

    lightning_module = LM(str_labels=train_ds.str_labels)
    trainer = pl.Trainer(gpus=NUM_GPUS, max_epochs=NUM_EPOCHS,
                         gradient_clip_val=0.5,
                         stochastic_weight_avg=True,
                         callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.00,
                                                  patience=5, verbose=True, mode='min')]
                         )

    trainer.fit(lightning_module, train_loader, eval_loader)

    save_dir = Path('state_dicts')
    save_dir.mkdir(exist_ok=True)
    torch.save(lightning_module.model.state_dict(), save_dir / f'{modality}_clf_{img_size}.pth')

    # Evaluate

    predictions = torch.Tensor()
    targets = torch.Tensor()
    lightning_module.model.eval()
    lightning_module.model.to(DEVICE)

    with torch.no_grad():
        for batch in eval_loader:
            x, y = batch
            x = x.to(DEVICE)
            output = lightning_module(x)
            targets = torch.cat((targets, y.cpu()))
            predictions = torch.cat((predictions, output.cpu()))

    for idx, label in enumerate(train_ds.str_labels):
        preds_label = predictions[:, idx]
        y_label = targets[:, idx].int()
        auroc_score = auroc(preds_label, y_label)
        av_precision_score = average_precision(preds_label, y_label)
        preds_thr = (preds_label > 0.5).int()
        acc = accuracy_metric(preds_thr, y_label)
        prec = precision(preds_thr, y_label)
        rec = recall(preds_thr, y_label)
        print(f'auroc__{label}', auroc_score)
        print(f'avg_precision__{label}', av_precision_score)
        print(f'acc__{label}', acc)
        print(f'precision__{label}', prec)
        print(f'recall__{label}', rec)
        print(f'pred_pos__{label}', sum(preds_thr).item())
        print(f'true_pos__{label}', sum(y_label).item())
示例#3
0
def test_weighted_with_empty_classes(device):
    """Tests that weighted multiclass AUROC calculation yields the same results if a new but empty class exists.

    Tests that the proper warnings and errors are raised
    """
    if not torch.cuda.is_available() and device == "cuda":
        pytest.skip("Test requires gpu to run")

    preds = torch.tensor([
        [0.90, 0.05, 0.05],
        [0.05, 0.90, 0.05],
        [0.05, 0.05, 0.90],
        [0.85, 0.05, 0.10],
        [0.10, 0.10, 0.80],
    ]).to(device)
    target = torch.tensor([0, 1, 1, 2, 2]).to(device)
    num_classes = 3
    _auroc = auroc(preds, target, average="weighted", num_classes=num_classes)

    # Add in a class with zero observations at second to last index
    preds = torch.cat(
        (preds[:, :num_classes - 1], torch.rand_like(
            preds[:, 0:1]), preds[:, num_classes - 1:]),
        axis=1)
    # Last class (2) gets moved to 3
    target[target == num_classes - 1] = num_classes
    with pytest.warns(
            UserWarning,
            match="Class 2 had 0 observations, omitted from AUROC calculation"
    ):
        _auroc_empty_class = auroc(preds,
                                   target,
                                   average="weighted",
                                   num_classes=num_classes + 1)
    assert _auroc == _auroc_empty_class

    target = torch.zeros_like(target)
    with pytest.raises(
            ValueError,
            match="Found 1 non-empty class in `multiclass` AUROC calculation"):
        _ = auroc(preds,
                  target,
                  average="weighted",
                  num_classes=num_classes + 1)
示例#4
0
def plot_roc_curve(
    y: Union[list, torch.Tensor],
    y_proba: Union[list, torch.Tensor],
    n_class: int = 2,
    save_path: Path = None,
    figsize: tuple = (10, 8),
):
    """
    Print a roc curve with auroc value.

    :param y_true: true values
    :param y_proba: probabilities of predicted values
    :param n_class: number of classes
    :param figsize: size of the figure
    :param save_path: optional path where the figure will be saved
    """

    if isinstance(y, list):
        y = torch.tensor(y)
    if isinstance(y_proba, list):
        y_proba = torch.tensor(y_proba)

    plt.figure(figsize=figsize)

    fpr, tpr, _ = roc(y_proba, y, num_classes=n_class)
    auroc_value = auroc(y_proba, y, num_classes=n_class)

    if isinstance(fpr, list) and len(fpr) == 2:
        # Take only positive class for multi-class with 2 class (equivalent to binary case)
        fpr = fpr[1]
        tpr = tpr[1]
    if isinstance(fpr, list) and len(fpr) > 2:
        raise ValueError("ROC curve not implemented for multiclass")

    plt.plot(fpr, tpr, label="ROC curve")
    plt.plot([0, 1], [0, 1], 'r--')
    plt.plot([], [], ' ', label=f"AUROC = {auroc_value:.3f}")
    plt.xlim([-0.005, 1.005])
    plt.ylim([-0.005, 1.005])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.legend(loc=4)
    plt.grid(alpha=0.75)

    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)
示例#5
0
    def validation_epoch_end(self, outputs):
        predictions = torch.Tensor()
        targets = torch.Tensor()
        for elem in outputs:
            predictions = torch.cat((predictions, elem['predictions']), dim=0)
            targets = torch.cat((targets, elem['targets']), dim=0)

        metrics = {'auroc': {}, 'avg_precision': {}, 'acc': {}, 'precision': {}, 'recall': {}}
        for idx, label in enumerate(self.str_labels):
            preds_label = predictions[:, idx]
            targets_label = targets[:, idx].int()
            preds_thr = (preds_label > 0.5).int()
            metrics['avg_precision'][label] = average_precision(preds_label, targets_label).item()
            metrics['auroc'][label] = auroc(preds_label, targets_label).item()
            metrics['acc'][label] = accuracy_metric(preds_thr, targets_label).item()
            metrics['precision'][label] = precision(preds_thr, targets_label).item()
            metrics['recall'][label] = recall(preds_thr, targets_label).item()

        for metric, values in metrics.items():
            self.log(f'{metric}', values, prog_bar=True)
示例#6
0
def auc(targets: csr_matrix, user_ids: (np.array, torch.tensor),
        preds: (np.array, torch.tensor), **kwargs) -> float:
    """
    Calculate the area under the ROC curve (AUC) for each user and average the results.

    Parameters
    ----------
    targets: scipy.sparse.csr_matrix
        Interaction matrix containing user and item IDs
    user_ids: np.array or torch.tensor
        Users corresponding to the recommendations in the top k predictions
    preds: torch.tensor
        Tensor of shape (n_users x n_items) with each user's scores for each item
    kwargs: keyword arguments
        Ignored, included only for compatibility with ``mapk``

    Returns
    ----------
    auc_score: float

    """
    if len(kwargs) > 0 and [kwargs_key for kwargs_key in kwargs] != ['k']:
        raise ValueError(f'Unexpected ``kwargs``: {kwargs}')

    agg = 0
    for i, user_id in enumerate(user_ids):
        target_tensor = torch.tensor(targets[user_id].toarray(),
                                     device=preds.device,
                                     dtype=torch.long).view(-1)
        # many models' ``preds`` may be unbounded if a final activation layer is not applied
        # we have to normalize ``preds`` here to avoid a ``ValueError`` stating that ``preds``
        # should be probabilities, but values were detected outside of [0,1] range
        auc = auroc(torch.sigmoid(preds[i, :]),
                    target=target_tensor,
                    pos_label=1)
        agg += auc

    return (agg / len(user_ids)).item()
示例#7
0
 def compute(self) -> torch.Tensor:
     preds, targets = self._get_preds_and_targets()
     if torch.unique(targets).numel() == 1:
         return torch.tensor(np.nan)
     return auroc(preds, targets)
示例#8
0
    def compute(self):
        if self.binary_target.sum() == self.binary_target.numel():
            return torch.tensor(float("nan"), device=self.pred_score.device)

        return auroc(self.pred_score, self.binary_target.byte())
示例#9
0
 def update(self, preds: torch.Tensor, target: torch.Tensor):
     preds, target = preds.squeeze(dim=0), target.squeeze(dim=0)
     assert preds.shape == target.shape
     self.auc += auroc(preds, target)
     self.count += 1.0
示例#10
0
    persistent_workers=True,
    use_prefetch_thread=True,  # TBD: could probably remove this argument
    exclude='reverse_id',
    reverse_eids=torch.arange(num_edges) ^ 1,
    negative_sampler=dgl.dataloading.negative_sampler.Uniform(5))

durations = []
for _ in range(10):
    t0 = time.time()
    for it, (input_nodes, pair_graph, neg_pair_graph,
             blocks) in enumerate(dataloader):
        x = blocks[0].srcdata['feat']
        pos_score, neg_score = model(pair_graph, neg_pair_graph, blocks, x)
        pos_label = torch.ones_like(pos_score)
        neg_label = torch.zeros_like(neg_score)
        score = torch.cat([pos_score, neg_score])
        labels = torch.cat([pos_label, neg_label])
        loss = F.binary_cross_entropy_with_logits(score, labels)
        opt.zero_grad()
        loss.backward()
        opt.step()
        if it % 20 == 0:
            acc = MF.auroc(score, labels.long())
            mem = torch.cuda.max_memory_allocated() / 1000000
            print('Loss', loss.item(), 'Acc', acc.item(), 'GPU Mem', mem, 'MB')
            tt = time.time()
            print(tt - t0)
            t0 = time.time()
    durations.append(tt - t0)
print(np.mean(durations[4:]), np.std(durations[4:]))