Пример #1
0
    def compute_precision_and_recall(
            output: Tensor,
            labels: Tensor,
            num_gt: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
        """
        Arguments:
            output(Tensor[N, K])
            labels(Tensor[N, K])
            num_gt(Tensor[K])
        Returns:
            prec(Tensor[N, K])
            rec(Tensor[N, K])
        """
        order = output.argsort(0, descending=True)
        tp = labels[order,
                    torch.ones_like(order) * torch.arange(output.shape[1])]
        fp = 1 - tp
        tp = tp.cumsum(0)
        fp = fp.cumsum(0)

        prec = tp / (tp + fp)
        rec = div(tp, labels.sum(0)) if num_gt is None \
            else div(tp, num_gt)

        return prec, rec
Пример #2
0
    def compute_pr_for_each(
            output: Tensor,
            labels: Tensor,
            num_gt: Optional[Union[int,
                                   float]] = None) -> Tuple[Tensor, Tensor]:
        """
        Arguments:
            output(Tensor[N])
            labels(Tensor[N]): Binary labels for each sample
            num_gt(int or float): Number of ground truth instances
        Returns:
            prec(Tensor[N])
            rec(Tensor[N])
        """
        order = output.argsort(descending=True)

        tp = labels[order]
        fp = 1 - tp
        tp = tp.cumsum(0)
        fp = fp.cumsum(0)

        prec = tp / (tp + fp)
        rec = div(tp, labels.sum().item()) if num_gt is None \
            else div(tp, num_gt)

        return prec, rec
Пример #3
0
def _calculate(y: torch.Tensor, y_pred: torch.Tensor):
    assert y.ndimension() == y_pred.ndimension() == 1 and len(y) == len(
        y_pred), "y and y_pred must be 1 dimension data with same length."
    assert y.unique().equal(
        torch.tensor([0, 1], dtype=y.dtype, device=y.device)
    ), "y values must be 0 or 1, can not be all 0 or all 1."
    n = len(y)
    indexes = y_pred.argsort()
    y = y[indexes].cpu().numpy()
    y_pred = y_pred[indexes].cpu().numpy()
    nneg = auc = tmp_pos = tmp_neg = 0.0

    for i in range(n):
        y_i = cast(float, y[i])
        if i + 1 < n and y_pred[i] == y_pred[i + 1]:
            tmp_pos += y_i
            tmp_neg += 1 - y_i
            continue
        if tmp_pos + tmp_neg > 0:
            tmp_pos += y_i
            tmp_neg += 1 - y_i
            nneg += tmp_neg
            auc += tmp_pos * (nneg - tmp_neg / 2)
            tmp_pos = tmp_neg = 0
            continue
        if y_i == 1:
            auc += nneg
        else:
            nneg += 1
    return auc / (nneg * (n - nneg))
Пример #4
0
def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float:
    if not (y.ndimension() == y_pred.ndimension() == 1 and len(y) == len(y_pred)):
        raise AssertionError("y and y_pred must be 1 dimension data with same length.")
    y_unique = y.unique()
    if len(y_unique) == 1:
        warnings.warn(f"y values can not be all {y_unique.item()}, skip AUC computation and return `Nan`.")
        return float("nan")
    if not y_unique.equal(torch.tensor([0, 1], dtype=y.dtype, device=y.device)):
        warnings.warn(f"y values must be 0 or 1, but in {y_unique.tolist()}, skip AUC computation and return `Nan`.")
        return float("nan")

    n = len(y)
    indices = y_pred.argsort()
    y = y[indices].cpu().numpy()
    y_pred = y_pred[indices].cpu().numpy()
    nneg = auc = tmp_pos = tmp_neg = 0.0

    for i in range(n):
        y_i = cast(float, y[i])
        if i + 1 < n and y_pred[i] == y_pred[i + 1]:
            tmp_pos += y_i
            tmp_neg += 1 - y_i
            continue
        if tmp_pos + tmp_neg > 0:
            tmp_pos += y_i
            tmp_neg += 1 - y_i
            nneg += tmp_neg
            auc += tmp_pos * (nneg - tmp_neg / 2)
            tmp_pos = tmp_neg = 0
            continue
        if y_i == 1:
            auc += nneg
        else:
            nneg += 1
    return auc / (nneg * (n - nneg))
Пример #5
0
def gp_posterior(ax,
                 x: torch.Tensor,
                 preds: MultivariateNormal,
                 ewma_alpha: float = 0.0,
                 label: Optional[str] = None,
                 sort=True,
                 fill_alpha=0.05,
                 **kwargs):
    x = x.view(-1)
    if sort:
        # i = x.argsort(dim=-2)[:, 0]
        i = x.argsort()
        if i.equal(torch.arange(i.size(0))):
            i = slice(None, None, None)
    else:
        i = slice(None, None, None)
    x = n(x[i])

    preds_mean = preds.mean.view(-1)
    mean = ewma(n(preds_mean[i]), ewma_alpha)
    line, *_ = ax.plot(x, mean, **kwargs)
    if label is not None:
        line.set_label(label)

    C = line.get_color()
    lower, upper = (p.view(-1) for p in preds.confidence_region())

    lower = ewma(n(lower[i]), ewma_alpha)
    upper = ewma(n(upper[i]), ewma_alpha)
    ax.fill_between(x, lower, upper, alpha=fill_alpha, color=C)
    ax.plot(x, lower, color=C, linewidth=0.5)
    ax.plot(x, upper, color=C, linewidth=0.5)
Пример #6
0
    def __call__(self, logits: torch.Tensor, labels: torch.Tensor, mask: torch.Tensor):
        """
        logits and labels should be the same shape. Labels should be
        an array of 0/1s to indicate if the document is relevant.

        We don't need a mask here since we select nonzero labels and
        masked entries in labels are never equal to 1 (Pedro is pretty sure)
        """
        n_relevent = labels.sum().item()
        if n_relevent == 0:
            # None are relevent, no-op
            return

        preds = logits.argsort(dim=-1, descending=True)
        # nonzeros occur where there are predictions to make
        # (n_nonzero, 3)
        # 3 = dims for batch, turn and fact
        indices = labels.nonzero()

        # TODO: This could be batched, but its a pain
        all_ranks = []
        # import ipdb; ipdb.set_trace()
        for batch_idx, turn_idx, fact_idx in indices:
            # List of predictions, first element is index
            # of top ranked document, second of second-top, etc
            inst_preds = preds[batch_idx, turn_idx]
            rank = (inst_preds == fact_idx).nonzero().reshape(-1)
            all_ranks.append(rank)
        all_ranks = torch.cat(all_ranks)
        # rank starts at zero from torch, += 1 for inversing it

        reciprocal_ranks = 1 / (1 + all_ranks).float()
        self._reciprocal_ranks.extend(reciprocal_ranks.cpu().numpy().tolist())
        return reciprocal_ranks.mean()
    def _erfinv(data: torch.Tensor, eps):
        rank = data.argsort().argsort().float()

        rank_scaled = (rank / rank.max() - 0.5) * 2
        rank_scaled = rank_scaled.clamp(-1 + eps, 1 - eps)

        tformed = rank_scaled.erfinv()

        return tformed
Пример #8
0
def get_class_name(ranks: torch.Tensor):
    CLASSNAMES = "imagenet1000_clsidx_to_labels.json"
    classes = []
    with open(os.path.join(file_path, CLASSNAMES), "r") as f:
        for line in f.readlines():
            raw = line.strip("{").strip("}")
            key, value = raw.split(": ")
            classes.append(value.strip("'").strip("',\n"))
    sorted_ranks = ranks.argsort(descending=True)
    return np.array(classes)[sorted_ranks]
Пример #9
0
    def __init__(self, X: torch.Tensor, y: torch.Tensor) -> None:
        assert X.dtype == torch.float
        assert y.dtype == torch.long
        assert len(X.shape) == 2
        assert len(y.shape) == 1
        assert X.shape[0] == y.shape[0]

        self.sort_indices = y.argsort()
        self.X = X[self.sort_indices, :]
        self.y = y[self.sort_indices]
def manifold_dist_loss_relu_sum(model: nn.Module,
                                inputs: torch.Tensor,
                                train_distances: torch.Tensor,
                                manifold: RiemannianManifold,
                                margin=0.01,
                                discount_factor=0.9):
    """
    See write up for details on this loss function -- encourages embeddings to preserve graph topology
    Parameters:
        model (nn.Module): model that takes in graph indices and outputs embeddings in output manifold space
        inputs (torch.Tensor): LongTensor of shape [batch_size, num_samples+1] giving the indices of 
            the vertices to be trained with the first vertex in each element of the batch being the 
            main vertex and the others being samples
        train_distances (torch.Tensor): floating point tensor of shape [batch_size, num_samples]
            containing the training distances from the input vertex to the sampled vertices
        manifold (RiemannianManifold): Manifold that model embeds vertices into

    Returns:
        pytorch scalar: Computed loss
    """

    input_embeddings = model(inputs)

    sample_vertices = input_embeddings.narrow(1, 1,
                                              input_embeddings.size(1) - 1)
    main_vertices = input_embeddings.narrow(1, 0, 1).expand_as(sample_vertices)
    manifold_dists = manifold.dist(main_vertices, sample_vertices)

    sorted_indices = train_distances.argsort(dim=-1)
    manifold_dists_sorted = torch.gather(manifold_dists, -1, sorted_indices)
    manifold_dists_sorted.add_(EPSILON).log_()
    diff_matrix_shape = [
        manifold_dists.size()[0],
        manifold_dists.size()[1],
        manifold_dists.size()[1]
    ]
    row_expanded = manifold_dists_sorted.unsqueeze(2).expand(
        *diff_matrix_shape)
    column_expanded = manifold_dists_sorted.unsqueeze(1).expand(
        *diff_matrix_shape)
    diff_matrix = row_expanded - column_expanded + margin

    train_dists_sorted = torch.gather(train_distances, -1, sorted_indices)
    train_row_expanded = train_dists_sorted.unsqueeze(2).expand(
        *diff_matrix_shape)
    train_column_expanded = train_dists_sorted.unsqueeze(1).expand(
        *diff_matrix_shape)
    diff_matrix_train = train_row_expanded - train_column_expanded
    masked_diff_matrix = torch.where(diff_matrix_train == 0, diff_matrix_train,
                                     diff_matrix)
    masked_diff_matrix.triu_()
    relu_(masked_diff_matrix)
    masked_diff_matrix = masked_diff_matrix.sum(-1)
    loss = masked_diff_matrix.sum(-1).mean()
    return loss
Пример #11
0
def mrr(predictions: torch.Tensor, ground_truth_idx: torch.Tensor) -> float:
    """Calculates mean reciprocal rank (MRR) for given predictions and ground truth values.
    :param predictions: BxN tensor of prediction values where B is batch size and N number of classes. Predictions
    must be sorted in class ids order
    :param ground_truth_idx: Bx1 tensor with index of ground truth class
    :return: Mean reciprocal rank score
    """
    assert predictions.size(0) == ground_truth_idx.size(0)

    indices = predictions.argsort()
    return (indices == ground_truth_idx
            ).nonzero()[:, 1].float().add(1.0).sum().item()
Пример #12
0
def mapr(input: torch.Tensor, targs: torch.LongTensor, mapn: int):
    """
        Compute the mean average precision
    
        > map5 = partial(mapr, mapn=5)
    """
    n = targs.shape[0]  # number for samples
    input = input.argsort(dim=-1, descending=True)[:, :mapn]
    targs = targs.view(n, -1)
    return ((input == targs).float() /
            torch.arange(1, mapn + 1, device=input.device).float()).sum(
                dim=-1).mean()
Пример #13
0
def _rank_data(data: Tensor) -> Tensor:
    """ Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the
    corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of
    their rank

    Adopted from:
        https://github.com/scipy/scipy/blob/v1.6.2/scipy/stats/stats.py#L4140-L4303
    """
    n = data.numel()
    rank = torch.empty_like(data)
    idx = data.argsort()
    rank[idx[:n]] = torch.arange(1, n + 1, dtype=data.dtype, device=data.device)

    repeats = _find_repeats(data)
    for r in repeats:
        condition = rank == r
        rank[condition] = rank[condition].mean()
    return rank
Пример #14
0
def roc_curve(input: Tensor, targ: Tensor):
    "Computes the receiver operator characteristic (ROC) curve by determining the true positive ratio (TPR) and false positive ratio (FPR) for various classification thresholds. Restricted binary classification tasks."
    # wassname: fix this by making LongTensor([0]=>device)
    targ = targ == 1
    desc_score_indices = torch.flip(input.argsort(-1), [-1])
    input = input[desc_score_indices]
    targ = targ[desc_score_indices]
    d = input[1:] - input[:-1]
    distinct_value_indices = torch.nonzero(d).transpose(0, 1)[0]
    threshold_idxs = torch.cat(
        (distinct_value_indices, LongTensor([len(targ) - 1]).to(targ.device)))
    tps = torch.cumsum(targ * 1, dim=-1)[threshold_idxs]
    fps = 1 + threshold_idxs - tps
    if tps[0] != 0 or fps[0] != 0:
        zer = torch.zeros(1, dtype=fps.dtype, device=fps.device)
        fps = torch.cat((zer, fps))
        tps = torch.cat((zer, tps))
    fpr, tpr = fps.float() / fps[-1], tps.float() / tps[-1]
    return fpr, tpr
Пример #15
0
def _rank_data(data: Tensor) -> Tensor:
    """Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the
    corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of their
    rank.

    Adopted from:     `Rank of element tensor`_
    """
    n = data.numel()
    rank = torch.empty_like(data)
    idx = data.argsort()
    rank[idx[:n]] = torch.arange(1,
                                 n + 1,
                                 dtype=data.dtype,
                                 device=data.device)

    repeats = _find_repeats(data)
    for r in repeats:
        condition = data == r
        rank[condition] = rank[condition].mean()
    return rank
Пример #16
0
def unique1d(tensor: torch.Tensor, return_index: bool = False):
    """Port of np.unique to PyTorch with `return_index` functionality"""
    assert len(tensor.shape) == 1

    optional_indices = return_index

    if optional_indices:
        perm = tensor.argsort()
        aux = tensor[perm]
    else:
        tensor.sort_()
        aux = tensor

    mask = torch.zeros(aux.shape)
    mask[:1] = 1
    mask[1:] = aux[1:] != aux[:-1]

    ret = (aux[mask.byte()], )
    if return_index:
        ret += (perm[mask.byte()], )

    return ret
Пример #17
0
    def __call__(self,
                 predictions: torch.Tensor,
                 gold_labels: torch.Tensor,
                 mask: Optional[torch.Tensor] = None) -> None:
        predictions, gold_labels, mask = \
            self.detach_tensors(predictions, gold_labels, mask)

        num_classes = predictions.size(-1)

        predictions = predictions.view((-1, num_classes))
        gold_labels = gold_labels.view(-1).long()

        predicted_ids = predictions.argsort(-1, descending=True)
        correct = predicted_ids.eq(gold_labels.unsqueeze(-1)).float()
        reciprocals = torch.arange(1, num_classes + 1,
                                   device=correct.device).float().reciprocal()
        reciprocal_ranks = torch.matmul(correct, reciprocals)

        if mask is not None:
            self.summed_reciprocal_ranks += reciprocal_ranks[mask].sum().item()
            self.total_count += mask.sum().item()
        else:
            self.summed_reciprocal_ranks += reciprocal_ranks.sum().item()
            self.total_count += gold_labels.numel()
Пример #18
0
def _label_ranking_loss_update(
    preds: Tensor,
    target: Tensor,
    sample_weight: Optional[Tensor] = None
) -> Tuple[Tensor, int, Optional[Tensor]]:
    """Accumulate state for label ranking loss.

    Args:
        preds: tensor with predictions
        target: tensor with ground truth labels
        sample_weight: optional tensor with weight for each sample
    """
    _check_ranking_input(preds, target, sample_weight)
    n_preds, n_labels = preds.shape
    relevant = target == 1
    n_relevant = relevant.sum(dim=1)

    # Ignore instances where number of true labels is 0 or n_labels
    mask = (n_relevant > 0) & (n_relevant < n_labels)
    preds = preds[mask]
    relevant = relevant[mask]
    n_relevant = n_relevant[mask]

    # Nothing is relevant
    if len(preds) == 0:
        return torch.tensor(0.0, device=preds.device), 1, sample_weight

    inverse = preds.argsort(dim=1).argsort(dim=1)
    per_label_loss = ((n_labels - inverse) * relevant).to(torch.float32)
    correction = 0.5 * n_relevant * (n_relevant + 1)
    denom = n_relevant * (n_labels - n_relevant)
    loss = (per_label_loss.sum(dim=1) - correction) / denom
    if isinstance(sample_weight, Tensor):
        loss *= sample_weight[mask]
        sample_weight = sample_weight.sum()
    return loss.sum(), n_preds, sample_weight
Пример #19
0
def scatter_split(src: torch.Tensor, indexes: torch.Tensor) -> List[torch.Tensor]:
    sorted_src = src[indexes.argsort()]
    indexes_count = torch.unique(indexes, return_counts=True)[1]

    return torch.split(sorted_src, indexes_count.tolist(), dim=0)
Пример #20
0
def _get_ranks(x: torch.Tensor) -> torch.Tensor:
    tmp = x.argsort()
    ranks = torch.zeros_like(tmp)
    ranks[tmp] = torch.arange(len(x), device=x.device)
    return ranks
Пример #21
0
 def _get_ranks(self, x: torch.Tensor) -> torch.Tensor:
     tmp = x.argsort()
     ranks = torch.zeros_like(tmp)
     ranks[tmp] = torch.arange(x.size(0), device=ranks.device)
     return ranks
Пример #22
0
def get_index(logit: torch.Tensor):
    return logit.argsort(dim=0)
Пример #23
0
def _get_ranks(x: torch.Tensor) -> torch.Tensor:
    argsort = x.argsort()
    ranks = torch.zeros_like(argsort, device=x.device)
    ranks[argsort] = torch.arange(len(x), device=x.device)
    return ranks
Пример #24
0
def evaluate(distmat: torch.Tensor,
             qpids: torch.Tensor,
             gpids: torch.Tensor,
             qcamids: torch.Tensor,
             gcamids: torch.Tensor,
             max_rank=50):
    """
    Evaluate match # by rank
    :param distmat:
    :param qpids:
    :param gpids:
    :param qcamids:
    :param gcamids:
    :param max_rank:
    :return:
        all_cmc: percentage of queries matched until each rank r
        ap_list: average precision for each query image (mean for mAP)
        inp_list: inp score for each query image (mean for mINP)
    """
    device = distmat.device
    q, q = distmat.shape

    if q < max_rank:
        max_rank = q
        print(
            "Note: number of gallery samples is quite small, got {}".format(q))
    indices = distmat.argsort(dim=1)
    matches = gpids[indices].eq(qpids.reshape(-1, 1)).int()

    order = indices
    remove = torch.logical_and(gpids[order].eq(qpids.reshape(-1, 1)),
                               gcamids[order].eq(qcamids.reshape(-1, 1)))
    keep = remove.logical_not()
    kept = keep.cumsum(dim=1)

    q, g = len(qpids), len(gpids)

    valid_matches = matches * keep
    valid_query = valid_matches.sum(dim=1).gt(
        0)  # at least one matchable (== matched) gallery image
    assert (valid_query.all())  # reid dataset queries should all be valid
    assert (valid_matches.sum() != 0
            )  # error: all query identities do not appear in gallery

    final_rank_positions = (valid_matches *
                            torch.arange(1, g + 1, device=device)).argmax(
                                dim=1)
    final_rank_valid = kept[torch.arange(q, device=device),
                            final_rank_positions]
    all_INP = valid_matches.sum(dim=1).float() / final_rank_valid.float()

    # `kept` is analogous to index within only-valid instances
    cum_precision = valid_matches.cumsum(dim=1).float() / kept.float()
    cum_precision[cum_precision.isnan()] = 1
    all_AP = (cum_precision *
              valid_matches).sum(dim=1) / valid_matches.sum(dim=1)

    # Compute CMC (need to go query-by-query) (assume that at least 10% are valid)
    buffer = 10
    keep = keep[:, :max_rank * buffer]
    matches = matches[:, :max_rank * buffer]
    all_cmc = []
    for i in range(q):
        mc = matches[i][keep[i]][:50]
        if len(mc) < max_rank:
            raise AssertionError(
                "Not enough matching galleries. Consider higher `buffer` value."
            )
        cmc = mc[:max_rank].cumsum(dim=0)
        # E.g., 0 1 x x x x ... to 0 1 1 1 1 1 ...
        cmc[cmc > 1] = 1
        all_cmc.append(cmc)

    all_cmc = torch.stack(all_cmc).float()
    all_cmc = all_cmc.sum(dim=0) / valid_query.float().sum()
    # mAP = all_AP[valid_query].mean()
    # mINP = all_INP[valid_query].mean()

    return all_cmc, all_AP, all_INP
Пример #25
0
def evaluate(distmat: torch.Tensor,
             q_pids: torch.Tensor,
             g_pids: torch.Tensor,
             q_camids: torch.Tensor,
             g_camids: torch.Tensor,
             max_rank=50,
             device=None):
    """
    Torch implementation of evaluate. Slower on CPU.

    :param distmat:
    :param q_pids:
    :param g_pids:
    :param q_camids:
    :param g_camids:
    :param max_rank:
    :return:
    """
    distmat = torch.as_tensor(distmat, device=device)
    q_pids = torch.as_tensor(q_pids, device=device)
    g_pids = torch.as_tensor(g_pids, device=device)
    q_camids = torch.as_tensor(q_camids, device=device)
    g_camids = torch.as_tensor(g_camids, device=device)
    num_q, num_g = distmat.shape

    if num_g < max_rank:
        max_rank = num_g
        print("Note: number of gallery samples is quite small, got {}".format(
            num_g))
    indices = distmat.argsort(dim=1)
    matches = (g_pids[indices] == q_pids.reshape(-1, 1)).int()

    order = indices
    remove = (g_pids[order] == q_pids.reshape(
        -1, 1)) & (g_camids[order] == q_camids.reshape(-1, 1))
    keep = remove == False
    kept = keep.cumsum(dim=1)

    q, g = len(q_pids), len(g_pids)

    valid_matches = matches * keep
    valid_query = (valid_matches.sum(dim=1) > 0
                   )  # at least one matchable (== matched) gallery image
    assert (valid_matches.sum() != 0
            )  # error: all query identities do not appear in gallery

    final_rank_positions = (valid_matches *
                            torch.arange(1, g + 1, device=device)).argmax(
                                dim=1)
    final_rank_valid = kept[torch.arange(q, device=device),
                            final_rank_positions]
    all_INP = valid_matches.sum(dim=1).float() / final_rank_valid.float()

    # `kept` is analogous to index within only-valid instances
    cum_precision = valid_matches.cumsum(dim=1).float() / kept.float()
    cum_precision[cum_precision.isnan()] = 1
    all_AP = (cum_precision *
              valid_matches).sum(dim=1) / valid_matches.sum(dim=1)

    # Compute CMC (need to go query-by-query) (assume that at least 10% are valid)
    buffer = 10
    keep = keep[:, :max_rank * buffer]
    matches = matches[:, :max_rank * buffer]
    all_cmc = []
    for i in range(q):
        mc = matches[i][keep[i]][:50]
        if len(mc) < max_rank:
            raise AssertionError(
                "Not enough matching galleries. Consider higher `buffer` value."
            )
        cmc = mc[:max_rank].cumsum(dim=0)
        # E.g., 0 1 x x x x ... to 0 1 1 1 1 1 ...
        cmc[cmc > 1] = 1
        all_cmc.append(cmc)

    all_cmc = torch.stack(all_cmc).float()
    all_cmc = all_cmc.sum(dim=0) / valid_query.float().sum()

    mAP = all_AP[valid_query].mean()
    mINP = all_INP[valid_query].mean()

    return all_cmc.cpu().numpy(), mAP.item(), mINP.item()