def compute_precision_and_recall( output: Tensor, labels: Tensor, num_gt: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]: """ Arguments: output(Tensor[N, K]) labels(Tensor[N, K]) num_gt(Tensor[K]) Returns: prec(Tensor[N, K]) rec(Tensor[N, K]) """ order = output.argsort(0, descending=True) tp = labels[order, torch.ones_like(order) * torch.arange(output.shape[1])] fp = 1 - tp tp = tp.cumsum(0) fp = fp.cumsum(0) prec = tp / (tp + fp) rec = div(tp, labels.sum(0)) if num_gt is None \ else div(tp, num_gt) return prec, rec
def compute_pr_for_each( output: Tensor, labels: Tensor, num_gt: Optional[Union[int, float]] = None) -> Tuple[Tensor, Tensor]: """ Arguments: output(Tensor[N]) labels(Tensor[N]): Binary labels for each sample num_gt(int or float): Number of ground truth instances Returns: prec(Tensor[N]) rec(Tensor[N]) """ order = output.argsort(descending=True) tp = labels[order] fp = 1 - tp tp = tp.cumsum(0) fp = fp.cumsum(0) prec = tp / (tp + fp) rec = div(tp, labels.sum().item()) if num_gt is None \ else div(tp, num_gt) return prec, rec
def _calculate(y: torch.Tensor, y_pred: torch.Tensor): assert y.ndimension() == y_pred.ndimension() == 1 and len(y) == len( y_pred), "y and y_pred must be 1 dimension data with same length." assert y.unique().equal( torch.tensor([0, 1], dtype=y.dtype, device=y.device) ), "y values must be 0 or 1, can not be all 0 or all 1." n = len(y) indexes = y_pred.argsort() y = y[indexes].cpu().numpy() y_pred = y_pred[indexes].cpu().numpy() nneg = auc = tmp_pos = tmp_neg = 0.0 for i in range(n): y_i = cast(float, y[i]) if i + 1 < n and y_pred[i] == y_pred[i + 1]: tmp_pos += y_i tmp_neg += 1 - y_i continue if tmp_pos + tmp_neg > 0: tmp_pos += y_i tmp_neg += 1 - y_i nneg += tmp_neg auc += tmp_pos * (nneg - tmp_neg / 2) tmp_pos = tmp_neg = 0 continue if y_i == 1: auc += nneg else: nneg += 1 return auc / (nneg * (n - nneg))
def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float: if not (y.ndimension() == y_pred.ndimension() == 1 and len(y) == len(y_pred)): raise AssertionError("y and y_pred must be 1 dimension data with same length.") y_unique = y.unique() if len(y_unique) == 1: warnings.warn(f"y values can not be all {y_unique.item()}, skip AUC computation and return `Nan`.") return float("nan") if not y_unique.equal(torch.tensor([0, 1], dtype=y.dtype, device=y.device)): warnings.warn(f"y values must be 0 or 1, but in {y_unique.tolist()}, skip AUC computation and return `Nan`.") return float("nan") n = len(y) indices = y_pred.argsort() y = y[indices].cpu().numpy() y_pred = y_pred[indices].cpu().numpy() nneg = auc = tmp_pos = tmp_neg = 0.0 for i in range(n): y_i = cast(float, y[i]) if i + 1 < n and y_pred[i] == y_pred[i + 1]: tmp_pos += y_i tmp_neg += 1 - y_i continue if tmp_pos + tmp_neg > 0: tmp_pos += y_i tmp_neg += 1 - y_i nneg += tmp_neg auc += tmp_pos * (nneg - tmp_neg / 2) tmp_pos = tmp_neg = 0 continue if y_i == 1: auc += nneg else: nneg += 1 return auc / (nneg * (n - nneg))
def gp_posterior(ax, x: torch.Tensor, preds: MultivariateNormal, ewma_alpha: float = 0.0, label: Optional[str] = None, sort=True, fill_alpha=0.05, **kwargs): x = x.view(-1) if sort: # i = x.argsort(dim=-2)[:, 0] i = x.argsort() if i.equal(torch.arange(i.size(0))): i = slice(None, None, None) else: i = slice(None, None, None) x = n(x[i]) preds_mean = preds.mean.view(-1) mean = ewma(n(preds_mean[i]), ewma_alpha) line, *_ = ax.plot(x, mean, **kwargs) if label is not None: line.set_label(label) C = line.get_color() lower, upper = (p.view(-1) for p in preds.confidence_region()) lower = ewma(n(lower[i]), ewma_alpha) upper = ewma(n(upper[i]), ewma_alpha) ax.fill_between(x, lower, upper, alpha=fill_alpha, color=C) ax.plot(x, lower, color=C, linewidth=0.5) ax.plot(x, upper, color=C, linewidth=0.5)
def __call__(self, logits: torch.Tensor, labels: torch.Tensor, mask: torch.Tensor): """ logits and labels should be the same shape. Labels should be an array of 0/1s to indicate if the document is relevant. We don't need a mask here since we select nonzero labels and masked entries in labels are never equal to 1 (Pedro is pretty sure) """ n_relevent = labels.sum().item() if n_relevent == 0: # None are relevent, no-op return preds = logits.argsort(dim=-1, descending=True) # nonzeros occur where there are predictions to make # (n_nonzero, 3) # 3 = dims for batch, turn and fact indices = labels.nonzero() # TODO: This could be batched, but its a pain all_ranks = [] # import ipdb; ipdb.set_trace() for batch_idx, turn_idx, fact_idx in indices: # List of predictions, first element is index # of top ranked document, second of second-top, etc inst_preds = preds[batch_idx, turn_idx] rank = (inst_preds == fact_idx).nonzero().reshape(-1) all_ranks.append(rank) all_ranks = torch.cat(all_ranks) # rank starts at zero from torch, += 1 for inversing it reciprocal_ranks = 1 / (1 + all_ranks).float() self._reciprocal_ranks.extend(reciprocal_ranks.cpu().numpy().tolist()) return reciprocal_ranks.mean()
def _erfinv(data: torch.Tensor, eps): rank = data.argsort().argsort().float() rank_scaled = (rank / rank.max() - 0.5) * 2 rank_scaled = rank_scaled.clamp(-1 + eps, 1 - eps) tformed = rank_scaled.erfinv() return tformed
def get_class_name(ranks: torch.Tensor): CLASSNAMES = "imagenet1000_clsidx_to_labels.json" classes = [] with open(os.path.join(file_path, CLASSNAMES), "r") as f: for line in f.readlines(): raw = line.strip("{").strip("}") key, value = raw.split(": ") classes.append(value.strip("'").strip("',\n")) sorted_ranks = ranks.argsort(descending=True) return np.array(classes)[sorted_ranks]
def __init__(self, X: torch.Tensor, y: torch.Tensor) -> None: assert X.dtype == torch.float assert y.dtype == torch.long assert len(X.shape) == 2 assert len(y.shape) == 1 assert X.shape[0] == y.shape[0] self.sort_indices = y.argsort() self.X = X[self.sort_indices, :] self.y = y[self.sort_indices]
def manifold_dist_loss_relu_sum(model: nn.Module, inputs: torch.Tensor, train_distances: torch.Tensor, manifold: RiemannianManifold, margin=0.01, discount_factor=0.9): """ See write up for details on this loss function -- encourages embeddings to preserve graph topology Parameters: model (nn.Module): model that takes in graph indices and outputs embeddings in output manifold space inputs (torch.Tensor): LongTensor of shape [batch_size, num_samples+1] giving the indices of the vertices to be trained with the first vertex in each element of the batch being the main vertex and the others being samples train_distances (torch.Tensor): floating point tensor of shape [batch_size, num_samples] containing the training distances from the input vertex to the sampled vertices manifold (RiemannianManifold): Manifold that model embeds vertices into Returns: pytorch scalar: Computed loss """ input_embeddings = model(inputs) sample_vertices = input_embeddings.narrow(1, 1, input_embeddings.size(1) - 1) main_vertices = input_embeddings.narrow(1, 0, 1).expand_as(sample_vertices) manifold_dists = manifold.dist(main_vertices, sample_vertices) sorted_indices = train_distances.argsort(dim=-1) manifold_dists_sorted = torch.gather(manifold_dists, -1, sorted_indices) manifold_dists_sorted.add_(EPSILON).log_() diff_matrix_shape = [ manifold_dists.size()[0], manifold_dists.size()[1], manifold_dists.size()[1] ] row_expanded = manifold_dists_sorted.unsqueeze(2).expand( *diff_matrix_shape) column_expanded = manifold_dists_sorted.unsqueeze(1).expand( *diff_matrix_shape) diff_matrix = row_expanded - column_expanded + margin train_dists_sorted = torch.gather(train_distances, -1, sorted_indices) train_row_expanded = train_dists_sorted.unsqueeze(2).expand( *diff_matrix_shape) train_column_expanded = train_dists_sorted.unsqueeze(1).expand( *diff_matrix_shape) diff_matrix_train = train_row_expanded - train_column_expanded masked_diff_matrix = torch.where(diff_matrix_train == 0, diff_matrix_train, diff_matrix) masked_diff_matrix.triu_() relu_(masked_diff_matrix) masked_diff_matrix = masked_diff_matrix.sum(-1) loss = masked_diff_matrix.sum(-1).mean() return loss
def mrr(predictions: torch.Tensor, ground_truth_idx: torch.Tensor) -> float: """Calculates mean reciprocal rank (MRR) for given predictions and ground truth values. :param predictions: BxN tensor of prediction values where B is batch size and N number of classes. Predictions must be sorted in class ids order :param ground_truth_idx: Bx1 tensor with index of ground truth class :return: Mean reciprocal rank score """ assert predictions.size(0) == ground_truth_idx.size(0) indices = predictions.argsort() return (indices == ground_truth_idx ).nonzero()[:, 1].float().add(1.0).sum().item()
def mapr(input: torch.Tensor, targs: torch.LongTensor, mapn: int): """ Compute the mean average precision > map5 = partial(mapr, mapn=5) """ n = targs.shape[0] # number for samples input = input.argsort(dim=-1, descending=True)[:, :mapn] targs = targs.view(n, -1) return ((input == targs).float() / torch.arange(1, mapn + 1, device=input.device).float()).sum( dim=-1).mean()
def _rank_data(data: Tensor) -> Tensor: """ Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of their rank Adopted from: https://github.com/scipy/scipy/blob/v1.6.2/scipy/stats/stats.py#L4140-L4303 """ n = data.numel() rank = torch.empty_like(data) idx = data.argsort() rank[idx[:n]] = torch.arange(1, n + 1, dtype=data.dtype, device=data.device) repeats = _find_repeats(data) for r in repeats: condition = rank == r rank[condition] = rank[condition].mean() return rank
def roc_curve(input: Tensor, targ: Tensor): "Computes the receiver operator characteristic (ROC) curve by determining the true positive ratio (TPR) and false positive ratio (FPR) for various classification thresholds. Restricted binary classification tasks." # wassname: fix this by making LongTensor([0]=>device) targ = targ == 1 desc_score_indices = torch.flip(input.argsort(-1), [-1]) input = input[desc_score_indices] targ = targ[desc_score_indices] d = input[1:] - input[:-1] distinct_value_indices = torch.nonzero(d).transpose(0, 1)[0] threshold_idxs = torch.cat( (distinct_value_indices, LongTensor([len(targ) - 1]).to(targ.device))) tps = torch.cumsum(targ * 1, dim=-1)[threshold_idxs] fps = 1 + threshold_idxs - tps if tps[0] != 0 or fps[0] != 0: zer = torch.zeros(1, dtype=fps.dtype, device=fps.device) fps = torch.cat((zer, fps)) tps = torch.cat((zer, tps)) fpr, tpr = fps.float() / fps[-1], tps.float() / tps[-1] return fpr, tpr
def _rank_data(data: Tensor) -> Tensor: """Calculate the rank for each element of a tensor. The rank refers to the indices of an element in the corresponding sorted tensor (starting from 1). Duplicates of the same value will be assigned the mean of their rank. Adopted from: `Rank of element tensor`_ """ n = data.numel() rank = torch.empty_like(data) idx = data.argsort() rank[idx[:n]] = torch.arange(1, n + 1, dtype=data.dtype, device=data.device) repeats = _find_repeats(data) for r in repeats: condition = data == r rank[condition] = rank[condition].mean() return rank
def unique1d(tensor: torch.Tensor, return_index: bool = False): """Port of np.unique to PyTorch with `return_index` functionality""" assert len(tensor.shape) == 1 optional_indices = return_index if optional_indices: perm = tensor.argsort() aux = tensor[perm] else: tensor.sort_() aux = tensor mask = torch.zeros(aux.shape) mask[:1] = 1 mask[1:] = aux[1:] != aux[:-1] ret = (aux[mask.byte()], ) if return_index: ret += (perm[mask.byte()], ) return ret
def __call__(self, predictions: torch.Tensor, gold_labels: torch.Tensor, mask: Optional[torch.Tensor] = None) -> None: predictions, gold_labels, mask = \ self.detach_tensors(predictions, gold_labels, mask) num_classes = predictions.size(-1) predictions = predictions.view((-1, num_classes)) gold_labels = gold_labels.view(-1).long() predicted_ids = predictions.argsort(-1, descending=True) correct = predicted_ids.eq(gold_labels.unsqueeze(-1)).float() reciprocals = torch.arange(1, num_classes + 1, device=correct.device).float().reciprocal() reciprocal_ranks = torch.matmul(correct, reciprocals) if mask is not None: self.summed_reciprocal_ranks += reciprocal_ranks[mask].sum().item() self.total_count += mask.sum().item() else: self.summed_reciprocal_ranks += reciprocal_ranks.sum().item() self.total_count += gold_labels.numel()
def _label_ranking_loss_update( preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None ) -> Tuple[Tensor, int, Optional[Tensor]]: """Accumulate state for label ranking loss. Args: preds: tensor with predictions target: tensor with ground truth labels sample_weight: optional tensor with weight for each sample """ _check_ranking_input(preds, target, sample_weight) n_preds, n_labels = preds.shape relevant = target == 1 n_relevant = relevant.sum(dim=1) # Ignore instances where number of true labels is 0 or n_labels mask = (n_relevant > 0) & (n_relevant < n_labels) preds = preds[mask] relevant = relevant[mask] n_relevant = n_relevant[mask] # Nothing is relevant if len(preds) == 0: return torch.tensor(0.0, device=preds.device), 1, sample_weight inverse = preds.argsort(dim=1).argsort(dim=1) per_label_loss = ((n_labels - inverse) * relevant).to(torch.float32) correction = 0.5 * n_relevant * (n_relevant + 1) denom = n_relevant * (n_labels - n_relevant) loss = (per_label_loss.sum(dim=1) - correction) / denom if isinstance(sample_weight, Tensor): loss *= sample_weight[mask] sample_weight = sample_weight.sum() return loss.sum(), n_preds, sample_weight
def scatter_split(src: torch.Tensor, indexes: torch.Tensor) -> List[torch.Tensor]: sorted_src = src[indexes.argsort()] indexes_count = torch.unique(indexes, return_counts=True)[1] return torch.split(sorted_src, indexes_count.tolist(), dim=0)
def _get_ranks(x: torch.Tensor) -> torch.Tensor: tmp = x.argsort() ranks = torch.zeros_like(tmp) ranks[tmp] = torch.arange(len(x), device=x.device) return ranks
def _get_ranks(self, x: torch.Tensor) -> torch.Tensor: tmp = x.argsort() ranks = torch.zeros_like(tmp) ranks[tmp] = torch.arange(x.size(0), device=ranks.device) return ranks
def get_index(logit: torch.Tensor): return logit.argsort(dim=0)
def _get_ranks(x: torch.Tensor) -> torch.Tensor: argsort = x.argsort() ranks = torch.zeros_like(argsort, device=x.device) ranks[argsort] = torch.arange(len(x), device=x.device) return ranks
def evaluate(distmat: torch.Tensor, qpids: torch.Tensor, gpids: torch.Tensor, qcamids: torch.Tensor, gcamids: torch.Tensor, max_rank=50): """ Evaluate match # by rank :param distmat: :param qpids: :param gpids: :param qcamids: :param gcamids: :param max_rank: :return: all_cmc: percentage of queries matched until each rank r ap_list: average precision for each query image (mean for mAP) inp_list: inp score for each query image (mean for mINP) """ device = distmat.device q, q = distmat.shape if q < max_rank: max_rank = q print( "Note: number of gallery samples is quite small, got {}".format(q)) indices = distmat.argsort(dim=1) matches = gpids[indices].eq(qpids.reshape(-1, 1)).int() order = indices remove = torch.logical_and(gpids[order].eq(qpids.reshape(-1, 1)), gcamids[order].eq(qcamids.reshape(-1, 1))) keep = remove.logical_not() kept = keep.cumsum(dim=1) q, g = len(qpids), len(gpids) valid_matches = matches * keep valid_query = valid_matches.sum(dim=1).gt( 0) # at least one matchable (== matched) gallery image assert (valid_query.all()) # reid dataset queries should all be valid assert (valid_matches.sum() != 0 ) # error: all query identities do not appear in gallery final_rank_positions = (valid_matches * torch.arange(1, g + 1, device=device)).argmax( dim=1) final_rank_valid = kept[torch.arange(q, device=device), final_rank_positions] all_INP = valid_matches.sum(dim=1).float() / final_rank_valid.float() # `kept` is analogous to index within only-valid instances cum_precision = valid_matches.cumsum(dim=1).float() / kept.float() cum_precision[cum_precision.isnan()] = 1 all_AP = (cum_precision * valid_matches).sum(dim=1) / valid_matches.sum(dim=1) # Compute CMC (need to go query-by-query) (assume that at least 10% are valid) buffer = 10 keep = keep[:, :max_rank * buffer] matches = matches[:, :max_rank * buffer] all_cmc = [] for i in range(q): mc = matches[i][keep[i]][:50] if len(mc) < max_rank: raise AssertionError( "Not enough matching galleries. Consider higher `buffer` value." ) cmc = mc[:max_rank].cumsum(dim=0) # E.g., 0 1 x x x x ... to 0 1 1 1 1 1 ... cmc[cmc > 1] = 1 all_cmc.append(cmc) all_cmc = torch.stack(all_cmc).float() all_cmc = all_cmc.sum(dim=0) / valid_query.float().sum() # mAP = all_AP[valid_query].mean() # mINP = all_INP[valid_query].mean() return all_cmc, all_AP, all_INP
def evaluate(distmat: torch.Tensor, q_pids: torch.Tensor, g_pids: torch.Tensor, q_camids: torch.Tensor, g_camids: torch.Tensor, max_rank=50, device=None): """ Torch implementation of evaluate. Slower on CPU. :param distmat: :param q_pids: :param g_pids: :param q_camids: :param g_camids: :param max_rank: :return: """ distmat = torch.as_tensor(distmat, device=device) q_pids = torch.as_tensor(q_pids, device=device) g_pids = torch.as_tensor(g_pids, device=device) q_camids = torch.as_tensor(q_camids, device=device) g_camids = torch.as_tensor(g_camids, device=device) num_q, num_g = distmat.shape if num_g < max_rank: max_rank = num_g print("Note: number of gallery samples is quite small, got {}".format( num_g)) indices = distmat.argsort(dim=1) matches = (g_pids[indices] == q_pids.reshape(-1, 1)).int() order = indices remove = (g_pids[order] == q_pids.reshape( -1, 1)) & (g_camids[order] == q_camids.reshape(-1, 1)) keep = remove == False kept = keep.cumsum(dim=1) q, g = len(q_pids), len(g_pids) valid_matches = matches * keep valid_query = (valid_matches.sum(dim=1) > 0 ) # at least one matchable (== matched) gallery image assert (valid_matches.sum() != 0 ) # error: all query identities do not appear in gallery final_rank_positions = (valid_matches * torch.arange(1, g + 1, device=device)).argmax( dim=1) final_rank_valid = kept[torch.arange(q, device=device), final_rank_positions] all_INP = valid_matches.sum(dim=1).float() / final_rank_valid.float() # `kept` is analogous to index within only-valid instances cum_precision = valid_matches.cumsum(dim=1).float() / kept.float() cum_precision[cum_precision.isnan()] = 1 all_AP = (cum_precision * valid_matches).sum(dim=1) / valid_matches.sum(dim=1) # Compute CMC (need to go query-by-query) (assume that at least 10% are valid) buffer = 10 keep = keep[:, :max_rank * buffer] matches = matches[:, :max_rank * buffer] all_cmc = [] for i in range(q): mc = matches[i][keep[i]][:50] if len(mc) < max_rank: raise AssertionError( "Not enough matching galleries. Consider higher `buffer` value." ) cmc = mc[:max_rank].cumsum(dim=0) # E.g., 0 1 x x x x ... to 0 1 1 1 1 1 ... cmc[cmc > 1] = 1 all_cmc.append(cmc) all_cmc = torch.stack(all_cmc).float() all_cmc = all_cmc.sum(dim=0) / valid_query.float().sum() mAP = all_AP[valid_query].mean() mINP = all_INP[valid_query].mean() return all_cmc.cpu().numpy(), mAP.item(), mINP.item()