def get_metric(self, reset: bool = False) -> Tuple[float, float]: """ Returns ------- Average exact match and F1 score (in that order) as computed by the official SQuAD script over all inputs. """ if is_distributed(): device = "cuda" if dist.get_backend() == "nccl" else "cpu" else: device = "cpu" _total_em = torch.tensor(self._total_em, device=device) _total_f1 = torch.tensor(self._total_f1, device=device) _count = torch.tensor(self._count, device=device) if is_distributed(): dist.all_reduce(_total_em, op=dist.ReduceOp.SUM) dist.all_reduce(_total_f1, op=dist.ReduceOp.SUM) dist.all_reduce(_count, op=dist.ReduceOp.SUM) if reset: self.reset() exact_match = _total_em.item() / _count.item() if _count > 0 else 0 f1_score = _total_f1.item() / _count.item() if _count > 0 else 0 count = _count.item() return {"em": exact_match, "f1": f1_score, "count": count}
def __init__( self, serialization_dir: Union[str, os.PathLike], save_completed_epochs: bool = True, save_every_num_seconds: Optional[float] = None, save_every_num_batches: Optional[int] = None, keep_most_recent_by_count: Optional[int] = 2, keep_most_recent_by_age: Optional[int] = None, ) -> None: self._serialization_dir = str(serialization_dir) self._save_completed_epochs = save_completed_epochs self._save_every_num_seconds = save_every_num_seconds self._save_every_num_batches = save_every_num_batches self._keep_most_recent_by_count = keep_most_recent_by_count self._keep_most_recent_by_age = keep_most_recent_by_age self._last_save_time = time.time() self._last_save_num_epochs_completed = 0 self._last_save_num_batches_in_epoch_completed = 0 self._rank = 0 if not is_distributed() else dist.get_rank() self.state_is_sharded = False if is_distributed() and save_every_num_seconds is not None: # This would involve extra overhead to keep syncronized between workers, # so we don't support it. raise ValueError( "Checkointer parameter 'save_every_num_seconds' is not supported in distributed training" )
def __call__( self, # type: ignore predictions: torch.LongTensor, gold_targets: torch.LongTensor, ) -> None: """ Update precision counts. # Parameters predictions : `torch.LongTensor`, required Batched predicted tokens of shape `(batch_size, max_sequence_length)`. references : `torch.LongTensor`, required Batched reference (gold) translations with shape `(batch_size, max_gold_sequence_length)`. # Returns None """ predictions, gold_targets = self.detach_tensors(predictions, gold_targets) device = gold_targets.device if is_distributed(): world_size = dist.get_world_size() for ngram_size, _ in enumerate(self._ngram_weights, start=1): precision_matches, precision_totals = self._get_modified_precision_counts( predictions, gold_targets, ngram_size ) if is_distributed(): _precision_matches = torch.tensor(precision_matches).to(device) _precision_totals = torch.tensor(precision_totals).to(device) dist.all_reduce(_precision_matches, op=dist.ReduceOp.SUM) dist.all_reduce(_precision_totals, op=dist.ReduceOp.SUM) precision_matches = _precision_matches.item() / world_size precision_totals = _precision_totals.item() / world_size self._precision_matches[ngram_size] += precision_matches self._precision_totals[ngram_size] += precision_totals if not self._exclude_indices: self._prediction_lengths += predictions.size(0) * predictions.size(1) self._reference_lengths += gold_targets.size(0) * gold_targets.size(1) else: from allennlp.training.util import get_valid_tokens_mask valid_predictions_mask = get_valid_tokens_mask(predictions, self._exclude_indices) self._prediction_lengths += valid_predictions_mask.sum().item() valid_gold_targets_mask = get_valid_tokens_mask(gold_targets, self._exclude_indices) self._reference_lengths += valid_gold_targets_mask.sum().item() if is_distributed(): _prediction_lengths = torch.tensor(self._prediction_lengths).to(device) _reference_lengths = torch.tensor(self._reference_lengths).to(device) dist.all_reduce(_prediction_lengths, op=dist.ReduceOp.SUM) dist.all_reduce(_reference_lengths, op=dist.ReduceOp.SUM) self._prediction_lengths = _prediction_lengths.item() self._reference_lengths = _reference_lengths.item()
def __call__( self, # type: ignore batched_top_spans: torch.Tensor, batched_metadata: List[Dict[str, Any]], ): num_gold_mentions = 0 num_recalled_mentions = 0 for top_spans, metadata in zip(batched_top_spans.tolist(), batched_metadata): gold_mentions: Set[Tuple[int, int]] = { mention for cluster in metadata["clusters"] for mention in cluster } predicted_spans: Set[Tuple[int, int]] = {(span[0], span[1]) for span in top_spans} num_gold_mentions += len(gold_mentions) num_recalled_mentions += len(gold_mentions & predicted_spans) if is_distributed(): device = batched_top_spans.device _num_gold_mentions = torch.tensor(num_gold_mentions).to(device) _num_recalled_mentions = torch.tensor(num_recalled_mentions).to( device) dist.all_reduce(_num_gold_mentions, op=dist.ReduceOp.SUM) dist.all_reduce(_num_recalled_mentions, op=dist.ReduceOp.SUM) num_gold_mentions = _num_gold_mentions.item() num_recalled_mentions = _num_recalled_mentions.item() self._num_gold_mentions += num_gold_mentions self._num_recalled_mentions += num_recalled_mentions
def __call__( self, predictions: torch.Tensor, gold_labels: torch.Tensor, mask: Optional[torch.BoolTensor] = None, ): """ # Parameters predictions : `torch.Tensor`, required. A tensor of predictions of shape (batch_size, ...). gold_labels : `torch.Tensor`, required. A tensor of the same shape as `predictions`. mask : `torch.BoolTensor`, optional (default = `None`). A tensor of the same shape as `predictions`. """ predictions, gold_labels, mask = self.detach_tensors( predictions, gold_labels, mask) device = gold_labels.device absolute_errors = torch.abs(predictions - gold_labels) if mask is not None: absolute_errors *= mask self._total_count += torch.sum(mask) else: self._total_count += gold_labels.numel() self._absolute_error += torch.sum(absolute_errors) if is_distributed(): _absolute_error = torch.tensor(self._absolute_error).to(device) _total_count = torch.tensor(self._total_count).to(device) dist.all_reduce(_absolute_error, op=dist.ReduceOp.SUM) dist.all_reduce(_total_count, op=dist.ReduceOp.SUM) self._absolute_error = _absolute_error.item() self._total_count = _total_count.item()
def get_metric(self, reset: bool = False): """ # Returns The accumulated sample Pearson correlation. """ covariance = self._predictions_labels_covariance.get_metric( reset=reset) predictions_variance = self._predictions_variance.get_metric( reset=reset) labels_variance = self._labels_variance.get_metric(reset=reset) denominator = math.sqrt(predictions_variance) * math.sqrt( labels_variance) if is_distributed(): # Note: this gives an approximate aggregation of the covariance. device = self._device _covariance = torch.tensor(covariance).to(device) dist.all_reduce(_covariance, op=dist.ReduceOp.SUM) covariance = _covariance.item() _denominator = torch.tensor(denominator).to(device) dist.all_reduce(_denominator, op=dist.ReduceOp.SUM) denominator = _denominator.item() if reset: self.reset() if np.around(denominator, decimals=5) == 0: pearson_r = 0 else: pearson_r = covariance / denominator return pearson_r
def _get_rouge_l_score( self, predicted_tokens: torch.LongTensor, reference_tokens: torch.LongTensor ) -> float: """ Compute sum of F1 scores given batch of predictions and references. """ total_f1 = 0.0 for predicted_seq, reference_seq in zip(predicted_tokens, reference_tokens): from allennlp.training.util import get_valid_tokens_mask m = get_valid_tokens_mask(reference_seq, self._exclude_indices).sum().item() n = get_valid_tokens_mask(predicted_seq, self._exclude_indices).sum().item() lcs = self._longest_common_subsequence(reference_seq, predicted_seq) # This also rules out the case that m or n are 0, so we don't worry about it later if lcs == 0: continue recall_lcs = lcs / m precision_lcs = lcs / n f1 = 2 * recall_lcs * precision_lcs / (recall_lcs + precision_lcs) total_f1 += f1 if is_distributed(): device = predicted_tokens.device _total_f1 = torch.tensor(total_f1).to(device) dist.all_reduce(_total_f1, op=dist.ReduceOp.SUM) total_f1 = _total_f1.item() return total_f1
def __init__(self, base_reader: DatasetReader, **kwargs) -> None: # ShardedDatasetReader is a wrapper for the original base_reader so some of the parameters like 'lazy' # can be safely inherited. However, ShardedDatasetReader is a class instance of a DatasetReader as well. # So we give priority to the parameters for the current instance stored in 'kwargs'. # If not present, we check the ones in the base reader kwargs["lazy"] = kwargs.get("lazy", base_reader.lazy) super().__init__(manual_distributed_sharding=True, **kwargs) if util.is_distributed(): self._rank = torch.distributed.get_rank() self._world_size = torch.distributed.get_world_size() else: self._rank = 0 self._world_size = 1 self.reader = base_reader # We have to check that the base reader doesn't implement manual distributed # sharding itself, because if it does, then only a fraction of the instances # will be read. if getattr(self.reader, "manual_distributed_sharding", False): raise ValueError( "The base reader of a sharded dataset reader should not implement " "manual distributed sharding itself.") # However we still need to set this flag to `True` after the fact so that # all of the instances within each shard are used. self.reader.manual_distributed_sharding = True
def __call__( self, # type: ignore logits: torch.Tensor, mask: Optional[torch.BoolTensor] = None, ): """ # Parameters logits : `torch.Tensor`, required. A tensor of unnormalized log probabilities of shape (batch_size, ..., num_classes). mask : `torch.BoolTensor`, optional (default = `None`). A masking tensor of shape (batch_size, ...). """ logits, mask = self.detach_tensors(logits, mask) device = logits.device if mask is None: mask = torch.ones(logits.size()[:-1], device=logits.device).bool() log_probs = torch.nn.functional.log_softmax(logits, dim=-1) probabilities = torch.exp(log_probs) * mask.unsqueeze(-1) weighted_negative_likelihood = -log_probs * probabilities entropy = weighted_negative_likelihood.sum(-1) _entropy = entropy.sum() / mask.sum() _count = 1 if is_distributed(): count = torch.tensor(_count, device=device) dist.all_reduce(_entropy, op=dist.ReduceOp.SUM) dist.all_reduce(count, op=dist.ReduceOp.SUM) _count = count.item() self._entropy += _entropy.item() self._count += _count
def get_metric(self, reset: bool = False): """ # Returns The accumulated sample Pearson correlation. """ if is_distributed(): raise RuntimeError( "Distributed aggregation for PearsonCorrelation is currently not supported." ) covariance = self._predictions_labels_covariance.get_metric( reset=reset) predictions_variance = self._predictions_variance.get_metric( reset=reset) labels_variance = self._labels_variance.get_metric(reset=reset) denominator = math.sqrt(predictions_variance) * math.sqrt( labels_variance) if reset: self.reset() if np.around(denominator, decimals=5) == 0: pearson_r = 0 else: pearson_r = covariance / denominator return pearson_r
def shard_iterable(self, iterable: Iterable[_T]) -> Iterator[_T]: """ Helper method that determines which items in an iterable object to skip based on the current node rank (for distributed training) and worker ID (for multi-process data loading). """ if not self.manual_distributed_sharding or not self.manual_multiprocess_sharding: raise ValueError( "self.shard_iterable() was called but self.manual_distributed_sharding and " "self.manual_multiprocess_sharding was not set to True. Did you forget to call " "super().__init__(manual_distributed_sharding=True, manual_multiprocess_sharding=True) " "in your constructor?" ) sharded_slice: Iterator[_T] = iter(iterable) if util.is_distributed(): sharded_slice = itertools.islice( sharded_slice, dist.get_rank(), None, dist.get_world_size() ) if self._worker_info is not None: sharded_slice = itertools.islice( sharded_slice, self._worker_info.id, None, self._worker_info.num_workers ) # We don't know for sure how many instances we have to produce. # _multi_worker_islice() figures that out. But we know for sure # it won't be more than max_instances. if self.max_instances is not None: sharded_slice = itertools.islice(sharded_slice, self.max_instances) return sharded_slice
def __call__(self, best_span_string, answer_strings): """ Parameters ---------- value : ``float`` The value to average. """ exact_match = squad.metric_max_over_ground_truths( squad.exact_match_score, best_span_string, answer_strings) f1_score = squad.metric_max_over_ground_truths(squad.f1_score, best_span_string, answer_strings) count = 1 if is_distributed(): if dist.get_backend() == "nccl": device = torch.cuda.current_device() else: device = torch.device("cpu") # Converting bool to int here, since we want to count the number of exact matches. _exact_match = torch.tensor(exact_match, dtype=torch.int).to(device) _f1_score = torch.tensor(f1_score).to(device) _count = torch.tensor(count).to(device) dist.all_reduce(_exact_match, op=dist.ReduceOp.SUM) dist.all_reduce(_f1_score, op=dist.ReduceOp.SUM) dist.all_reduce(_count, op=dist.ReduceOp.SUM) exact_match = _exact_match.item() f1_score = _f1_score.item() count = _count.item() self._total_em += exact_match self._total_f1 += f1_score self._count += count
def __call__( self, predictions: torch.Tensor, gold_labels: torch.Tensor, mask: Optional[torch.BoolTensor] = None, ): """ # Parameters predictions : `torch.Tensor`, required. A tensor of predictions of shape (batch_size, ...). gold_labels : `torch.Tensor`, required. A tensor of the same shape as `predictions`. mask : `torch.BoolTensor`, optional (default = `None`). A tensor of the same shape as `predictions`. """ predictions, gold_labels, mask = self.detach_tensors(predictions, gold_labels, mask) # Flatten predictions, gold_labels, and mask. We calculate the Spearman correlation between # the vectors, since each element in the predictions and gold_labels tensor is assumed # to be a separate observation. predictions = predictions.reshape(-1) gold_labels = gold_labels.reshape(-1) self.total_predictions = self.total_predictions.to(predictions.device) self.total_gold_labels = self.total_gold_labels.to(gold_labels.device) if mask is not None: mask = mask.reshape(-1) self.total_predictions = torch.cat((self.total_predictions, predictions * mask), 0) self.total_gold_labels = torch.cat((self.total_gold_labels, gold_labels * mask), 0) else: self.total_predictions = torch.cat((self.total_predictions, predictions), 0) self.total_gold_labels = torch.cat((self.total_gold_labels, gold_labels), 0) if is_distributed(): world_size = dist.get_world_size() device = gold_labels.device # Check if batch lengths are equal. _all_batch_lengths = [torch.tensor(0) for i in range(world_size)] dist.all_gather( _all_batch_lengths, torch.tensor(self.total_predictions.shape[0], device=device) ) _all_batch_lengths = [batch_length.item() for batch_length in _all_batch_lengths] if len(set(_all_batch_lengths)) > 1: # Subsequent dist.all_gather() calls currently do not handle tensors of different length. raise RuntimeError( "Distributed aggregation for SpearmanCorrelation is currently not supported " "for batches of unequal length." ) _total_predictions = [ torch.zeros(self.total_predictions.shape, device=device) for i in range(world_size) ] _total_gold_labels = [ torch.zeros(self.total_gold_labels.shape, device=device) for i in range(world_size) ] dist.all_gather(_total_predictions, self.total_predictions) dist.all_gather(_total_gold_labels, self.total_gold_labels) self.total_predictions = torch.cat(_total_predictions, dim=0) self.total_gold_labels = torch.cat(_total_gold_labels, dim=0)
def _read(self, file_path: str): # if `file_path` is a URL, redirect to the cache file_path = cached_path(file_path) import torch.distributed as dist from allennlp.common.util import is_distributed if is_distributed(): start_index = dist.get_rank() step_size = dist.get_world_size() logger.info( "Reading SNLI instances %% %d from jsonl dataset at: %s", step_size, file_path) else: start_index = 0 step_size = 1 logger.info("Reading SNLI instances from jsonl dataset at: %s", file_path) with open(file_path, "r") as snli_file: example_iter = (json.loads(line) for line in snli_file) filtered_example_iter = (example for example in example_iter if example["gold_label"] != "-") for example in itertools.islice(filtered_example_iter, start_index, None, step_size): label = example["gold_label"] premise = example["sentence1"] hypothesis = example["sentence2"] yield self.text_to_instance(premise, hypothesis, label)
def __call__(self, logits: torch.Tensor, labels: torch.Tensor, label_weights: torch.Tensor): """ # Parameters logits : `torch.Tensor`, required. A tensor of predictions of shape (batch_size, num_classes). labels : `torch.Tensor`, required. A tensor of integer class label of shape (batch_size, num_labels). label_weights : `torch.Tensor`, required. A tensor of floats of shape (batch_size, num_labels), giving a weight or score to every one of the labels. """ logits, labels, label_weights = self.detach_tensors( logits, labels, label_weights) predictions = logits.argmax(dim=1) # Sum over dimension 1 gives the score per question. We care about the overall sum though, # so we sum over all dimensions. local_sum_of_scores = (( label_weights * (labels == predictions.unsqueeze(-1))).sum().to( torch.float32)) local_score_count = torch.tensor(labels.size(0), dtype=torch.int32, device=labels.device) from allennlp.common.util import is_distributed if is_distributed(): dist.all_reduce(local_sum_of_scores, op=dist.ReduceOp.SUM) dist.all_reduce(local_score_count, op=dist.ReduceOp.SUM) self._sum_of_scores += local_sum_of_scores.item() self._score_count += local_score_count.item()
def __call__( self, predictions: torch.Tensor, gold_labels: torch.Tensor, mask: Optional[torch.BoolTensor] = None, ): """ # Parameters predictions : `torch.Tensor`, required. A tensor of predictions of shape (batch_size, k, sequence_length). gold_labels : `torch.Tensor`, required. A tensor of integer class label of shape (batch_size, sequence_length). mask : `torch.BoolTensor`, optional (default = `None`). A masking tensor the same size as `gold_labels`. """ predictions, gold_labels, mask = self.detach_tensors(predictions, gold_labels, mask) device = gold_labels.device # Some sanity checks. if gold_labels.dim() != predictions.dim() - 1: raise ConfigurationError( "gold_labels must have dimension == predictions.dim() - 1 but " "found tensor of shape: {}".format(gold_labels.size()) ) if mask is not None and mask.size() != gold_labels.size(): raise ConfigurationError( "mask must have the same size as predictions but " "found tensor of shape: {}".format(mask.size()) ) k = predictions.size()[1] expanded_size = list(gold_labels.size()) expanded_size.insert(1, k) expanded_gold = gold_labels.unsqueeze(1).expand(expanded_size) if mask is not None: expanded_mask = mask.unsqueeze(1).expand(expanded_size) masked_gold = expanded_mask * expanded_gold masked_predictions = expanded_mask * predictions else: masked_gold = expanded_gold masked_predictions = predictions eqs = masked_gold.eq(masked_predictions) matches_per_question = eqs.min(dim=2)[0] some_match = matches_per_question.max(dim=1)[0] correct = some_match.sum().item() self.total_count += predictions.size()[0] self.correct_count += correct if is_distributed(): _correct_count = torch.tensor(self.correct_count).to(device) _total_count = torch.tensor(self.total_count).to(device) dist.all_reduce(_correct_count, op=dist.ReduceOp.SUM) dist.all_reduce(_total_count, op=dist.ReduceOp.SUM) self.correct_count = _correct_count.item() self.total_count = _total_count.item()
def _read(self, file_path): start_index = 0 step_size = 1 if common_util.is_distributed(): start_index += dist.get_rank() step_size *= dist.get_world_size() for i in islice(range(TOTAL_INSTANCES), start_index, None, step_size): yield self.text_to_instance(i)
def _get_rouge_n_stats( self, predicted_tokens: torch.LongTensor, reference_tokens: torch.LongTensor, ngram_size: int, ) -> Tuple[float, float, float]: """ Compare the predicted tokens to the reference (gold) tokens at the desired ngram size and compute recall, precision and f1 sums """ total_recall = 0.0 total_precision = 0.0 total_f1 = 0.0 for predicted_seq, reference_seq in zip(predicted_tokens, reference_tokens): from allennlp.training.util import ngrams predicted_ngram_counts = ngrams(predicted_seq, ngram_size, self._exclude_indices) reference_ngram_counts = ngrams(reference_seq, ngram_size, self._exclude_indices) matches = 0 total_reference_ngrams = 0 for ngram, count in reference_ngram_counts.items(): matches += min(predicted_ngram_counts[ngram], count) total_reference_ngrams += count total_predicted_ngrams = sum(predicted_ngram_counts.values()) if total_reference_ngrams == 0 or total_predicted_ngrams == 0 or matches == 0: continue recall = matches / total_reference_ngrams precision = matches / total_predicted_ngrams f1 = 2.0 * recall * precision / (recall + precision) # Accumulate stats total_recall += recall total_precision += precision total_f1 += f1 if is_distributed(): device = predicted_tokens.device _total_recall = torch.tensor(total_recall, device=device) _total_precision = torch.tensor(total_precision, device=device) _total_f1 = torch.tensor(total_f1, device=device) dist.all_reduce(_total_recall, op=dist.ReduceOp.SUM) dist.all_reduce(_total_precision, op=dist.ReduceOp.SUM) dist.all_reduce(_total_f1, op=dist.ReduceOp.SUM) total_recall = _total_recall.item() total_precision = _total_precision.item() total_f1 = _total_f1.item() return total_recall, total_precision, total_f1
def __init__(self, base_reader: DatasetReader, **kwargs,) -> None: super().__init__(**kwargs) if util.is_distributed(): self._rank = torch.distributed.get_rank() self._world_size = torch.distributed.get_world_size() else: self._rank = 0 self._world_size = 1 self.reader = base_reader
def __init__(self, offload_to_cpu: Optional[bool] = True, maintain_forward_counter: Optional[bool] = None) -> None: self._offload_to_cpu = offload_to_cpu if maintain_forward_counter is None: from allennlp.common.util import is_distributed # Better to assume we need this in the distributed case, since we definitely # need this when the model is wrapped with FairScale's FSDP. self._maintain_forward_counter = is_distributed() else: self._maintain_forward_counter = maintain_forward_counter
def get_metric(self, reset: bool = False): """ # Returns The accumulated covariance. """ if is_distributed(): raise RuntimeError("Distributed aggregation for Covariance is currently not supported.") covariance = self._total_co_moment / (self._total_count - 1) if reset: self.reset() return covariance
def get_metric(self, reset: bool = False): """ # Returns A Dict per label containing following the span based metrics: - precision : `float` - recall : `float` - f1-measure : `float` Additionally, an `overall` key is included, which provides the precision, recall and f1-measure for all spans. """ if is_distributed(): raise RuntimeError( "Distributed aggregation for `SrlEvalScorer` is currently not supported." ) all_tags: Set[str] = set() all_tags.update(self._true_positives.keys()) all_tags.update(self._false_positives.keys()) all_tags.update(self._false_negatives.keys()) all_metrics = {} for tag in all_tags: if tag == "overall": raise ValueError( "'overall' is disallowed as a tag type, " "rename the tag type to something else if necessary.") precision, recall, f1_measure = self._compute_metrics( self._true_positives[tag], self._false_positives[tag], self._false_negatives[tag]) precision_key = "precision" + "-" + tag recall_key = "recall" + "-" + tag f1_key = "f1-measure" + "-" + tag all_metrics[precision_key] = precision all_metrics[recall_key] = recall all_metrics[f1_key] = f1_measure # Compute the precision, recall and f1 for all spans jointly. precision, recall, f1_measure = self._compute_metrics( sum(self._true_positives.values()), sum(self._false_positives.values()), sum(self._false_negatives.values()), ) all_metrics["precision-overall"] = precision all_metrics["recall-overall"] = recall all_metrics["f1-measure-overall"] = f1_measure if reset: self.reset() return all_metrics
def all_gather_anchor_positive_pairs( anchors: torch.Tensor, positives: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """If training on 2 or more GPUs, `all_gather`s the embeddings produced on each replica, ensuring that the gradients for the embeddings produced on each replica are not lost. The returned anchor, positive pairs can be fed to a contrastive loss. This method is necessary to ensure that we train against the expected number of negatives 2 * (batch size - 1) per batch, as a naive implementation would end up training against 2 * (batch size / n_gpus - 1) number of negatives. If we are not training on 2 or more GPUs, this method is a no-op and returns its inputs. # Parameters anchors : torch.Tensor Embedded text representing the anchors. positives : TextFieldTensors Embedded text representing the positives. # Returns Tuple[torch.Tensor, torch.Tensor] Embedded anchor, positive pairs that can be fed to a contrastive loss. """ # If we are not using distributed training, this is a no-op. if not util.is_distributed(): return anchors, positives # Gather the encoded anchors and positives on all replicas anchors_list = [ torch.ones_like(anchors) for _ in range(dist.get_world_size()) ] positives_list = [ torch.ones_like(positives) for _ in range(dist.get_world_size()) ] dist.all_gather(anchors_list, anchors.contiguous()) dist.all_gather(positives_list, positives.contiguous()) # The gathered copy of the current replicas positive pairs have no gradients, so we overwrite # them with the positive pairs generated on this replica, which DO have gradients. anchors_list[dist.get_rank()] = anchors positives_list[dist.get_rank()] = positives # Finally, we concatenate the positive pairs so they can be fed to the contrastive loss. anchors = torch.cat(anchors_list) positives = torch.cat(positives_list) return anchors, positives
def __call__(self, value): """ # Parameters value : `float` The value to average. """ self._total_value += list(self.detach_tensors(value))[0] self._count += 1 if is_distributed(): device = torch.device("cpu") _count = torch.tensor(self._count).to(device) _total_value = torch.tensor(self._total_value).to(device) dist.all_reduce(_count, op=dist.ReduceOp.SUM) dist.all_reduce(_total_value, op=dist.ReduceOp.SUM) self._count = _count.item() self._total_value = _total_value.item()
def __call__( self, best_span_strings: Union[str, List[str]], answer_strings: Union[List[str], List[List[str]]], ): if not isinstance(best_span_strings, list): best_span_strings = [best_span_strings] answer_strings = [answer_strings] # type: ignore cast(List[str], best_span_strings) cast(List[List[str]], answer_strings) assert len(best_span_strings) == len(answer_strings) count = len(best_span_strings) exact_match = 0 f1_score = 0.0 for prediction, gold_answers in zip(best_span_strings, answer_strings): exact_match += squad.metric_max_over_ground_truths( squad.compute_exact, prediction, gold_answers ) f1_score += squad.metric_max_over_ground_truths( squad.compute_f1, prediction, gold_answers ) if is_distributed(): if dist.get_backend() == "nccl": device = torch.cuda.current_device() else: device = torch.device("cpu") # Converting bool to int here, since we want to count the number of exact matches. _exact_match = torch.tensor(exact_match, dtype=torch.int).to(device) _f1_score = torch.tensor(f1_score, dtype=torch.double).to(device) _count = torch.tensor(count).to(device) dist.all_reduce(_exact_match, op=dist.ReduceOp.SUM) dist.all_reduce(_f1_score, op=dist.ReduceOp.SUM) dist.all_reduce(_count, op=dist.ReduceOp.SUM) exact_match = _exact_match.item() f1_score = _f1_score.item() count = _count.item() self._total_em += exact_match self._total_f1 += f1_score self._count += count
def get_metric(self, reset: bool = False): """ # Returns `Dict[str, float]` A Dict per label containing following the span based metrics: - precision : `float` - recall : `float` - f1-measure : `float` Additionally, an `overall` key is included, which provides the precision, recall and f1-measure for all spans. """ if is_distributed(): raise RuntimeError( "Distributed aggregation for SpanBasedF1Measure is currently not supported." ) all_tags: Set[str] = set() all_tags.update(self._true_positives.keys()) all_tags.update(self._false_positives.keys()) all_tags.update(self._false_negatives.keys()) all_metrics = {} # for tag in all_tags: # precision, recall, f1_measure = self._compute_metrics( # self._true_positives[tag], self._false_positives[tag], self._false_negatives[tag] # ) # precision_key = "precision" + "-" + tag # recall_key = "recall" + "-" + tag # f1_key = "FR-" + "-" + tag # all_metrics[precision_key] = precision # all_metrics[recall_key] = recall # all_metrics[f1_key] = f1_measure # Compute the precision, recall and f1 for all spans jointly. precision, recall, f1_measure = self._compute_metrics( sum(self._true_positives.values()), sum(self._false_positives.values()), sum(self._false_negatives.values()), ) all_metrics["PR-overall"] = precision all_metrics["RR-overall"] = recall all_metrics["FR-overall"] = f1_measure if reset: self.reset() return precision, recall, f1_measure
def __call__(self, value): """ # Parameters value : `float` The value to average. """ _total_value = list(self.detach_tensors(value))[0] _count = 1 if is_distributed(): device = torch.device("cuda" if dist.get_backend() == "nccl" else "cpu") count = torch.tensor(_count).to(device) total_value = torch.tensor(_total_value).to(device) dist.all_reduce(count, op=dist.ReduceOp.SUM) dist.all_reduce(total_value, op=dist.ReduceOp.SUM) _count = count.item() _total_value = total_value.item() self._count += _count self._total_value += _total_value
def __init__( self, max_instances: Optional[int] = None, manual_distributed_sharding: bool = False, manual_multiprocess_sharding: bool = False, serialization_dir: Optional[str] = None, ) -> None: # Do some validation. if max_instances is not None and max_instances < 0: raise ValueError("If specified, max_instances should be a positive int") self.max_instances = max_instances self.manual_distributed_sharding = manual_distributed_sharding self.manual_multiprocess_sharding = manual_multiprocess_sharding self.serialization_dir = serialization_dir self._worker_info: Optional[WorkerInfo] = None self._distributed_info: Optional[DistributedInfo] = None # If we're actually in the main process, we can find the info using torch utils. if util.is_distributed(): self._distributed_info = DistributedInfo(dist.get_world_size(), dist.get_rank())
def __call__( self, # type: ignore predictions: torch.LongTensor, gold_targets: torch.LongTensor, ) -> None: """ Update recall counts. # Parameters predictions : `torch.LongTensor` Batched predicted tokens of shape `(batch_size, max_sequence_length)`. references : `torch.LongTensor` Batched reference (gold) sequences with shape `(batch_size, max_gold_sequence_length)`. # Returns None """ # ROUGE-N predictions, gold_targets = self.detach_tensors( predictions, gold_targets) for n in range(1, self._ngram_size + 1): recall, precision, f1 = self._get_rouge_n_stats( predictions, gold_targets, n) self._total_rouge_n_recalls[n] += recall self._total_rouge_n_precisions[n] += precision self._total_rouge_n_f1s[n] += f1 # ROUGE-L self._total_rouge_l_f1 += self._get_rouge_l_score( predictions, gold_targets) sequence_count = len(predictions) if is_distributed(): device = predictions.device _sequence_count = torch.tensor(sequence_count, device=device) dist.all_reduce(_sequence_count, op=dist.ReduceOp.SUM) sequence_count = _sequence_count.item() self._total_sequence_count += sequence_count
def __call__(self, prediction: Union[str, List], ground_truths: List): # type: ignore """ Parameters ---------- prediction: ``Union[str, List]`` The predicted answer from the model evaluated. This could be a string, or a list of string when multiple spans are predicted as answer. ground_truths: ``List`` All the ground truth answer annotations. """ # If you wanted to split this out by answer type, you could look at [1] here and group by # that, instead of only keeping [0]. ground_truth_answer_strings = [ answer_json_to_strings(annotation)[0] for annotation in ground_truths ] exact_match, f1_score = metric_max_over_ground_truths( drop_em_and_f1, prediction, ground_truth_answer_strings) count = 1 if is_distributed(): if dist.get_backend() == "nccl": device = torch.cuda.current_device() else: device = torch.device("cpu") # Converting bool to int here, since we want to count the number of exact matches. _exact_match = torch.tensor(exact_match, dtype=torch.int).to(device) _f1_score = torch.tensor(f1_score).to(device) _count = torch.tensor(count).to(device) dist.all_reduce(_exact_match, op=dist.ReduceOp.SUM) dist.all_reduce(_f1_score, op=dist.ReduceOp.SUM) dist.all_reduce(_count, op=dist.ReduceOp.SUM) exact_match = _exact_match.item() f1_score = _f1_score.item() count = _count.item() self._total_em += exact_match self._total_f1 += f1_score self._count += count