def aggregate_logging_outputs(logging_outputs): """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) nsentences = sum(log.get('nsentences', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) agg_output = { 'loss': loss_sum / sample_size / math.log(2) if sample_size > 0 else 0., 'ntokens': ntokens, 'nsentences': nsentences, 'sample_size': sample_size, } from fairseq.custom.metrics import TrainingMetrics custom_output = TrainingMetrics.aggregate_and_normalize( logging_outputs) for k, v in custom_output.items(): agg_output[k] = v if sample_size != ntokens: agg_output['nll_loss'] = loss_sum / ntokens / math.log( 2) if ntokens > 0 else 0. return agg_output
def eval_single_token_prediction(model, itr, dictionary, singletoken_topp=0.0, singletoken_topk=1): predicted_tokens = [] target_tokens = [] mle_loss_sum = 0 num_samples_sum = 0 wrong_mass_sum = 0 logging_outputs = [] for n, sample in tqdm(enumerate(itr)): sample = utils.move_to_cuda(sample) net_output = model(**sample['net_input']) logits = net_output[0][0] logits[:, dictionary.pad()] = -1e19 predicted_tokens.append(logits.argmax(1).tolist()) target = sample['target'].view(-1) target_tokens.append(target.tolist()) # -- mle loss lprobs = model.get_normalized_probs(net_output, log_probs=True) lprobs = lprobs.view(-1, lprobs.size(-1)) true_token_lprobs = F.nll_loss( lprobs, target, ignore_index=dictionary.pad_index, reduction='none', ) true_token_logits = -F.nll_loss( logits, target, ignore_index=dictionary.pad_index, reduction='none', ) mle_loss = true_token_lprobs.sum() orig = utils.strip_pad(target, dictionary.pad_index) ntokens = orig.numel() mle_loss_sum += mle_loss.item() num_samples_sum += ntokens logging_output = TrainingMetrics.ranking_metrics(logits, true_token_logits, sample, ntokens, target, topk=singletoken_topk, topp=singletoken_topp) negative_targets = (logits > true_token_logits[:, None]).float() wrong_mass_sum += (negative_targets * (F.softmax(logits, dim=1))).sum() logging_outputs.append(logging_output) ppl = math.pow(2, mle_loss_sum / num_samples_sum / math.log(2)) custom_metrics = TrainingMetrics.aggregate_and_normalize(logging_outputs) custom_metrics['ppl'] = ppl avg_wrong_mass = wrong_mass_sum / num_samples_sum custom_metrics['avg_wrong_mass'] = avg_wrong_mass.item() return predicted_tokens, target_tokens, custom_metrics