def reduce_metrics(self, logging_outputs, criterion, logging_output_keys=None): logging_output_keys = logging_output_keys or self.eval_lang_pairs # aggregate logging outputs for each language pair agg_logging_outputs = {} for key in logging_output_keys: with metrics.aggregate() as agg: logging_outputs_key = [ logging_output.get(key, {}) for logging_output in logging_outputs ] for k in ['sample_size', 'nsentences', 'ntokens']: metrics.log_scalar(k, sum(l[k] for l in logging_outputs_key)) super().reduce_metrics(logging_outputs_key, criterion) agg_logging_outputs[key] = agg.get_smoothed_values() def sum_over_languages(key): return sum(logging_output[key] for logging_output in agg_logging_outputs.values()) # flatten logging outputs flat_logging_output = { '{}:{}'.format(lang_pair, k): v for lang_pair, agg_logging_output in agg_logging_outputs.items() for k, v in agg_logging_output.items() } flat_logging_output['loss'] = sum_over_languages('loss') if any('nll_loss' in logging_output for logging_output in agg_logging_outputs.values()): flat_logging_output['nll_loss'] = sum_over_languages('nll_loss') flat_logging_output['sample_size'] = sum_over_languages('sample_size') flat_logging_output['nsentences'] = sum_over_languages('nsentences') flat_logging_output['ntokens'] = sum_over_languages('ntokens') return flat_logging_output
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) nsentences = utils.item( sum(log.get("nsentences", 0) for log in logging_outputs) ) sample_size = utils.item( sum(log.get("sample_size", 0) for log in logging_outputs) ) metrics.log_scalar("loss", loss_sum / sample_size, sample_size, round=3) metrics.log_scalar("ntokens", ntokens) metrics.log_scalar("nsentences", nsentences) builtin_keys = { "loss", "ntokens", "nsentences", "sample_size", "_world_size", } world_size = utils.item( sum(log.get("_world_size", 0) for log in logging_outputs) ) for k in logging_outputs[0]: if k not in builtin_keys: val = sum(log.get(k, 0) for log in logging_outputs) if k.startswith("loss_"): metrics.log_scalar(k, val / sample_size, sample_size, round=3) else: metrics.log_scalar(k, val / world_size, round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) nll_loss_sum = sum(log.get('nll_loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) #ori_loss=utils.item(sum(log.get('ori_loss', 0) for log in logging_outputs)) #Dist_M_sum=utils.item(sum(log.get('Dist_M_sum', 0).float() for log in logging_outputs)) #vec_en2de_mean_norm=utils.item(sum(log.get('vec_en2de_mean_norm', 0).float() for log in logging_outputs)) #vec_de2en_mean_norm=utils.item(sum(log.get('vec_de2en_mean_norm', 0).float() for log in logging_outputs)) #metrics.log_scalar('ori_loss', loss_sum , sample_size, round=3) #metrics.log_scalar('Dist_M_sum', Dist_M_sum , sample_size, round=3) #metrics.log_scalar('vec_en2de_mean_norm', vec_en2de_mean_norm , sample_size, round=3) #metrics.log_scalar('vec_de2en_mean_norm', vec_de2en_mean_norm , sample_size, round=3) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('nll_loss', nll_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_derived( 'ppl', lambda meters: utils.get_perplexity(meters['nll_loss'].avg))
def reduce_metrics(cls, logging_outputs): """Aggregate logging outputs from data parallel training.""" agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs) for k, v in agg_logging_outputs.items(): if k in {'nsentences', 'ntokens', 'sample_size'}: continue metrics.log_scalar(k, v, round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" loss_sum = sum(log.get("loss", 0) for log in logging_outputs) ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) metrics.log_scalar( "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 ) if sample_size != ntokens: metrics.log_scalar( "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 ) metrics.log_derived( "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) ) else: metrics.log_derived( "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) ) counts = {} for lk in logging_outputs[0].keys(): if lk.startswith("count_"): val = sum(log[lk] for log in logging_outputs) metrics.log_scalar(lk, val) counts[lk] = val for lk in logging_outputs[0].keys(): if lk.startswith("loss_"): val = sum(log[lk] for log in logging_outputs) metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) elif lk.startswith("correct_"): val = sum(log[lk] for log in logging_outputs) metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)])
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = utils.item( sum(log.get('loss', 0) for log in logging_outputs)) ntokens = utils.item( sum(log.get('ntokens', 0) for log in logging_outputs)) nsentences = utils.item( sum(log.get('nsentences', 0) for log in logging_outputs)) sample_size = utils.item( sum(log.get('sample_size', 0) for log in logging_outputs)) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('ntokens', ntokens) metrics.log_scalar('nsentences', nsentences) builtin_keys = {'loss', 'ntokens', 'nsentences', 'sample_size'} for k in logging_outputs[0]: if k not in builtin_keys: val = sum(log.get(k, 0) for log in logging_outputs) / len(logging_outputs) if k.startswith('loss'): metrics.log_scalar(k, val / sample_size / math.log(2), sample_size) else: metrics.log_scalar(k, val, round=3)
def reduce_metrics(self, logging_outputs, criterion): super().reduce_metrics(logging_outputs, criterion) metrics.log_scalar( "posterior", sum(log["posterior"] for log in logging_outputs if "posterior" in log), )
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_derived('ppl', lambda meters: utils.get_perplexity(meters['loss'].avg))
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) nsentences = sum(log.get('nsentences', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) TP = sum(log.get('TP', 0) for log in logging_outputs) TN = sum(log.get('TN', 0) for log in logging_outputs) FP = sum(log.get('FP', 0) for log in logging_outputs) FN = sum(log.get('FN', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) if sample_size != ntokens: metrics.log_scalar('nll_loss', loss_sum / ntokens / math.log(2), ntokens, round=3) if len(logging_outputs) > 0 and 'ncorrect' in logging_outputs[0]: ncorrect = sum(log.get('ncorrect', 0) for log in logging_outputs) metrics.log_scalar('accuracy', 100.0 * ncorrect / nsentences, nsentences, round=2) metrics.log_scalar('mcc', 100.0 * (TP * TN - FP * FN) / (((TP + FP)*(TP + FN)*(TN + FP)*(TN + FN)) ** .5), round=2) if 'logits' in logging_outputs[0]: logits = np.concatenate([log.get('logits') for log in logging_outputs]) targets = np.concatenate([log.get('targets') for log in logging_outputs]) spearman_corr = stats.spearmanr(logits, targets).correlation metrics.log_scalar('sprcorr', 100.0 * spearman_corr, round=2)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get("loss", 0) for log in logging_outputs) nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) symm_kl_sum = sum(log.get("symm_kl", 0) for log in logging_outputs) self_kl_sum = sum(log.get("self_kl", 0) for log in logging_outputs) self_cv_sum = sum(log.get("self_cv", 0) for log in logging_outputs) metrics.log_scalar("symm_kl", symm_kl_sum / sample_size, sample_size, round=3) metrics.log_scalar("self_kl", self_kl_sum / sample_size, sample_size, round=3) metrics.log_scalar("self_cv", self_cv_sum / sample_size, sample_size, round=3) metrics.log_scalar( "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 ) metrics.log_scalar( "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3 ) metrics.log_derived( "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) )
def reduce_metrics(cls, logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) nll_loss_sum = sum(log.get('nll_loss', 0) for log in logging_outputs) kd_loss_sum = sum(log.get('kd_loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('nll_loss', nll_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_scalar('kd_loss', kd_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_derived( 'ppl', lambda meters: utils.get_perplexity(meters['nll_loss'].avg)) total = utils.item(sum(log.get('total', 0) for log in logging_outputs)) if total > 0: metrics.log_scalar('total', total) n_correct = utils.item( sum(log.get('n_correct', 0) for log in logging_outputs)) metrics.log_scalar('n_correct', n_correct) metrics.log_derived( 'accuracy', lambda meters: round( meters['n_correct'].sum * 100.0 / meters['total'].sum, 3) if meters['total'].sum > 0 else float('nan'), )
def reduce_metrics(cls, logging_outputs) -> None: for task_name in logging_outputs[0]["multitask"].keys(): # different criterion may return different logging # currently only reduce on loss, the most common one # ideally the way that losses are reduced should also depend on the task type loss_sum = sum(log["multitask"][task_name].get("loss", 0) for log in logging_outputs) sample_size = sum( log["multitask"][task_name].get("sample_size", 0) for log in logging_outputs) metrics.log_scalar( f"multitask_{task_name}_loss", loss_sum / sample_size / math.log(2), sample_size, round=3, ) loss_weight = logging_outputs[0]["multitask"][task_name].get( "loss_weight", 0) metrics.log_scalar( f"multitask_{task_name}_loss_weight", loss_weight, weight=0, priority=250, )
def reduce_metrics(cls, logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get("loss", 0) for log in logging_outputs) nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) kd_loss_sum = sum(log.get("kd_loss", 0) for log in logging_outputs) ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar("nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_scalar("kd_loss", kd_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_derived( "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)) total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) if total > 0: metrics.log_scalar("total", total) n_correct = utils.item( sum(log.get("n_correct", 0) for log in logging_outputs)) metrics.log_scalar("n_correct", n_correct) metrics.log_derived( "accuracy", lambda meters: round( meters["n_correct"].sum * 100.0 / meters["total"].sum, 3) if meters["total"].sum > 0 else float("nan"), )
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" """since we use NCE, our actual batch_size is 1 per GPU. Then we take the mean of each worker.""" loss_sum = sum(log.get("loss", 0.0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) metrics.log_scalar("loss", loss_sum / sample_size, round=3)
def test_new_root(self): with metrics.aggregate() as a: metrics.log_scalar('loss', 1) with metrics.aggregate(new_root=True) as b: metrics.log_scalar('loss', 2) self.assertEqual(a.get_smoothed_values()['loss'], 1) self.assertEqual(b.get_smoothed_values()['loss'], 2)
def set_num_updates(self, num_updates): """Set the number of parameters updates.""" self._num_updates = num_updates self.lr_step_update() metrics.log_scalar("num_updates", self._num_updates, weight=0, priority=200)
def test_nesting(self): with metrics.aggregate() as a: metrics.log_scalar("loss", 1) with metrics.aggregate() as b: metrics.log_scalar("loss", 2) self.assertEqual(a.get_smoothed_values()["loss"], 1.5) self.assertEqual(b.get_smoothed_values()["loss"], 2)
def reduce_metrics(cls, logging_outputs) -> None: super().reduce_metrics(logging_outputs) loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) metrics.log_scalar("ctc_loss", loss_sum / sample_size / math.log(2), sample_size, round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) nll_loss_sum = sum(log.get('nll_loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('nll_loss', nll_loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_derived('ppl', lambda meters: round(2**meters['nll_loss'].avg, 3))
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) sample_size = utils.item( sum(log.get("sample_size", 0) for log in logging_outputs) ) loss = loss_sum / sample_size / math.log(2) metrics.log_scalar("loss", loss, sample_size, round=3)
def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: """Aggregate logging outputs from data parallel training.""" utils.deprecation_warning( 'Criterions should implement the reduce_metrics API. ' 'Falling back to deprecated aggregate_logging_outputs API.') agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs) for k, v in agg_logging_outputs.items(): if k in {'nsentences', 'ntokens', 'sample_size'}: continue metrics.log_scalar(k, v)
def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) nsentences = sum(log.get('nsentences', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) # ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) n_sentences = sum(log.get('n_sentences', 0) for log in logging_outputs) # print('Avg ', f_bert_sum / n_sentences) n_correct = sum(log.get('n_correct', 0) for log in logging_outputs) total_n = sum(log.get('total_n', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / n_sentences, n_sentences, round=3) metrics.log_scalar('accuracy', float(n_correct) / float(total_n), total_n, round=3)
def reduce_metrics(cls, logging_outputs) -> None: super().reduce_metrics(logging_outputs) nsentences = utils.item( sum(log.get("nsentences", 0) for log in logging_outputs)) contrastive_loss = utils.item( sum(log.get("contrastive_loss", 0) for log in logging_outputs)) metrics.log_scalar( "contrastive_loss", contrastive_loss / nsentences / math.log(2), nsentences, round=3, )
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = utils.item(sum(log.get('loss', 0) for log in logging_outputs)) ntokens = utils.item(sum(log.get('ntokens', 0) for log in logging_outputs)) sample_size = utils.item(sum(log.get('sample_size', 0) for log in logging_outputs)) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) if sample_size != ntokens: metrics.log_scalar('nll_loss', loss_sum / ntokens / math.log(2), ntokens, round=3) metrics.log_derived('ppl', lambda meters: utils.get_perplexity(meters['nll_loss'].avg)) else: metrics.log_derived('ppl', lambda meters: utils.get_perplexity(meters['loss'].avg))
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" CrossEntropyCriterion.reduce_metrics(logging_outputs) word_error = sum(log.get('word_error', 0) for log in logging_outputs) word_count = sum(log.get('word_count', 0) for log in logging_outputs) char_error = sum(log.get('char_error', 0) for log in logging_outputs) char_count = sum(log.get('char_count', 0) for log in logging_outputs) if word_count > 0: # model.training == False metrics.log_scalar('wer', float(word_error) / word_count * 100, word_count, round=4) if char_count > 0: # model.training == False metrics.log_scalar('wer', float(word_error) / word_count * 100, word_count, round=4)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" lm_loss_sum = sum(log.get("lm_loss", 0) for log in logging_outputs) sentence_loss_sum = sum( log.get("sentence_loss", 0) for log in logging_outputs) ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) agg_loss = sum(log.get("loss", 0) for log in logging_outputs) metrics.log_scalar( "loss", agg_loss / sample_size / math.log(2) if sample_size > 0 else 0.0, sample_size, round=3, ) metrics.log_scalar( "lm_loss", lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0, ntokens, round=3, ) metrics.log_scalar( "sentence_loss", sentence_loss_sum / nsentences / math.log(2) if nsentences > 0 else 0.0, nsentences, round=3, ) metrics.log_scalar( "nll_loss", lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0, ntokens, round=3, )
def reduce_metrics(self, logging_outputs, criterion): super().reduce_metrics(logging_outputs, criterion) if self.args.eval_bleu: def sum_logs(key): return sum(log.get(key, 0) for log in logging_outputs) counts, totals = [], [] for i in range(EVAL_BLEU_ORDER): counts.append(sum_logs("_bleu_counts_" + str(i))) totals.append(sum_logs("_bleu_totals_" + str(i))) if max(totals) > 0: # log counts as numpy arrays -- log_scalar will sum them correctly metrics.log_scalar("_bleu_counts", np.array(counts)) metrics.log_scalar("_bleu_totals", np.array(totals)) metrics.log_scalar("_bleu_sys_len", sum_logs("_bleu_sys_len")) metrics.log_scalar("_bleu_ref_len", sum_logs("_bleu_ref_len")) def compute_bleu(meters): import inspect import sacrebleu fn_sig = inspect.getfullargspec(sacrebleu.compute_bleu)[0] if "smooth_method" in fn_sig: smooth = {"smooth_method": "exp"} else: smooth = {"smooth": "exp"} bleu = sacrebleu.compute_bleu( correct=meters["_bleu_counts"].sum, total=meters["_bleu_totals"].sum, sys_len=meters["_bleu_sys_len"].sum, ref_len=meters["_bleu_ref_len"].sum, **smooth) return round(bleu.score, 2) metrics.log_derived("bleu", compute_bleu)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" LabelSmoothedCrossEntropyCriterion.reduce_metrics(logging_outputs) mask_loss_sum = sum(log.get('mask_loss', 0) for log in logging_outputs) # mask_loss_final_sum = sum(log.get('mask_loss_final', 0) for log in logging_outputs) p_sum = sum(log.get('p2', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) mask_sum = sum(log.get('mask_ave', 0) for log in logging_outputs) metrics.log_scalar('mask_loss', mask_loss_sum / sample_size / math.log(2), sample_size, round=6) # metrics.log_scalar('mask_loss_final', mask_loss_final_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('p_2', p_sum / sample_size, sample_size, round=5) metrics.log_scalar('mask_ave', mask_sum / sample_size, sample_size, round=3) metrics.log_scalar('new_weight', logging_outputs[0].get("new_weight", 0) / 4, len(logging_outputs), round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" loss_sum = sum(log.get('loss', 0) for log in logging_outputs) loss_ce = sum(log.get('ce_loss', 0) for log in logging_outputs) loss_kd = sum(log.get('kd_loss', 0) for log in logging_outputs) # print('debug info: loss kd =', loss_kd) # print('debug info: loss kd =', [log.get('ce_loss', 0) for log in logging_outputs]) if 'ce_loss_teacher' in logging_outputs[0]: loss_ce_teacher = sum( log.get('ce_loss_teacher', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) metrics.log_scalar('loss', loss_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('loss_ce', loss_ce / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('loss_kd', loss_kd / sample_size / math.log(2), sample_size, round=3) if 'ce_loss_teacher' in logging_outputs[0]: metrics.log_scalar('loss_ce_teacher', loss_ce_teacher / sample_size / math.log(2), sample_size, round=3) metrics.log_derived( 'ppl', lambda meters: utils.get_perplexity(meters['loss'].avg))