def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" CrossEntropyCriterion.reduce_metrics(logging_outputs) num_corr = sum(log.get("num_corr", 0) for log in logging_outputs) num_tot = sum(log.get("num_tot", 0) for log in logging_outputs) metrics.log_scalar("accuracy", num_corr.float() / num_tot * 100 if num_tot > 0 else 0.0, num_tot, round=3)
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" CrossEntropyCriterion.reduce_metrics(logging_outputs) word_error = sum(log.get('word_error', 0) for log in logging_outputs) word_count = sum(log.get('word_count', 0) for log in logging_outputs) char_error = sum(log.get('char_error', 0) for log in logging_outputs) char_count = sum(log.get('char_count', 0) for log in logging_outputs) if word_count > 0: # model.training == False metrics.log_scalar('wer', float(word_error) / word_count * 100, word_count, round=4) if char_count > 0: # model.training == False metrics.log_scalar('wer', float(word_error) / word_count * 100, word_count, round=4)
def test_zero_eps(self): self.args.label_smoothing = 0.0 nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample) smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample) self.assertAlmostEqual(nll_loss, smooth_loss)
def test_nll_loss(self): self.args.label_smoothing = 0.1 nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample) smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample) self.assertLess(abs(nll_loss - nll_logging_output['loss']), 1e-6) self.assertLess(abs(nll_loss - smooth_logging_output['nll_loss']), 1e-6)
def aggregate_logging_outputs(logging_outputs): """Aggregate logging outputs from data parallel training.""" agg_output = CrossEntropyCriterion.aggregate_logging_outputs(logging_outputs) word_error = sum(log.get('word_error', 0) for log in logging_outputs) word_count = sum(log.get('word_count', 0) for log in logging_outputs) char_error = sum(log.get('char_error', 0) for log in logging_outputs) char_count = sum(log.get('char_count', 0) for log in logging_outputs) if word_count > 0: # model.training == False agg_output['word_error'] = word_error agg_output['word_count'] = word_count if char_count > 0: # model.training == False agg_output['char_error'] = char_error agg_output['char_count'] = char_count return agg_output
def __init__(self, opt, shared=None): # In general use a basic TorchAgent wherever possible super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full initialization # fairseq expects options to be in argparse format, instead of a dict # We also need to do some argument postprocessing and whatnot self.args, self.opt = _fairseq_opt_wrapper(opt) # seed the RNG torch.manual_seed(self.args.seed) # Just some identifying info self.id = "fairseq:{}".format(self.args.arch) # construct dictionaries for parlai frontend and fairseq backend self.dict = _FairseqDictionary(self.opt) # We need a placeholder task for fairseq self.task = _ParlaiTask(self.dict) # actually construct the model and generator model_class = models.ARCH_MODEL_REGISTRY[self.args.arch] self.model = model_class.build_model(self.args, self.task) self.generator = SequenceGenerator( [self.model], tgt_dict=self.dict, beam_size=self.args.beam, stop_early=(not self.args.no_early_stop), normalize_scores=(not self.args.unnormalized), len_penalty=self.args.lenpen, ) # set up the grader and the trainer # TODO: maybe support label smoothing here self.criterion = CrossEntropyCriterion(self.args, self.task) if self.args.fp16: self.trainer = fp16_trainer.FP16Trainer( self.args, self.task, self.model, self.criterion ) else: # TODO: we might choose to add a --no-fp16 opt in the future to # explicitly disable fp16 instead if torch.cuda.get_device_capability(0)[0] >= 7: print("Heads up: using --fp16 could be a lot faster!") self.trainer = trainer.Trainer( self.args, self.task, self.model, self.criterion ) # if the model already existed, let's preload it and the trainer if self.opt.get('model_file') and os.path.isfile(self.opt['model_file']): print('Loading existing model params from ' + self.opt['model_file']) self.load(self.opt.get('model_file')) # move things to the GPU if possible if self.use_cuda: self.model = self.model.cuda() self.generator = self.generator.cuda() # Start things off clean self.reset()
def __init__(self, args, task, sentence_avg): super().__init__(task) self.task = task self.args = args self.loss_function = CrossEntropyCriterion(task, False)