def __init__(self, opt, shared=None): # In general use a basic TorchAgent wherever possible super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full initialization # fairseq expects options to be in argparse format, instead of a dict # We also need to do some argument postprocessing and whatnot self.args, self.opt = _fairseq_opt_wrapper(opt) # seed the RNG torch.manual_seed(self.args.seed) # Just some identifying info self.id = "fairseq:{}".format(self.args.arch) # construct dictionaries for parlai frontend and fairseq backend self.dict = _FairseqDictionary(self.opt) # We need a placeholder task for fairseq self.task = _ParlaiTask(self.dict) # actually construct the model and generator model_class = models.ARCH_MODEL_REGISTRY[self.args.arch] self.model = model_class.build_model(self.args, self.task) self.generator = SequenceGenerator( [self.model], tgt_dict=self.dict, beam_size=self.args.beam, stop_early=(not self.args.no_early_stop), normalize_scores=(not self.args.unnormalized), len_penalty=self.args.lenpen, ) # set up the grader and the trainer # TODO: maybe support label smoothing here self.criterion = CrossEntropyCriterion(self.args, self.task) if self.args.fp16: self.trainer = fp16_trainer.FP16Trainer( self.args, self.task, self.model, self.criterion ) else: # TODO: we might choose to add a --no-fp16 opt in the future to # explicitly disable fp16 instead if torch.cuda.get_device_capability(0)[0] >= 7: print("Heads up: using --fp16 could be a lot faster!") self.trainer = trainer.Trainer( self.args, self.task, self.model, self.criterion ) # if the model already existed, let's preload it and the trainer if self.opt.get('model_file') and os.path.isfile(self.opt['model_file']): print('Loading existing model params from ' + self.opt['model_file']) self.load(self.opt.get('model_file')) # move things to the GPU if possible if self.use_cuda: self.model = self.model.cuda() self.generator = self.generator.cuda() # Start things off clean self.reset()
def __init__(self, opt, shared=None): # In general use a basic TorchAgent wherever possible super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full initialization # check early if we're going to be loading the model from a checkpoint model_file_exists = (self.opt.get('model_file') and os.path.isfile(self.opt['model_file'])) # fairseq expects options to be in argparse format, instead of a dict # We also need to do some argument postprocessing and whatnot # We'll skip pretrained embeddings if we're going to override them with # a model checkpoint anyway self.args, self.opt = _fairseq_opt_wrapper(opt, model_file_exists) # seed the RNG torch.manual_seed(self.args.seed) # Just some identifying info self.id = "fairseq:{}".format(self.args.arch) # We need a placeholder task for fairseq self.task = _ParlaiTask(self.dict) # actually construct the model and generator self.model = self.build_model() # Construct the generator and scorer self.generator = SequenceGenerator( [self.model], tgt_dict=self.dict, beam_size=self.args.beam, stop_early=(not self.args.no_early_stop), normalize_scores=(not self.args.unnormalized), len_penalty=self.args.lenpen, unk_penalty=self.args.unkpen, sampling=self.args.sampling, sampling_topk=self.args.sampling_topk, sampling_temperature=self.args.sampling_temperature, ) self.scorer = SequenceScorer([self.model], self.dict) # set up the grader and the trainer self.criterion = criterions.build_criterion(self.args, self.task) if getattr(self.args, 'fp16', None): self.trainer = fp16_trainer.FP16Trainer( self.args, self.task, self.model, self.criterion) else: # TODO: we might choose to add a --no-fp16 opt in the future to # explicitly disable fp16 instead if torch.cuda.get_device_capability(0)[0] >= 7: print("Heads up: using --fp16 could be a lot faster!") self.trainer = trainer.Trainer(self.args, self.task, self.model, self.criterion) # if the model already existed, let's preload it and the trainer if model_file_exists: print('Loading existing model params from ' + self.opt['model_file']) self.load(self.opt.get('model_file')) # move things to the GPU if possible if self.use_cuda: self.model = self.model.cuda() self.generator = self.generator.cuda() else: self.model = shared['model'] self.trainer = shared['trainer'] self.generator = shared['generator'] self.dict = shared['dict'] self.args = shared['args'] # Start things off clean self.reset()