def custom_build_model(opt, dict, lm=False): if not lm: model = build_model(opt, dict) else: model = build_language_model(opt, dict) return model
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch self.beam_accum = None self.beta = opt.beta self.alpha = opt.alpha self.start_with_bos = opt.start_with_bos self.fp16 = opt.fp16 self.attributes = opt.attributes # attributes split by |. for example: de|domain1 self.bos_token = opt.bos_token self.sampling = opt.sampling self.src_lang = opt.src_lang self.tgt_lang = opt.tgt_lang if self.attributes: self.attributes = self.attributes.split("|") self.models = list() self.model_types = list() # models are string with | as delimiter models = opt.model.split("|") print(models) self.n_models = len(models) self._type = 'text' for i, model in enumerate(models): if opt.verbose: print('Loading model from %s' % model) checkpoint = torch.load(model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] dicts = checkpoint['dicts'] if i == 0: if "src" in checkpoint['dicts']: self.src_dict = checkpoint['dicts']['src'] else: self._type = "audio" self.tgt_dict = checkpoint['dicts']['tgt'] if "langs" in checkpoint["dicts"]: self.lang_dict = checkpoint['dicts']['langs'] else: self.lang_dict = {'src': 0, 'tgt': 1} self.bos_id = self.tgt_dict.labelToIdx[self.bos_token] # Build model from the saved option # if hasattr(model_opt, 'fusion') and model_opt.fusion == True: # print("* Loading a FUSION model") # model = build_fusion(model_opt, checkpoint['dicts']) # else: # model = build_model(model_opt, checkpoint['dicts']) model = build_model(model_opt, checkpoint['dicts']) optimize_model(model) model.load_state_dict(checkpoint['model']) if model_opt.model in model_list: # if model.decoder.positional_encoder.len_max < self.opt.max_sent_length: # print("Not enough len to decode. Renewing .. ") # model.decoder.renew_buffer(self.opt.max_sent_length) model.renew_buffer(self.opt.max_sent_length) if opt.fp16: model = model.half() if opt.cuda: model = model.cuda() else: model = model.cpu() if opt.dynamic_quantile == 1: engines = torch.backends.quantized.supported_engines if 'fbgemm' in engines: torch.backends.quantized.engine = 'fbgemm' else: torch.backends.quantized.engine = 'qnnpack' model = torch.quantization.quantize_dynamic( model, {torch.nn.LSTM, torch.nn.Linear}, dtype=torch.qint8) model.eval() self.models.append(model) self.model_types.append(model_opt.model) # language model if opt.lm is not None: if opt.verbose: print('Loading language model from %s' % opt.lm) lm_chkpoint = torch.load(opt.lm, map_location=lambda storage, loc: storage) lm_opt = lm_chkpoint['opt'] lm_model = build_language_model(lm_opt, checkpoint['dicts']) if opt.fp16: lm_model = lm_model.half() if opt.cuda: lm_model = lm_model.cuda() else: lm_model = lm_model.cpu() self.lm_model = lm_model self.cuda = opt.cuda self.ensemble_op = opt.ensemble_op if opt.autoencoder is not None: if opt.verbose: print('Loading autoencoder from %s' % opt.autoencoder) checkpoint = torch.load(opt.autoencoder, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # posSize= checkpoint['autoencoder']['nmt.decoder.positional_encoder.pos_emb'].size(0) # self.models[0].decoder.renew_buffer(posSize) # self.models[0].decoder.renew_buffer(posSize) # Build model from the saved option self.autoencoder = Autoencoder(self.models[0], model_opt) self.autoencoder.load_state_dict(checkpoint['autoencoder']) if opt.cuda: self.autoencoder = self.autoencoder.cuda() self.models[0] = self.models[0].cuda() else: self.autoencoder = self.autoencoder.cpu() self.models[0] = self.models[0].cpu() self.models[0].autoencoder = self.autoencoder if opt.verbose: print('Done')
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch self.beam_accum = None self.beta = opt.beta self.alpha = opt.alpha self.start_with_bos = opt.start_with_bos self.fp16 = opt.fp16 self.attributes = opt.attributes # attributes split by |. for example: de|domain1 # self.bos_token = opt.bos_token self.sampling = opt.sampling self.src_lang = opt.src_lang self.tgt_lang = opt.tgt_lang if self.attributes: self.attributes = self.attributes.split("|") self.models = list() self.model_types = list() # models are string with | as delimiter models = opt.model.split("|") print(models) self.n_models = len(models) self._type = 'text' for i, model_path in enumerate(models): checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] model_opt = backward_compatible(model_opt) if hasattr(model_opt, "enc_state_dict"): model_opt.enc_state_dict = None model_opt.dec_state_dict = None self.main_model_opt = model_opt dicts = checkpoint['dicts'] # update special tokens onmt.constants = add_tokenidx(model_opt, onmt.constants, dicts) self.bos_token = model_opt.tgt_bos_word if i == 0: if "src" in checkpoint['dicts']: self.src_dict = checkpoint['dicts']['src'] else: self._type = "audio" # self.src_dict = self.tgt_dict self.tgt_dict = checkpoint['dicts']['tgt'] if "langs" in checkpoint["dicts"]: self.lang_dict = checkpoint['dicts']['langs'] else: self.lang_dict = {'src': 0, 'tgt': 1} self.bos_id = self.tgt_dict.labelToIdx[self.bos_token] model = build_model(model_opt, checkpoint['dicts']) optimize_model(model) if opt.verbose: print('Loading model from %s' % model_path) model.load_state_dict(checkpoint['model']) if model_opt.model in model_list: # if model.decoder.positional_encoder.len_max < self.opt.max_sent_length: # print("Not enough len to decode. Renewing .. ") # model.decoder.renew_buffer(self.opt.max_sent_length) model.renew_buffer(self.opt.max_sent_length) # model.convert_autograd() if opt.fp16: model = model.half() if opt.cuda: model = model.cuda() else: model = model.cpu() if opt.dynamic_quantile == 1: engines = torch.backends.quantized.supported_engines if 'fbgemm' in engines: torch.backends.quantized.engine = 'fbgemm' else: print( "[INFO] fbgemm is not found in the available engines. Possibly the CPU does not support AVX2." " It is recommended to disable Quantization (set to 0)." ) torch.backends.quantized.engine = 'qnnpack' # convert the custom functions to their autograd equivalent first model.convert_autograd() model = torch.quantization.quantize_dynamic( model, {torch.nn.LSTM, torch.nn.Linear}, dtype=torch.qint8) model.eval() self.models.append(model) self.model_types.append(model_opt.model) # language model if opt.lm is not None: if opt.verbose: print('Loading language model from %s' % opt.lm) lm_chkpoint = torch.load(opt.lm, map_location=lambda storage, loc: storage) lm_opt = lm_chkpoint['opt'] lm_model = build_language_model(lm_opt, checkpoint['dicts']) if opt.fp16: lm_model = lm_model.half() if opt.cuda: lm_model = lm_model.cuda() else: lm_model = lm_model.cpu() self.lm_model = lm_model self.cuda = opt.cuda self.ensemble_op = opt.ensemble_op if opt.autoencoder is not None: if opt.verbose: print('Loading autoencoder from %s' % opt.autoencoder) checkpoint = torch.load(opt.autoencoder, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # posSize= checkpoint['autoencoder']['nmt.decoder.positional_encoder.pos_emb'].size(0) # self.models[0].decoder.renew_buffer(posSize) # self.models[0].decoder.renew_buffer(posSize) # Build model from the saved option self.autoencoder = Autoencoder(self.models[0], model_opt) self.autoencoder.load_state_dict(checkpoint['autoencoder']) if opt.cuda: self.autoencoder = self.autoencoder.cuda() self.models[0] = self.models[0].cuda() else: self.autoencoder = self.autoencoder.cpu() self.models[0] = self.models[0].cpu() self.models[0].autoencoder = self.autoencoder if opt.verbose: print('Done')
def main(): start = time.time() print("Loading data from '%s'" % opt.data) if opt.data_format == 'raw': dataset = torch.load(opt.data) elapse = str(datetime.timedelta(seconds=int(time.time() - start))) print("Done after %s" % elapse) dicts = dataset['dicts'] # For backward compatibility train_dict = defaultdict(lambda: None, dataset['train']) valid_dict = defaultdict(lambda: None, dataset['valid']) if train_dict['src_lang'] is not None: assert 'langs' in dicts train_src_langs = train_dict['src_lang'] train_tgt_langs = train_dict['tgt_lang'] else: # allocate new languages dicts['langs'] = {'src': 0, 'tgt': 1} train_src_langs = list() train_tgt_langs = list() # Allocation one for the bilingual case train_src_langs.append(torch.Tensor([dicts['langs']['src']])) train_tgt_langs.append(torch.Tensor([dicts['langs']['tgt']])) train_data = LanguageModelDataset( dataset['train']['tgt'], train_tgt_langs, batch_size_sents=opt.batch_size_sents, seq_length=opt.lm_seq_length) if valid_dict['src_lang'] is not None: assert 'langs' in dicts valid_src_langs = valid_dict['src_lang'] valid_tgt_langs = valid_dict['tgt_lang'] else: # allocate new languages valid_src_langs = list() valid_tgt_langs = list() # Allocation one for the bilingual case valid_src_langs.append(torch.Tensor([dicts['langs']['src']])) valid_tgt_langs.append(torch.Tensor([dicts['langs']['tgt']])) valid_data = LanguageModelDataset( dataset['valid']['tgt'], valid_tgt_langs, batch_size_sents=opt.batch_size_sents, seq_length=opt.lm_seq_length) if opt.load_from: checkpoint = torch.load(opt.load_from, map_location=lambda storage, loc: storage) print("* Loading dictionaries from the checkpoint") dicts = checkpoint['dicts'] else: dicts['tgt'].patch(opt.patch_vocab_multiplier) checkpoint = None if "src" in dicts: print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) else: print(' * vocabulary size. target = %d' % (dicts['tgt'].size())) print(' * number of training sentences. %d' % train_data.size()) print(' * maximum batch size (words per batch). %d' % (opt.batch_size_sents * opt.lm_seq_length)) else: raise NotImplementedError print('Building model...') model = build_language_model(opt, dicts) optimize_model(model) """ Building the loss function """ loss_function = NMTLossFunc(opt.model_size, dicts['tgt'].size(), label_smoothing=opt.label_smoothing) n_params = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % n_params) if len(opt.gpus) > 1 or opt.virtual_gpu > 1: raise NotImplementedError("Multi-GPU training is not supported ATM.") else: # if opt.fp16: # trainer = FP16XETrainer(model, loss_function, train_data, valid_data, dicts, opt) # else: trainer = XETrainer(model, loss_function, train_data, valid_data, dicts, opt) trainer.run(checkpoint=checkpoint)
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch self.beam_accum = None self.beta = opt.beta self.alpha = opt.alpha self.start_with_bos = opt.start_with_bos self.fp16 = opt.fp16 self.models = list() self.model_types = list() # models are string with | as delimiter models = opt.model.split("|") print(models) self.n_models = len(models) self._type = 'text' for i, model in enumerate(models): if opt.verbose: print('Loading model from %s' % model) checkpoint = torch.load(model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] if i == 0: if "src" in checkpoint['dicts']: self.src_dict = checkpoint['dicts']['src'] else: self._type = "audio" self.tgt_dict = checkpoint['dicts']['tgt'] # Build model from the saved option # if hasattr(model_opt, 'fusion') and model_opt.fusion == True: # print("* Loading a FUSION model") # model = build_fusion(model_opt, checkpoint['dicts']) # else: # model = build_model(model_opt, checkpoint['dicts']) model = build_model(model_opt) model.load_state_dict(checkpoint['model']) if model_opt.model in model_list: # if model.decoder.positional_encoder.len_max < self.opt.max_sent_length: # print("Not enough len to decode. Renewing .. ") # model.decoder.renew_buffer(self.opt.max_sent_length) model.renew_buffer(self.opt.max_sent_length) if opt.fp16: model = model.half() if opt.cuda: model = model.cuda() else: model = model.cpu() model.eval() self.models.append(model) self.model_types.append(model_opt.model) # language model if opt.lm is not None: if opt.verbose: print('Loading language model from %s' % opt.lm) lm_chkpoint = torch.load(opt.lm, map_location=lambda storage, loc: storage) lm_opt = lm_chkpoint['opt'] lm_model = build_language_model(lm_opt, lm_chkpoint['dicts']) if opt.fp16: lm_model = lm_model.half() if opt.cuda: lm_model = lm_model.cuda() else: lm_model = lm_model.cpu() self.lm_model = lm_model self.cuda = opt.cuda self.ensemble_op = opt.ensemble_op if opt.autoencoder is not None: if opt.verbose: print('Loading autoencoder from %s' % opt.autoencoder) checkpoint = torch.load(opt.autoencoder, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] #posSize= checkpoint['autoencoder']['nmt.decoder.positional_encoder.pos_emb'].size(0) #self.models[0].decoder.renew_buffer(posSize) #self.models[0].decoder.renew_buffer(posSize) # Build model from the saved option self.autoencoder = Autoencoder(self.models[0], model_opt) self.autoencoder.load_state_dict(checkpoint['autoencoder']) if opt.cuda: self.autoencoder = self.autoencoder.cuda() self.models[0] = self.models[0].cuda() else: self.autoencoder = self.autoencoder.cpu() self.models[0] = self.models[0].cpu() if opt.fp16: self.autoencoder = self.autoencoder.half() self.models[0] = self.models[0].half() if opt.verbose: print('Done')