def custom_build_model(opt, dict, lm=False): if not lm: model = build_model(opt, dict) else: model = build_language_model(opt, dict) return model
def main(): start = time.time() print("Loading data from '%s'" % opt.data) if opt.data_format == 'raw': dataset = torch.load(opt.data) elapse = str(datetime.timedelta(seconds=int(time.time() - start))) print("Done after %s" % elapse ) train_data = onmt.Dataset(dataset['train']['src'], dataset['train']['tgt'], opt.batch_size_words, data_type=dataset.get("type", "text"), batch_size_sents=opt.batch_size_sents, multiplier = opt.batch_size_multiplier, sort_by_target=opt.sort_by_target) valid_data = onmt.Dataset(dataset['valid']['src'], dataset['valid']['tgt'], opt.batch_size_words, data_type=dataset.get("type", "text"), batch_size_sents=opt.batch_size_sents) dicts = dataset['dicts'] if "src" in dicts: print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) else: print(' * vocabulary size. target = %d' % (dicts['tgt'].size())) print(' * number of training sentences. %d' % train_data.size()) print(' * maximum batch size (words per batch). %d' % opt.batch_size_words) else: raise NotImplementedError print('Building model...') model = build_language_model(opt, dicts) """ Building the loss function """ loss_function = NMTLossFunc(dicts['tgt'].size(), label_smoothing=opt.label_smoothing) n_params = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % n_params) if len(opt.gpus) > 1 or opt.virtual_gpu > 1: raise NotImplementedError("Warning! Multi-GPU training is not fully tested and potential bugs can happen.") else: if opt.fp16: trainer = FP16XETrainer(model, loss_function, train_data, valid_data, dicts, opt) else: trainer = XETrainer(model, loss_function, train_data, valid_data, dicts, opt) trainer.run(save_file=opt.load_from)
def main(): start = time.time() print("Loading data from '%s'" % opt.data) if opt.data_format == 'raw': dataset = torch.load(opt.data) elapse = str(datetime.timedelta(seconds=int(time.time() - start))) print("Done after %s" % elapse) train_data = LanguageModelDataset( dataset['train']['tgt'], batch_size_sents=opt.batch_size_sents, seq_length=opt.lm_seq_length) valid_data = LanguageModelDataset( dataset['valid']['tgt'], batch_size_sents=opt.batch_size_sents, seq_length=opt.lm_seq_length) dicts = dataset['dicts'] if "src" in dicts: print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) else: print(' * vocabulary size. target = %d' % (dicts['tgt'].size())) print(' * number of training sentences. %d' % train_data.size()) print(' * maximum batch size (words per batch). %d' % opt.batch_size_words) else: raise NotImplementedError print('Building model...') model = build_language_model(opt, dicts) print(model) """ Building the loss function """ loss_function = NMTLossFunc(dicts['tgt'].size(), label_smoothing=opt.label_smoothing) n_params = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % n_params) if len(opt.gpus) > 1 or opt.virtual_gpu > 1: raise NotImplementedError("Multi-GPU training is not supported ATM.") else: # if opt.fp16: # trainer = FP16XETrainer(model, loss_function, train_data, valid_data, dicts, opt) # else: trainer = XETrainer(model, loss_function, train_data, valid_data, dicts, opt) trainer.run(save_file=opt.load_from)
def custom_build_model(opt, dict, lm=False): if not lm: model = build_model(opt, dict) # by me scalar_mix = ScalarMix( onmt.Constants.BERT_LAYERS, do_layer_norm=False, initial_scalar_parameters=None, trainable=True, ) model.add_module("scalar_mix", scalar_mix) else: model = build_language_model(opt, dict) return model
def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch self.beam_accum = None self.beta = opt.beta self.alpha = opt.alpha self.start_with_bos = opt.start_with_bos self.fp16 = opt.fp16 self.attributes = opt.attributes # attributes split by |. for example: de|domain1 self.bos_token = opt.bos_token self.sampling = opt.sampling if self.attributes: self.attributes = self.attributes.split("|") self.models = list() self.model_types = list() # models are string with | as delimiter models = opt.model.split("|") print(models) self.n_models = len(models) self._type = 'text' for i, model in enumerate(models): if opt.verbose: print('Loading model from %s' % model) checkpoint = torch.load(model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] if i == 0: if "src" in checkpoint['dicts']: self.src_dict = checkpoint['dicts']['src'] else: self._type = "audio" self.tgt_dict = checkpoint['dicts']['tgt'] if "atb" in checkpoint["dicts"]: self.atb_dict = checkpoint['dicts']['atb'] else: self.atb_dict = None self.bos_id = self.tgt_dict.labelToIdx[self.bos_token] # Build model from the saved option # if hasattr(model_opt, 'fusion') and model_opt.fusion == True: # print("* Loading a FUSION model") # model = build_fusion(model_opt, checkpoint['dicts']) # else: # model = build_model(model_opt, checkpoint['dicts']) model = build_model(model_opt, checkpoint['dicts']) model.load_state_dict(checkpoint['model']) if model_opt.model in model_list: # if model.decoder.positional_encoder.len_max < self.opt.max_sent_length: # print("Not enough len to decode. Renewing .. ") # model.decoder.renew_buffer(self.opt.max_sent_length) model.renew_buffer(self.opt.max_sent_length) if opt.fp16: model = model.half() if opt.cuda: model = model.cuda() else: model = model.cpu() model.eval() self.models.append(model) self.model_types.append(model_opt.model) # language model if opt.lm is not None: if opt.verbose: print('Loading language model from %s' % opt.lm) lm_chkpoint = torch.load(opt.lm, map_location=lambda storage, loc: storage) lm_opt = lm_chkpoint['opt'] lm_model = build_language_model(lm_opt, checkpoint['dicts']) if opt.fp16: lm_model = lm_model.half() if opt.cuda: lm_model = lm_model.cuda() else: lm_model = lm_model.cpu() self.lm_model = lm_model self.cuda = opt.cuda self.ensemble_op = opt.ensemble_op if opt.autoencoder is not None: if opt.verbose: print('Loading autoencoder from %s' % opt.autoencoder) checkpoint = torch.load(opt.autoencoder, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # posSize= checkpoint['autoencoder']['nmt.decoder.positional_encoder.pos_emb'].size(0) # self.models[0].decoder.renew_buffer(posSize) # self.models[0].decoder.renew_buffer(posSize) # Build model from the saved option self.autoencoder = Autoencoder(self.models[0], model_opt) self.autoencoder.load_state_dict(checkpoint['autoencoder']) if opt.cuda: self.autoencoder = self.autoencoder.cuda() self.models[0] = self.models[0].cuda() else: self.autoencoder = self.autoencoder.cpu() self.models[0] = self.models[0].cpu() self.models[0].autoencoder = self.autoencoder if opt.verbose: print('Done')