def encoder_forward(self, opt, source_l=3, bsize=1): ''' Tests if the encoder works as expected args: opt: set of options source_l: Length of generated input sentence bsize: Batchsize of generated input ''' if opt.rnn_size > 0: opt.enc_rnn_size = opt.rnn_size word_field = self.get_field() feature_fields = [] embeddings = build_embeddings(opt, word_field, feature_fields) enc = build_encoder(opt, embeddings) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) hidden_t, outputs, test_length = enc(test_src, test_length) # Initialize vectors to compare size with test_hid = torch.zeros(self.opt.enc_layers, bsize, opt.enc_rnn_size) test_out = torch.zeros(source_l, bsize, opt.dec_rnn_size) # Ensure correct sizes and types self.assertEqual(test_hid.size(), hidden_t[0].size(), hidden_t[1].size()) self.assertEqual(test_out.size(), outputs.size()) self.assertEqual(type(outputs), torch.Tensor)
def nmtmodel_forward(self, opt, source_l=3, bsize=1): """ Creates a nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options source_l: length of input sequence bsize: batchsize """ if opt.rnn_size > 0: opt.enc_rnn_size = opt.rnn_size opt.dec_rnn_size = opt.rnn_size word_field = self.get_field() feature_fields = [] embeddings = build_embeddings(opt, word_field, feature_fields) enc = build_encoder(opt, embeddings) embeddings = build_embeddings(opt, word_field, feature_fields, for_encoder=False) dec = build_decoder(opt, embeddings) model = onmt.models.model.NMTModel(enc, dec) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) outputs, attn = model(test_src, test_tgt, test_length) outputsize = torch.zeros(source_l - 1, bsize, opt.dec_rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.Tensor)
def nmtmodel_forward(self, opt, source_l=3, bsize=1): """ Creates a nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options source_l: length of input sequence bsize: batchsize """ if opt.rnn_size > 0: opt.enc_rnn_size = opt.rnn_size opt.dec_rnn_size = opt.rnn_size word_field = self.get_field() embeddings = build_embeddings(opt, word_field) enc = build_encoder(opt, embeddings) embeddings = build_embeddings(opt, word_field, for_encoder=False) dec = build_decoder(opt, embeddings) model = onmt.models.model.NMTModel(enc, dec) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) outputs, attn = model(test_src, test_tgt, test_length) outputsize = torch.zeros(source_l - 1, bsize, opt.dec_rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.Tensor)
def encoder_forward(self, opt, source_l=3, bsize=1): ''' Tests if the encoder works as expected args: opt: set of options source_l: Length of generated input sentence bsize: Batchsize of generated input ''' if opt.rnn_size > 0: opt.enc_rnn_size = opt.rnn_size word_field = self.get_field() embeddings = build_embeddings(opt, word_field) enc = build_encoder(opt, embeddings) test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) hidden_t, outputs, test_length = enc(test_src, test_length) # Initialize vectors to compare size with test_hid = torch.zeros(self.opt.enc_layers, bsize, opt.enc_rnn_size) test_out = torch.zeros(source_l, bsize, opt.dec_rnn_size) # Ensure correct sizes and types self.assertEqual(test_hid.size(), hidden_t[0].size(), hidden_t[1].size()) self.assertEqual(test_out.size(), outputs.size()) self.assertEqual(type(outputs), torch.Tensor)
def build_base_model_with_projection_layer(model_opt, fields, checkpoint=None): """Build a model from opts. Args: model_opt: the option loaded from checkpoint. It's important that the opts have been updated and validated. See :class:`onmt.utils.parse.ArgumentParser`. fields (dict[str, torchtext.data.Field]): `Field` objects for the model. gpu (bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. gpu_id (int or NoneType): Which GPU to use. Returns: the NMTModel. """ # for back compat when attention_dropout was not defined try: model_opt.attention_dropout except AttributeError: model_opt.attention_dropout = model_opt.dropout # Build embeddings. if model_opt.model_type == "text" or model_opt.model_type == "vec": src_field = fields["src"] src_emb = build_embeddings(model_opt, src_field) else: src_emb = None # Build encoder. encoder = build_encoder(model_opt, src_emb) # Build decoder. tgt_field = fields["tgt"] tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. assert src_field.base_field.vocab == tgt_field.base_field.vocab, \ "preprocess with -share_vocab if you use share_embeddings" tgt_emb.word_lut.weight = src_emb.word_lut.weight decoder = build_decoder(model_opt, tgt_emb) # Build NMTModel(= encoder + decoder). device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = onmt.models.NMTModel(encoder, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( ## ADDING PROJECTION LAYER ## Two commented lines changed #nn.Linear(model_opt.dec_rnn_size, # len(fields["tgt"].base_field.vocab)), nn.Linear(model_opt.rnn_size, model_opt.src_word_vec_size), nn.Linear(model_opt.src_word_vec_size, len(fields["tgt"].base_field.vocab)), #end change Cast(torch.float32), gen_func ) if model_opt.share_decoder_embeddings: ## AND CHANGED 0 TO 1 to correctly share embeddings generator[1].weight = decoder.embeddings.word_lut.weight else: tgt_base_field = fields["tgt"].base_field vocab_size = len(tgt_base_field.vocab) pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token] generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx) if model_opt.share_decoder_embeddings: generator.linear.weight = decoder.embeddings.word_lut.weight # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = {fix_key(k): v for k, v in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec) model.generator = generator model.to(device) if model_opt.model_dtype == 'fp16' and model_opt.optim == 'fusedadam': model.half() return model