示例#1
0
    def test_py_lstm_mask(self):
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(self.exp_global, layers=1),
            attender=MlpAttender(self.exp_global),
            trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
            decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100),
        )

        batcher = xnmt.batcher.TrgBatcher(batch_size=3)
        train_src, _ = \
          batcher.pack(self.src_data, self.trg_data)

        self.set_train(True)
        for sent_i in range(3):
            dy.renew_cg()
            src = train_src[sent_i]
            self.start_sent(src)
            embeddings = model.src_embedder.embed_sent(src)
            encodings = model.encoder(embeddings)
            if train_src[sent_i].mask is None:
                assert encodings.mask is None
            else:
                np.testing.assert_array_almost_equal(
                    train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
示例#2
0
 def test_loss_model2(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(layers=3,
                                            input_dim=layer_dim,
                                            hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   rnn_layer=UniLSTMSeqTransducer(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_input_dim=layer_dim,
                                       yaml_path="model.decoder.rnn_layer"),
                                   mlp_layer=MLP(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_rnn_dim=layer_dim,
                                       vocab_size=100,
                                       yaml_path="model.decoder.rnn_layer"),
                                   bridge=CopyBridge(dec_dim=layer_dim,
                                                     dec_layers=1)),
     )
     model.set_train(False)
     self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
示例#3
0
  def test_py_lstm_mask(self):
    layer_dim = 512
    model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                trg_embed_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"),
                                transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )

    batcher = xnmt.batcher.TrgBatcher(batch_size=3)
    train_src, _ = \
      batcher.pack(self.src_data, self.trg_data)

    self.set_train(True)
    for sent_i in range(3):
      dy.renew_cg()
      src = train_src[sent_i]
      self.start_sent(src)
      embeddings = model.src_embedder.embed_sent(src)
      encodings = model.encoder.transduce(embeddings)
      if train_src[sent_i].mask is None:
        assert encodings.mask is None
      else:
        np.testing.assert_array_almost_equal(train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
示例#4
0
 def test_py_lstm_encoder_len(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                            hidden_dim=layer_dim,
                                            layers=3),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   lstm_dim=layer_dim,
                                   mlp_hidden_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   vocab_size=100),
     )
     self.set_train(True)
     for sent_i in range(10):
         dy.renew_cg()
         src = self.src_data[sent_i].get_padded_sent(
             Vocab.ES, 4 - (len(self.src_data[sent_i]) % 4))
         self.start_sent(src)
         embeddings = model.src_embedder.embed_sent(src)
         encodings = model.encoder(embeddings)
         self.assertEqual(int(math.ceil(len(embeddings) / float(4))),
                          len(encodings))
示例#5
0
 def test_loss_model2(self):
     model = DefaultTranslator(
         src_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(self.model_context, layers=3),
         attender=MlpAttender(self.model_context),
         trg_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100),
     )
     model.set_train(False)
     self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
示例#6
0
 def test_loss_model2(self):
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(self.exp_global, layers=3),
         attender=MlpAttender(self.exp_global),
         trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.exp_global,
                                   vocab_size=100,
                                   bridge=CopyBridge(
                                       exp_global=self.exp_global,
                                       dec_layers=1)),
     )
     model.set_train(False)
     self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
示例#7
0
 def test_py_lstm_encoder_len(self):
   layer_dim = 512
   model = DefaultTranslator(
     src_reader=self.src_reader,
     trg_reader=self.trg_reader,
     src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3),
     attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
     trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                               trg_embed_dim=layer_dim,
                               rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"),
                               transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                               scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                               bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
   )
   self.set_train(True)
   for sent_i in range(10):
     dy.renew_cg()
     src = self.src_data[sent_i].get_padded_sent(Vocab.ES, 4 - (self.src_data[sent_i].sent_len() % 4))
     self.start_sent(src)
     embeddings = model.src_embedder.embed_sent(src)
     encodings = model.encoder.transduce(embeddings)
     self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings))