Exemplo n.º 1
0
 def test_train_dev_loss_equal(self):
   layer_dim = 512
   batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
   train_args = {}
   train_args['src_file'] = "examples/data/head.ja"
   train_args['trg_file'] = "examples/data/head.en"
   train_args['loss_calculator'] = AutoRegressiveMLELoss()
   train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(),
                                           trg_reader=PlainTextReader(),
                                           src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
                                           attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim,
                                                                hidden_dim=layer_dim),
                                           trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                                                     trg_embed_dim=layer_dim,
                                                                     rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                                                                    hidden_dim=layer_dim,
                                                                                                    decoder_input_dim=layer_dim,
                                                                                                    yaml_path="model.decoder.rnn"),
                                                                     transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                                                     scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                                                     bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
                                           )
   train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'],
                                           src_file="examples/data/head.ja",
                                           ref_file="examples/data/head.en",
                                           batcher=batcher)]
   train_args['trainer'] = DummyTrainer()
   train_args['batcher'] = batcher
   train_args['run_for_epochs'] = 1
   training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args)
   training_regimen.run_training(save_fct = lambda: None)
   self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words,
                          training_regimen.dev_loss_tracker.dev_score.loss, places=5)
Exemplo n.º 2
0
    def setUp(self):
        layer_dim = 512
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                trg_embed_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        self.model.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
Exemplo n.º 3
0
  def test_py_lstm_mask(self):
    layer_dim = 512
    model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                trg_embed_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"),
                                transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )

    batcher = xnmt.batcher.TrgBatcher(batch_size=3)
    train_src, _ = \
      batcher.pack(self.src_data, self.trg_data)

    self.set_train(True)
    for sent_i in range(3):
      dy.renew_cg()
      src = train_src[sent_i]
      self.start_sent(src)
      embeddings = model.src_embedder.embed_sent(src)
      encodings = model.encoder.transduce(embeddings)
      if train_src[sent_i].mask is None:
        assert encodings.mask is None
      else:
        np.testing.assert_array_almost_equal(train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
Exemplo n.º 4
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 64
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.segment_composer = SumComposer()
        self.src_reader = CharFromWordTextReader()
        self.trg_reader = PlainTextReader()
        self.loss_calculator = AutoRegressiveMLELoss()
        self.segmenting_encoder = SegmentingSeqTransducer(
            segment_composer=self.segment_composer,
            final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim,
                                                 hidden_dim=layer_dim),
        )

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=self.segmenting_encoder,
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                trg_embed_dim=layer_dim,
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        self.model.set_train(True)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
        my_batcher = xnmt.batcher.TrgBatcher(batch_size=3,
                                             src_pad_token=1,
                                             trg_pad_token=2)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Exemplo n.º 5
0
 def test_uni_lstm_encoder_len(self):
   layer_dim = 512
   model = DefaultTranslator(
     src_reader=self.src_reader,
     trg_reader=self.trg_reader,
     src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
     attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
     trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                               trg_embed_dim=layer_dim,
                               rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"),
                               transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                               scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                               bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
   )
   self.assert_in_out_len_equal(model)
Exemplo n.º 6
0
 def test_loss_model2(self):
   layer_dim = 512
   model = DefaultTranslator(
     src_reader=self.src_reader,
     trg_reader=self.trg_reader,
     src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3),
     attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
     trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                               trg_embed_dim=layer_dim,
                               rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                              hidden_dim=layer_dim,
                                                              decoder_input_dim=layer_dim,
                                                              yaml_path="model.decoder.rnn"),
                               transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                               scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                               bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
   )
   model.set_train(False)
   self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
Exemplo n.º 7
0
 def test_py_lstm_encoder_len(self):
   layer_dim = 512
   model = DefaultTranslator(
     src_reader=self.src_reader,
     trg_reader=self.trg_reader,
     src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3),
     attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
     trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
     decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                               trg_embed_dim=layer_dim,
                               rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"),
                               transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                               scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                               bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
   )
   self.set_train(True)
   for sent_i in range(10):
     dy.renew_cg()
     src = self.src_data[sent_i].get_padded_sent(Vocab.ES, 4 - (self.src_data[sent_i].sent_len() % 4))
     self.start_sent(src)
     embeddings = model.src_embedder.embed_sent(src)
     encodings = model.encoder.transduce(embeddings)
     self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings))
Exemplo n.º 8
0
inference = AutoRegressiveInference(batcher=InOrderBatcher(batch_size=1))

layer_dim = 512

model = DefaultTranslator(
  src_reader=PlainTextReader(vocab=src_vocab),
  trg_reader=PlainTextReader(vocab=trg_vocab),
  src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)),

  encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1),
  attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim),
  trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)),
  decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim,
                                                         decoder_input_dim=layer_dim, yaml_path="decoder"),
                                transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim,
                                                       aux_input_dim=layer_dim),
                                scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim),
                                trg_embed_dim=layer_dim,
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
  inference=inference
)

train = SimpleTrainingRegimen(
  name=f"{EXP}",
  model=model,
  batcher=batcher,
  trainer=AdamTrainer(alpha=0.001),
  run_for_epochs=2,
  src_file="examples/data/head.ja",
  trg_file="examples/data/head.en",
  dev_tasks=[LossEvalTask(src_file="examples/data/head.ja",
Exemplo n.º 9
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 64
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim,
                                                          hidden_dim=layer_dim)
        self.segment_composer = SumComposer()
        self.src_reader = CharFromWordTextReader()
        self.trg_reader = PlainTextReader()
        self.loss_calculator = AutoRegressiveMLELoss()

        baseline = Linear(input_dim=layer_dim, output_dim=1)
        policy_network = Linear(input_dim=layer_dim, output_dim=2)
        self.poisson_prior = PoissonPrior(mu=3.3)
        self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior)
        self.conf_penalty = ConfidencePenalty()
        self.policy_gradient = PolicyGradient(input_dim=layer_dim,
                                              output_dim=2,
                                              baseline=baseline,
                                              policy_network=policy_network,
                                              z_normalization=True,
                                              conf_penalty=self.conf_penalty,
                                              sample=5)
        self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1)
        self.segmenting_encoder = SegmentingSeqTransducer(
            embed_encoder=self.segment_encoder_bilstm,
            segment_composer=self.segment_composer,
            final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim,
                                                 hidden_dim=layer_dim),
            policy_learning=self.policy_gradient,
            eps_greedy=self.eps_greedy,
            length_prior=self.length_prior,
        )

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=self.segmenting_encoder,
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                trg_embed_dim=layer_dim,
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        self.model.set_train(True)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
        my_batcher = xnmt.batcher.TrgBatcher(batch_size=3,
                                             src_pad_token=1,
                                             trg_pad_token=2)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)