def setUp(self): layer_dim = 512 events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja"))
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 32 xnmt.events.clear() ParamManager.init_param_col() edge_vocab = Vocab(vocab_file="examples/data/parse/head.en.edge_vocab") node_vocab = Vocab(vocab_file="examples/data/parse/head.en.node_vocab") value_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.src_reader = input_readers.PlainTextReader(vocab=value_vocab) self.trg_reader = input_readers.CoNLLToRNNGActionsReader( surface_vocab=value_vocab, nt_vocab=node_vocab, edg_vocab=edge_vocab) self.layer_dim = layer_dim self.src_data = list( self.src_reader.read_sents("examples/data/head.en")) self.trg_data = list( self.trg_reader.read_sents("examples/data/parse/head.en.conll")) self.loss_calculator = MLELoss() self.head_composer = composer.DyerHeadComposer( fwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), bwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), transform=AuxNonLinear(input_dim=layer_dim, aux_input_dim=layer_dim, output_dim=layer_dim)) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=len(value_vocab)), encoder=IdentitySeqTransducer(), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=RNNGDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), bridge=NoBridge(dec_dim=layer_dim, dec_layers=1), graph_reader=self.trg_reader, head_composer=self.head_composer)) event_trigger.set_train(True) my_batcher = batchers.TrgBatcher(batch_size=1) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
class TestGreedyVsBeam(unittest.TestCase): """ Test if greedy search produces same output as beam search with beam 1. """ def setUp(self): layer_dim = 512 events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) def test_greedy_vs_beam(self): dy.renew_cg() outputs = self.model.generate( batchers.mark_as_batch([self.src_data[0]]), BeamSearch(beam_size=1)) output_score1 = outputs[0].score dy.renew_cg() outputs = self.model.generate( batchers.mark_as_batch([self.src_data[0]]), GreedySearch()) output_score2 = outputs[0].score self.assertAlmostEqual(output_score1, output_score2)
def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model)
def test_bi_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.assert_in_out_len_equal(model)
def test_py_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) for sent_i in range(10): dy.renew_cg() src = self.src_data[sent_i].create_padded_sent( 4 - (self.src_data[sent_i].sent_len() % 4)) event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings))
def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")), trg_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder(input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher)] train_args['trainer'] = DummyTrainer() train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = regimens.SimpleTrainingRegimen(**train_args) training_regimen.run_training(save_fct = lambda: None) self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words, training_regimen.dev_loss_tracker.dev_score.loss, places=5)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): layer_dim = 512 events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() outputs = self.model.generate( batchers.mark_as_batch([self.src_data[0]]), GreedySearch()) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_nll(src=self.src_data[0], trg=outputs[0]).value() self.assertAlmostEqual(-output_score, train_loss, places=3)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim) self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader(vocab=Vocab(vocab_file="test/data/head.ja.charvocab")) self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="test/data/head.en.vocab")) self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5) baseline = Linear(input_dim=layer_dim, output_dim=1) policy_network = Linear(input_dim=layer_dim, output_dim=2) self.poisson_prior = PoissonPrior(mu=3.3) self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior) self.conf_penalty = ConfidencePenalty() self.policy_gradient = PolicyGradient(input_dim=layer_dim, output_dim=2, baseline=baseline, policy_network=policy_network, z_normalization=True, conf_penalty=self.conf_penalty) self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1) self.segmenting_encoder = SegmentingSeqTransducer( embed_encoder = self.segment_encoder_bilstm, segment_composer = self.segment_composer, final_transducer = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), policy_learning = self.policy_gradient, eps_greedy = self.eps_greedy, length_prior = self.length_prior, ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder(input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) self.layer_dim = layer_dim self.src_data = list(self.model.src_reader.read_sents("test/data/head.ja")) self.trg_data = list(self.model.trg_reader.read_sents("test/data/head.en")) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.charvocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")) self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5) self.segmenting_encoder = SegmentingSeqTransducer( segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def test_py_lstm_mask(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) batcher = batchers.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) event_trigger.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal( train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
def add_input(self, prev_word, state) -> DefaultTranslator.Output: if batchers.is_batched(self.src_sents): src = self.src_sents[0] else: src = self.src_sents force_actions = None look_oracle = self.policy_train_oracle if self.train else self.policy_test_oracle if type(src) == sent.CompoundSentence: src, force_actions = src.sents[0], src.sents[1].words force_actions = force_actions if look_oracle else None if type(prev_word) == list: prev_word = prev_word[0] while True: force_action = None # If we look at the oracle, fill the value of force action accordingly if look_oracle: now_position = state.has_been_read + state.has_been_written # Case when in the inference the produced sentence is longer than the reference. # The number of enough reads have been taken, just need to write the output until the end. if now_position < len(force_actions): force_action = force_actions[now_position] else: force_action = self.Action.WRITE.value # Taking the next action next_action = self._next_action(state, src.len_unpadded(), force_action) if next_action.content == self.Action.WRITE.value: state = state.write(self.src_encoding, prev_word, next_action) break elif next_action.content == self.Action.READ.value: state = state.read(self.src_encoding[state.has_been_read], next_action) else: raise ValueError(next_action.content) return DefaultTranslator.Output(state, state.decoder_state.attention)
def test_overfitting(self): layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.vocab")), trg_reader=PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")), src_embedder=LookupEmbedder(vocab_size=100, emb_dim=layer_dim), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(alpha=0.1) train_args['batcher'] = batcher training_regimen = regimens.SimpleTrainingRegimen(**train_args)
layer_dim = 512 model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), inference=inference) train = SimpleTrainingRegimen( name=f"{EXP}",