示例#1
0
    def test_full_beam_decoder_char_rnn_vocab_reduction(self):
        test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                               sequence_lstm=True)
        lexical_dictionaries = test_utils.create_lexical_dictionaries()
        test_args.vocab_reduction_params = {
            "lexical_dictionaries": lexical_dictionaries,
            "num_top_words": 10,
            "max_translation_candidates_per_word": 1,
        }

        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_rnn_units = 12
        test_args.char_rnn_layers = 2

        self._test_full_beam_decoder(test_args)
    def test_char_rnn_generate(self):
        test_args = test_utils.ModelParamsDict(sequence_lstm=True)
        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_rnn_units = 12
        test_args.char_rnn_layers = 2

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        model = models.build_model(test_args, src_dict, tgt_dict)
        translator = beam_decode.SequenceGenerator([model])
        src_tokens = torch.LongTensor([[0, 0, 0], [0, 0, 0]])
        src_lengths = torch.LongTensor([3, 3])
        char_inds = torch.LongTensor(np.zeros((2, 3, 5)))
        word_lengths = torch.LongTensor([[5, 5, 5], [5, 5, 5]])
        encoder_input = (src_tokens, src_lengths, char_inds, word_lengths)
        translator.generate(encoder_input, maxlen=7)
示例#3
0
 def test_gpu_pretrained_embedding(self):
     encoder_embedding = open(test_utils.make_temp_file(), "wb")
     decoder_embedding = open(test_utils.make_temp_file(), "wb")
     test_args = test_utils.ModelParamsDict(
         encoder_pretrained_embed=encoder_embedding.name,
         decoder_pretrained_embed=decoder_embedding.name,
         encoder_freeze_embed=True,
         decoder_freeze_embed=True,
     )
     # The vocabulary defaults to 103 in test_utils.prepare_inputs.
     np.save(encoder_embedding, np.zeros(
         (103, test_args.encoder_embed_dim)))
     encoder_embedding.close()
     np.save(decoder_embedding, np.zeros(
         (103, test_args.decoder_embed_dim)))
     decoder_embedding.close()
     self._gpu_train_step(test_args)
示例#4
0
    def test_combine_weighted_scores(self):
        test_args = test_utils.ModelParamsDict()
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_path = ""
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.cloze_transformer_weight = 0.0
        test_args.lm_model_weight = 1.01
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        with patch(
                "pytorch_translate.utils.load_diverse_ensemble_for_inference",
                return_value=([model], test_args, task),
        ):

            scores = torch.tensor([[80, 0, 0, 0, 0], [0, 0, 0, 80, 0]],
                                  dtype=torch.float)
            src_tokens = torch.tensor([1, 2, 3, 4, 5])
            hypos = [{
                "tokens": torch.tensor([1, 2])
            }, {
                "tokens": torch.tensor([1, 2])
            }]

            src_len = len(src_tokens)
            tgt_len = torch.tensor([len(hypo["tokens"]) for hypo in hypos],
                                   dtype=torch.float)
            weights = [
                test_args.l2r_model_weight,
                test_args.r2l_model_weight,
                test_args.reverse_model_weight,
                test_args.lm_model_weight,
                test_args.cloze_transformer_weight,
            ]
            combined_scores = combine_weighted_scores(scores, weights, src_len,
                                                      tgt_len, 1)

            # 80/(2^1), 0, 0, 80*1.01/(2^1)
            expected = torch.tensor([40.0, 40.4], dtype=torch.float)
            assert torch.equal(combined_scores, expected)
示例#5
0
    def test_reverse_scorer_prepare_inputs(self):
        test_args = test_utils.ModelParamsDict()
        test_args.append_eos_to_source = True
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)

        pad = task.tgt_dict.pad()
        eos = task.tgt_dict.eos()

        with patch(
                "pytorch_translate.utils.load_diverse_ensemble_for_inference",
                return_value=([model], test_args, task),
        ):
            scorer = ReverseModelScorer(test_args, "/tmp/model_path.txt", None,
                                        task)
            src_tokens = torch.tensor([6, 7, 8], dtype=torch.int)
            hypos = [
                {
                    "tokens": torch.tensor([12, 13, 14, eos], dtype=torch.int)
                },
                {
                    "tokens": torch.tensor([22, 23, eos], dtype=torch.int)
                },
            ]

            (encoder_inputs,
             tgt_tokens) = scorer.prepare_inputs(src_tokens, hypos)

            # Test encoder inputs
            assert torch.equal(
                encoder_inputs[0],
                torch.tensor([[12, 13, 14, eos], [22, 23, eos, pad]],
                             dtype=torch.int),
            ), "Encoder inputs are not as expected"
            max_tgt_len = max(len(hypo["tokens"]) for hypo in hypos)
            assert encoder_inputs[1][
                0] == max_tgt_len, " Src length is not as expected"

            # Test target tokens
            assert torch.equal(
                tgt_tokens,
                torch.tensor([[eos, 6, 7, 8, eos], [eos, 6, 7, 8, eos]],
                             dtype=torch.int),
            ), "Target tokens are not as expected"
    def test_reverse_tgt_tokens(self):
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)

        with patch(
                "pytorch_translate.utils.load_diverse_ensemble_for_inference",
                return_value=([model], test_args, task),
        ):
            scorer = R2LModelScorer(test_args, None)

            pad = task.tgt_dict.pad()
            tgt_tokens = torch.Tensor([[1, 2, 3], [1, 2, pad], [1, pad, pad]])
            expected_tokens = torch.Tensor([[3, 2, 1], [2, 1, pad],
                                            [1, pad, pad]])
            reversed_tgt_tokens = scorer.reverse_tgt_tokens(tgt_tokens)
            assert torch.equal(reversed_tgt_tokens, expected_tokens)
示例#7
0
    def test_ensemble_encoder_export_char_cnn_vocab_reduction(self):
        test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                               sequence_lstm=True)
        lexical_dictionaries = test_utils.create_lexical_dictionaries()
        test_args.vocab_reduction_params = {
            "lexical_dictionaries": lexical_dictionaries,
            "num_top_words": 5,
            "max_translation_candidates_per_word": 1,
        }

        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_cnn_params = "[(10, 3), (10, 5)]"
        test_args.char_cnn_nonlinear_fn = "tanh"
        test_args.char_cnn_num_highway_layers = 2

        self._test_ensemble_encoder_export_char_source(test_args)
示例#8
0
 def test_load_data_single_path_idx_bin(self):
     test_args = test_utils.ModelParamsDict()
     test_args.source_lang = "en"
     test_args.target_lang = "fr"
     test_args.log_verbose = False
     src_dict, tgt_dict = test_utils.create_vocab_dictionaries()
     src_text_file, tgt_text_file = test_utils.create_test_text_files()
     task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
     with tempfile.TemporaryDirectory() as destdir:
         preprocess_args = [
             "--source-lang",
             test_args.source_lang,
             "--target-lang",
             test_args.target_lang,
             "--destdir",
             destdir,
         ]
         preproc_parser = preprocess_options.get_preprocessing_parser()
         preproc_args = preproc_parser.parse_args(preprocess_args)
         preproc_args.dataset_impl = "regular"  # No MMP
         split = "train"
         binarize(
             preproc_args,
             src_text_file,
             src_dict,
             split,
             test_args.source_lang,
             offset=0,
             end=-1,
         )
         binarize(
             preproc_args,
             tgt_text_file,
             tgt_dict,
             split,
             test_args.target_lang,
             offset=0,
             end=-1,
         )
         src_path = dataset_dest_prefix(preproc_args, split, test_args.source_lang)
         tgt_path = dataset_dest_prefix(preproc_args, split, test_args.target_lang)
         task.load_dataset(split, src_path, tgt_path, is_npz=False)
         self.assertEqual(len(task.datasets[split]), 4)
         self.assertIsInstance(task.datasets[split], LanguagePairDataset)
示例#9
0
    def test_topk_kd_loss(self):
        """
        Makes sure that we can build KD loss without problem.
        """
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = self._dummy_sample()
        model = self.task.build_model(test_args)
        net_output = model(**sample["net_input"])
        student_probs = model.get_normalized_probs(net_output, log_probs=True)
        # [bsz, seqlen, vocab] -> [bsz*seqlen, vocab]
        lprobs = student_probs.view(-1, student_probs.size(-1))

        teacher_model = self.task.build_model(test_args)
        teacher_probs = teacher_model.get_normalized_probs(net_output,
                                                           log_probs=False)
        top_k_teacher_probs, indices = torch.topk(teacher_probs, k=3)
        top_k_teacher_probs_normalized = F.normalize(top_k_teacher_probs,
                                                     p=1,
                                                     dim=2).detach()
        sample["top_k_scores"] = top_k_teacher_probs_normalized
        sample["top_k_indices"] = indices

        kd_criterion = knowledge_distillation_loss.KnowledgeDistillationCriterion(
            test_args, self.task)
        kd_loss, topk_probs = kd_criterion.get_kd_loss(sample, student_probs,
                                                       lprobs)

        # asserting that the values are correctly inserted into teacher_probs.
        for row in range(indices.shape[0]):
            for col in range(indices.shape[1]):
                # testing if values are normalized.
                assert round(float(torch.sum(topk_probs[row][col][:])),
                             0) == 1.0
                for i, val in enumerate(indices[row][col]):
                    # testing if scattering is done correctly.
                    assert (topk_probs[row][col][val] ==
                            top_k_teacher_probs_normalized[row][col][i])

        topk_probs_flat = topk_probs.view(-1, topk_probs.size(-1))
        kd_loss = -torch.sum(topk_probs_flat * lprobs)
        assert kd_loss >= 0
示例#10
0
 def test_load_data_noising(self):
     test_args = test_utils.ModelParamsDict()
     test_args.source_lang = "en"
     test_args.target_lang = "fr"
     test_args.log_verbose = False
     src_dict, tgt_dict = test_utils.create_vocab_dictionaries()
     num_paths = 4
     src_bin_path, tgt_bin_path = {}, {}
     for i in range(num_paths):
         src_text_file, tgt_text_file = test_utils.create_test_text_files()
         src_bin_path[i] = preprocess.binarize_text_file(
             text_file=src_text_file,
             dictionary=src_dict,
             output_path=tempfile.NamedTemporaryFile().name,
             append_eos=True,
             reverse_order=False,
         )
         tgt_bin_path[i] = preprocess.binarize_text_file(
             text_file=tgt_text_file,
             dictionary=tgt_dict,
             output_path=tempfile.NamedTemporaryFile().name,
             append_eos=True,
             reverse_order=False,
         )
     task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
     split = "1"
     task.load_dataset(
         split,
         src_bin_path,
         tgt_bin_path,
         noiser={
             0:
             UnsupervisedMTNoising(
                 dictionary=src_dict,
                 max_word_shuffle_distance=3,
                 word_dropout_prob=0.2,
                 word_blanking_prob=0.2,
             )
         },
     )
     self.assertEqual(len(task.datasets[split]), 16)
     self.assertIsInstance(task.datasets[split].datasets[0].src,
                           NoisingDataset)
示例#11
0
 def test_diversity_sibling_rank(self):
     """
     Testing calculation of sibling_rank() function.
     """
     test_args = test_utils.ModelParamsDict()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     model = task.build_model(test_args)
     translator = beam_decode.SequenceGenerator([model],
                                                task.target_dictionary)
     logprobs = torch.FloatTensor([[[2, 1, 3, 5, 6], [0, 1, 3, 2, 4]],
                                   [[2, 3, 1, 5, 0], [3, 1, 5, 2, 0]]])
     logprobs_out = torch.FloatTensor([
         [[-1, -3, 1, 4, 6], [-4, -2, 2, 0, 4]],
         [[0, 2, -2, 5, -4], [2, -2, 5, 0, -4]],
     ])
     logprobs = translator.diversity_sibling_rank(logprobs, 1)
     np.testing.assert_allclose(actual=logprobs_out.view(-1, 5).numpy(),
                                desired=logprobs.numpy(),
                                atol=1e-5)
示例#12
0
    def test_convert_hypos_to_tgt_tokens(self):
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)

        with patch(
                "pytorch_translate.utils.load_diverse_ensemble_for_inference",
                return_value=([model], test_args, task),
        ):
            scorer = SimpleModelScorer(test_args, None)

            hypos = [
                {
                    "tokens": torch.Tensor([1, 2, 3, 4, 5])
                },
                {
                    "tokens": torch.Tensor([1, 2, 3, 4])
                },
                {
                    "tokens": torch.Tensor([1, 2, 3])
                },
                {
                    "tokens": torch.Tensor([1, 2])
                },
                {
                    "tokens": torch.Tensor([1])
                },
            ]
            tgt_tokens = scorer.convert_hypos_to_tgt_tokens(hypos)

            pad = task.tgt_dict.pad()
            eos = task.tgt_dict.eos()
            expected_tgt_tokens = torch.Tensor([
                [eos, 1, 2, 3, 4, 5],
                [eos, 1, 2, 3, 4, pad],
                [eos, 1, 2, 3, pad, pad],
                [eos, 1, 2, pad, pad, pad],
                [eos, 1, pad, pad, pad, pad],
            ]).type_as(tgt_tokens)
            assert torch.equal(tgt_tokens, expected_tgt_tokens)
示例#13
0
    def test_model_passing_as_parameter(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.lm_model_weight = 1.01
        test_args.cloze_transformer_weight = 1.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        src_tokens = torch.tensor([[1, 2, 3, 4, 5]])
        hypos = [{"tokens": torch.tensor([1, 2])}, {"tokens": torch.tensor([1, 2])}]
        rescorer = Rescorer(
            test_args, task, {"l2r_model": {"model": model, "task": task}}
        )
        scores = rescorer.score(src_tokens, hypos)
        assert scores.size()[1] == 5
    def test_topk_kd_loss(self):
        """
        Makes sure that we can build KD loss without problem.
        """
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = self._dummy_sample()
        model = self.task.build_model(test_args)
        net_output = model(**sample["net_input"])
        student_lprobs = model.get_normalized_probs(net_output, log_probs=True)
        # [bsz, seqlen, vocab] -> [bsz*seqlen, vocab]
        lprobs = student_lprobs.view(-1, student_lprobs.size(-1))

        teacher_model = self.task.build_model(test_args)
        teacher_probs = teacher_model.get_normalized_probs(net_output,
                                                           log_probs=False)
        top_k_teacher_probs, indices = torch.topk(teacher_probs, k=3)
        top_k_teacher_probs_normalized = F.normalize(top_k_teacher_probs,
                                                     p=1,
                                                     dim=2).detach()
        sample["top_k_scores"] = top_k_teacher_probs_normalized
        sample["top_k_indices"] = indices

        kd_criterion = (knowledge_distillation_loss.
                        KnowledgeDistillationCriterion.build_criterion(
                            test_args, self.task))
        kd_loss = kd_criterion.get_kd_loss(sample, student_lprobs, lprobs)

        # Calculate kd_loss using full matrix and compare
        topk_mask = torch.zeros(student_lprobs.shape).type_as(student_lprobs)
        topk_probs = topk_mask.scatter(2, indices,
                                       top_k_teacher_probs_normalized.float())
        topk_probs_flat = topk_probs.view(-1, topk_probs.size(-1))
        kd_loss_2 = -(torch.sum(topk_probs_flat * lprobs))
        np.testing.assert_almost_equal(kd_loss.item(),
                                       kd_loss_2.item(),
                                       decimal=4)
        assert kd_loss >= 0
示例#15
0
    def test_ensemble_encoder_export_unk_only_char_cnn_vocab_reduction(self):
        test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                               sequence_lstm=True)
        lexical_dictionaries = test_utils.create_lexical_dictionaries()
        test_args.vocab_reduction_params = {
            "lexical_dictionaries": lexical_dictionaries,
            "num_top_words": 5,
            "max_translation_candidates_per_word": 1,
        }

        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_cnn_params = "[(50, 1), (76, 2), (130, 3)]"
        test_args.char_cnn_nonlinear_fn = "relu"
        test_args.char_cnn_pool_type = "max"
        test_args.char_cnn_num_highway_layers = 2
        test_args.char_cnn_output_dim = 64
        test_args.encoder_embed_dim = 64
        test_args.unk_only_char_encoding = True

        self._test_ensemble_encoder_export_char_source(test_args)
示例#16
0
    def test_batch_computation(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_path = "/tmp/test_rescorer_model.pt"
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.cloze_transformer_weight = 1.0
        test_args.lm_model_weight = 0.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        torch.save(model, test_args.l2r_model_path)
        with patch(
            "pytorch_translate.utils.load_diverse_ensemble_for_inference",
            return_value=([model], test_args, task),
        ):
            rescorer = Rescorer(test_args)
            src_tokens = torch.tensor([[1, 3, 3, 4, 2], [1, 3, 2, 0, 0]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
                {"tokens": torch.tensor([1, 2])},
                {"tokens": torch.tensor([1, 5, 6, 2])},
            ]
            scores = rescorer.score(src_tokens, hypos)

            src_tokens = torch.tensor([[1, 3, 3, 4, 2]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
            ]
            scores_single = rescorer.score(src_tokens, hypos)

            assert torch.equal(scores[0], scores_single[0])
示例#17
0
 def test_batched_beam_decoder_transformer(self):
     test_args = test_utils.ModelParamsDict(transformer=True)
     self._test_batched_beam_decoder_step(test_args)
示例#18
0
 def test_ensemble_encoder_export_dual_decoder(self):
     test_args = test_utils.ModelParamsDict(arch="dual_decoder_kd")
     self._test_ensemble_encoder_export(test_args)
示例#19
0
    def test_forward_training(self):
        """
        We test that if we shuffle the input sample, we will get the same
        forward values, both in training mode (without dropout) and in
        eval mode.
        For the meanwhile, we use an auxiliary hybrid_transformer_rnn
        in order to get the encoder output.
        """
        test_word_decoder_args = test_utils.ModelParamsDict(
            arch="hybrid_transformer_rnn")
        self.task = tasks.DictionaryHolderTask(self.word_dict, self.word_dict)
        word_model = maybe_cuda(self.task.build_model(test_word_decoder_args))
        word_model.eval()  # Make sure we do not apply dropout.

        test_args = test_utils.ModelParamsDict(arch="char_aware_hybrid")

        decoder_embed_tokens = maybe_cuda(
            transformer.build_embedding(dictionary=self.word_dict,
                                        embed_dim=10))
        decoder = maybe_cuda(
            char_aware_hybrid.CharAwareHybridRNNDecoder(
                args=test_args,
                src_dict=self.word_dict,
                dst_dict=self.word_dict,
                embed_tokens=decoder_embed_tokens,
                num_chars=len(self.char_dict),
            ))

        src_tokens = maybe_cuda(self.sample["net_input"]["src_tokens"])
        src_lengths = maybe_cuda(self.sample["net_input"]["src_lengths"])
        prev_output_chars = maybe_cuda(
            self.sample["net_input"]["prev_output_chars"][:,
                                                          -1:, :].squeeze(1))
        prev_output_tokens = maybe_cuda(
            self.sample["net_input"]["prev_output_tokens"][:, 0:1])

        encoder_out = word_model.encoder(src_tokens, src_lengths)

        embed_output = decoder._embed_prev_outputs(
            prev_output_tokens=prev_output_tokens,
            prev_output_chars=prev_output_chars)[0]
        forward_output = decoder(
            prev_output_tokens=prev_output_tokens,
            encoder_out=encoder_out,
            prev_output_chars=prev_output_chars,
        )
        output_logits = forward_output[0]

        prev_output_tokens_shuffled = torch.cat(
            [prev_output_tokens[1:], prev_output_tokens[0].unsqueeze(0)],
            dim=0)
        prev_output_chars_shuffled = torch.cat(
            [prev_output_chars[1:], prev_output_chars[0].unsqueeze(0)], dim=0)
        src_tokens_shuffled = torch.cat(
            [src_tokens[1:], src_tokens[0].unsqueeze(0)], dim=0)

        # Making sure shuffling is done correctly.
        assert torch.equal(src_tokens[0], src_tokens_shuffled[2])
        assert torch.equal(src_tokens[1], src_tokens_shuffled[0])
        assert torch.equal(src_tokens[2], src_tokens_shuffled[1])
        assert torch.equal(prev_output_chars[0], prev_output_chars_shuffled[2])
        assert torch.equal(prev_output_chars[1], prev_output_chars_shuffled[0])
        assert torch.equal(prev_output_chars[2], prev_output_chars_shuffled[1])
        assert torch.equal(prev_output_tokens[0],
                           prev_output_tokens_shuffled[2])
        assert torch.equal(prev_output_tokens[1],
                           prev_output_tokens_shuffled[0])
        assert torch.equal(prev_output_tokens[2],
                           prev_output_tokens_shuffled[1])

        # Making sure that we embed the inputs correctly.
        encoder_out_shuffled = word_model.encoder(src_tokens_shuffled,
                                                  src_lengths)
        embed_output_shuffled = decoder._embed_prev_outputs(
            prev_output_tokens=prev_output_tokens_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )[0]
        assert embed_output[0, 0].equal(embed_output_shuffled[0, 2])
        assert embed_output[0, 1].equal(embed_output_shuffled[0, 0])
        assert embed_output[0, 2].equal(embed_output_shuffled[0, 1])

        # Making sure the output of the forward function is correct.
        forward_output_shuffled = decoder(
            prev_output_tokens=prev_output_tokens_shuffled,
            encoder_out=encoder_out_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )
        output_logits_shuffled = forward_output_shuffled[0]

        assert encoder_out[0][:, 0, :].equal(encoder_out_shuffled[0][:, 2, :])
        assert encoder_out[0][:, 1, :].equal(encoder_out_shuffled[0][:, 0, :])
        assert encoder_out[0][:, 2, :].equal(encoder_out_shuffled[0][:, 1, :])

        assert output_logits[0].equal(output_logits_shuffled[2])
        assert output_logits[1].equal(output_logits_shuffled[0])
        assert output_logits[2].equal(output_logits_shuffled[1])
        """
        Now trying in the eval mode.
        """
        decoder.eval()
        forward_output = decoder(
            prev_output_tokens=prev_output_tokens,
            encoder_out=encoder_out,
            prev_output_chars=prev_output_chars,
        )
        output_logits = forward_output[0]
        forward_output_shuffled = decoder(
            prev_output_tokens=prev_output_tokens_shuffled,
            encoder_out=encoder_out_shuffled,
            prev_output_chars=prev_output_chars_shuffled,
        )
        output_logits_shuffled = forward_output_shuffled[0]
        assert output_logits[0].equal(output_logits_shuffled[2])
        assert output_logits[1].equal(output_logits_shuffled[0])
        assert output_logits[2].equal(output_logits_shuffled[1])
示例#20
0
 def test_batched_beam_decoder_default(self):
     test_args = test_utils.ModelParamsDict(
         encoder_bidirectional=True,
         sequence_lstm=True,
     )
     self._test_batched_beam_decoder_step(test_args)
示例#21
0
 def test_merge_transpose_and_batchmatmul(self):
     test_args = test_utils.ModelParamsDict(transformer=True)
     caffe2_rep = self._test_batched_beam_decoder_step(
         test_args, return_caffe2_rep=True)
     merge_transpose_and_batchmatmul(caffe2_rep)
示例#22
0
 def test_full_beam_decoder(self):
     test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                            sequence_lstm=True)
     self._test_full_beam_decoder(test_args)
示例#23
0
 def test_forced_decoder_export_default(self):
     test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                            sequence_lstm=True)
     self._test_forced_decoder_export(test_args)
示例#24
0
 def test_gpu_freeze_embedding(self):
     test_args = test_utils.ModelParamsDict(encoder_freeze_embed=True,
                                            decoder_freeze_embed=True)
     self._gpu_train_step(test_args)
示例#25
0
 def test_ensemble_transformer_encoder_export(self):
     test_args = test_utils.ModelParamsDict(transformer=True)
     self._test_ensemble_encoder_export(test_args)
示例#26
0
 def test_sequence_lstm_encoder(self):
     test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                            sequence_lstm=True)
     trainer, _ = self._gpu_train_step(test_args)
     assert trainer.get_meter("gnorm").avg > 0
示例#27
0
 def test_full_ensemble_export_default(self):
     test_args = test_utils.ModelParamsDict(
         encoder_bidirectional=True,
         sequence_lstm=True,
     )
     self._test_full_ensemble_export(test_args)
示例#28
0
 def test_layer_norm_lstm_cell(self):
     test_args = test_utils.ModelParamsDict(cell_type="layer_norm_lstm")
     trainer, _ = self._gpu_train_step(test_args)
     assert trainer.get_meter("gnorm").avg > 0
    def test_beam_search_and_decode_generate(self):
        """
        A basic test that the output given by BeamSearchAndDecode class
        is the same as SequenceGenerator
        """
        test_args = test_utils.ModelParamsDict(arch="rnn")
        test_args.sequence_lstm = True
        BEAM_SIZE = 1
        WORD_REWARD = 1
        UNK_REWARD = -1
        LENGTH_PENALTY = 0

        PLACEHOLDER_SEQ_LENGTH = 5
        NBEST = 2
        MAX_SEQ_LEN = 7

        src_tokens = torch.LongTensor([[0, 0, 0]])
        src_lengths = torch.LongTensor([3])

        # Build model list
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        models = task.build_model(test_args)

        # Placeholder inputs for BeamSearchAndDecode
        placeholder_src_tokens = torch.LongTensor(
            np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64"))
        placeholder_src_lengths = torch.IntTensor(
            np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32"))
        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_lengths[0].item())
        prev_hypos_indices = torch.zeros(BEAM_SIZE, dtype=torch.int64)
        num_steps = torch.LongTensor([MAX_SEQ_LEN])

        # Generate output using SequenceGenerator
        translator = SequenceGenerator(
            [models],
            task.target_dictionary,
            beam_size=BEAM_SIZE,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
        )

        encoder_input = {"src_tokens": src_tokens, "src_lengths": src_lengths}
        top_seq_gen_hypothesis = translator.generate(encoder_input,
                                                     beam_size=BEAM_SIZE,
                                                     maxlen=MAX_SEQ_LEN)[0]

        # Generate output using BeamSearch/BeamDecode
        placeholder_src_tokens = torch.LongTensor(
            np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64"))
        placeholder_src_lengths = torch.IntTensor(
            np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32"))

        # Generate output using BeamSearchAndDecode class
        beam_search_and_decode = BeamSearchAndDecode(
            [models],
            tgt_dict=tgt_dict,
            src_tokens=placeholder_src_tokens,
            src_lengths=placeholder_src_lengths,
            eos_token_id=tgt_dict.eos(),
            length_penalty=LENGTH_PENALTY,
            nbest=NBEST,
            beam_size=BEAM_SIZE,
            stop_at_eos=True,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
            quantize=True,
        )
        beam_search_and_decode_output = beam_search_and_decode(
            src_tokens.transpose(0, 1),
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            num_steps[0],
        )

        for hyp_index in range(
                min(len(beam_search_and_decode_output),
                    len(top_seq_gen_hypothesis))):
            beam_search_and_decode_hypothesis = beam_search_and_decode_output[
                hyp_index]

            # Compare two outputs
            # We always look only from 0 to MAX_SEQ_LEN, because sequence generator
            # adds an EOS at the end after MAX_SEQ_LEN

            # Compare two hypotheses
            np.testing.assert_array_equal(
                top_seq_gen_hypothesis[hyp_index]["tokens"].tolist()
                [0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[0].tolist()[0:MAX_SEQ_LEN],
            )
            # Compare token level scores
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]
                ["positional_scores"].tolist()[0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[2][0:MAX_SEQ_LEN],
                decimal=1,
            )

            # Compare attention weights
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]["attention"].numpy()
                [:, 0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[3].numpy()[:, 0:MAX_SEQ_LEN],
                decimal=1,
            )
示例#30
0
 def test_beam_component_equivalence_default(self):
     test_args = test_utils.ModelParamsDict(encoder_bidirectional=True,
                                            sequence_lstm=True)
     self._test_beam_component_equivalence(test_args)