Exemplo n.º 1
0
    def _test_ensemble_encoder_export(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = EncoderEnsemble(model_list)

        tmp_dir = tempfile.mkdtemp()
        encoder_pb_path = os.path.join(tmp_dir, "encoder.pb")
        encoder_ensemble.onnx_export(encoder_pb_path)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path)

        caffe2_encoder_outputs = onnx_encoder.run(
            (src_tokens.numpy(), src_lengths.numpy()))

        for i in range(len(pytorch_encoder_outputs)):
            caffe2_out_value = caffe2_encoder_outputs[i]
            pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy()
            np.testing.assert_allclose(caffe2_out_value,
                                       pytorch_out_value,
                                       rtol=1e-4,
                                       atol=1e-6)

        encoder_ensemble.save_to_db(
            os.path.join(tmp_dir, "encoder.predictor_export"))
Exemplo n.º 2
0
    def test_batch_computation(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_path = "/tmp/test_rescorer_model.pt"
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.cloze_transformer_weight = 1.0
        test_args.lm_model_weight = 0.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        torch.save(model, test_args.l2r_model_path)
        with patch(
            "pytorch_translate.utils.load_diverse_ensemble_for_inference",
            return_value=([model], test_args, task),
        ):
            rescorer = Rescorer(test_args)
            src_tokens = torch.tensor([[1, 3, 3, 4, 2], [1, 3, 2, 0, 0]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
                {"tokens": torch.tensor([1, 2])},
                {"tokens": torch.tensor([1, 5, 6, 2])},
            ]
            scores = rescorer.score(src_tokens, hypos)

            src_tokens = torch.tensor([[1, 3, 3, 4, 2]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
            ]
            scores_single = rescorer.score(src_tokens, hypos)

            assert torch.equal(scores[0], scores_single[0])
    def test_topk_kd_loss(self):
        """
        Makes sure that we can build KD loss without problem.
        """
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = self._dummy_sample()
        model = self.task.build_model(test_args)
        net_output = model(**sample["net_input"])
        student_lprobs = model.get_normalized_probs(net_output, log_probs=True)
        # [bsz, seqlen, vocab] -> [bsz*seqlen, vocab]
        lprobs = student_lprobs.view(-1, student_lprobs.size(-1))

        teacher_model = self.task.build_model(test_args)
        teacher_probs = teacher_model.get_normalized_probs(net_output, log_probs=False)
        top_k_teacher_probs, indices = torch.topk(teacher_probs, k=3)
        top_k_teacher_probs_normalized = F.normalize(
            top_k_teacher_probs, p=1, dim=2
        ).detach()
        sample["top_k_scores"] = top_k_teacher_probs_normalized
        sample["top_k_indices"] = indices

        kd_criterion = knowledge_distillation_loss.KnowledgeDistillationCriterion.build_criterion(
            test_args, self.task
        )
        kd_loss = kd_criterion.get_kd_loss(sample, student_lprobs, lprobs)

        # Calculate kd_loss using full matrix and compare
        topk_mask = torch.zeros(student_lprobs.shape).type_as(student_lprobs)
        topk_probs = topk_mask.scatter(
            2, indices, top_k_teacher_probs_normalized.float()
        )
        topk_probs_flat = topk_probs.view(-1, topk_probs.size(-1))
        kd_loss_2 = -torch.sum(topk_probs_flat * lprobs)
        np.testing.assert_almost_equal(kd_loss.item(), kd_loss_2.item(), decimal=4)
        assert kd_loss >= 0
Exemplo n.º 4
0
    def test_model_passing_as_parameter(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.lm_model_weight = 1.01
        test_args.cloze_transformer_weight = 1.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        src_tokens = torch.tensor([1, 2, 3, 4, 5]).cuda()
        hypos = [
            {"tokens": torch.tensor([1, 2]).cuda()},
            {"tokens": torch.tensor([1, 2]).cuda()},
        ]
        rescorer = Rescorer(
            test_args, task, {"l2r_model": {"model": model, "task": task}}
        )
        scores = rescorer.score(src_tokens, hypos)
        assert scores.size()[1] == 5
    def test_beam_search_and_decode_generate(self):
        """
        A basic test that the output given by BeamSearchAndDecode class
        is the same as SequenceGenerator
        """
        test_args = test_utils.ModelParamsDict(arch="rnn")
        test_args.sequence_lstm = True
        BEAM_SIZE = 1
        WORD_REWARD = 1
        UNK_REWARD = -1
        LENGTH_PENALTY = 0

        PLACEHOLDER_SEQ_LENGTH = 5
        NBEST = 2
        MAX_SEQ_LEN = 7

        src_tokens = torch.LongTensor([[0, 0, 0]])
        src_lengths = torch.LongTensor([3])

        # Build model list
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        models = task.build_model(test_args)

        # Placeholder inputs for BeamSearchAndDecode
        placeholder_src_tokens = torch.LongTensor(
            np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64"))
        placeholder_src_lengths = torch.IntTensor(
            np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32"))
        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_lengths[0].item())
        prev_hypos_indices = torch.zeros(BEAM_SIZE, dtype=torch.int64)
        num_steps = torch.LongTensor([MAX_SEQ_LEN])

        # Generate output using SequenceGenerator
        translator = SequenceGenerator(
            [models],
            task.target_dictionary,
            beam_size=BEAM_SIZE,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
        )

        encoder_input = {"src_tokens": src_tokens, "src_lengths": src_lengths}
        top_seq_gen_hypothesis = translator.generate(encoder_input,
                                                     beam_size=BEAM_SIZE,
                                                     maxlen=MAX_SEQ_LEN)[0]

        # Generate output using BeamSearch/BeamDecode
        placeholder_src_tokens = torch.LongTensor(
            np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64"))
        placeholder_src_lengths = torch.IntTensor(
            np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32"))

        # Generate output using BeamSearchAndDecode class
        beam_search_and_decode = BeamSearchAndDecode(
            [models],
            tgt_dict=tgt_dict,
            src_tokens=placeholder_src_tokens,
            src_lengths=placeholder_src_lengths,
            eos_token_id=tgt_dict.eos(),
            length_penalty=LENGTH_PENALTY,
            nbest=NBEST,
            beam_size=BEAM_SIZE,
            stop_at_eos=True,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
            quantize=True,
        )
        beam_search_and_decode_output = beam_search_and_decode(
            src_tokens.transpose(0, 1),
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            num_steps[0],
        )

        for hyp_index in range(
                min(len(beam_search_and_decode_output),
                    len(top_seq_gen_hypothesis))):
            beam_search_and_decode_hypothesis = beam_search_and_decode_output[
                hyp_index]

            # Compare two outputs
            # We always look only from 0 to MAX_SEQ_LEN, because sequence generator
            # adds an EOS at the end after MAX_SEQ_LEN

            # Compare two hypotheses
            np.testing.assert_array_equal(
                top_seq_gen_hypothesis[hyp_index]["tokens"].tolist()
                [0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[0].tolist()[0:MAX_SEQ_LEN],
            )
            # Compare token level scores
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]
                ["positional_scores"].tolist()[0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[2][0:MAX_SEQ_LEN],
                decimal=1,
            )

            # Compare attention weights
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]["attention"].numpy()
                [:, 0:MAX_SEQ_LEN],
                beam_search_and_decode_hypothesis[3].numpy()[:, 0:MAX_SEQ_LEN],
                decimal=1,
            )
Exemplo n.º 6
0
    def _test_batched_beam_decoder_step(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(models.build_model(test_args, src_dict,
                                                 tgt_dict))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample['net_input']['src_tokens'][0:1].t()
        src_lengths = sample['net_input']['src_lengths'][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(
            model_list,
            beam_size=beam_size,
        )

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, 'decoder_step.pb')
        decoder_step_ensemble.onnx_export(
            decoder_step_pb_path,
            pytorch_encoder_outputs,
        )

        # single EOS in flat array
        input_tokens = torch.LongTensor(
            np.array([model_list[0].dst_dict.eos()]), )
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        pytorch_first_step_outputs = decoder_step_ensemble(
            input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        # next step inputs (input_tokesn shape: [beam_size])
        next_input_tokens = torch.LongTensor(
            np.array([i for i in range(4, 9)]), )

        next_prev_scores = pytorch_first_step_outputs[1]
        next_timestep = timestep + 1
        next_states = pytorch_first_step_outputs[4:]

        step_inputs = []

        # encoder outputs need to be replicated for each input hypothesis
        for encoder_rep in pytorch_encoder_outputs[:len(model_list)]:
            step_inputs.append(encoder_rep.repeat(1, beam_size, 1))

        if model_list[0].decoder.vocab_reduction_module is not None:
            step_inputs.append(pytorch_encoder_outputs[len(model_list)])

        step_inputs.extend(list(next_states))

        pytorch_next_step_outputs = decoder_step_ensemble(
            next_input_tokens, next_prev_scores, next_timestep, *step_inputs)

        with open(decoder_step_pb_path, 'r+b') as f:
            onnx_model = onnx.load(f)
        onnx_decoder = caffe2_backend.prepare(onnx_model)

        decoder_inputs_numpy = [
            next_input_tokens.numpy(),
            next_prev_scores.detach().numpy(),
            next_timestep.detach().numpy(),
        ]
        for tensor in step_inputs:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_next_step_outputs = onnx_decoder.run(
            tuple(decoder_inputs_numpy), )

        for i in range(len(pytorch_next_step_outputs)):
            caffe2_out_value = caffe2_next_step_outputs[i]
            pytorch_out_value = pytorch_next_step_outputs[i].data.numpy()
            np.testing.assert_allclose(
                caffe2_out_value,
                pytorch_out_value,
                rtol=1e-4,
                atol=1e-6,
            )
Exemplo n.º 7
0
    def _test_full_ensemble_export(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(models.build_model(test_args, src_dict,
                                                 tgt_dict))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample['net_input']['src_tokens'][0:1].t()
        src_lengths = sample['net_input']['src_lengths'][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderStepEnsemble(
            model_list,
            beam_size=5,
        )

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, 'decoder_step.pb')
        decoder_step_ensemble.onnx_export(
            decoder_step_pb_path,
            pytorch_encoder_outputs,
        )

        # single EOS
        input_token = torch.LongTensor(
            np.array([[model_list[0].dst_dict.eos()]]), )
        timestep = torch.LongTensor(np.array([[0]]))

        pytorch_decoder_outputs = decoder_step_ensemble(
            input_token, timestep, *pytorch_encoder_outputs)

        with open(decoder_step_pb_path, 'r+b') as f:
            onnx_model = onnx.load(f)
        onnx_decoder = caffe2_backend.prepare(onnx_model)

        decoder_inputs_numpy = [input_token.numpy(), timestep.numpy()]
        for tensor in pytorch_encoder_outputs:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_decoder_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy))

        for i in range(len(pytorch_decoder_outputs)):
            caffe2_out_value = caffe2_decoder_outputs[i]
            pytorch_out_value = pytorch_decoder_outputs[i].data.numpy()
            np.testing.assert_allclose(
                caffe2_out_value,
                pytorch_out_value,
                rtol=1e-4,
                atol=1e-6,
            )

        decoder_step_ensemble.save_to_db(
            os.path.join(tmp_dir, 'decoder_step.predictor_export'),
            pytorch_encoder_outputs,
        )
Exemplo n.º 8
0
    def _test_full_beam_decoder(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        bs = BeamSearch(model_list,
                        tgt_dict,
                        src_tokens,
                        src_lengths,
                        beam_size=6)
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(11)
        prev_hypos_indices = torch.zeros(6, dtype=torch.int64)

        outs = bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
        )

        import io

        f = io.BytesIO()
        torch.onnx._export(
            bs,
            (
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                torch.LongTensor([20]),
            ),
            f,
            export_params=True,
            verbose=False,
            example_outputs=outs,
        )

        f.seek(0)

        onnx_model = onnx.load(f)
        c2_model = caffe2_backend.prepare(onnx_model)
        c2_model.run((
            src_tokens.numpy(),
            src_lengths.numpy(),
            prev_token.numpy(),
            prev_scores.numpy(),
            attn_weights.numpy(),
            prev_hypos_indices.numpy(),
            np.array([20]),
        ))
Exemplo n.º 9
0
    def _test_batched_beam_decoder_step(self,
                                        test_args,
                                        return_caffe2_rep=False):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list,
                                                           tgt_dict,
                                                           beam_size=beam_size)

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb")
        decoder_step_ensemble.onnx_export(decoder_step_pb_path,
                                          pytorch_encoder_outputs)

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        pytorch_first_step_outputs = decoder_step_ensemble(
            input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        # next step inputs (input_tokesn shape: [beam_size])
        next_input_tokens = torch.LongTensor(np.array([i
                                                       for i in range(4, 9)]))

        next_prev_scores = pytorch_first_step_outputs[1]
        next_timestep = timestep + 1
        next_states = list(pytorch_first_step_outputs[4:])

        # Tile these for the next timestep
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        pytorch_next_step_outputs = decoder_step_ensemble(
            next_input_tokens, next_prev_scores, next_timestep, *next_states)

        onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path)

        if return_caffe2_rep:
            return onnx_decoder

        decoder_inputs_numpy = [
            next_input_tokens.numpy(),
            next_prev_scores.detach().numpy(),
            next_timestep.detach().numpy(),
        ]
        for tensor in next_states:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_next_step_outputs = onnx_decoder.run(
            tuple(decoder_inputs_numpy))

        for i in range(len(pytorch_next_step_outputs)):
            caffe2_out_value = caffe2_next_step_outputs[i]
            pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy()
            np.testing.assert_allclose(caffe2_out_value,
                                       pytorch_out_value,
                                       rtol=1e-4,
                                       atol=1e-6)
        decoder_step_ensemble.save_to_db(
            output_path=os.path.join(tmp_dir, "decoder.predictor_export"),
            encoder_ensemble_outputs=pytorch_encoder_outputs,
        )
Exemplo n.º 10
0
    def _test_beam_component_equivalence(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        # to initialize BeamSearch object
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        beam_size = 5
        full_beam_search = BeamSearch(model_list,
                                      tgt_dict,
                                      src_tokens,
                                      src_lengths,
                                      beam_size=beam_size)

        encoder_ensemble = EncoderEnsemble(model_list)

        # to initialize decoder_step_ensemble
        with torch.no_grad():
            pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list,
                                                           tgt_dict,
                                                           beam_size=beam_size)

        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)
        num_steps = torch.LongTensor([2])

        with torch.no_grad():
            (
                bs_out_tokens,
                bs_out_scores,
                bs_out_weights,
                bs_out_prev_indices,
            ) = full_beam_search(
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                num_steps,
            )

        comp_out_tokens = (np.ones([num_steps + 1, beam_size], dtype="int64") *
                           tgt_dict.eos())
        comp_out_scores = np.zeros([num_steps + 1, beam_size])
        comp_out_weights = np.zeros(
            [num_steps + 1, beam_size,
             src_lengths.numpy()[0]])
        comp_out_prev_indices = np.zeros([num_steps + 1, beam_size],
                                         dtype="int64")

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        with torch.no_grad():
            pytorch_first_step_outputs = decoder_step_ensemble(
                input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        comp_out_tokens[1, :] = pytorch_first_step_outputs[0]
        comp_out_scores[1, :] = pytorch_first_step_outputs[1]
        comp_out_prev_indices[1, :] = pytorch_first_step_outputs[2]
        comp_out_weights[1, :, :] = pytorch_first_step_outputs[3]

        next_input_tokens = pytorch_first_step_outputs[0]
        next_prev_scores = pytorch_first_step_outputs[1]
        timestep += 1

        # Tile states after first timestep
        next_states = list(pytorch_first_step_outputs[4:])
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        with torch.no_grad():
            pytorch_next_step_outputs = decoder_step_ensemble(
                next_input_tokens, next_prev_scores, timestep, *next_states)

        comp_out_tokens[2, :] = pytorch_next_step_outputs[0]
        comp_out_scores[2, :] = pytorch_next_step_outputs[1]
        comp_out_prev_indices[2, :] = pytorch_next_step_outputs[2]
        comp_out_weights[2, :, :] = pytorch_next_step_outputs[3]

        np.testing.assert_array_equal(comp_out_tokens, bs_out_tokens.numpy())
        np.testing.assert_allclose(comp_out_scores,
                                   bs_out_scores.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
        np.testing.assert_array_equal(comp_out_prev_indices,
                                      bs_out_prev_indices.numpy())
        np.testing.assert_allclose(comp_out_weights,
                                   bs_out_weights.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
Exemplo n.º 11
0
    def _test_full_beam_search_decoder(self, test_args, quantize=False):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        eos_token_id = 8
        length_penalty = 0.25
        nbest = 3
        stop_at_eos = True
        num_steps = torch.LongTensor([20])

        beam_size = 6
        bsd = BeamSearchAndDecode(
            model_list,
            tgt_dict,
            src_tokens,
            src_lengths,
            eos_token_id=eos_token_id,
            length_penalty=length_penalty,
            nbest=nbest,
            beam_size=beam_size,
            stop_at_eos=stop_at_eos,
            quantize=quantize,
        )
        f = io.BytesIO()
        bsd.save_to_pytorch(f)

        # Test generalization with a different sequence length
        src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 9, 9, 10, 11]).unsqueeze(1)
        src_lengths = torch.LongTensor([11])
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)

        outs = bsd(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            num_steps[0],
        )

        f.seek(0)
        deserialized_bsd = torch.jit.load(f)
        deserialized_bsd.apply(lambda s: s._unpack() if hasattr(s, "_unpack") else None)
        outs_deserialized = deserialized_bsd(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            num_steps[0],
        )

        for hypo, hypo_deserialized in zip(outs, outs_deserialized):
            np.testing.assert_array_equal(
                hypo[0].tolist(), hypo_deserialized[0].tolist()
            )
            np.testing.assert_array_almost_equal(
                hypo[2], hypo_deserialized[2], decimal=1
            )
            np.testing.assert_array_almost_equal(
                hypo[3].numpy(), hypo_deserialized[3].numpy(), decimal=1
            )
Exemplo n.º 12
0
    def _test_full_beam_decoder(self, test_args, quantize=False):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        length, word_length = 11, 7
        if test_args.arch in constants.ARCHS_FOR_CHAR_SOURCE:
            char_inds = torch.LongTensor(
                np.random.randint(0,
                                  126, (1, length, word_length),
                                  dtype="int64"))
            word_lengths = torch.IntTensor(
                np.array([word_length] * length, dtype="int32")).reshape(
                    (1, length))
        else:
            char_inds, word_lengths = None, None

        beam_size = 6
        bs = BeamSearch(
            model_list,
            tgt_dict,
            src_tokens,
            src_lengths,
            beam_size=beam_size,
            quantize=quantize,
            char_inds=char_inds,
            word_lengths=word_lengths,
        )
        f = io.BytesIO()
        bs.save_to_pytorch(f)

        # Test generalization with a different sequence length
        src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                       11]).unsqueeze(1)
        src_lengths = torch.LongTensor([11])
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)

        outs = bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
            char_inds=char_inds,
            word_lengths=word_lengths,
        )

        f.seek(0)
        deserialized_bs = torch.jit.load(f)
        deserialized_bs.apply(lambda s: s._unpack()
                              if hasattr(s, "_unpack") else None)
        outs_deserialized = deserialized_bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
            char_inds=char_inds,
            word_lengths=word_lengths,
        )

        for a, b in zip(outs_deserialized, outs):
            np.testing.assert_allclose(a.detach().numpy(), b.detach().numpy())
Exemplo n.º 13
0
    def test_collate(self):
        """
        Makes sure that we can memoize in collate if we give a particular data index
        in different orders.
        """
        test_args = test_utils.ModelParamsDict()
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        teacher_model = pytorch_translate_utils.maybe_cuda(
            self.task.build_model(test_args)
        )

        d0, d1, d2, d3 = self._dummy_datasets(src_dict.eos(), tgt_dict.eos())
        dataset1 = [d0, d1]
        dataset2 = [d2, d3]
        dataset3 = [d3, d0]
        dataset4 = [d1, d2]

        top_k_teacher_scores = {}
        top_k_teacher_indices = {}
        b1 = TeacherDataset.collate(
            dataset1,
            [teacher_model],
            3,
            src_dict.pad(),
            src_dict.eos(),
            top_k_teacher_scores,
            top_k_teacher_indices,
        )
        TeacherDataset.collate(
            dataset2,
            [teacher_model],
            3,
            src_dict.pad(),
            src_dict.eos(),
            top_k_teacher_scores,
            top_k_teacher_indices,
        )
        before_scores = [top_k_teacher_scores[i].cpu().numpy() for i in range(4)]
        before_indices = [top_k_teacher_indices[i].cpu().numpy() for i in range(4)]

        TeacherDataset.collate(
            dataset3,
            [teacher_model],
            3,
            src_dict.pad(),
            src_dict.eos(),
            top_k_teacher_scores,
            top_k_teacher_indices,
        )
        TeacherDataset.collate(
            dataset4,
            [teacher_model],
            3,
            src_dict.pad(),
            src_dict.eos(),
            top_k_teacher_scores,
            top_k_teacher_indices,
        )
        after_scores = [top_k_teacher_scores[i].cpu().numpy() for i in range(4)]
        after_indices = [top_k_teacher_indices[i].cpu().numpy() for i in range(4)]

        for i in range(4):
            np.array_equal(after_scores[i], before_scores[i])
            np.array_equal(after_indices[i], before_indices[i])

        b5 = TeacherDataset.collate(
            dataset1,
            [teacher_model],
            3,
            src_dict.pad(),
            src_dict.eos(),
            top_k_teacher_scores,
            top_k_teacher_indices,
        )
        probs_before = b1["top_k_scores"].numpy()
        indices_before = b1["top_k_indices"].numpy()
        probs_after = b5["top_k_scores"].numpy()
        indices_after = b5["top_k_indices"].numpy()

        # The first one has a different length, does the last two values in the
        # before value has irrelevant values.abs
        assert np.array_equal(probs_before[0][:-4], probs_after[0][:-4]) is True
        assert np.array_equal(indices_before[0][:-4], indices_after[0][:-4]) is True
        assert np.array_equal(probs_after[0][-4:], np.zeros((4, 3))) is True
        assert np.array_equal(indices_after[0][-4:], np.zeros((4, 3))) is True

        assert np.array_equal(probs_before[1], probs_after[1]) is True
        assert np.array_equal(indices_before[1], indices_after[1]) is True
    def test_decoder_ensemble_with_eos(self):
        """
        This is to test the functionality of DecoderBatchedStepEnsembleWithEOS class.
        We expect it generates same outputs with DecoderBatchedStepEnsemble before
        final step. At final step, it generates EOS tokens.
        """
        test_args = test_utils.ModelParamsDict(arch="rnn")
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        model = task.build_model(test_args)
        eos_token = tgt_dict.eos()

        encoder_ensemble = EncoderEnsemble([model])
        src_tokens = torch.LongTensor([4, 5, 6, 7, 8]).unsqueeze(1)
        src_lengths = torch.LongTensor([5])
        enc_inputs = (src_tokens, src_lengths)
        encoder_outputs = encoder_ensemble(*enc_inputs)

        beam_size = 8
        word_reward = 1
        unk_reward = -1
        decoder_ensemble = DecoderBatchedStepEnsemble(
            models=[model],
            tgt_dict=tgt_dict,
            beam_size=beam_size,
            word_reward=word_reward,
            unk_reward=unk_reward,
        )
        decoder_ensemble_with_eos = DecoderBatchedStepEnsembleWithEOS(
            models=[model],
            tgt_dict=tgt_dict,
            beam_size=beam_size,
            word_reward=word_reward,
            unk_reward=unk_reward,
        )

        prev_tokens = torch.LongTensor([eos_token])
        prev_scores = torch.FloatTensor([0.0])
        timestep = torch.LongTensor([0])
        final_step = torch.tensor([False], dtype=torch.bool)
        maxLen = 5
        num_steps = torch.LongTensor([maxLen])

        decoder_first_step_outputs = decoder_ensemble(prev_tokens, prev_scores,
                                                      timestep,
                                                      *encoder_outputs)

        decoder_with_eos_first_step_outputs = decoder_ensemble_with_eos(
            prev_tokens, prev_scores, timestep, final_step, *encoder_outputs)

        # Test results at first step
        self._test_base(decoder_first_step_outputs,
                        decoder_with_eos_first_step_outputs)

        (
            prev_tokens,
            prev_scores,
            prev_hypos_indices,
            attn_weights,
            *states,
        ) = decoder_first_step_outputs

        # Tile is needed after first step
        for i in range(len([model])):
            states[i] = states[i].repeat(1, beam_size, 1)

        (
            prev_tokens_with_eos,
            prev_scores_with_eos,
            prev_hypos_indices_with_eos,
            attn_weights_with_eos,
            *states_with_eos,
        ) = decoder_with_eos_first_step_outputs

        for i in range(len([model])):
            states_with_eos[i] = states_with_eos[i].repeat(1, beam_size, 1)

        for i in range(num_steps - 1):
            decoder_step_outputs = decoder_ensemble(prev_tokens, prev_scores,
                                                    torch.tensor([i + 1]),
                                                    *states)
            (
                prev_tokens,
                prev_scores,
                prev_hypos_indices,
                attn_weights,
                *states,
            ) = decoder_step_outputs
            decoder_step_with_eos_outputs = decoder_ensemble_with_eos(
                prev_tokens_with_eos,
                prev_scores_with_eos,
                torch.tensor([i + 1]),
                final_step,
                *states_with_eos,
            )
            (
                prev_tokens_with_eos,
                prev_scores_with_eos,
                prev_hypos_indices_with_eos,
                attn_weights_with_eos,
                *states_with_eos,
            ) = decoder_step_with_eos_outputs

            # Test results at each step
            self._test_base(decoder_step_outputs,
                            decoder_step_with_eos_outputs)

        # Test the outputs of final tesp
        decoder_final_with_eos_outputs = decoder_ensemble_with_eos(
            prev_tokens_with_eos,
            prev_scores_with_eos,
            torch.tensor([num_steps]),
            torch.tensor([True]),
            *states_with_eos,
        )

        np.testing.assert_array_equal(
            decoder_final_with_eos_outputs[0],
            torch.LongTensor([eos_token]).repeat(beam_size),
        )
        np.testing.assert_array_equal(
            decoder_final_with_eos_outputs[2],
            torch.LongTensor(np.array([i for i in range(beam_size)])),
        )
Exemplo n.º 15
0
 def setUp(self):
     self.args = test_utils.ModelParamsDict()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(self.args)
     self.task = tasks.PytorchTranslateTask(self.args, src_dict, tgt_dict)
     self.model = self.task.build_model(self.args)