def test_batched_beam_from_checkpoints_vr(self):
        check_dir = (
            '/mnt/gfsdataswarm-global/namespaces/search/language-technology-mt/'
            'nnmt_tmp/tl_XX-en_XX-pytorch-256-dim-vocab-reduction'
        )
        checkpoints = [
            'averaged_checkpoint_best_3.pt',
            'averaged_checkpoint_best_4.pt',
            'averaged_checkpoint_best_5.pt',
        ]
        checkpoint_filenames = [os.path.join(check_dir, f) for f in checkpoints]

        encoder_ensemble = EncoderEnsemble.build_from_checkpoints(
            checkpoint_filenames,
            os.path.join(check_dir, 'dictionary-tl.txt'),
            os.path.join(check_dir, 'dictionary-en.txt'),
        )

        decoder_step_ensemble = DecoderBatchedStepEnsemble.build_from_checkpoints(
            checkpoint_filenames,
            os.path.join(check_dir, 'dictionary-tl.txt'),
            os.path.join(check_dir, 'dictionary-en.txt'),
            beam_size=5,
        )

        self._test_full_ensemble(
            encoder_ensemble,
            decoder_step_ensemble,
            batched_beam=True,
        )
示例#2
0
def export(args):
    assert_required_args_are_set(args)
    checkpoint_filenames = args.path.split(":")

    if args.char_source:
        encoder_class = CharSourceEncoderEnsemble
    else:
        encoder_class = EncoderEnsemble

    encoder_ensemble = encoder_class.build_from_checkpoints(
        checkpoint_filenames=checkpoint_filenames,
        src_dict_filename=args.source_vocab_file,
        dst_dict_filename=args.target_vocab_file,
    )
    if args.encoder_output_file != "":
        encoder_ensemble.save_to_db(args.encoder_output_file)

    if args.decoder_output_file != "":
        decoder_step_ensemble = DecoderBatchedStepEnsemble.build_from_checkpoints(
            checkpoint_filenames=checkpoint_filenames,
            src_dict_filename=args.source_vocab_file,
            dst_dict_filename=args.target_vocab_file,
            beam_size=args.beam_size,
            word_reward=args.word_reward,
            unk_reward=args.unk_reward,
        )

        # need example encoder outputs to pass through network
        # (source length 5 is arbitrary)
        src_dict = encoder_ensemble.src_dict
        token_list = [src_dict.unk()] * 4 + [src_dict.eos()]
        src_tokens = torch.LongTensor(
            np.array(token_list, dtype="int64").reshape(-1, 1)
        )
        src_lengths = torch.IntTensor(np.array([len(token_list)], dtype="int32"))
        if args.char_source:
            char_inds = torch.LongTensor(np.ones((1, 5, 3), dtype="int64"))
            word_lengths = torch.LongTensor(np.array([3] * 5, dtype="int64")).reshape(
                1, 5
            )
            pytorch_encoder_outputs = encoder_ensemble(
                src_tokens, src_lengths, char_inds, word_lengths
            )
        else:
            pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble.save_to_db(
            args.decoder_output_file, pytorch_encoder_outputs
        )
示例#3
0
    def _test_batched_beam_decoder_step(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(models.build_model(test_args, src_dict,
                                                 tgt_dict))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample['net_input']['src_tokens'][0:1].t()
        src_lengths = sample['net_input']['src_lengths'][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(
            model_list,
            beam_size=beam_size,
        )

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, 'decoder_step.pb')
        decoder_step_ensemble.onnx_export(
            decoder_step_pb_path,
            pytorch_encoder_outputs,
        )

        # single EOS in flat array
        input_tokens = torch.LongTensor(
            np.array([model_list[0].dst_dict.eos()]), )
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        pytorch_first_step_outputs = decoder_step_ensemble(
            input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        # next step inputs (input_tokesn shape: [beam_size])
        next_input_tokens = torch.LongTensor(
            np.array([i for i in range(4, 9)]), )

        next_prev_scores = pytorch_first_step_outputs[1]
        next_timestep = timestep + 1
        next_states = pytorch_first_step_outputs[4:]

        step_inputs = []

        # encoder outputs need to be replicated for each input hypothesis
        for encoder_rep in pytorch_encoder_outputs[:len(model_list)]:
            step_inputs.append(encoder_rep.repeat(1, beam_size, 1))

        if model_list[0].decoder.vocab_reduction_module is not None:
            step_inputs.append(pytorch_encoder_outputs[len(model_list)])

        step_inputs.extend(list(next_states))

        pytorch_next_step_outputs = decoder_step_ensemble(
            next_input_tokens, next_prev_scores, next_timestep, *step_inputs)

        with open(decoder_step_pb_path, 'r+b') as f:
            onnx_model = onnx.load(f)
        onnx_decoder = caffe2_backend.prepare(onnx_model)

        decoder_inputs_numpy = [
            next_input_tokens.numpy(),
            next_prev_scores.detach().numpy(),
            next_timestep.detach().numpy(),
        ]
        for tensor in step_inputs:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_next_step_outputs = onnx_decoder.run(
            tuple(decoder_inputs_numpy), )

        for i in range(len(pytorch_next_step_outputs)):
            caffe2_out_value = caffe2_next_step_outputs[i]
            pytorch_out_value = pytorch_next_step_outputs[i].data.numpy()
            np.testing.assert_allclose(
                caffe2_out_value,
                pytorch_out_value,
                rtol=1e-4,
                atol=1e-6,
            )
示例#4
0
    def _test_batched_beam_decoder_step(self,
                                        test_args,
                                        return_caffe2_rep=False):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list,
                                                           tgt_dict,
                                                           beam_size=beam_size)

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb")
        decoder_step_ensemble.onnx_export(decoder_step_pb_path,
                                          pytorch_encoder_outputs)

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        pytorch_first_step_outputs = decoder_step_ensemble(
            input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        # next step inputs (input_tokesn shape: [beam_size])
        next_input_tokens = torch.LongTensor(np.array([i
                                                       for i in range(4, 9)]))

        next_prev_scores = pytorch_first_step_outputs[1]
        next_timestep = timestep + 1
        next_states = list(pytorch_first_step_outputs[4:])

        # Tile these for the next timestep
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        pytorch_next_step_outputs = decoder_step_ensemble(
            next_input_tokens, next_prev_scores, next_timestep, *next_states)

        onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path)

        if return_caffe2_rep:
            return onnx_decoder

        decoder_inputs_numpy = [
            next_input_tokens.numpy(),
            next_prev_scores.detach().numpy(),
            next_timestep.detach().numpy(),
        ]
        for tensor in next_states:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_next_step_outputs = onnx_decoder.run(
            tuple(decoder_inputs_numpy))

        for i in range(len(pytorch_next_step_outputs)):
            caffe2_out_value = caffe2_next_step_outputs[i]
            pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy()
            np.testing.assert_allclose(caffe2_out_value,
                                       pytorch_out_value,
                                       rtol=1e-4,
                                       atol=1e-6)
        decoder_step_ensemble.save_to_db(
            output_path=os.path.join(tmp_dir, "decoder.predictor_export"),
            encoder_ensemble_outputs=pytorch_encoder_outputs,
        )
示例#5
0
    def _test_beam_component_equivalence(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        # to initialize BeamSearch object
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        beam_size = 5
        full_beam_search = BeamSearch(model_list,
                                      tgt_dict,
                                      src_tokens,
                                      src_lengths,
                                      beam_size=beam_size)

        encoder_ensemble = EncoderEnsemble(model_list)

        # to initialize decoder_step_ensemble
        with torch.no_grad():
            pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list,
                                                           tgt_dict,
                                                           beam_size=beam_size)

        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)
        num_steps = torch.LongTensor([2])

        with torch.no_grad():
            (
                bs_out_tokens,
                bs_out_scores,
                bs_out_weights,
                bs_out_prev_indices,
            ) = full_beam_search(
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                num_steps,
            )

        comp_out_tokens = (np.ones([num_steps + 1, beam_size], dtype="int64") *
                           tgt_dict.eos())
        comp_out_scores = np.zeros([num_steps + 1, beam_size])
        comp_out_weights = np.zeros(
            [num_steps + 1, beam_size,
             src_lengths.numpy()[0]])
        comp_out_prev_indices = np.zeros([num_steps + 1, beam_size],
                                         dtype="int64")

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        with torch.no_grad():
            pytorch_first_step_outputs = decoder_step_ensemble(
                input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        comp_out_tokens[1, :] = pytorch_first_step_outputs[0]
        comp_out_scores[1, :] = pytorch_first_step_outputs[1]
        comp_out_prev_indices[1, :] = pytorch_first_step_outputs[2]
        comp_out_weights[1, :, :] = pytorch_first_step_outputs[3]

        next_input_tokens = pytorch_first_step_outputs[0]
        next_prev_scores = pytorch_first_step_outputs[1]
        timestep += 1

        # Tile states after first timestep
        next_states = list(pytorch_first_step_outputs[4:])
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        with torch.no_grad():
            pytorch_next_step_outputs = decoder_step_ensemble(
                next_input_tokens, next_prev_scores, timestep, *next_states)

        comp_out_tokens[2, :] = pytorch_next_step_outputs[0]
        comp_out_scores[2, :] = pytorch_next_step_outputs[1]
        comp_out_prev_indices[2, :] = pytorch_next_step_outputs[2]
        comp_out_weights[2, :, :] = pytorch_next_step_outputs[3]

        np.testing.assert_array_equal(comp_out_tokens, bs_out_tokens.numpy())
        np.testing.assert_allclose(comp_out_scores,
                                   bs_out_scores.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
        np.testing.assert_array_equal(comp_out_prev_indices,
                                      bs_out_prev_indices.numpy())
        np.testing.assert_allclose(comp_out_weights,
                                   bs_out_weights.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
    def test_decoder_ensemble_with_eos(self):
        """
        This is to test the functionality of DecoderBatchedStepEnsembleWithEOS class.
        We expect it generates same outputs with DecoderBatchedStepEnsemble before
        final step. At final step, it generates EOS tokens.
        """
        test_args = test_utils.ModelParamsDict(arch="rnn")
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        model = task.build_model(test_args)
        eos_token = tgt_dict.eos()

        encoder_ensemble = EncoderEnsemble([model])
        src_tokens = torch.LongTensor([4, 5, 6, 7, 8]).unsqueeze(1)
        src_lengths = torch.LongTensor([5])
        enc_inputs = (src_tokens, src_lengths)
        encoder_outputs = encoder_ensemble(*enc_inputs)

        beam_size = 8
        word_reward = 1
        unk_reward = -1
        decoder_ensemble = DecoderBatchedStepEnsemble(
            models=[model],
            tgt_dict=tgt_dict,
            beam_size=beam_size,
            word_reward=word_reward,
            unk_reward=unk_reward,
        )
        decoder_ensemble_with_eos = DecoderBatchedStepEnsembleWithEOS(
            models=[model],
            tgt_dict=tgt_dict,
            beam_size=beam_size,
            word_reward=word_reward,
            unk_reward=unk_reward,
        )

        prev_tokens = torch.LongTensor([eos_token])
        prev_scores = torch.FloatTensor([0.0])
        timestep = torch.LongTensor([0])
        final_step = torch.tensor([False], dtype=torch.bool)
        maxLen = 5
        num_steps = torch.LongTensor([maxLen])

        decoder_first_step_outputs = decoder_ensemble(prev_tokens, prev_scores,
                                                      timestep,
                                                      *encoder_outputs)

        decoder_with_eos_first_step_outputs = decoder_ensemble_with_eos(
            prev_tokens, prev_scores, timestep, final_step, *encoder_outputs)

        # Test results at first step
        self._test_base(decoder_first_step_outputs,
                        decoder_with_eos_first_step_outputs)

        (
            prev_tokens,
            prev_scores,
            prev_hypos_indices,
            attn_weights,
            *states,
        ) = decoder_first_step_outputs

        # Tile is needed after first step
        for i in range(len([model])):
            states[i] = states[i].repeat(1, beam_size, 1)

        (
            prev_tokens_with_eos,
            prev_scores_with_eos,
            prev_hypos_indices_with_eos,
            attn_weights_with_eos,
            *states_with_eos,
        ) = decoder_with_eos_first_step_outputs

        for i in range(len([model])):
            states_with_eos[i] = states_with_eos[i].repeat(1, beam_size, 1)

        for i in range(num_steps - 1):
            decoder_step_outputs = decoder_ensemble(prev_tokens, prev_scores,
                                                    torch.tensor([i + 1]),
                                                    *states)
            (
                prev_tokens,
                prev_scores,
                prev_hypos_indices,
                attn_weights,
                *states,
            ) = decoder_step_outputs
            decoder_step_with_eos_outputs = decoder_ensemble_with_eos(
                prev_tokens_with_eos,
                prev_scores_with_eos,
                torch.tensor([i + 1]),
                final_step,
                *states_with_eos,
            )
            (
                prev_tokens_with_eos,
                prev_scores_with_eos,
                prev_hypos_indices_with_eos,
                attn_weights_with_eos,
                *states_with_eos,
            ) = decoder_step_with_eos_outputs

            # Test results at each step
            self._test_base(decoder_step_outputs,
                            decoder_step_with_eos_outputs)

        # Test the outputs of final tesp
        decoder_final_with_eos_outputs = decoder_ensemble_with_eos(
            prev_tokens_with_eos,
            prev_scores_with_eos,
            torch.tensor([num_steps]),
            torch.tensor([True]),
            *states_with_eos,
        )

        np.testing.assert_array_equal(
            decoder_final_with_eos_outputs[0],
            torch.LongTensor([eos_token]).repeat(beam_size),
        )
        np.testing.assert_array_equal(
            decoder_final_with_eos_outputs[2],
            torch.LongTensor(np.array([i for i in range(beam_size)])),
        )