Exemplo n.º 1
0
    def test_combine_weighted_scores(self):
        test_args = test_utils.ModelParamsDict()
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.original_model_weight = 1
        test_args.l2r_model_path = ""
        test_args.l2r_model_weight = 1
        test_args.r2l_model_weight = 0
        test_args.reverse_model_weight = 0.5
        test_args.lm_model_weight = 0.75
        test_args.length_penalty = 1

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        with patch(
                "pytorch_translate.utils.load_diverse_ensemble_for_inference",
                return_value=([model], test_args, task),
        ):
            rescorer = Rescorer(test_args)

            scores = torch.tensor([[10, 20, 30, 40]], dtype=torch.float)
            src_tokens = torch.tensor([1, 2, 3, 4, 5])
            hypos = [{"tokens": torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])}]
            rescorer.combine_weighted_scores(scores, src_tokens, hypos)

            # 10=1. , 20*0=0. , 30*(0.5/5)=3. , 40*(0.75/5)=6.
            expected = torch.tensor([[10.0, 0.0, 3.0, 6.0]], dtype=torch.float)
            assert torch.equal(scores, expected)
Exemplo n.º 2
0
    def test_model_passing_as_parameter(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.lm_model_weight = 1.01
        test_args.cloze_transformer_weight = 1.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        src_tokens = torch.tensor([[1, 2, 3, 4, 5]])
        hypos = [{"tokens": torch.tensor([1, 2])}, {"tokens": torch.tensor([1, 2])}]
        rescorer = Rescorer(
            test_args, task, {"l2r_model": {"model": model, "task": task}}
        )
        scores = rescorer.score(src_tokens, hypos)
        assert scores.size()[1] == 5
Exemplo n.º 3
0
    def test_batch_computation(self):
        test_args = test_utils.ModelParamsDict("transformer")
        test_args.enable_rescoring = True
        test_args.length_penalty = 1
        test_args.l2r_model_path = "/tmp/test_rescorer_model.pt"
        test_args.l2r_model_weight = 1.0
        test_args.r2l_model_weight = 0.0
        test_args.reverse_model_weight = 0.0
        test_args.cloze_transformer_weight = 1.0
        test_args.lm_model_weight = 0.0
        test_args.length_penalty = 1.0

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict)
        model = task.build_model(test_args)
        torch.save(model, test_args.l2r_model_path)
        with patch(
            "pytorch_translate.utils.load_diverse_ensemble_for_inference",
            return_value=([model], test_args, task),
        ):
            rescorer = Rescorer(test_args)
            src_tokens = torch.tensor([[1, 3, 3, 4, 2], [1, 3, 2, 0, 0]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
                {"tokens": torch.tensor([1, 2])},
                {"tokens": torch.tensor([1, 5, 6, 2])},
            ]
            scores = rescorer.score(src_tokens, hypos)

            src_tokens = torch.tensor([[1, 3, 3, 4, 2]])
            hypos = [
                {"tokens": torch.tensor([1, 5, 2])},
                {"tokens": torch.tensor([6, 3, 5, 2])},
            ]
            scores_single = rescorer.score(src_tokens, hypos)

            assert torch.equal(scores[0], scores_single[0])
Exemplo n.º 4
0
    def forward(self, model, sample, reduce=True):
        """Compute the loss for the given sample.

        Returns a tuple with three elements:
        1) the loss
        2) the sample size, which is used as the denominator for the gradient
        3) logging outputs to display while training
        """
        src_tokens = sample["net_input"]["src_tokens"]
        beam_size = self.args.rl_num_trajectory
        bsz, srclen = src_tokens.size()
        encoder_input = {
            "src_tokens": sample["net_input"]["src_tokens"],
            "src_lengths": sample["net_input"]["src_lengths"],
        }

        # 1) Generate hypos
        translator = generate.build_sequence_generator(self.args, self.task, [model])
        with torch.no_grad():
            seq_hypos = translator.generate(
                encoder_input,
                beam_size,
                maxlen=int(self.args.max_len_a * srclen + self.args.max_len_b),
            )

        word_hypos = [[] for j in range(bsz)]
        for k in range(bsz):
            word_hypos[k] = [{"tokens": sample["target"][k]}]

        ## Mix sequence, word-level hypos
        hypos = [seq_hypos[j] + word_hypos[j] for j in range(bsz)]
        hypos = [hypo for _ in hypos for hypo in _]
        hypos_len = (
            torch.tensor([len(hypo["tokens"]) for hypo in hypos])
            .type_as(src_tokens)
            .float()
        )
        # mask index for word-level hypos, e.g., target sentence
        mask_index = torch.arange(beam_size, (beam_size + 1) * bsz, beam_size + 1).view(
            -1
        )

        # 2) Compute (log)-probs via forward models
        self.self_rescorer.model = model
        self.self_rescorer.task = self.task
        model.train()
        assert self.self_rescorer.model.training, "model should be in training phase"

        hypo_encoder_inputs, hypo_tokens = self.self_rescorer.prepare_inputs(
            src_tokens, hypos
        )
        hypo_logprobs, hypo_encoder_outs, forward_logprobs = self.self_rescorer.score_tokens(
            hypo_encoder_inputs, hypo_tokens
        )
        hypo_logprobs /= hypos_len ** self.args.rescore_length_penalty

        # 3) Sequence level
        seq_loss = torch.zeros(1).type_as(hypo_logprobs)
        if self.args.rl_weight > 0.0:
            ## 3.1) Compute seq-level rewards
            with torch.no_grad():
                rescorer = Rescorer(self.args, self.task, self.rescore_models)
                scores = rescorer.score(src_tokens, hypos)
                rewards = self.combine_score(src_tokens, hypos, hypos_len, scores)
            assert not rewards.requires_grad, "no grads flow back to generation"
            ## 3.2) Compute Policy Gradient loss
            rewards = rewards.type_as(hypo_logprobs)
            seq_mask = hypo_logprobs.new_ones(hypo_logprobs.size())
            seq_mask[mask_index] = 0.0
            seq_loss = -1.0 * (seq_mask * hypo_logprobs * rewards).sum()

        # 4) Word-level
        word_loss = torch.zeros(1).type_as(hypo_logprobs)
        if self.args.word_weight > 0.0:
            ## 4.1) Compute word-level rewards from a left-right rescoring model
            with torch.no_grad():
                teacher_model = self.rescore_models[self.args.word_model]
                teacher = SimpleModelScorer(self.args, None, teacher_model, self.task)
                _, _, teacher_logprobs = teacher.score_tokens(
                    hypo_encoder_inputs, hypo_tokens
                )
            ## 4.2) Compute word-level loss
            f_logprob, f_index = forward_logprobs.topk(self.args.topk_words)
            word_mask = f_logprob.new_zeros(f_logprob.size())
            word_mask[mask_index, :, :] = 1.0
            ## KL(p_s || p_t) = \sum p_s log p_s - \sum p_s log p_t, aka RL + maxEnt
            word_loss = (
                word_mask
                * f_logprob.exp()
                * (f_logprob - 1.0 * teacher_logprobs.gather(-1, f_index))
            ).sum()

        # 5) Compute Cross-entropy loss
        eos = self.task.target_dictionary.eos()
        target_tokens = torch.cat(
            (
                torch.zeros(bsz, 1).fill_(eos).type_as(sample["target"]),
                sample["target"],
            ),
            dim=1,
        )
        target_encoder_inputs = (
            encoder_input["src_tokens"],
            [encoder_input["src_lengths"][0].item()],
        )
        target_logprobs, target_encoder_out, _ = self.self_rescorer.score_tokens(
            target_encoder_inputs, target_tokens
        )
        nll_loss = -1.0 * target_logprobs.sum()

        # 6) Gather losses
        loss = (
            self.args.rl_weight * seq_loss
            + self.args.word_weight * word_loss
            + nll_loss
        )

        # Logging
        sample_size = (
            sample["target"].size(0) if self.args.sentence_avg else sample["ntokens"]
        )
        logging_output = {
            "loss": utils.item(loss.data) if reduce else loss.data,
            "nll_loss": utils.item(nll_loss.data) if reduce else nll_loss.data,
            "ntokens": sample["ntokens"],
            "nsentences": sample["target"].size(0),
            "sample_size": sample_size,
        }
        return loss, sample_size, logging_output
Exemplo n.º 5
0
    def forward(self, model, sample, reduce=True):
        """Compute the loss for the given sample.

        Returns a tuple with three elements:
        1) the loss
        2) the sample size, which is used as the denominator for the gradient
        3) logging outputs to display while training
        """
        src_tokens = sample["net_input"]["src_tokens"]
        beam_size = self.args.rl_num_trajectory
        bsz, srclen = src_tokens.size()
        encoder_input = {
            "src_tokens": sample["net_input"]["src_tokens"],
            "src_lengths": sample["net_input"]["src_lengths"],
        }

        # 1) Generate hypos
        translator = generate.build_sequence_generator(self.args, self.task,
                                                       [model])
        with torch.no_grad():
            hypos = translator.generate(
                encoder_input,
                beam_size,
                maxlen=int(self.args.max_len_a * srclen + self.args.max_len_b),
            )
        ## flatten nested list
        hypos = [hypo for _ in hypos
                 for hypo in _]  # with length of bsz * beam_size
        hypos_len = (torch.tensor([len(hypo["tokens"]) for hypo in hypos
                                   ]).type_as(src_tokens).float())

        # 2) Compute (log)-probs via forward models
        self.self_rescorer.model = model
        self.self_rescorer.task = self.task
        model.train()
        assert self.self_rescorer.model.training, "model should be in training phase"

        hypo_encoder_inputs, hypo_tokens = self.self_rescorer.prepare_inputs(
            src_tokens, hypos)
        hypo_logprobs, hypo_encoder_outs, _ = self.self_rescorer.score_tokens(
            hypo_encoder_inputs, hypo_tokens)
        hypo_logprobs /= hypos_len**self.args.rescore_length_penalty

        # 3) Compute rewards from rescoring models
        with torch.no_grad():
            rescorer = Rescorer(self.args, self.task, self.rescore_models)
            scores = rescorer.score(src_tokens, hypos)
            rewards = self.combine_score(src_tokens, hypos, hypos_len, scores)
        assert not rewards.requires_grad, "no grads flow back to generation"

        # 4) Compute Policy Gradient loss
        rewards = rewards.type_as(hypo_logprobs)
        rl_loss = -1.0 * (hypo_logprobs * rewards).sum()

        # 5) Compute Cross-entropy loss
        eos = self.task.target_dictionary.eos()
        target_tokens = torch.cat(
            (
                torch.zeros(bsz, 1).fill_(eos).type_as(sample["target"]),
                sample["target"],
            ),
            dim=1,
        )
        target_encoder_inputs = (
            encoder_input["src_tokens"],
            [encoder_input["src_lengths"][0].item()],
        )
        target_logprobs, target_encoder_out, _ = self.self_rescorer.score_tokens(
            target_encoder_inputs, target_tokens)
        nll_loss = -1.0 * target_logprobs.sum()

        # 6) Gather losses
        loss = self.args.rl_weight * rl_loss + nll_loss

        # Logging
        sample_size = (sample["target"].size(0)
                       if self.args.sentence_avg else sample["ntokens"])
        logging_output = {
            "loss": utils.item(loss.data) if reduce else loss.data,
            "nll_loss": utils.item(nll_loss.data) if reduce else nll_loss.data,
            "ntokens": sample["ntokens"],
            "nsentences": sample["target"].size(0),
            "sample_size": sample_size,
        }
        return loss, sample_size, logging_output
Exemplo n.º 6
0
def _generate_score(models, args, task, dataset, optimize=True):
    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    if not args.quiet:
        print("| loading model(s) from {}".format(", ".join(
            args.path.split(":"))))

    # Optimize ensemble for generation
    if optimize:
        for model in models:
            model.make_generation_fast_(
                beamable_mm_beam_size=None
                if args.no_beamable_mm else args.beam,
                need_attn=True,
            )

    translator = build_sequence_generator(args, task, models)
    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Keep track of translations
    # Initialize with empty translations
    # and zero probs scores
    translated_sentences = [""] * len(dataset)
    translated_scores = [0.0] * len(dataset)

    collect_output_hypos = getattr(args, "output_hypos_binary_path", False)
    if collect_output_hypos:
        output_hypos_token_arrays = [None] * len(dataset)

    # Generate and compute BLEU score
    dst_dict = task.target_dictionary
    if args.sacrebleu:
        scorer = bleu.SacrebleuScorer()
    else:
        scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk())
    itr = get_eval_itr(args, models, task, dataset)

    oracle_scorer = None
    if args.report_oracle_bleu:
        oracle_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(),
                                    dst_dict.unk())

    rescorer = None
    rescoring_bleu_scorer = None
    if args.enable_rescoring:
        rescorer = Rescorer(args)
        rescoring_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(),
                                            dst_dict.unk())

    num_sentences = 0
    translation_samples = []
    with progress_bar.build_progress_bar(args, itr) as t:
        wps_meter = TimeMeter()
        gen_timer = StopwatchMeter()
        translations = translator.generate_batched_itr(
            t,
            maxlen_a=args.max_len_a,
            maxlen_b=args.max_len_b,
            cuda=use_cuda,
            timer=gen_timer,
            prefix_size=1
            if pytorch_translate_data.is_multilingual(args) else 0,
        )

        for trans_info in _iter_translations(args, task, dataset, translations,
                                             align_dict, rescorer):
            scorer.add(trans_info.target_tokens, trans_info.hypo_tokens)
            if oracle_scorer is not None:
                oracle_scorer.add(trans_info.target_tokens,
                                  trans_info.best_hypo_tokens)
            if rescoring_bleu_scorer is not None:
                rescoring_bleu_scorer.add(
                    trans_info.target_tokens,
                    trans_info.hypo_tokens_after_rescoring)

            translated_sentences[trans_info.sample_id] = trans_info.hypo_str
            translated_scores[trans_info.sample_id] = trans_info.hypo_score
            if collect_output_hypos:
                output_hypos_token_arrays[
                    trans_info.sample_id] = trans_info.best_hypo_tokens
            translation_samples.append(
                collections.OrderedDict({
                    "sample_id":
                    trans_info.sample_id.item(),
                    "src_str":
                    trans_info.src_str,
                    "target_str":
                    trans_info.target_str,
                    "hypo_str":
                    trans_info.hypo_str,
                }))
            wps_meter.update(trans_info.src_tokens.size(0))
            t.log({"wps": round(wps_meter.avg)})
            num_sentences += 1

    # If applicable, save collected hypothesis tokens to binary output file
    if collect_output_hypos:
        output_dataset = pytorch_translate_data.InMemoryNumpyDataset()
        output_dataset.load_from_sequences(output_hypos_token_arrays)
        output_dataset.save(args.output_hypos_binary_path)

    # If applicable, save the translations to the output file
    # For eg. external evaluation
    if getattr(args, "translation_output_file", False):
        with open(args.translation_output_file, "w") as out_file:
            for hypo_str in translated_sentences:
                print(hypo_str, file=out_file)

    if getattr(args, "translation_probs_file", False):
        with open(args.translation_probs_file, "w") as out_file:
            for hypo_score in translated_scores:
                print(np.exp(hypo_score), file=out_file)

    if oracle_scorer is not None:
        print(
            f"| Oracle BLEU (best hypo in beam): {oracle_scorer.result_string()}"
        )

    if rescoring_bleu_scorer is not None:
        print(
            f"| Rescoring BLEU (top hypo in beam after rescoring):{rescoring_bleu_scorer.result_string()}"
        )

    return scorer, num_sentences, gen_timer, translation_samples