Exemplo n.º 1
0
    def _get_reward(self, y_hat, y, n_gram=6):
        # NLTK의 sentence_gleu 함수를 사용하여,y와 y_hat의 GLEU 값을 구해서 출력
        # This method gets the reward based on the sampling result and reference sentence.
        # For now, we uses GLEU in NLTK, but you can used your own well-defined reward function.
        # In addition, GLEU is variation of BLEU, and it is more fit to reinforcement learning.

        # Since we don't calculate reward score exactly as same as multi-bleu.perl,
        # (especialy we do have different tokenization,) I recommend to set n_gram to 6.

        # |y| = (batch_size, length1)
        # |y_hat| = (batch_size, length2)

        scores = []

        # Actually, below is really far from parallized operations.
        # Thus, it may cause slow training.
        for b in range(y.size(0)):
            ref = []
            hyp = []
            for t in range(y.size(1)):
                ref += [str(int(y[b, t]))]
                if y[b, t] == data_loader.EOS:
                    break

            for t in range(y_hat.size(1)):
                hyp += [str(int(y_hat[b, t]))]
                if y_hat[b, t] == data_loader.EOS:
                    break

            # for nltk.bleu & nltk.gleu
            scores += [score_func([ref], hyp, max_len=n_gram) * 100.]

            # for utils.score_sentence
            # scores += [score_func(ref, hyp, 4, smooth = 1)[-1] * 100.]
        scores = torch.FloatTensor(scores).to(y.device)
        # |scores| = (batch_size)

        return scores
Exemplo n.º 2
0
    def _get_reward(y_hat, y, n_gram=6):
        # This method gets the reward based on the sampling result and reference sentence.
        # For now, we uses GLEU in NLTK, but you can used your own well-defined reward function.
        # In addition, GLEU is variation of BLEU, and it is more fit to reinforcement learning.

        # Since we don't calculate reward score exactly as same as multi-bleu.perl,
        # (especialy we do have different tokenization,) I recommend to set n_gram to 6.

        # |y| = (batch_size, length1)
        # |y_hat| = (batch_size, length2)

        with torch.no_grad():
            scores = []

            for b in range(y.size(0)):
                ref = []
                hyp = []
                for t in range(y.size(-1)):
                    ref += [str(int(y[b, t]))]
                    if y[b, t] == data_loader.EOS:
                        break

                for t in range(y_hat.size(-1)):
                    hyp += [str(int(y_hat[b, t]))]
                    if y_hat[b, t] == data_loader.EOS:
                        break
                # Below lines are slower than naive for loops in above.
                # ref = y[b].masked_select(y[b] != data_loader.PAD).tolist()
                # hyp = y_hat[b].masked_select(y_hat[b] != data_loader.PAD).tolist()

                # for nltk.bleu & nltk.gleu
                scores += [score_func([ref], hyp, max_len=n_gram) * 100.]
            scores = torch.FloatTensor(scores).to(y.device)
            # |scores| = (batch_size)

            return scores