Exemplo n.º 1
0
def evaluate_specific(train_network, input_lang, pair, name='tracking_pair'):
    print('>', pair[0])
    print('=', pair[1])
    output_words = evaluate(train_network, input_lang, pair[0])
    output_sentence = ' '.join(output_words)
    print('<', output_sentence)
    print('BLEU Score', bleu_score.corpus_bleu([output_sentence], [pair[1]]))
Exemplo n.º 2
0
 def evaluate(self):
     ys_pred = []
     ys_true = []
     if self.measure == 'accuracy':
         for inputs, targets in self.data_loader:
             predicted = self.model.predict(inputs)
             predicted, targets = self._pad(
                 predicted, targets)  # when over- or under-generate
             ys_pred.append(np.concatenate(predicted))
             ys_true.append(np.concatenate(targets))
         ys_pred = np.concatenate(ys_pred)
         ys_true = np.concatenate(ys_true)
         value = accuracy_score(ys_true, ys_pred)
     elif self.measure == 'BLEU':
         for inputs, targets in self.data_loader:
             ys_pred += [np.array(p) for p in self.model.predict(inputs)]
             ys_true += [np.array(t)[np.newaxis, :] for t in targets]
         value = bleu_score.corpus_bleu(ys_true, ys_pred)
     elif self.measure == 'sent_BLEU':
         sent_bleues = []
         for inputs, targets in self.data_loader:
             predicted = self.model.predict(inputs)
             sent_bleues += [
                 sentence_bleu(np.array(cand), np.array(tgt))
                 for cand, tgt in zip(predicted, targets)
             ]
         value = sum(sent_bleues) / len(sent_bleues)
     else:
         raise ValueError("measure: ['accuray', 'BLEU', 'sent_BLEU']")
     return value
Exemplo n.º 3
0
    def evaluate_specific(self,
                          in_seq,
                          out_seq,
                          in_len,
                          person,
                          name='tracking_pair'):
        dialogue = [self.index2word[j] for j in in_seq]
        response = [self.index2word[j] for j in out_seq]
        print('>', dialogue, 'By :', person[0].data)
        print('=', response, 'By :', person[1].data)

        _, output_words, attentions = self.evaluate([in_seq], [out_seq],
                                                    [in_len],
                                                    person[0].view(1, 1))
        try:
            target_index = output_words[0].index('<EOS>') + 1
        except ValueError:
            target_index = len(output_words[0])

        output_words = output_words[0][:target_index]
        attentions = attentions[0, :target_index, :].view(target_index, -1)

        output_sentence = ' '.join(output_words)
        print('<', output_sentence)

        print('BLEU Score',
              bleu_score.corpus_bleu([output_sentence], [response]))
        self.help_fn.show_attention(dialogue,
                                    output_words,
                                    attentions,
                                    name=name)
def evaluate_specific(model, in_seq, out_seq, in_len, person, senti, types,
                      types_2, index2word):

    response = [index2word[j] for j in out_seq]

    criterion = nn.NLLLoss(ignore_index=0)

    loss_eva, output_words = model.evaluate([in_seq], [out_seq], [in_len],
                                            [person], [senti],
                                            criterion)  #这里就跑一句话

    try:
        target_index = output_words[0].index('<EOS>') + 1
    except ValueError:
        target_index = len(output_words[0])

    # TODO: Remove this false target_index 所以隐去下面

    output_words = output_words[0][:target_index]

    output_sentence = ' '.join(output_words)
    #print('<', output_sentence)

    #它这里计算是不是反了?应该是参考句在前,候选句在后吧--已经替换了
    bleu1 = bleu_score.corpus_bleu([response], [output_sentence],
                                   weights=(1, 0, 0, 0))
    bleu2 = bleu_score.corpus_bleu([response], [output_sentence],
                                   weights=(0, 1, 0, 0))
    bleu3 = bleu_score.corpus_bleu([response], [output_sentence],
                                   weights=(0, 0, 1, 0))
    bleu4 = bleu_score.corpus_bleu([response], [output_sentence],
                                   weights=(0, 0, 0, 1))

    #print('BLEU1 Score', bleu1,'BLEU4 Score', bleu4)

    num_token, types = distinct1(output_words, types)

    num_token_2, types_2 = distinct2(output_words, types_2)

    #help_fn.show_attention(dialogue, output_words, attentions, name=name)
    return loss_eva, bleu1, bleu2, bleu3, bleu4, num_token, types, num_token_2, types_2
Exemplo n.º 5
0
def calc_bleu(refs, hyps):
    """
    BLEUスコアを計算する関数
    :param refs: list, 参照訳。単語のリストのリスト (例: [['I', 'have', 'a', 'pen'], ...])
    :param hyps: list, モデルの生成した訳。
    単語のリストのリスト (例: [['I', 'have', 'a', 'pen'], ...])
    :return: float, BLEUスコア(0~100)
    """
    refs = [[ref[:ref.index(word2id['<EOS>'])]] for ref in refs]
    hyps = [hyp[:hyp.index(word2id['<EOS>'])]
            if word2id['<EOS>'] in hyp else hyp for hyp in hyps]
    return 100 * bleu_score.corpus_bleu(refs, hyps)
Exemplo n.º 6
0
 def calc_bleu(self, refs: List[List[T]], hyps: List[List[T]]):
     """
     Args:
         refs: reference sentences splitted by word / word_idx
         hyps: generated sentences splitted by word / word_idx
     Returns:
         bleu_score (float): [0, 100] score (upper is better)
     """
     refs = [[ref[:ref.index(self.EOS)]] for ref in refs]
     hyps = [
         hyp[:hyp.index(self.EOS)] if self.EOS in hyp else hyp
         for hyp in hyps
     ]
     return 100 * bleu_score.corpus_bleu(
         refs, hyps, smoothing_function=self.smoothing_function)
Exemplo n.º 7
0
def Validation(transformer, n, epoch):
    all_BLEUscore = 0
    new_n = int(n/max_epoch)
    start = 0
    stop = n
    
    with torch.no_grad():
        pair = pairs_dev
        inputs, outputs = map(list, zip(*pair))
        dev_pairs = [tensorsFromPair(inputs[i].split(" "), outputs[i].split(" "))
                     for i in range(n)]
        dataset = MyDataset(dev_pairs)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=50, shuffle=False, num_workers=0, collate_fn=my_collate_fn)
        
        cnt = 0
        res = []
        hy = []
        chencherry = bleu_score.SmoothingFunction()
        for idx in tqdm(dataloader, ascii = True):
            batch = len(idx[0][0])
            dev_pair = idx[0]
            
            input_tensor = torch.tensor(dev_pair[0], dtype=torch.long, device=device)
            re_tensor = torch.tensor(dev_pair[1], dtype=torch.long, device=device)
            
            output_words = evaluate(transformer, input_tensor, batch, 0)
            #output_words = evaluate(transformer, input_tensor, batch, -1)  #debag
            
            for i in range(len(output_words)):
                re = outputs[cnt].split(" ")
                res.append([ re ])
                if len(output_words[i]) != 0:   # 空判定
                    hy.append(output_words[i].split(" "))
                else:
                    hy.append([""])
                
                cnt += 1
                
        #all_BLEUscore += bleu_score.corpus_bleu(res, hy, smoothing_function=chencherry.method4)  # smoothing_function=chencherry.method4
        all_BLEUscore += bleu_score.corpus_bleu(res, hy)
        all_BLEUscore *= 100
        
    return all_BLEUscore
Exemplo n.º 8
0
#!/usr/bin/env python
# -*- coding: utf8 -*-

# for python3
# txt1にreference、txt2にMT outputを入れてcorpus_BLEUを取得
from nltk import word_tokenize
from nltk import bleu_score
from nltk.translate.bleu_score import SmoothingFunction
cc = SmoothingFunction()

txt1 = open("txt1.txt", encoding='utf-8').read().splitlines()
txt2 = open("txt2.txt", encoding='utf-8').read().splitlines()
l = len(txt1)
ref = [0] * l
hyp = [0] * l

for i in range(l):
    ref[i] = word_tokenize(txt1[i])
    hyp[i] = word_tokenize(txt2[i])

#print(ref)
#print(hyp)
print(bleu_score.corpus_bleu(ref, hyp, smoothing_function=cc.method7))

#    b[i] = str(bleu_score.sentence_bleu([ref], hyp, smoothing_function=cc.method7))

#f = open('bleu.txt', 'w')
#b = "\n".join(b)
#f.write(b)
#f.close()
Exemplo n.º 9
0
reference = [['this', 'is', 'small', 'test']]
candidate = ['this', 'is', 'a', 'test']
score = sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0.0))
print(score)


# In[126]:

from nltk import bleu_score
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import corpus_bleu
reference = actual
hypothesis = predicted
smoothie = SmoothingFunction().method4
print('bleu_score.corpus_bleu(reference, hypothesis): {0}'.
      format(bleu_score.corpus_bleu(reference,hypothesis,smoothing_function=smoothie)))


# In[1]:

import nltk
import pkg_resources
pkg_resources.get_distribution("nltk").version


# In[15]:

#Attention Model

#train-data
train_X = encode_text(ger_tokenizer,ger_max,train[:,1])
Exemplo n.º 10
0
def calc_bleu(refs, hyps):
    _refs = [[ref[:ref.index(EOS)]] for ref in refs]
    _hyps = [hyp[:hyp.index(EOS)] if EOS in hyp else hyp for hyp in hyps]
    return 100 * bleu_score.corpus_bleu(_refs, _hyps)