def score_article(self,test,ref): refs = [bleu.cook_refs([refSent],self.options['bleu_ngrams']) for refSent in ref] testcook = [] for i,line in enumerate(test): testcook.append(bleu.cook_test(line,refs[i],self.options['bleu_ngrams'])) score = bleu.score_cooked(testcook,self.options['bleu_ngrams']) return score
def score_article(self, test, ref): refs = [bleu.cook_refs([refSent], self.options["bleu_ngrams"]) for refSent in ref] testcook = [] for i, line in enumerate(test): testcook.append(bleu.cook_test(line, refs[i], self.options["bleu_ngrams"])) score = bleu.score_cooked(testcook, self.options["bleu_ngrams"]) return score
def eval_sents(self,translist,targetlist): scoredict = {} cooked_test = {} cooked_test2 = {} cooktarget = [(items[0],bleu.cook_refs([items[1]],self.options['bleu_ngrams'])) for items in enumerate(targetlist)] cooktarget = [(refID,(reflens, refmaxcounts, set(refmaxcounts))) for (refID,(reflens, refmaxcounts)) in cooktarget] for testID,testSent in enumerate(translist): scorelist = [] #copied over from bleu.py to minimize redundancy test_normalized = bleu.normalize(testSent) cooked_test["testlen"] = len(test_normalized) cooked_test["guess"] = [max(len(test_normalized)-k+1,0) for k in range(1,self.options['bleu_ngrams']+1)] counts = bleu.count_ngrams(test_normalized, self.options['bleu_ngrams']) #separate by n-gram length. if we have no matching bigrams, we don't have to compare unigrams ngrams_sorted = dict([(x,set()) for x in range(self.options['bleu_ngrams'])]) for ngram in counts: ngrams_sorted[len(ngram)-1].add(ngram) for (refID,(reflens, refmaxcounts, refset)) in cooktarget: ngrams_filtered = ngrams_sorted[self.options['bleu_ngrams']-1].intersection(refset) if ngrams_filtered: cooked_test["reflen"] = reflens[0] cooked_test['correct'] = [0]*self.options['bleu_ngrams'] for ngram in ngrams_filtered: cooked_test["correct"][self.options['bleu_ngrams']-1] += min(refmaxcounts[ngram], counts[ngram]) for order in range(self.options['bleu_ngrams']-1): for ngram in ngrams_sorted[order].intersection(refset): cooked_test["correct"][order] += min(refmaxcounts[ngram], counts[ngram]) #copied over from bleu.py to minimize redundancy logbleu = 0.0 for k in range(self.options['bleu_ngrams']): logbleu += math.log(cooked_test['correct'][k])-math.log(cooked_test['guess'][k]) logbleu /= self.options['bleu_ngrams'] logbleu += min(0,1-float(cooked_test['reflen'])/cooked_test['testlen']) score = math.exp(logbleu) if score > 0: #calculate bleu score in reverse direction cooked_test2["guess"] = [max(cooked_test['reflen']-k+1,0) for k in range(1,self.options['bleu_ngrams']+1)] logbleu = 0.0 for k in range(self.options['bleu_ngrams']): logbleu += math.log(cooked_test['correct'][k])-math.log(cooked_test2['guess'][k]) logbleu /= self.options['bleu_ngrams'] logbleu += min(0,1-float(cooked_test['testlen'])/cooked_test['reflen']) score2 = math.exp(logbleu) meanscore = (2*score*score2)/(score+score2) scorelist.append((meanscore,refID,cooked_test['correct'])) scoredict[testID] = sorted(scorelist,key=itemgetter(0),reverse=True)[:self.options['maxalternatives']] return scoredict
def eval_sents(self, translist, targetlist): scoredict = {} cooked_test = {} cooked_test2 = {} cooktarget = [ (items[0], bleu.cook_refs([items[1]], self.options["bleu_ngrams"])) for items in enumerate(targetlist) ] cooktarget = [ (refID, (reflens, refmaxcounts, set(refmaxcounts))) for (refID, (reflens, refmaxcounts)) in cooktarget ] for testID, testSent in enumerate(translist): scorelist = [] # copied over from bleu.py to minimize redundancy test_normalized = bleu.normalize(testSent) cooked_test["testlen"] = len(test_normalized) cooked_test["guess"] = [ max(len(test_normalized) - k + 1, 0) for k in range(1, self.options["bleu_ngrams"] + 1) ] counts = bleu.count_ngrams(test_normalized, self.options["bleu_ngrams"]) # separate by n-gram length. if we have no matching bigrams, we don't have to compare unigrams ngrams_sorted = dict([(x, set()) for x in range(self.options["bleu_ngrams"])]) for ngram in counts: ngrams_sorted[len(ngram) - 1].add(ngram) for (refID, (reflens, refmaxcounts, refset)) in cooktarget: ngrams_filtered = ngrams_sorted[self.options["bleu_ngrams"] - 1].intersection(refset) if ngrams_filtered: cooked_test["reflen"] = reflens[0] cooked_test["correct"] = [0] * self.options["bleu_ngrams"] for ngram in ngrams_filtered: cooked_test["correct"][self.options["bleu_ngrams"] - 1] += min( refmaxcounts[ngram], counts[ngram] ) for order in range(self.options["bleu_ngrams"] - 1): for ngram in ngrams_sorted[order].intersection(refset): cooked_test["correct"][order] += min(refmaxcounts[ngram], counts[ngram]) # copied over from bleu.py to minimize redundancy logbleu = 0.0 for k in range(self.options["bleu_ngrams"]): logbleu += math.log(cooked_test["correct"][k]) - math.log(cooked_test["guess"][k]) logbleu /= self.options["bleu_ngrams"] logbleu += min(0, 1 - float(cooked_test["reflen"]) / cooked_test["testlen"]) score = math.exp(logbleu) if score > 0: # calculate bleu score in reverse direction cooked_test2["guess"] = [ max(cooked_test["reflen"] - k + 1, 0) for k in range(1, self.options["bleu_ngrams"] + 1) ] logbleu = 0.0 for k in range(self.options["bleu_ngrams"]): logbleu += math.log(cooked_test["correct"][k]) - math.log(cooked_test2["guess"][k]) logbleu /= self.options["bleu_ngrams"] logbleu += min(0, 1 - float(cooked_test["testlen"]) / cooked_test["reflen"]) score2 = math.exp(logbleu) meanscore = (2 * score * score2) / (score + score2) scorelist.append((meanscore, refID, cooked_test["correct"])) scoredict[testID] = sorted(scorelist, key=itemgetter(0), reverse=True)[: self.options["maxalternatives"]] return scoredict