class MeteorBleu: """ Prints features for all versions of Meteor and BLEU for every input sentence """ def __init__(self, alpha=0.5): self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16) self.tri_bleu = Bleu(3) self.four_bleu = Bleu(4, beta=0.13) self.p = Preprocessor() def features(self, tokline, posline): """ The workhouse function Takes lists of tokens and postags for [h1, h2, ref] Returns feature values for h1, h2, h1-h2 """ features = [] # Simple Meteor h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor lowercase h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor lowercase, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor referencing sequence of postags, lowercase, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.simple_meteor.score(h1p, refp, postags=True, hpos=h1pos, refpos=refpos) h2score = self.simple_meteor.score(h2p, refp, postags=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # trigram BLEU, lowercased, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1score = self.tri_bleu.score(h1p, refp) h2score = self.tri_bleu.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score(h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score(h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score(h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score(h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score(h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score(h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased, stemmed, weighted w = [10, 5, 2, 1] h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score(h1p, refp, postag=True, hpos=h1pos, refpos=refpos, wts=w) h2score = self.four_bleu.score(h2p, refp, postag=True, hpos=h2pos, refpos=refpos, wts=w) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] return features def evaluate(self, h1score, h2score): """ Scores hypothesis sentences based on scores Prints output """ if h1score > h2score: print -1 elif h1score == h2score: print 0 else: print 1
avg_weights = sum_weights / ((it+1) * (i+1)) avged_weights = total_weights / ((it+1) * (i+1)) itertime = time.time() - iterstart print >> logs, "iteration %d time %.2lf (%.2lf per sent)" % (it+1, itertime, itertime/(i+1)), print >> logs, "decode %.2lf, averaging %.2lf" % (decoder.decode_time, avgtime), if not preloaded: print >> logs, "load %.2lf, oracle %.2lf, extract %.2lf" % \ (decoder.load_time(), decoder.oracle_time, decoder.extract_time) else: print >> logs trainscore = parseval.score() #.fscore() ## re-load if opts.devfile is not None: devforests = decoder.load(opts.devfile) devscore = evaluate_all(avged_weights, devforests).score() #.fscore() else: devscore = -1 if opts.testfile is not None: testforests = decoder.load(opts.testfile) testscore = evaluate_all(avged_weights, testforests).score() \ if opts.testfile is not None else 0 scores = "train = %.4lf, dev = %.2lf, test = %.2lf" % (trainscore, devscore*100, testscore*100)
def test_score(): cand = "中华人民共和国" ref = "中华人民共和国公民" bleu = Bleu(N_SIZE) s = bleu.score(cand, ref) print('score: {}'.format(s))
class MeteorBleu: """ Prints features for all versions of Meteor and BLEU for every input sentence """ def __init__(self, alpha=0.5): self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16) self.tri_bleu = Bleu(3) self.four_bleu = Bleu(4, beta=0.13) self.p = Preprocessor() def features(self, tokline, posline): """ The workhouse function Takes lists of tokens and postags for [h1, h2, ref] Returns feature values for h1, h2, h1-h2 """ features = [] # Simple Meteor h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor lowercase h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor lowercase, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1score = self.simple_meteor.score(h1p, refp) h2score = self.simple_meteor.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # Simple Meteor referencing sequence of postags, lowercase, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.simple_meteor.score( h1p, refp, postags=True, hpos=h1pos, refpos=refpos) h2score = self.simple_meteor.score( h2p, refp, postags=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # trigram BLEU, lowercased, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1score = self.tri_bleu.score(h1p, refp) h2score = self.tri_bleu.score(h2p, refp) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score( h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score( h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score( h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score( h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased, stemmed h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score( h1p, refp, postag=True, hpos=h1pos, refpos=refpos) h2score = self.four_bleu.score( h2p, refp, postag=True, hpos=h2pos, refpos=refpos) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] # postag-smoothed 4-gram BLEU, lowercased, stemmed, weighted w = [10,5,2,1] h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True) h1pos, h2pos, refpos = self.p.preprocess(posline) h1score = self.four_bleu.score( h1p, refp, postag=True, hpos=h1pos, refpos=refpos, wts=w) h2score = self.four_bleu.score( h2p, refp, postag=True, hpos=h2pos, refpos=refpos, wts=w) h1_h2 = h1score - h2score features += [h1score, h2score, h1_h2] return features def evaluate(self, h1score, h2score): """ Scores hypothesis sentences based on scores Prints output """ if h1score > h2score: print -1 elif h1score == h2score: print 0 else: print 1