示例#1
0
def compute_bleu(net, word_dict, index_dict, tokens, initial=None, IM=None):
    """
    Return BLEU scores for reference tokens
    For each reference caption, a candidate caption is sampled from net
    """
    bleu_scores = np.zeros((len(tokens), 3))
    for i, ref in enumerate(tokens):
        if initial != None:
            init = copy.deepcopy(initial)
        else:
            init = None
        ref = ref[net.context:][:-1]
        if IM != None:
            can = sample(net,
                         word_dict,
                         index_dict,
                         len(ref),
                         IM[i],
                         initial=init)
        else:
            can = sample(net, word_dict, index_dict, len(ref), initial=init)

        # Compute bleu using n = (1,2,3)
        n1 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=1), n=1)], n=1)
        n2 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=2), n=2)], n=2)
        n3 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=3), n=3)], n=3)
        bleu_scores[i] = [n1, n2, n3]

    return bleu_scores
示例#2
0
def main():
    sys.path.append("../scripts/training/cmert-0.5")
    import bleu
    data_dir = "test_scorer_data"
    nbest_file = os.path.join(data_dir, "nbest.out")
    ref_file = os.path.join(data_dir, "reference.txt")
    bleu.preserve_case = False
    bleu.eff_ref_len = "shortest"
    bleu.nonorm = 0

    ref_fh = open(ref_file)
    cookedrefs = []
    for ref in ref_fh:
        cookedref = bleu.cook_refs([ref])
        cookedrefs.append(cookedref)
    ref_fh.close()

    nbest_fh = open(nbest_file)
    tests = []
    i = -1
    for line in nbest_fh:
        fields = line.split("||| ")
        current_i = int(fields[0])
        text = fields[1]
        if i != current_i:
            tests.append([])
            i = current_i
        tests[-1].append(text)
    nbest_fh.close()

    #  score with first best
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        stats = " ".join([
            "%d %d" % (c, g)
            for (c, g) in zip(cookedtest['correct'], cookedtest['guess'])
        ])
        print " %s %d" % (stats, cookedtest['reflen'])
        cookedtests.append(cookedtest)
    bleu1 = bleu.score_cooked(cookedtests)

    # vary, and score again
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        if i == 7:
            sentence = tests[i][8]
        elif i == 1:
            sentences = tests[i][2]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        cookedtests.append(cookedtest)
    bleu2 = bleu.score_cooked(cookedtests)

    print "Bleus: ", bleu1, bleu2
示例#3
0
def main():
    sys.path.append("../scripts/training/cmert-0.5")
    import bleu
    data_dir = "test_scorer_data"
    nbest_file = os.path.join(data_dir,"nbest.out")
    ref_file = os.path.join(data_dir,"reference.txt")
    bleu.preserve_case = False
    bleu.eff_ref_len = "shortest"
    bleu.nonorm = 0

    ref_fh = open(ref_file)
    cookedrefs = []
    for ref in ref_fh:
        cookedref = bleu.cook_refs([ref])
        cookedrefs.append(cookedref)
    ref_fh.close()
    
    nbest_fh = open(nbest_file)
    tests = []
    i = -1
    for line in nbest_fh:
        fields = line.split("||| ")
        current_i = int(fields[0])
        text = fields[1]
        if i != current_i:
            tests.append([])
            i = current_i
        tests[-1].append(text)
    nbest_fh.close()

    #  score with first best
    cookedtests = []
    for i  in range(len(tests)):
        sentence = tests[i][0]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        stats = " ".join(["%d %d" % (c,g) for (c,g) in zip(cookedtest['correct'], cookedtest['guess'])])
        print " %s %d" % (stats ,cookedtest['reflen'])
        cookedtests.append(cookedtest)
    bleu1 = bleu.score_cooked(cookedtests)

    # vary, and score again
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        if i == 7:
            sentence = tests[i][8]
        elif i == 1:
            sentences = tests[i][2]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        cookedtests.append(cookedtest)
    bleu2 = bleu.score_cooked(cookedtests)
    

    print "Bleus: ", bleu1,bleu2
示例#4
0
def batch_bleu(cans, refs):
    """
    cans : [ 'XXX', 'XXX', ... ]
    refs : [ ['XXX', 'XXX', ... ], ['XXX', 'XXX', ... ], ... ]
    """
    bleu_scores = np.zeros((len(cans), 3))
    for i, can in enumerate(cans):
        n1 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=1), n=1)], n=1)
        n2 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=2), n=2)], n=2)
        n3 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=3), n=3)], n=3)
        bleu_scores[i] = [n1,n2,n3]
    return bleu_scores
def bleu_single(test,cookedrefs,n=4,addprec=1):
    comps=bleu.cook_test(test,cookedrefs,n=n)
    p=1.
    for k in xrange(n):
        p *= float(comps['correct'][k]+addprec)/(comps['guess'][k]+addprec)
    p = p ** (1./n)
    if 0 < comps['testlen'] < comps['reflen']:
        p *= math.exp(1-float(comps['reflen'])/comps['testlen'])
    return p
示例#6
0
def process(sentnum, testsents):
    candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
    for (sent,vector) in testsents:
        comps = bleu.cook_test(sent, cookedrefs[sentnum])

        if comps['testlen'] != comps['guess'][0]:
            sys.stderr.write("ERROR: test length != guessed 1-grams\n")
	featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
					    " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
					    comps['reflen']))
示例#7
0
def bleu_single(test, cookedrefs, n=4, addprec=1):
    comps = bleu.cook_test(test, cookedrefs, n=n)
    p = 1.
    for k in xrange(n):
        p *= float(comps['correct'][k] + addprec) / (comps['guess'][k] +
                                                     addprec)
    p = p**(1. / n)
    if 0 < comps['testlen'] < comps['reflen']:
        p *= math.exp(1 - float(comps['reflen']) / comps['testlen'])
    return p
示例#8
0
def process(sentnum, testsents):
    candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
    for (sent, vector) in testsents:
        comps = bleu.cook_test(sent, cookedrefs[sentnum])

        if comps['testlen'] != comps['guess'][0]:
            sys.stderr.write("ERROR: test length != guessed 1-grams\n")
        featsfile.write("%s %s %d\n" %
                        (" ".join([str(v) for v in vector]), " ".join([
                            "%d %d" % (c, g)
                            for (c, g) in zip(comps['correct'], comps['guess'])
                        ]), comps['reflen']))
def compute_bleu(net, word_dict, index_dict, tokens, initial=None, IM=None):
    """
    Return BLEU scores for reference tokens
    For each reference caption, a candidate caption is sampled from net
    """
    bleu_scores = np.zeros((len(tokens), 3))
    for i, ref in enumerate(tokens):
        if initial != None:
            init = copy.deepcopy(initial)
        else:
            init = None
        ref = ref[net.context:][:-1]
        if IM != None:
            can = sample(net, word_dict, index_dict, len(ref), IM[i], initial=init)
        else:
            can = sample(net, word_dict, index_dict, len(ref), initial=init)

        # Compute bleu using n = (1,2,3)
        n1 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=1), n=1)], n=1)
        n2 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=2), n=2)], n=2)
        n3 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=3), n=3)], n=3)
        bleu_scores[i] = [n1,n2,n3]

    return bleu_scores
示例#10
0
    for lines in itertools.izip(*[file(filename) for filename in args[1:]]):
        cookedrefs.append(bleu.cook_refs(lines, n=n))

    if opts.mapfilename is not None:
        linemap = []
        for line in file(opts.mapfilename):
            linemap.append(int(line))
    else:
        linemap = range(len(cookedrefs))

    if args[0] == "-":
        infile = sys.stdin
    else:
        infile = open(args[0])
    test1 = []
    for (line,i) in itertools.izip(infile, linemap):
        test1.append(bleu.cook_test(line, cookedrefs[i], n=n))

    total = 0.
    n_sent = 0

    for comps in test1:

        score = score_single_cooked(comps)
        sys.stdout.write("bleu+1=%f\n" % score)
        total += score
        n_sent += 1

    sys.stderr.write("average: %s\n" % (total/n_sent))
    
示例#11
0
    n = 4

    cookedrefs = []
    for lines in itertools.izip(*[file(filename) for filename in args[1:]]):
        cookedrefs.append(bleu.cook_refs([line.split() for line in lines], n=n))

    if opts.mapfilename is not None:
        linemap = []
        for line in file(opts.mapfilename):
            linemap.append(int(line))
    else:
        linemap = range(len(cookedrefs))

    test1 = []
    for (line,i) in itertools.izip(file(args[0]), linemap):
        test1.append(bleu.cook_test(line.split(), cookedrefs[i], n=n))

    total = 0.
    n_sent = 0

    for comps in test1:
        if comps['testlen'] == 0:
            sys.stdout.write("0\n")
            continue
        logbleu = 0.0
        for k in xrange(n):
            logbleu += math.log(comps['correct'][k]+1)-math.log(comps['guess'][k]+1)
            #sys.stdout.write("%d/%d " % (comps['correct'][k], comps['guess'][k]))
        logbleu /= float(n)

        if opts.brevitypenalty:
示例#12
0
  for line in infile:
    prefeats = parse_nbest(line.strip())
    feats = dd(lambda: "0")
    feats.update(prefeats)
    hyp = feats[hypkey].lstrip("{").rstrip("}")
    sent = int(feats[sentkey])-1

    # write hyp to temp file
    hypfile.write(hyp+"\n")

    # write id, components, features to tuning file

    tunefile.write("%d ||| " % sent)

    # convert hyp to components using bleu stuff
    cook = bleu.cook_test(hyp.split(), cookedrefs[sent], n=bleun)
    for k in range(bleun):
      tunefile.write("%d " % cook["correct"][k])
      tunefile.write("%d " % cook["guess"][k])
    tunefile.write("%d ||| " % cook["reflen"])

    # pull out tuned features
    for feat in args.feats:
      tunefile.write(str(-(float(feats[feat])))+" ")
    # form model feature from untuned features
    modelscore = 0.0
    for fname, fval in feats.iteritems():
      if fname in modelweights:
        modelscore += -(float(fval))*modelweights[fname]
    tunefile.write("%f\n" % modelscore)
  hypfile.close()
示例#13
0
    (opts, args) = getopt.getopt(sys.argv[1:], "rctpv", [])
    for (opt, parm) in opts:
        if opt == "-c":
            bleu.preserve_case = True
        elif opt == "-t":
            bleu.nist_tokenize = False
        elif opt == "-p":
            bleu.clip_len = True
        elif opt == "-v":
            verbose = True

    test1 = []
    test2 = []
    for lines in itertools.izip(*[file(filename) for filename in args]):
        cookedrefs = bleu.cook_refs(lines[2:])
        test1.append(bleu.cook_test(lines[0], cookedrefs))
        test2.append(bleu.cook_test(lines[1], cookedrefs))

    score1 = bleu.score_cooked(test1)
    print "System 1: %f" % score1
    print "System 2: %f" % bleu.score_cooked(test2)

    better = worse = 0
    fake = test1[:]
    for i in xrange(len(fake)):
        fake[i] = test2[i]

        fake_score = bleu.score_cooked(fake)
        if fake_score > score1:
            better += 1
        elif fake_score < score1:
示例#14
0
    line = infile.readline()
    while line != "":
        try:
            (sentnum, sent, vector) = line.split('|||')
        except:
            sys.stderr.write("ERROR: bad input line: %s\n" % line)
            continue
        sentnum = int(sentnum)
        sent = " ".join(sent.split())
        vector = vector.strip()
        if False and sent == "":
            progress += 1
            line = infile.readline()
            continue
        comps = bleu.cook_test(sent, cookedrefs[sentnum])
        if comps['testlen'] != comps['guess'][0]:
            sys.stderr.write("ERROR: test length != guessed 1-grams\n")
        sys.stdout.write("%d ||| %s %d ||| %s\n" % (sentnum,
                                                
                                                   " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
                                                   comps['reflen'],
                                                   
                                                   vector))

        sys.stdout.flush()

        if sentnum != cur_sentnum:
            sys.stderr.write(".")
            sys.stderr.flush()
            cur_sentnum = sentnum
示例#15
0
    for line in infile:
        prefeats = parse_nbest(line.strip())
        feats = dd(lambda: "0")
        feats.update(prefeats)
        hyp = feats[hypkey].lstrip("{").rstrip("}")
        sent = int(feats[sentkey]) - 1

        # write hyp to temp file
        hypfile.write(hyp + "\n")

        # write id, components, features to tuning file

        tunefile.write("%d ||| " % sent)

        # convert hyp to components using bleu stuff
        cook = bleu.cook_test(hyp.split(), cookedrefs[sent], n=bleun)
        for k in range(bleun):
            tunefile.write("%d " % cook["correct"][k])
            tunefile.write("%d " % cook["guess"][k])
        tunefile.write("%d ||| " % cook["reflen"])

        # pull out tuned features
        for feat in args.feats:
            tunefile.write(str(-(float(feats[feat]))) + " ")
        # form model feature from untuned features
        modelscore = 0.0
        for fname, fval in feats.iteritems():
            if fname in modelweights:
                modelscore += -(float(fval)) * modelweights[fname]
        tunefile.write("%f\n" % modelscore)
    hypfile.close()