示例#1
0
def main():
    nbests = defaultdict(list)
    references = {}
    for i, line in enumerate(open(opts.en)):
        '''
        Initialize references to correct english sentences
        '''
        references[i] = line

    for line in open(opts.nbest):
        (i, sentence, features) = line.strip().split("|||")
        stats =  list(bleu_stats(sentence, references[int(i)]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list 
        feature_list = [float(x) for x in features.split()]
        nbests[int(i)].append((sentence, bleu_score, smoothed_bleu_score, feature_list))

    theta = [1.0/6 for _ in xrange(6)] #initialization
    

    for i in range(0, opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbests[nbest])
            sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True)
            for i in range(0, min(len(sample), opts.xi)):
                for j in range(0, 6):
                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]:
                        mistake = mistake + 1
                        theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] - sample[i][1][3][j])
        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    print "\n".join([str(weight) for weight in theta])
def main(opts, references, input_nbest, theta0=None):
    entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list")
    nbests = None
    if nbests is None:
        nbests = []
        sys.stderr.write("No nbests on disk, so calculating ndests ... \n")
        for j,line in enumerate(input_nbest):
            (i, sentence, features) = line.strip().split("|||")
            i = int(i)

            # lst_smoothed_bleu_score = []
            # for ref in references:
            #     stats = list(bleu.bleu_stats(sentence, ref[i]))
            #     lst_smoothed_bleu_score.append( bleu.smoothed_bleu(stats) )
            # # making the feature string to float list
            # avg_smoothed_bleu_score = float(sum(lst_smoothed_bleu_score)) / len(lst_smoothed_bleu_score)

            stats = list(bleu.bleu_stats(sentence, references[i]))
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            
            feature_list = [float(x) for x in features.split()]
            if len(nbests)<=i:
                nbests.append([])
            # nbests[i].append(entry(sentence, avg_smoothed_bleu_score, feature_list))
            nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

            if j%5000 == 0:
                sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)

    theta = theta0
    if theta is None:
        theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    avg_theta = [ 0.0 for _ in xrange(arg_num)]
    avg_cnt = 0
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest, opts)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
                    
                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    

    weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]
    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of return the averaged-out weights, return the weights that maximize the BLEU score    
    return "\n".join([str(weight) for weight in weights])
示例#3
0
def gold_score(hyp_line, ref_line):
    """Return the gold score for a translation hypothesis based on the
    data in *hyp_line* and *ref_line*."""
    _, hyp, _ = hyp_line.split(" ||| ")
    hyp_words = hyp.split()
    ref_words = ref_line.split()
    return bleu.bleu(tuple(bleu.bleu_stats(hyp_words, ref_words)))
示例#4
0
def main():
    nbests = defaultdict(list)
    references = {}
    for i, line in enumerate(open(opts.en)):
        '''
        Initialize references to correct english sentences
        '''
        references[i] = line

    for line in open(opts.nbest):
        (i, sentence, features) = line.strip().split("|||")
        stats = list(bleu_stats(sentence, references[int(i)]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        nbests[int(i)].append(
            (sentence, bleu_score, smoothed_bleu_score, feature_list))

    theta = [1.0 / 6 for _ in xrange(6)]  #initialization

    for i in range(0, opts.epo):
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbests[nbest])
            sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True)
            for i in range(0, min(len(sample), opts.xi)):
                for j in range(0, 6):
                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][
                            1][3][j]:
                        mistake = mistake + 1
                        theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] -
                                                          sample[i][1][3][j])
        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
    print "\n".join([str(weight) for weight in theta])
示例#5
0
def computeBleu(system, reference):
    stats = [0 for i in xrange(10)]
    stats = [
        sum(scores)
        for scores in zip(stats, bleu.bleu_stats(system, reference))
    ]
    return bleu.smoothed_bleu(stats)
示例#6
0
def main():
    references = []
    sys.stderr.write("Reading English Sentences\n")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nTry reading nbests datastructure from disk ... \n")
    nbests = read_ds_from_file(opts.nbestDS)
    if nbests is None:
        nbests = []
        sys.stderr.write("No nbests on disk, so calculating ndests ... \n")
        for j,line in enumerate(open(opts.nbest)):
            (i, sentence, features) = line.strip().split("|||")
            i = int(i)
            stats = list(bleu.bleu_stats(sentence, references[i]))
            # bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            # making the feature string to float list
            feature_list = [float(x) for x in features.split()]
            if len(nbests)<=i:
                nbests.append([])
            # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
            nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

            if j%5000 == 0:
                sys.stderr.write(".")
        write_ds_to_file(nbests, opts.nbestDS)

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    avg_theta = [ 0.0 for _ in xrange(arg_num)]
    avg_cnt = 0
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
                    
                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    

    weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]
    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score    
    print "\n".join([str(weight) for weight in weights])
示例#7
0
def bleu_score(input_string):
    ref = [line.strip().split() for line in open(opts.reference)]
    hyp = [line.strip().split() for line in input_string.split('\n')]

    stats = [0 for i in xrange(10)]
    for (r,h) in zip(ref, hyp):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h,r))]
    return bleu.bleu(stats)
示例#8
0
def score(predicted, reference):
    ref = [line.strip().split() for line in open(reference)]
    system = [line.strip().split() for line in predicted]

    stats = [0 for i in range(10)]
    for (r, s) in zip(ref, system):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r))]

    return bleu.bleu(stats)
def main(opts, sysstdin):
	ref = [line.strip().split() for line in open(opts.en)]
	system = [line.strip().split() for line in sysstdin]

	stats = [0 for i in xrange(10)]
	for (r,s) in zip(ref, system):
	    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
	
	return bleu.bleu(stats)
示例#10
0
def sum_bleu_scores_per_range(range_marker_dict):
    range_bleu_scores = {}
    for k, v in range_markers_dict.items():
        sum_bs = 0.0
        for h, r in v:
            b_stats = bleu.bleu_stats(h, r)
            bs = bleu.bleu(b_stats)
            sum_bs += bs
        range_bleu_scores[k] = sum_bs
    return range_bleu_scores
def compute_bleu(hypo, ref="data/dev.ref"):
    f_ref = open(ref, 'r')
    f_hypo = open(hypo, 'r')
    ref = [line.strip().split() for line in f_ref]
    hyp = [line.strip().split() for line in f_hypo]
    f_hypo.close()
    f_ref.close()

    stats = [0 for i in xrange(10)]
    for (r, h) in zip(ref, hyp):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h, r))]
    return (100 * bleu.bleu(stats))
示例#12
0
def bleu_score(mt_para_corpus, si_para_corpus, N=4):
    '''
   BLEU score between trans and inter
   '''
    stats = [0 for i in xrange(10)]
    for mt_sent_pair, si_sent_pair in zip(mt_para_corpus.sent_pairs,
                                          si_para_corpus.sent_pairs):
        ref = [w.tok for w in mt_sent_pair.tgt_sent.words]
        output = [w.tok for w in si_sent_pair.tgt_sent.words]
        stats = [
            sum(scores) for scores in zip(stats, bleu.bleu_stats(output, ref))
        ]
    return bleu.bleu(stats)
示例#13
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j, line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu_stats(sentence, references[i]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests) <= i:
            nbests.append([])
        nbests[i].append(
            entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        if j % 5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    sys.stderr.write("\nTraining...\n")
    for i in xrange(opts.epo):
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu,
                        reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1),
                                        opts.eta)


#                for j in xrange(arg_num):
#                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]:
#                        mistake = mistake + 1
#                        theta[j] += opts.eta * (sample[i][0].feature_list[j] - sample[i][1].feature_list[j])
        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
    print "\n".join([str(weight) for weight in theta])
示例#14
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j,line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu_stats(sentence, references[i]))
        # bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if j == 10:
            break
        if len(nbests)<=i:
            nbests.append([])
        # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))
        if j%5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    avg_theta = [ 0 for _ in xrange(arg_num)]
    avg_cnt = 0
    sys.stderr.write("\nTraining...\n")
    for i in xrange(opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
                    avg_theta = vector_plus(avg_theta, theta)
                    avg_cnt += 1
        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    final_theta = [ t / avg_cnt for t in avg_theta]
    print "\n".join([str(weight) for weight in final_theta])
示例#15
0
def update_param(feature, current_param_dict):
	sentence_dict = {}
	for m in xrange(0, num_sents):
		ref = reference[m]
		candidates = all_hyps[m * 100:m * 100 + 100]
		line_dict, steepest_line = define_sentence_lines(feature, candidates, current_param_dict)
		sequence = find_line_sequence(line_dict, [(steepest_line, -999999)])
		interval_stats_dict = {}
		for candidate, interval_start, interval_end in sequence:
			interval_stats_dict[(interval_start, interval_end)] = list(bleu.bleu_stats(candidate[2].split(), ref))
		sentence_dict[m] = interval_stats_dict

	all_interval_ends = sorted(set([item[1] for sublist in [dict.keys() for dict in sentence_dict.values()] for item in sublist]))
	best_interval, best_BLEU = choose_best_interval(all_interval_ends, sentence_dict)
	return_param_dict = {}
	for f in current_param_dict:
		return_param_dict[f] = current_param_dict[f] if f != feature else sum(best_interval)/2
	return return_param_dict, best_BLEU
示例#16
0
def get_nbest(nbest, source, target):
    src = [line.strip().split() for line in open(source).readlines()]
    ref = [line.strip().split() for line in open(target).readlines()]
    translations = [
        line.strip().split("|||") for line in open(nbest).readlines()
    ]
    nbests = [[] for _ in ref]
    original_feature_count = 0
    sys.stderr.write("Computing smoothed bleu...")
    translation = namedtuple("translation", "features, smoothed_bleu")
    for (i, sentence, features) in translations:
        (i, sentence,
         features) = (int(i), sentence.strip(),
                      [float(f) for f in features.strip().split()])
        sentence_split = sentence.strip().split()
        stats = tuple(bleu.bleu_stats(sentence_split, ref[i]))
        nbests[i].append(translation(features, bleu.smoothed_bleu(stats)))
    return nbests
示例#17
0
def compute_score(weights, refs, hyps):
    tot_stats = [0 for i in xrange(10)]
    hyp_list = []
    for s in xrange(0, num_sents):
        hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
        (best_score, best) = (-1e300, '')
        for (num, hyp, feats) in hyps_for_one_sent:
            score = 0.0
            for feat in feats.split(' '):
                (k, v) = feat.split('=')
                score += weights[k] * float(v)
            if score > best_score:
                (best_score, best) = (score, hyp)
        hyp_list.append("%s\n" % best)
    for (r,h) in zip(refs, hyp_list):
        tot_stats = [sum(s) for s in zip(tot_stats, bleu.bleu_stats(r, h))]
        # for i in xrange(len(tot_stats)):
        #     tot_stats[i] += int(best[i])
    return bleu.bleu(tot_stats)
示例#18
0
文件: mert.py 项目: nsaphra/en600.468
def performance(weights, dev_src, dev_kbest, dev_ref):
  old_weights = copy.deepcopy(weights)
  all_hyps = [pair.split(' ||| ') for pair in open(dev_kbest)]
  all_src = [s.split(' ||| ') for s in open(dev_src)]
  num_sents = len(all_hyps) / 100
  stats = []
  ref_file = open(dev_ref)

  for (r_ind, (ref, src, s)) in enumerate(zip(ref_file, all_src, xrange(0, num_sents))):
    hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    ref = ref.strip().split()
    src = src[1].strip().split()
    (best_score, best, best_ind) = (-1e300, '', -1)
    for (h_ind, (num, h_sent, feats)) in enumerate(hyps_for_one_sent):
      score = 0.0
      hyp = h_sent.strip().split()
      for (k,v) in get_feats(hyp, src, feats).items():
        score += weights[k] * v

      if score > best_score:
        (best_score, best, best_ind) = (score, h_sent, h_ind)
    stats.append([i for i in bleu.bleu_stats(best.strip().split(),ref)])

  return score_bleu_stats(stats)
candidate = namedtuple("candidate", "english, features , score")
nbests = []
cnt = 0  #count # of sentence
#we can run the first part for only once and save it.
###1st part,compute blue score for each candidate translation.
for line in open(opts.nbest):
    cnt = cnt + 1
    #print '{0}\r'.format("\rIteration: %d/%d." %(cnt, 432303)),
    (i, sentence, features) = line.strip().split("|||")
    if len(nbests) <= int(i):
        nbests.append([])
    features = [float(h) for h in features.strip().split()]
    stats = [0 for kk in xrange(10)]  #code from score-reranker.py
    stats = [
        sum(scores) for scores in zip(
            stats, bleu.bleu_stats(sentence.strip().split(), ref[int(i)]))
    ]
    score = bleu.smoothed_bleu(stats)
    nbests[int(i)].append(candidate(sentence.strip(), features, score))
cPickle.dump(nbests, open(
    'my_nbests_add.p',
    'wb'))  #save the result. no need to run the first part each time
#print "finished calculating nbests."
nbests = cPickle.load(open('my_nbests_add.p', 'rb'))  #load pickled file

#2nd part,learn the optimal weight
epochs = 20  #setup parameters mentioned in pseudocode
tau_maxsize = 100  #5000
xi = 10  #50
tau = []
alpha = 0.05
示例#20
0
def main():
    references = []
    sys.stderr.write("Reading English Sentences\n")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nTry reading %s from disk ... \n" % opts.nbestDS)
    nbests = read_ds_from_file(opts.nbestDS)
    if nbests is None:
        nbests = []
        sys.stderr.write("%s is not on disk, so calculating it ... \n" % opts.nbestDS)
        for j,line in enumerate(open(opts.nbest)):
            (i, sentence, features) = line.strip().split("|||")
            i = int(i)
            stats = list(bleu.bleu_stats(sentence, references[i]))
            # bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            # making the feature string to float list
            feature_list = [float(x) for x in features.split()]
            if len(nbests)<=i:
                nbests.append([])
            # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
            nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

            if j%5000 == 0:
                sys.stderr.write(".")
        sys.stderr.write("\nWriting %s to disk ... \n" % opts.nbestDS)
        write_ds_to_file(nbests, opts.nbestDS)
        sys.stderr.write("Finish writing %s\n" % opts.nbestDS)

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    # avg_theta = [ 0.0 for _ in xrange(arg_num)]
    # avg_cnt = 0

    tau = opts.tau # positive learning margin
    sys.stderr.write("\nTraining...\n")
    for iter_num in xrange(opts.epo):
        sys.stderr.write("\nIteration#{} ".format(iter_num + 1))
        cnt = 0;
        # sentence wise updating

        for i, nbest in enumerate(nbests):
            y = sorted(nbest, key = lambda h: h.smoothed_bleu, reverse = True)
            mu = [0.0]*len(nbest)
            w_times_x = [0.0]*len(nbest)
            for j, best in enumerate(nbest):
                # calculate linear function result
                w_times_x[j] = dot_product(theta, best.feature_list)

            # processing pairs 
            top_r = int(len(y)*opts.r)
            bottom_k = int(len(y)*opts.k)
            for j in xrange(len(nbest) - 1):
                for l in xrange(j+1, len(nbest)):
                    if nbest[j].smoothed_bleu <= y[top_r].smoothed_bleu \
                    and nbest[l].smoothed_bleu >= y[- bottom_k].smoothed_bleu \
                    and w_times_x[j] > w_times_x[l] + tau:
                        mu[j] = mu[j] + 1
                        mu[l] = mu[l] - 1
                    elif nbest[j].smoothed_bleu >= y[- bottom_k].smoothed_bleu \
                    and nbest[l].smoothed_bleu <= y[top_r].smoothed_bleu \
                    and w_times_x[j] > w_times_x[l] - tau:
                        mu[j] = mu[j] - 1
                        mu[l] = mu[l] + 1
                    else:
                        cnt += 1
                if (j + 1) % 100 == 0:
                    sys.stderr.write(".")

            vector_sum = [0 for _ in xrange(len(nbest[0].feature_list))]
            for m, best in enumerate(nbest):
                vector_sum = vector_plus(vector_sum, scale_product(mu[m], best.feature_list))

            theta = vector_plus(theta, vector_sum, opts.eta)

            # avg_theta = vector_plus(avg_theta, theta)
            # avg_cnt += 1

        sys.stderr.write("\n Non-supported vectors:  %s\n" % (cnt,))
    

    # weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]
    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score    
    print "\n".join([str(weight) for weight in theta])
示例#21
0
文件: rerank.py 项目: stothe2/468-MT
weights = {'p(e)'       : float(opts.lm) ,
           'p(e|f)'     : float(opts.tm1),
           'p_lex(f|e)' : float(opts.tm2)}

ref = [line.strip().split() for line in open(opts.reference)]
all_hyps = [pair.split(' ||| ') for pair in open(opts.input)]
num_sents = len(zip(ref, all_hyps))

# Calculate the "gold" scoring function G
# which is just the local BLEU score here.
all_scores = defaultdict(list)
for s in xrange(0, num_sents):
    hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    for (num, hyp, feats) in hyps_for_one_sent:
        stats = [0 for i in xrange(10)]
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s]))]
        score = bleu.bleu(stats)
        all_scores[s].append( (score, hyp, feats) )

def tune():
    ''' Finds best weight w '''
    w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]])
    binary_classifier = svm.SVC(kernel="linear")

    for _ in range(0,5): # for desired number of iterations
        X, y = [], []
        for s in xrange(0, num_sents):
            samples = sampler(s, 5000, 50, 0.05)
            for (feats, sign) in samples:
                X.append(feats)
                y.append(sign)
示例#22
0
def train(nbest_candidates,
          reference_files,
          init_weights=None,
          epochs=5,
          alpha=0.04,
          tau=100,
          xi=20,
          eta=0.0001):

    # initialization
    print >> sys.stderr, "Initializing training data"
    candidate = namedtuple("candidate",
                           "sentence, features, bleu, smoothed_bleu")
    refs = []
    for reference_file in reference_files:
        refs.append([line.strip().split() for line in open(reference_file)])
    nbests = []
    for n, line in enumerate(nbest_candidates):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        sentence = sentence.strip()
        features = np.array([float(h) for h in features.strip().split()])

        # calculate bleu score and smoothed bleu score
        max_bleu_score = -float('inf')
        for ref in refs:
            stats = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
            bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            max_bleu_score = max(max_bleu_score, smoothed_bleu_score)

        while len(nbests) <= i:
            nbests.append([])
        nbests[i].append(
            candidate(sentence, features, bleu_score, max_bleu_score))

        if n % 2000 == 0:
            sys.stderr.write(".")
    print >> sys.stderr, "\nRetrieved %d candidates for %d sentences" % (
        n, len(nbests))

    # set weights to default
    w = init_weights if init_weights is not None else \
        np.array([1.0/len(nbests[0][0].features)] * len(nbests[0][0].features))
    assert len(w) == len(nbests[0][0].features)
    w_sum = np.zeros(len(nbests[0][0].features))

    # training
    random.seed()
    for i in range(epochs):
        print >> sys.stderr, "Training epoch %d:" % i
        mistakes = 0
        for nbest in nbests:
            if len(nbest) < 2:
                continue

            sample = []
            for j in range(tau):
                (s1, s2) = (nbest[k]
                            for k in random.sample(range(len(nbest)), 2))
                if fabs(s1.smoothed_bleu - s2.smoothed_bleu) > alpha:
                    if s1.smoothed_bleu > s2.smoothed_bleu:
                        sample.append((s1, s2))
                    else:
                        sample.append((s2, s1))
                else:
                    continue

            sample.sort(key=lambda s: s[0].smoothed_bleu - s[1].smoothed_bleu,
                        reverse=True)
            for (s1, s2) in sample[:xi]:
                if np.dot(w, s1.features) <= np.dot(w, s2.features):
                    mistakes += 1
                    w += eta * (s1.features - s2.features
                                )  # this is vector addition!

        w_sum += w
        print >> sys.stderr, "Number of mistakes: %d" % mistakes

    w = w_sum / float(epochs)
    return w
示例#23
0
def main():
    references = []
    sys.stderr.write("Reading English Sentences\n")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nTry reading nbests datastructure from disk ... \n")
    nbests = read_ds_from_file(opts.nbestDS)
    if nbests is None:
        nbests = []
        sys.stderr.write("No nbests on disk, so calculating ndests ... \n")
        for j, line in enumerate(open(opts.nbest)):
            (i, sentence, features) = line.strip().split("|||")
            i = int(i)
            stats = list(bleu.bleu_stats(sentence, references[i]))
            # bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            # making the feature string to float list
            feature_list = [float(x) for x in features.split()]
            if len(nbests) <= i:
                nbests.append([])
            # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
            nbests[i].append(entry(sentence, smoothed_bleu_score,
                                   feature_list))

            if j % 5000 == 0:
                sys.stderr.write(".")
        write_ds_to_file(nbests, opts.nbestDS)

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    avg_theta = [0.0 for _ in xrange(arg_num)]
    avg_cnt = 0
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu,
                        reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1),
                                        opts.eta)

                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake, ))

    weights = [
        avg / avg_cnt if avg_cnt != 0 else 1 / float(arg_num)
        for avg in avg_theta
    ]
    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score
    print "\n".join([str(weight) for weight in weights])
示例#24
0
#             time.time() - start_time,
#         )
#     )
#     if best_dev > dev_loss:
#         print("[DEV] Best model so far, saving snapshot.")
#         torch.save(model, "batched_enc_dec_model.pt")
#         best_dev = dev_loss

# this is how you generate, can replace with desired sentenced to generate
model = torch.load("batched_enc_dec_model.pt")
_, _, test_it, _, _ = get_datasets(1)
sentences = []
stats = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
model.eval()
sample_batch_id = random.choice(range(len(test_it)))
for sent_id, sent in enumerate(test_it):
    pred, attn_matrix = generate(sent.src[:, 0])
    pred_txt = " ".join(map(lambda x: trg_vocab.vocab.itos[x], pred))
    target = list(map(lambda x: trg_vocab.vocab.itos[x], sent.trg[1:, 0]))
    target_txt = " ".join(target[:target.index("<eos>")])
    stats += np.array(bleu_stats(pred_txt, target_txt))
    sentences.append([pred_txt, target_txt])
    if sample_batch_id == sent_id:
        #now let's visualize it's attention
        plot_attention([src_vocab.vocab.itos[x] for x in sent.src[:, 0]],
                       [trg_vocab.vocab.itos[x] for x in pred], attn_matrix,
                       'attention_matrix.png')
print("Corpus BLEU: %.2f" % (100 * bleu(stats)))
for pred, target in sentences[:10]:
    print("%s => %s" % (pred, target))
示例#25
0
    num1 = random.randint(0, 99)
    num2 = random.randint(0, 99)
    if num1 == num2:
      if num1 == 0:
        num2 = num2 + 1
      else:
        num2 = num2 - 1 #just making sure they arent the same example
    hyp1 = hyps_for_one_sent[num1][1]
    #print(hyp1)
    hyp2 = hyps_for_one_sent[num2][1]
    #print(hyp2)
    feats1 = hyps_for_one_sent[num1][2]
    feats2 = hyps_for_one_sent[num2][2]

    #calculate bleu score for each example
    s1=list(bleu.bleu_stats(hyp1, ref))
    bs1=bleu.bleu(s1)
    s2=list(bleu.bleu_stats(hyp2, ref))
    bs2=bleu.bleu(s2)
    #print(bs1, bs2)
    #make training vector with difference in values of feats and indicator
    if bs1 > bs2:
      indic = 1
    else:
      if bs1 < bs2:
        indic = -1
      else:
          continue
      #ignore the ones that have same bleu score?
    #get feat values for each pair of features and subtract
    trainfeats = []
示例#26
0
theta = numpy.random.rand(6)
eta = 0.1

(opts, _) = optparser.parse_args()

source = open(opts.ref).read().splitlines()
target = open(opts.tar).read().splitlines()
a_translation = namedtuple('a_translation',
                           'sentence, features, smoothed_bleu')
nbests = [[] for i in range(len(source))]

for line in open(opts.nbest):
    (i, sentence, features) = line.strip().split("|||")
    ind = int(i)
    #stats=bleu.bleu_stats(sentence, source[ind])
    stats = list(bleu.bleu_stats(sentence, target[ind]))
    #test1=test[0]
    bleu_smooth_score = bleu.smoothed_bleu(stats)
    feature_vec = numpy.fromstring(features, sep=' ')
    nbests[ind].append(a_translation(sentence, feature_vec, bleu_smooth_score))


def get_sample(nbest):
    sample = []
    for i in range(0, tau):
        #random_items = random.sample(nbest, 2)
        #s1 = random_items[0]
        #s2 = random_items[1]
        s1 = random.choice(nbest)
        s2 = random.choice(nbest)
        if math.fabs(s1.smoothed_bleu - s2.smoothed_bleu) > alpha:
示例#27
0
def get_bleu_stats(h, r):
    h = h.split()
    r = r.split()
    stats = [0 for i in xrange(10)]
    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h, r))]
    return stats
#!/usr/bin/env python
import optparse, sys, os
import bleu

optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default=os.path.join("/usr/shared/CMPT/nlp-class/project/test/", "all.cn-en.en0"), help="English reference sentences")
optparser.add_option("-i", "--input", dest="input", default=os.path.join("/home/yongyiw/Documents/Github/final-project/Code", "output_1"), help="decoder output")
(opts,_) = optparser.parse_args()

# print opts.reference, opts.input
ref = [line.strip().split() for line in open(opts.reference)]
system = [line.strip().split() for line in open(opts.input)]

stats = [0 for i in xrange(10)]
for (r,s) in zip(ref, system):
  stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
print bleu.bleu(stats)
示例#29
0
optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default="data/test.en", help="English reference sentences")
optparser.add_option("-n", "--nbest", dest="nbest", default="data/test.nbest", help="N-best lists")
(opts,_) = optparser.parse_args()

ref = [line.strip().split() for line in open(opts.reference)]

nbests = []
for n, line in enumerate(open(opts.nbest)):
    (i, sentence, _) = line.strip().split("|||")
    (i, sentence) = (int(i), sentence.strip())
    if len(ref) <= i:
        break
    while len(nbests) <= i:
        nbests.append([])
    scores = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
    inverse_scores = tuple([-x for x in scores])
    nbests[i].append(translation_candidate(sentence, scores, inverse_scores))
    if n % 2000 == 0:
        sys.stderr.write(".")

oracle = [nbest[0] for nbest in nbests]

stats = [0 for i in xrange(10)]
for candidate in oracle:
    stats = [sum(scores) for scores in zip(stats, candidate.scores)]

prev_score = 0
score = bleu.bleu(stats)

# greedy search for better oracle. For each sentence, choose the
示例#30
0
optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default=os.path.join("data", "test.en"), help="English reference sentences")
optparser.add_option("-n", "--nbest", dest="nbest", default=os.path.join("data", "test.nbest"), help="N-best lists")
(opts,_) = optparser.parse_args()

ref = [line.strip().split() for line in open(opts.reference)]

nbests = []
for n, line in enumerate(open(opts.nbest)):
  (i, sentence, _) = line.strip().split("|||")
  (i, sentence) = (int(i), sentence.strip())
  if len(ref) <= i:
    break
  while len(nbests) <= i:
    nbests.append([])
  scores = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
  inverse_scores = tuple([-x for x in scores])
  nbests[i].append(translation_candidate(sentence, scores, inverse_scores))
  if n % 2000 == 0:
    sys.stderr.write(".")

oracle = [nbest[0] for nbest in nbests]

stats = [0 for i in xrange(10)]
for candidate in oracle:
  stats = [sum(scores) for scores in zip(stats, candidate.scores)]

prev_score = 0
score = bleu.bleu(stats)

# greedy search for better oracle. For each sentence, choose the
示例#31
0
def get_validation_bleu(hypotheses):
    stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    for hyp, ref in zip(hypotheses, dev_tgt):
        hyp, ref = (hyp.strip().split(), ref.strip().split())
        stats += numpy.array(bleu_stats(hyp, ref))
    return "%.2f" % (100 * bleu(stats))
示例#32
0
#!/usr/bin/env python
import optparse
import sys
import bleu

optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default="data/test.en", help="English reference sentences")
(opts,_) = optparser.parse_args()

ref = [line.strip().split() for line in open(opts.reference)]
system = [line.strip().split() for line in sys.stdin]

stats = [0 for i in xrange(10)]
for (r,s) in zip(ref, system):
    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
print bleu.bleu(stats)
def cal_store(ref, system):
	stats = [0 for i in xrange(10)]
	for (r,s) in zip(ref, system):
		stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
	return bleu.bleu(stats)
示例#34
0
def writeFeatureVector():
    hypothesis_sentences = namedtuple("hyp", "features, bleu")
    ref = [line.strip().split() for line in open(opts.ref)][:sys.maxint]
    src_dev = [line.strip().split("|||")[1] for line in open(opts.src_dev)][:sys.maxint]
    
    sys.stderr.write("reading dev data...")
    nbests = [[] for _ in ref]
    all_hyps = [pair.split(' ||| ') for pair in open(opts.dev)]
    num_sents = len(all_hyps) / 100
    for s in xrange(0, num_sents):
        hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
        for (num, hyp, feats) in hyps_for_one_sent:           
            feats = [float(h.split('=')[1]) for h in feats.strip().split()]
            stats = tuple(bleu.bleu_stats(hyp.strip().split(), ref[s]))
            #TODO: add extra feature here
            # 1. adding number of target words
            enWordsNO = len(hyp.strip().split())
            feats.append(enWordsNO)
            
            #2. adding number of untranslated source words
            feats.append(calcNotTranslatedWords(src_dev[s], hyp))
            
            nbests[s].append(hypothesis_sentences(feats, bleu.bleu(stats)))
        
    # pairwise sampling. Figure 4 of the paper
    random.seed(0)
    sampling_hypothesis = namedtuple("sample", "hyp1, hyp2, gDiff")
    def sampling():
        V = []
        for _ in xrange(opts.tau):
            c1 = random.choice(nbest)
            c2 = random.choice(nbest)
            if c1 != c2 and math.fabs(c1.bleu - c2.bleu) > opts.alpha:
                V.append(sampling_hypothesis(c1, c2, math.fabs(c1.bleu - c2.bleu))) 
        return V
    
    x = []
    nbest_count = 0
    for nbest in nbests:
        nbest_count = nbest_count +1
        
        V = sampling()
        sortedV = sorted(V , key=lambda h: h.gDiff, reverse=True)[:opts.xi]  
        x_count = 0
        for idx, sample in enumerate(sortedV):
            x_count = x_count + 1
             
            tmp = [c1j-c2j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)]
            tmp.append(cmp(sample.hyp1.bleu , sample.hyp2.bleu))
            x.append(tmp)
            tmp = [c2j-c1j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)]
            tmp.append(cmp(sample.hyp2.bleu , sample.hyp1.bleu))
            x.append(tmp)
            
        if x_count != opts.xi: 
            sys.stderr.write("%d\n" % (x_count))
    
        
    #writing feature vector
    for f in x:
        print ",".join(str(f0) for f0 in f)
示例#35
0
文件: rerank.py 项目: klee166/468-MT
}

ref = [line.strip().split() for line in open(opts.reference)]
all_hyps = [pair.split(' ||| ') for pair in open(opts.input)]
num_sents = len(zip(ref, all_hyps))

# Calculate the "gold" scoring function G
# which is just the local BLEU score here.
all_scores = defaultdict(list)
for s in xrange(0, num_sents):
    hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    for (num, hyp, feats) in hyps_for_one_sent:
        stats = [0 for i in xrange(10)]
        stats = [
            sum(scores)
            for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s]))
        ]
        score = bleu.bleu(stats)
        all_scores[s].append((score, hyp, feats))


def tune():
    ''' Finds best weight w '''
    w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]])
    binary_classifier = svm.SVC(kernel="linear")

    for _ in range(0, 5):  # for desired number of iterations
        X, y = [], []
        for s in xrange(0, num_sents):
            samples = sampler(s, 5000, 50, 0.05)
            for (feats, sign) in samples:
def get_bleu_score(tgt,src):
    stats=[ 0 for i in range(10)]
    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(tgt,src))]
    return bleu.bleu(stats)
示例#37
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j,line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu.bleu_stats(sentence, references[i]))
        # bleu_score = bleu.bleu(stats)
        smoothed_bleu_score = bleu.smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests)<=i:
            nbests.append([])
        # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

        if j%5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    weights = [ [] for _ in xrange(opts.epo)]
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        avg_theta = [ 0.0 for _ in xrange(arg_num)]
        avg_cnt = 0
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
                    
                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake,))
        weights[j] = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]



    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score    
    # print "\n".join([str(weight) for weight in final_theta])

    bleu_score = [0 for _ in weights]
    for j, w in enumerate(weights):
        trans = []
        translation = namedtuple("translation", "english, score")
        system = []
        for i, nbest in enumerate(nbests):
            # for one sentence
            for et in nbest:
                if len(trans) <= int(i):
                    trans.append([])

                trans[int(i)].append(translation(et.sentence, sum([x*y for x,y in zip(w, et.feature_list)])))

            for tran in trans:
                system.append(sorted(tran, key=lambda x: -x.score)[0].english)
        
        stats = [0 for i in xrange(10)]
        for (r,s) in zip(references, system):
            stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]

        bleu_score[j] = bleu.bleu(stats)

    idx = [i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score)][0]
    sys.stderr.write("Maximum BLEU score of training data is: {}\n".format(max(bleu_score)))
    sys.stderr.write("Corresponding weights are: {}\n".format(" ".join([ str(w) for w in weights[idx] ])))
    print "\n".join([str(weight) for weight in weights[idx]])
示例#38
0
文件: mert.py 项目: nsaphra/en600.468
def main():
  optparser = optparse.OptionParser()
  optparser.add_option("-s", "--src", dest="src", default="data/train.src", help="source sentences")
  optparser.add_option("-k", "--kbest-list", dest="kbest", default="data/train.100best", help="100-best translation lists")
  optparser.add_option("-r", "--ref", dest="ref", default="data/train.ref", help="reference translations")

  optparser.add_option("-l", "--lm", dest="lm", default=-0.5, type="float", help="Language model weight")
  optparser.add_option("-t", "--tm1", dest="tm1", default=-0.5, type="float", help="Translation model p(e|f) weight")
  optparser.add_option("-u", "--tm2", dest="tm2", default=-0.5, type="float", help="Lexical translation model p_lex(f|e) weight")
  optparser.add_option("-c", "--word_cnt", dest="bp", default=0.5, type="float", help="brevity penalty weight")
  optparser.add_option("-g", "--greek_to_me", dest="untranslated", default=0.5, type="float", help="untranslated token weight")
  (opts, _) = optparser.parse_args()

  data = [{'ref':r.strip().split()} for r in open(opts.ref)]
  all_hyps = [pair.split(' ||| ') for pair in open(opts.kbest)]
  for (ind, s) in enumerate(open(opts.src)):
    if ind >= len(data):
      break
    (sent_id, src_sent) = s.split(' ||| ', 1)
    src = src_sent.strip().split()
    ref = data[ind]['ref']
    data[ind]['src'] = src
    hyps_for_one_sent = all_hyps[ind * 100:ind * 100 + 100]
    data[ind]['kbest'] = [-1 for i in hyps_for_one_sent]
    data[ind]['kbest_feats'] = [-1 for i in hyps_for_one_sent]
    data[ind]['bleu'] = [-1 for i in hyps_for_one_sent]
    for (h_ind, (num, h_sent, feats)) in enumerate(hyps_for_one_sent):
      h = h_sent.strip().split()
      data[ind]['kbest'][h_ind] = h
      data[ind]['kbest_feats'][h_ind] = get_feats(h, src, feats)
      data[ind]['bleu'][h_ind] = [i for i in bleu.bleu_stats(h,ref)]

  shortcuts = {'p(e)' : 'l', 'p(e|f)' : 't', 'p_lex(f|e)' : 'u', 'word_cnt' : 'c', 'untranslated_cnt': 'g'}
  weights = {'p(e)' : opts.lm, 'p(e|f)' : opts.tm1, 'p_lex(f|e)' : opts.tm2, 'word_cnt':opts.bp, 'untranslated_cnt':opts.untranslated}
  sys.stderr.write( "iter -1\n")
  sys.stderr.write( "train BLEU %f\n" % performance(weights, opts.src, opts.kbest, opts.ref))
  sys.stderr.write( "test BLEU %f\n" % performance(weights, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref"))
  out = ""
  for (n, w) in weights.items():
    out += "-%s %s " % (shortcuts[n], w)
  sys.stderr.write( out + "\n")
  best_bleu = 0.0
  best_test = 0.0
  best_w = weights
  it = 0
  prev_bleu = 0.0
  while it < 5:
    old_weights = copy.deepcopy(weights)
    mert(weights, data)
    sys.stderr.write( "iter %d\n" % it)
    train_bleu = performance(weights, opts.src, opts.kbest, opts.ref)
    test_bleu = performance(weights, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref")
    sys.stderr.write( "train BLEU %f\n" % train_bleu)
    sys.stderr.write( "test BLEU %f\n" % test_bleu)
    out = ""
    for (n, w) in weights.items():
      out += "-%s %s " % (shortcuts[n], w)
    sys.stderr.write( out + "\n")
    if train_bleu > best_bleu:
      best_bleu = train_bleu
      best_test = test_bleu
      best_w = weights
    diff = 0.0
    for (n, w) in old_weights.items():
      diff += abs(w - weights[n])
    it += 1
    if diff <= eps or abs(train_bleu - prev_bleu) < eps:
      break
      it += 1
      sys.stderr.write( "RANDOM RESTART\n")
      for name in weights.keys():
        weights[name] = random.uniform(MIN_W, -MIN_W)
  prev_bleu = train_bleu
  sys.stderr.write( "BEST:\n")
  sys.stderr.write( "overall BLEU %f\n" % best_bleu)
  sys.stderr.write( "train BLEU %f\n" % performance(best_w, train_src_, train_kbest_, train_ref_))
  sys.stderr.write( "test BLEU %f\n" % performance(best_w, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref"))
  out = ""
  for (n, w) in best_w.items():
    out += "-%s %s " % (shortcuts[n], w)
  sys.stderr.write( out + "\n")
示例#39
0
def minimum_error_rate_training(weights, all_hyps, num_sents):
    # # repeat till convergence
    # # for all parameters
    # weight_hypothesis = [weights.copy()] #inialize the possible weights
    rand_weights = {
        'p(e)'       : random.uniform(-3,3),
        'p(e|f)'     : random.uniform(-3,3),
        'p_lex(f|e)' : random.uniform(-3,3)}
    # # append randomized weights to hypothesis
    # weight_hypothesis.append(rand_weights)
    # # for each weight hypothesis
    # for w_hyp in weight_hypothesis:
    #     # set of threshold points T
    #     threshold_set = set()
    #     # for all sentences
    #     for s in xrange(0, num_sents):
    #         hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    #         # for all translations
    #         for t in hyps_for_one_sent:
    #             compute_line(t)

    # for all parameters
    for w in rand_weights:
        print weights
        # set of threshold points T
        threshold_set = []
        # for all sentences
        for s in xrange(0, num_sents):
            print 'for sentence', s
            reference = all_refs[s]
            # for all translations
            hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
            hyp_lines = []
            for (num, hyp, feats) in hyps_for_one_sent:
                print '\tnum', num, 'hyp', hyp
                # get slope and intersection to define line
                gradient = 0.0 # gradient = value of the feature
                y_intersect = 0.0 # y_intersect = sum of other weights * value of feature
                alt_weight_sum = 0.0
                for feat in feats.split(' '):
                    (k, v) = feat.split('=')
                    print '\t\tfeature key', k, 'feature value', v
                    # get the parameter that we are interested in
                    if k == w:
                        gradient = float(v)
                    else:
                        alt_weight_sum += float(rand_weights[k])
                y_intersect = float(alt_weight_sum * gradient)
                print 'gradient', gradient, 'combined weight', alt_weight_sum
                # line = (gradient, y_intersect,
                #           hypothesis, sentence number for reference)
                line = {'m': gradient, 'c': y_intersect, 'hyp': hyp, 'ref': reference}
                hyp_lines.append(line)
                # sort lines in descending order,
                # with steepest gradient first, then sort by y intersection
            sorted_hyp_lines = sorted(hyp_lines, key=lambda element: (-element['m'], -element['c']))
            # get steepest lines
            steepest_lines = {}
            for i,line in enumerate(sorted_hyp_lines):
                if line['m'] in steepest_lines:
                    if line['c'] > steepest_lines[line['m']]['c']:
                        steepest_lines[line['m']] = line
                else:
                    steepest_lines[line['m']] = line
            # find upper envelope:
            upper_envelope = []
            i = 0
            # while find line l_2 that intersects with l first
            while i+1 < len(sorted_hyp_lines):
                # intersection points in order
                intersection_points = {}
                l_1 = sorted_hyp_lines[i] # y = ax + c

                # find line l_2 that intersects with l_1 first
                for j in xrange(i+1, len(sorted_hyp_lines)):
                    l_2 = sorted_hyp_lines[j] # y = bx + d
                    # Check if m is the same (lines are parallel, take the line with higher c)
                    if l_1['m'] == l_2['m']:
                        continue
                    # intersection point x,y
                    # x = (d-c)/(a-b)
                    x_numerator = float(l_2['c']) - float(l_1['c'])
                    x_denominator = float(l_1['m']) - float(l_2['m'])
                    x = float(x_numerator / x_denominator)
                    # y = a(x) + c
                    y = l_1['m'] * x + l_1['c']
                    # save all intersection points of other lines with l_1
                    # [0]: x, [1]: y, [2]: (l_1['hyp'], l_2['hyp']), [3]: reference
                    intersection_points[(x,y,(l_1['hyp'], l_2['hyp']),reference)] = j

                if len(intersection_points) == 0:
                    print 'finished calculating upper envelope'
                    break
                else:
                    # minimum intersection point with l_1 = first intersection with l_2
                    min_line_intersect = min(intersection_points)
                    upper_envelope.append(min_line_intersect)

                    # l = l_2
                    i = intersection_points[min_line_intersect]
            # add parameter value at intersection
            # parameter points in the format:
            # x_1, x_2, bleu score, tuple(hypothesis 1, ref)
            # where x_1 is the start of the interval, and x_2 is the end of the interval
            for index, point in enumerate(upper_envelope):
                # first point starts at infinity
                if index == 0:
                    parameter = {
                                    'x_1': float('-inf'),
                                    'x_2': point[1],
                                     'score': bleu.bleu_stats(point[2][0], point[3]),
                                     'hyp': point[2][0],
                                     'ref': point[3]
                                 }
                else:
                    parameter = {
                                    'x_1': previous_x,
                                    'x_2': point[1],
                                    'score': bleu.bleu_stats(point[2][0], point[3]),
                                    'hyp': point[2][0],
                                    'ref': point[3]
                                 }
                threshold_set.append(parameter)
                previous_x = point[1]
                # last point ends at infinity
                if index+1 == len(upper_envelope):
                    parameter = {
                                    'x_1': previous_x,
                                    'x_2': float('-inf'),
                                    'score': bleu.bleu_stats(point[2][1], point[3]),
                                    'hyp': point[2][1],
                                    'ref': point[3]
                                 }
                    threshold_set.append(parameter)
示例#40
0
          " ".join(map(lambda x: i2w_trg[x], generate(sample_dev))))
    # Evaluate on dev set
    dev_words, dev_loss = 0, 0.0
    start_time = time.time()
    for sent_id, (start, length) in enumerate(dev_order):
        dev_batch = dev[start:start + length]
        my_loss, num_words = calc_loss(dev_batch, 0.0)
        dev_loss += my_loss.item()
        dev_words += num_words
    print("[DEV] iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs" %
          (ITER, dev_loss / dev_words, math.exp(
              dev_loss / dev_words), time.time() - start_time))
    if best_dev > dev_loss:
        print("[DEV] Best model so far, saving snapshot.")
        torch.save(model, "batched_enc_dec_model.pt")
        best_dev = dev_loss

    # this is how you generate, can replace with desired sentenced to generate
model = torch.load("batched_enc_dec_model.pt")
sentences = []
stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for sent in test:
    hyp = generate(sent)
    sentences.append(hyp)
    stats += np.array(
        bleu_stats(" ".join(map(lambda x: i2w_trg[x], hyp)),
                   " ".join(map(lambda x: i2w_trg[x], sent[1]))))
print("Corpus BLEU: %.2f" % (100 * bleu(stats)))
for sent in sentences[:10]:
    print(" ".join(map(lambda x: i2w_trg[x], sent)))
示例#41
0
def minimum_error_rate_training(weights, all_hyps, num_sents):
    # # repeat till convergence
    # # for all parameters
    # weight_hypothesis = [weights.copy()] #inialize the possible weights
    rand_weights = {
        'p(e)'       : random.uniform(-3,3),
        'p(e|f)'     : random.uniform(-3,3),
        'p_lex(f|e)' : random.uniform(-3,3)}
    # # append randomized weights to hypothesis
    # weight_hypothesis.append(rand_weights)
    # # for each weight hypothesis
    # for w_hyp in weight_hypothesis:
    #     # set of threshold points T
    #     threshold_set = set()
    #     # for all sentences
    #     for s in xrange(0, num_sents):
    #         hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    #         # for all translations
    #         for t in hyps_for_one_sent:
    #             compute_line(t)

    # for all parameters
    curr_score = compute_score(rand_weights,  all_refs, all_hyps)
    for w in rand_weights:

        # print weights
        # set of threshold points T
        threshold_set = []
        # for all sentences
        for s in xrange(0, num_sents):
            # print 'for sentence', s
            reference = all_refs[s]
            # for all translations
            hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
            hyp_lines = []
            for (num, hyp, feats) in hyps_for_one_sent:
                # print '\tnum', num, 'hyp', hyp
                # get slope and intersection to define line
                gradient = 0.0 # gradient = value of the feature
                y_intersect = 0.0 # y_intersect = sum of other weights * value of feature
                alt_weight_sum = 0.0
                for feat in feats.split(' '):
                    (k, v) = feat.split('=')
                    # print '\t\tfeature key', k, 'feature value', v
                    # get the parameter that we are interested in
                    if k == w:
                        gradient = float(v)
                    else:
                        alt_weight_sum += float(rand_weights[k])
                y_intersect = float(alt_weight_sum * gradient)
                # print 'gradient', gradient, 'combined weight', alt_weight_sum
                # line = (gradient, y_intersect,
                #           hypothesis, sentence number for reference)
                line = {'m': gradient, 'c': y_intersect, 'hyp': hyp, 'ref': reference}
                hyp_lines.append(line)
                # sort lines in descending order,
                # with steepest gradient first, then sort by y intersection
            sorted_hyp_lines = sorted(hyp_lines, key=lambda element: (-element['m'], -element['c']))
            # get steepest lines
            steepest_lines = {}
            for i,line in enumerate(sorted_hyp_lines):
                if line['m'] in steepest_lines:
                    if line['c'] > steepest_lines[line['m']]['c']:
                        steepest_lines[line['m']] = line
                else:
                    steepest_lines[line['m']] = line
            # find upper envelope:
            upper_envelope = []
            i = 0
            # while find line l_2 that intersects with l first
            while i+1 < len(sorted_hyp_lines):
                # intersection points in order
                intersection_points = {}
                l_1 = sorted_hyp_lines[i] # y = ax + c

                # find line l_2 that intersects with l_1 first
                for j in xrange(i+1, len(sorted_hyp_lines)):
                    l_2 = sorted_hyp_lines[j] # y = bx + d
                    # Check if m is the same (lines are parallel, take the line with higher c)
                    if l_1['m'] == l_2['m']:
                        continue
                    # intersection point x,y
                    # x = (d-c)/(a-b)
                    x_numerator = float(l_2['c']) - float(l_1['c'])
                    x_denominator = float(l_1['m']) - float(l_2['m'])
                    x = float(x_numerator / x_denominator)
                    # y = a(x) + c
                    y = l_1['m'] * x + l_1['c']
                    # save all intersection points of other lines with l_1
                    # [0]: x, [1]: y, [2]: (l_1['hyp'], l_2['hyp']), [3]: reference
                    intersection_points[(x,y,(l_1['hyp'], l_2['hyp']),reference)] = j

                if len(intersection_points) == 0:
                    print 'finished calculating upper envelope'
                    break
                else:
                    # minimum intersection point with l_1 = first intersection with l_2
                    min_line_intersect = min(intersection_points)
                    upper_envelope.append(min_line_intersect)

                    # l = l_2
                    i = intersection_points[min_line_intersect]
            # add parameter value at intersection
            # parameter points in the format:
            # x_1, x_2, bleu score, tuple(hypothesis 1, ref)
            # where x_1 is the start of the interval, and x_2 is the end of the interval
            for index, point in enumerate(upper_envelope):
                # first point starts at infinity
                if index == 0:
                    parameter = {
                                    'x_1': float('-inf'),
                                    'x_2': point[1],
                                     'score': bleu.bleu_stats(point[2][0], point[3]),
                                     'hyp': point[2][0],
                                     'ref': point[3]
                                 }
                else:
                    parameter = {
                                    'x_1': previous_x,
                                    'x_2': point[1],
                                    'score': bleu.bleu_stats(point[2][0], point[3]),
                                    'hyp': point[2][0],
                                    'ref': point[3]
                                 }
                threshold_set.append(parameter)
                previous_x = point[1]
                # last point ends at infinity
                if index+1 == len(upper_envelope):
                    parameter = {
                                    'x_1': previous_x,
                                    'x_2': float('-inf'),
                                    'score': bleu.bleu_stats(point[2][1], point[3]),
                                    'hyp': point[2][1],
                                    'ref': point[3]
                                 }
                    threshold_set.append(parameter)


        # --- END SAMANTHA'S STUFF ---
        # sort T by parameter value
        # compute score for value before first threshold point
        # for all t in T
            # compute score for value after t
            # if score is highest
                # record max score and t
        # if max score > current score
            # update parameter value

        #sort threshold set based on the bleu score
        threshold_set = sorted(threshold_set, key=lambda x: x["score"])
        points = []
        for dic in threshold_set:
            points.append((dic['x_1'], dic['x_2']))
        point_list = []
        for point in points:
            point_list.append(point[0])
            point_list.append(point[1])

        point_list.sort()

        t_weights = rand_weights
        start, end = get_interval(point_list)

        max_score = compute_score(rand_weights, all_refs, all_hyps)
        while point_list:
            print "."
            val = (start+end)/float(2)
            t_weights[w] = val
            score = compute_score(t_weights, all_refs, all_hyps)
            if score > max_score:
                max_score = score
                best_val = val
            start = end
            end = point_list.pop()

        if max_score > curr_score:
            print "!"
            curr_score = max_score
            best_w = w
            best_v = best_val
            weights[w] = best_v
            print weights

    print "PRINTING SENTENCES"
    for s in xrange(0, num_sents):
      hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
      (best_score, best) = (-1e300, '')
      for (num, hyp, feats) in hyps_for_one_sent:
        score = 0.0
        for feat in feats.split(' '):
          (k, v) = feat.split('=')
          score += weights[k] * float(v)
        if score > best_score:
          (best_score, best) = (score, hyp)
      try:
        sys.stdout.write("%s\n" % best)
      except (Exception):
        sys.exit(1)
示例#42
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j, line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu.bleu_stats(sentence, references[i]))
        # bleu_score = bleu.bleu(stats)
        smoothed_bleu_score = bleu.smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests) <= i:
            nbests.append([])
        # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

        if j % 5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    weights = [[] for _ in xrange(opts.epo)]
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        avg_theta = [0.0 for _ in xrange(arg_num)]
        avg_cnt = 0
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu,
                        reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1),
                                        opts.eta)

                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
        weights[j] = [
            avg / avg_cnt if avg_cnt != 0 else 1 / float(arg_num)
            for avg in avg_theta
        ]

    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score
    # print "\n".join([str(weight) for weight in final_theta])

    bleu_score = [0 for _ in weights]
    for j, w in enumerate(weights):
        trans = []
        translation = namedtuple("translation", "english, score")
        system = []
        for i, nbest in enumerate(nbests):
            # for one sentence
            for et in nbest:
                if len(trans) <= int(i):
                    trans.append([])

                trans[int(i)].append(
                    translation(
                        et.sentence,
                        sum([x * y for x, y in zip(w, et.feature_list)])))

            for tran in trans:
                system.append(sorted(tran, key=lambda x: -x.score)[0].english)

        stats = [0 for i in xrange(10)]
        for (r, s) in zip(references, system):
            stats = [
                sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r))
            ]

        bleu_score[j] = bleu.bleu(stats)

    idx = [
        i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score)
    ][0]
    sys.stderr.write("Maximum BLEU score of training data is: {}\n".format(
        max(bleu_score)))
    sys.stderr.write("Corresponding weights are: {}\n".format(" ".join(
        [str(w) for w in weights[idx]])))
    print "\n".join([str(weight) for weight in weights[idx]])
示例#43
0
文件: nmt.py 项目: rooa/sp2016.11-731
def get_validation_bleu(hypotheses):
    stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    for hyp, ref in zip(hypotheses, dev_tgt):
        hyp, ref = (hyp.strip().split(), ref.strip().split())
        stats += numpy.array(bleu_stats(hyp, ref))
    return "%.2f" % (100 * bleu(stats))
示例#44
0
def main():
    references = []
    sys.stderr.write("Reading English Sentences\n")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nTry reading %s from disk ... \n" % opts.nbestDS)
    nbests = read_ds_from_file(opts.nbestDS)
    if nbests is None:
        nbests = []
        sys.stderr.write("%s is not on disk, so calculating it ... \n" %
                         opts.nbestDS)
        for j, line in enumerate(open(opts.nbest)):
            (i, sentence, features) = line.strip().split("|||")
            i = int(i)
            stats = list(bleu.bleu_stats(sentence, references[i]))
            # bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            # making the feature string to float list
            feature_list = [float(x) for x in features.split()]
            if len(nbests) <= i:
                nbests.append([])
            # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
            nbests[i].append(entry(sentence, smoothed_bleu_score,
                                   feature_list))

            if j % 5000 == 0:
                sys.stderr.write(".")
        sys.stderr.write("\nWriting %s to disk ... \n" % opts.nbestDS)
        write_ds_to_file(nbests, opts.nbestDS)
        sys.stderr.write("Finish writing %s\n" % opts.nbestDS)

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    # avg_theta = [ 0.0 for _ in xrange(arg_num)]
    # avg_cnt = 0

    tau = opts.tau  # positive learning margin
    sys.stderr.write("\nTraining...\n")
    for iter_num in xrange(opts.epo):
        sys.stderr.write("\nIteration#{} ".format(iter_num + 1))
        cnt = 0
        # sentence wise updating

        for i, nbest in enumerate(nbests):
            y = sorted(nbest, key=lambda h: h.smoothed_bleu, reverse=True)
            mu = [0.0] * len(nbest)
            w_times_x = [0.0] * len(nbest)
            for j, best in enumerate(nbest):
                # calculate linear function result
                w_times_x[j] = dot_product(theta, best.feature_list)

            # processing pairs
            top_r = int(len(y) * opts.r)
            bottom_k = int(len(y) * opts.k)
            for j in xrange(len(nbest) - 1):
                for l in xrange(j + 1, len(nbest)):
                    yj = nbest[j].smoothed_bleu
                    yl = nbest[l].smoothed_bleu
                    if yj < yl \
                    and dist(yj, yl) > opts.epsilon \
                    and w_times_x[j] - w_times_x[l] < g_learn(yj, yl)*tau:
                        mu[j] = mu[j] + g_learn(yj, yl)
                        mu[l] = mu[l] - g_learn(yj, yl)
                    elif yj > yl \
                    and dist(yj, yl) > opts.epsilon \
                    and w_times_x[l] - w_times_x[y] < g_learn(yl, yj)*tau:
                        mu[j] = mu[j] - g_learn(yl, yj)
                        mu[l] = mu[l] + g_learn(yl, yj)
                    else:
                        cnt += 1
                if (j + 1) % 10000 == 0:
                    sys.stderr.write(".")

            vector_sum = [0 for _ in xrange(len(nbest[0].feature_list))]
            for m, best in enumerate(nbest):
                vector_sum = vector_plus(
                    vector_sum, scale_product(mu[m], best.feature_list))

            theta = vector_plus(theta, vector_sum, opts.eta)

            # avg_theta = vector_plus(avg_theta, theta)
            # avg_cnt += 1

        sys.stderr.write("\n Non-supported vectors:  %s\n" % (cnt, ))

    # weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]
    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score
    print "\n".join([str(weight) for weight in theta])
 def Score(self, hyp, ref):
   stats = [0 for i in xrange(10)]
   stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp,ref))]
   return bleu.bleu(stats)
                  " ".join(target_txt[:target_txt.index("<eos>")]))
            print("[DEV] Pred:\t" + " ".join(pred_txt))
        my_loss = calc_loss(dev_batch, 0.0)
        dev_loss += my_loss.item()
    print("[DEV] iter %r: dev loss=%.4f, time=%.2fs" % (
        ITER,
        dev_loss,
        time.time() - start_time,
    ))
    if best_dev > dev_loss:
        print("[DEV] Best model so far, saving snapshot.")
        torch.save(model, "batched_enc_dec_model.pt")
        best_dev = dev_loss

    # this is how you generate, can replace with desired sentenced to generate
model = torch.load("batched_enc_dec_model.pt")
_, _, test_it, _, _ = get_datasets(1)
sentences = []
stats = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
model.eval()
for sent in test_it:
    pred_txt = " ".join(
        map(lambda x: trg_vocab.vocab.itos[x], generate(sent.src[:, 0])))
    sentences.append(pred_txt)
    target_txt = list(map(lambda x: trg_vocab.vocab.itos[x], sent.trg[1:, 0]))
    stats += np.array(
        bleu_stats(pred_txt, " ".join(target_txt[:target_txt.index("<eos>")])))
print("Corpus BLEU: %.2f" % (100 * bleu(stats)))
for sent in sentences[:10]:
    print(sent)