def main(): nbests = defaultdict(list) references = {} for i, line in enumerate(open(opts.en)): ''' Initialize references to correct english sentences ''' references[i] = line for line in open(opts.nbest): (i, sentence, features) = line.strip().split("|||") stats = list(bleu_stats(sentence, references[int(i)])) bleu_score = bleu(stats) smoothed_bleu_score = smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] nbests[int(i)].append((sentence, bleu_score, smoothed_bleu_score, feature_list)) theta = [1.0/6 for _ in xrange(6)] #initialization for i in range(0, opts.epo): mistake = 0; for nbest in nbests: sample = get_sample(nbests[nbest]) sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True) for i in range(0, min(len(sample), opts.xi)): for j in range(0, 6): if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]: mistake = mistake + 1 theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] - sample[i][1][3][j]) sys.stderr.write("Mistake: %s\n" % (mistake,)) print "\n".join([str(weight) for weight in theta])
def main(opts, references, input_nbest, theta0=None): entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list") nbests = None if nbests is None: nbests = [] sys.stderr.write("No nbests on disk, so calculating ndests ... \n") for j,line in enumerate(input_nbest): (i, sentence, features) = line.strip().split("|||") i = int(i) # lst_smoothed_bleu_score = [] # for ref in references: # stats = list(bleu.bleu_stats(sentence, ref[i])) # lst_smoothed_bleu_score.append( bleu.smoothed_bleu(stats) ) # # making the feature string to float list # avg_smoothed_bleu_score = float(sum(lst_smoothed_bleu_score)) / len(lst_smoothed_bleu_score) stats = list(bleu.bleu_stats(sentence, references[i])) smoothed_bleu_score = bleu.smoothed_bleu(stats) feature_list = [float(x) for x in features.split()] if len(nbests)<=i: nbests.append([]) # nbests[i].append(entry(sentence, avg_smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j%5000 == 0: sys.stderr.write(".") arg_num = len(nbests[0][0].feature_list) theta = theta0 if theta is None: theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization avg_theta = [ 0.0 for _ in xrange(arg_num)] avg_cnt = 0 sys.stderr.write("\nTraining...\n") for j in xrange(opts.epo): mistake = 0; for nbest in nbests: sample = get_sample(nbest, opts) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake,)) weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of return the averaged-out weights, return the weights that maximize the BLEU score return "\n".join([str(weight) for weight in weights])
def gold_score(hyp_line, ref_line): """Return the gold score for a translation hypothesis based on the data in *hyp_line* and *ref_line*.""" _, hyp, _ = hyp_line.split(" ||| ") hyp_words = hyp.split() ref_words = ref_line.split() return bleu.bleu(tuple(bleu.bleu_stats(hyp_words, ref_words)))
def main(): nbests = defaultdict(list) references = {} for i, line in enumerate(open(opts.en)): ''' Initialize references to correct english sentences ''' references[i] = line for line in open(opts.nbest): (i, sentence, features) = line.strip().split("|||") stats = list(bleu_stats(sentence, references[int(i)])) bleu_score = bleu(stats) smoothed_bleu_score = smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] nbests[int(i)].append( (sentence, bleu_score, smoothed_bleu_score, feature_list)) theta = [1.0 / 6 for _ in xrange(6)] #initialization for i in range(0, opts.epo): mistake = 0 for nbest in nbests: sample = get_sample(nbests[nbest]) sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True) for i in range(0, min(len(sample), opts.xi)): for j in range(0, 6): if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][ 1][3][j]: mistake = mistake + 1 theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] - sample[i][1][3][j]) sys.stderr.write("Mistake: %s\n" % (mistake, )) print "\n".join([str(weight) for weight in theta])
def computeBleu(system, reference): stats = [0 for i in xrange(10)] stats = [ sum(scores) for scores in zip(stats, bleu.bleu_stats(system, reference)) ] return bleu.smoothed_bleu(stats)
def main(): references = [] sys.stderr.write("Reading English Sentences\n") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i%100 == 0: sys.stderr.write(".") sys.stderr.write("\nTry reading nbests datastructure from disk ... \n") nbests = read_ds_from_file(opts.nbestDS) if nbests is None: nbests = [] sys.stderr.write("No nbests on disk, so calculating ndests ... \n") for j,line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests)<=i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j%5000 == 0: sys.stderr.write(".") write_ds_to_file(nbests, opts.nbestDS) arg_num = len(nbests[0][0].feature_list) theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization avg_theta = [ 0.0 for _ in xrange(arg_num)] avg_cnt = 0 sys.stderr.write("\nTraining...\n") for j in xrange(opts.epo): mistake = 0; for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake,)) weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score print "\n".join([str(weight) for weight in weights])
def bleu_score(input_string): ref = [line.strip().split() for line in open(opts.reference)] hyp = [line.strip().split() for line in input_string.split('\n')] stats = [0 for i in xrange(10)] for (r,h) in zip(ref, hyp): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h,r))] return bleu.bleu(stats)
def score(predicted, reference): ref = [line.strip().split() for line in open(reference)] system = [line.strip().split() for line in predicted] stats = [0 for i in range(10)] for (r, s) in zip(ref, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r))] return bleu.bleu(stats)
def main(opts, sysstdin): ref = [line.strip().split() for line in open(opts.en)] system = [line.strip().split() for line in sysstdin] stats = [0 for i in xrange(10)] for (r,s) in zip(ref, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))] return bleu.bleu(stats)
def sum_bleu_scores_per_range(range_marker_dict): range_bleu_scores = {} for k, v in range_markers_dict.items(): sum_bs = 0.0 for h, r in v: b_stats = bleu.bleu_stats(h, r) bs = bleu.bleu(b_stats) sum_bs += bs range_bleu_scores[k] = sum_bs return range_bleu_scores
def compute_bleu(hypo, ref="data/dev.ref"): f_ref = open(ref, 'r') f_hypo = open(hypo, 'r') ref = [line.strip().split() for line in f_ref] hyp = [line.strip().split() for line in f_hypo] f_hypo.close() f_ref.close() stats = [0 for i in xrange(10)] for (r, h) in zip(ref, hyp): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h, r))] return (100 * bleu.bleu(stats))
def bleu_score(mt_para_corpus, si_para_corpus, N=4): ''' BLEU score between trans and inter ''' stats = [0 for i in xrange(10)] for mt_sent_pair, si_sent_pair in zip(mt_para_corpus.sent_pairs, si_para_corpus.sent_pairs): ref = [w.tok for w in mt_sent_pair.tgt_sent.words] output = [w.tok for w in si_sent_pair.tgt_sent.words] stats = [ sum(scores) for scores in zip(stats, bleu.bleu_stats(output, ref)) ] return bleu.bleu(stats)
def main(): nbests = [] references = [] sys.stderr.write("Reading English Sentences") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i % 100 == 0: sys.stderr.write(".") sys.stderr.write("\nReading ndests") for j, line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu_stats(sentence, references[i])) bleu_score = bleu(stats) smoothed_bleu_score = smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests) <= i: nbests.append([]) nbests[i].append( entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) if j % 5000 == 0: sys.stderr.write(".") arg_num = len(nbests[0][0].feature_list) theta = [1.0 / arg_num for _ in xrange(arg_num)] #initialization sys.stderr.write("\nTraining...\n") for i in xrange(opts.epo): mistake = 0 for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) # for j in xrange(arg_num): # if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]: # mistake = mistake + 1 # theta[j] += opts.eta * (sample[i][0].feature_list[j] - sample[i][1].feature_list[j]) sys.stderr.write("Mistake: %s\n" % (mistake, )) print "\n".join([str(weight) for weight in theta])
def main(): nbests = [] references = [] sys.stderr.write("Reading English Sentences") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i%100 == 0: sys.stderr.write(".") sys.stderr.write("\nReading ndests") for j,line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu_stats(sentence, references[i])) # bleu_score = bleu(stats) smoothed_bleu_score = smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if j == 10: break if len(nbests)<=i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j%5000 == 0: sys.stderr.write(".") arg_num = len(nbests[0][0].feature_list) theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization avg_theta = [ 0 for _ in xrange(arg_num)] avg_cnt = 0 sys.stderr.write("\nTraining...\n") for i in xrange(opts.epo): mistake = 0; for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake,)) final_theta = [ t / avg_cnt for t in avg_theta] print "\n".join([str(weight) for weight in final_theta])
def update_param(feature, current_param_dict): sentence_dict = {} for m in xrange(0, num_sents): ref = reference[m] candidates = all_hyps[m * 100:m * 100 + 100] line_dict, steepest_line = define_sentence_lines(feature, candidates, current_param_dict) sequence = find_line_sequence(line_dict, [(steepest_line, -999999)]) interval_stats_dict = {} for candidate, interval_start, interval_end in sequence: interval_stats_dict[(interval_start, interval_end)] = list(bleu.bleu_stats(candidate[2].split(), ref)) sentence_dict[m] = interval_stats_dict all_interval_ends = sorted(set([item[1] for sublist in [dict.keys() for dict in sentence_dict.values()] for item in sublist])) best_interval, best_BLEU = choose_best_interval(all_interval_ends, sentence_dict) return_param_dict = {} for f in current_param_dict: return_param_dict[f] = current_param_dict[f] if f != feature else sum(best_interval)/2 return return_param_dict, best_BLEU
def get_nbest(nbest, source, target): src = [line.strip().split() for line in open(source).readlines()] ref = [line.strip().split() for line in open(target).readlines()] translations = [ line.strip().split("|||") for line in open(nbest).readlines() ] nbests = [[] for _ in ref] original_feature_count = 0 sys.stderr.write("Computing smoothed bleu...") translation = namedtuple("translation", "features, smoothed_bleu") for (i, sentence, features) in translations: (i, sentence, features) = (int(i), sentence.strip(), [float(f) for f in features.strip().split()]) sentence_split = sentence.strip().split() stats = tuple(bleu.bleu_stats(sentence_split, ref[i])) nbests[i].append(translation(features, bleu.smoothed_bleu(stats))) return nbests
def compute_score(weights, refs, hyps): tot_stats = [0 for i in xrange(10)] hyp_list = [] for s in xrange(0, num_sents): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] (best_score, best) = (-1e300, '') for (num, hyp, feats) in hyps_for_one_sent: score = 0.0 for feat in feats.split(' '): (k, v) = feat.split('=') score += weights[k] * float(v) if score > best_score: (best_score, best) = (score, hyp) hyp_list.append("%s\n" % best) for (r,h) in zip(refs, hyp_list): tot_stats = [sum(s) for s in zip(tot_stats, bleu.bleu_stats(r, h))] # for i in xrange(len(tot_stats)): # tot_stats[i] += int(best[i]) return bleu.bleu(tot_stats)
def performance(weights, dev_src, dev_kbest, dev_ref): old_weights = copy.deepcopy(weights) all_hyps = [pair.split(' ||| ') for pair in open(dev_kbest)] all_src = [s.split(' ||| ') for s in open(dev_src)] num_sents = len(all_hyps) / 100 stats = [] ref_file = open(dev_ref) for (r_ind, (ref, src, s)) in enumerate(zip(ref_file, all_src, xrange(0, num_sents))): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] ref = ref.strip().split() src = src[1].strip().split() (best_score, best, best_ind) = (-1e300, '', -1) for (h_ind, (num, h_sent, feats)) in enumerate(hyps_for_one_sent): score = 0.0 hyp = h_sent.strip().split() for (k,v) in get_feats(hyp, src, feats).items(): score += weights[k] * v if score > best_score: (best_score, best, best_ind) = (score, h_sent, h_ind) stats.append([i for i in bleu.bleu_stats(best.strip().split(),ref)]) return score_bleu_stats(stats)
candidate = namedtuple("candidate", "english, features , score") nbests = [] cnt = 0 #count # of sentence #we can run the first part for only once and save it. ###1st part,compute blue score for each candidate translation. for line in open(opts.nbest): cnt = cnt + 1 #print '{0}\r'.format("\rIteration: %d/%d." %(cnt, 432303)), (i, sentence, features) = line.strip().split("|||") if len(nbests) <= int(i): nbests.append([]) features = [float(h) for h in features.strip().split()] stats = [0 for kk in xrange(10)] #code from stats = [ sum(scores) for scores in zip( stats, bleu.bleu_stats(sentence.strip().split(), ref[int(i)])) ] score = bleu.smoothed_bleu(stats) nbests[int(i)].append(candidate(sentence.strip(), features, score)) cPickle.dump(nbests, open( 'my_nbests_add.p', 'wb')) #save the result. no need to run the first part each time #print "finished calculating nbests." nbests = cPickle.load(open('my_nbests_add.p', 'rb')) #load pickled file #2nd part,learn the optimal weight epochs = 20 #setup parameters mentioned in pseudocode tau_maxsize = 100 #5000 xi = 10 #50 tau = [] alpha = 0.05
def main(): references = [] sys.stderr.write("Reading English Sentences\n") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i%100 == 0: sys.stderr.write(".") sys.stderr.write("\nTry reading %s from disk ... \n" % opts.nbestDS) nbests = read_ds_from_file(opts.nbestDS) if nbests is None: nbests = [] sys.stderr.write("%s is not on disk, so calculating it ... \n" % opts.nbestDS) for j,line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests)<=i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j%5000 == 0: sys.stderr.write(".") sys.stderr.write("\nWriting %s to disk ... \n" % opts.nbestDS) write_ds_to_file(nbests, opts.nbestDS) sys.stderr.write("Finish writing %s\n" % opts.nbestDS) arg_num = len(nbests[0][0].feature_list) theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization # avg_theta = [ 0.0 for _ in xrange(arg_num)] # avg_cnt = 0 tau = opts.tau # positive learning margin sys.stderr.write("\nTraining...\n") for iter_num in xrange(opts.epo): sys.stderr.write("\nIteration#{} ".format(iter_num + 1)) cnt = 0; # sentence wise updating for i, nbest in enumerate(nbests): y = sorted(nbest, key = lambda h: h.smoothed_bleu, reverse = True) mu = [0.0]*len(nbest) w_times_x = [0.0]*len(nbest) for j, best in enumerate(nbest): # calculate linear function result w_times_x[j] = dot_product(theta, best.feature_list) # processing pairs top_r = int(len(y)*opts.r) bottom_k = int(len(y)*opts.k) for j in xrange(len(nbest) - 1): for l in xrange(j+1, len(nbest)): if nbest[j].smoothed_bleu <= y[top_r].smoothed_bleu \ and nbest[l].smoothed_bleu >= y[- bottom_k].smoothed_bleu \ and w_times_x[j] > w_times_x[l] + tau: mu[j] = mu[j] + 1 mu[l] = mu[l] - 1 elif nbest[j].smoothed_bleu >= y[- bottom_k].smoothed_bleu \ and nbest[l].smoothed_bleu <= y[top_r].smoothed_bleu \ and w_times_x[j] > w_times_x[l] - tau: mu[j] = mu[j] - 1 mu[l] = mu[l] + 1 else: cnt += 1 if (j + 1) % 100 == 0: sys.stderr.write(".") vector_sum = [0 for _ in xrange(len(nbest[0].feature_list))] for m, best in enumerate(nbest): vector_sum = vector_plus(vector_sum, scale_product(mu[m], best.feature_list)) theta = vector_plus(theta, vector_sum, opts.eta) # avg_theta = vector_plus(avg_theta, theta) # avg_cnt += 1 sys.stderr.write("\n Non-supported vectors: %s\n" % (cnt,)) # weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score print "\n".join([str(weight) for weight in theta])
weights = {'p(e)' : float(opts.lm) , 'p(e|f)' : float(opts.tm1), 'p_lex(f|e)' : float(opts.tm2)} ref = [line.strip().split() for line in open(opts.reference)] all_hyps = [pair.split(' ||| ') for pair in open(opts.input)] num_sents = len(zip(ref, all_hyps)) # Calculate the "gold" scoring function G # which is just the local BLEU score here. all_scores = defaultdict(list) for s in xrange(0, num_sents): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] for (num, hyp, feats) in hyps_for_one_sent: stats = [0 for i in xrange(10)] stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s]))] score = bleu.bleu(stats) all_scores[s].append( (score, hyp, feats) ) def tune(): ''' Finds best weight w ''' w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]]) binary_classifier = svm.SVC(kernel="linear") for _ in range(0,5): # for desired number of iterations X, y = [], [] for s in xrange(0, num_sents): samples = sampler(s, 5000, 50, 0.05) for (feats, sign) in samples: X.append(feats) y.append(sign)
def train(nbest_candidates, reference_files, init_weights=None, epochs=5, alpha=0.04, tau=100, xi=20, eta=0.0001): # initialization print >> sys.stderr, "Initializing training data" candidate = namedtuple("candidate", "sentence, features, bleu, smoothed_bleu") refs = [] for reference_file in reference_files: refs.append([line.strip().split() for line in open(reference_file)]) nbests = [] for n, line in enumerate(nbest_candidates): (i, sentence, features) = line.strip().split("|||") i = int(i) sentence = sentence.strip() features = np.array([float(h) for h in features.strip().split()]) # calculate bleu score and smoothed bleu score max_bleu_score = -float('inf') for ref in refs: stats = tuple(bleu.bleu_stats(sentence.split(), ref[i])) bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) max_bleu_score = max(max_bleu_score, smoothed_bleu_score) while len(nbests) <= i: nbests.append([]) nbests[i].append( candidate(sentence, features, bleu_score, max_bleu_score)) if n % 2000 == 0: sys.stderr.write(".") print >> sys.stderr, "\nRetrieved %d candidates for %d sentences" % ( n, len(nbests)) # set weights to default w = init_weights if init_weights is not None else \ np.array([1.0/len(nbests[0][0].features)] * len(nbests[0][0].features)) assert len(w) == len(nbests[0][0].features) w_sum = np.zeros(len(nbests[0][0].features)) # training random.seed() for i in range(epochs): print >> sys.stderr, "Training epoch %d:" % i mistakes = 0 for nbest in nbests: if len(nbest) < 2: continue sample = [] for j in range(tau): (s1, s2) = (nbest[k] for k in random.sample(range(len(nbest)), 2)) if fabs(s1.smoothed_bleu - s2.smoothed_bleu) > alpha: if s1.smoothed_bleu > s2.smoothed_bleu: sample.append((s1, s2)) else: sample.append((s2, s1)) else: continue sample.sort(key=lambda s: s[0].smoothed_bleu - s[1].smoothed_bleu, reverse=True) for (s1, s2) in sample[:xi]: if, s1.features) <=, s2.features): mistakes += 1 w += eta * (s1.features - s2.features ) # this is vector addition! w_sum += w print >> sys.stderr, "Number of mistakes: %d" % mistakes w = w_sum / float(epochs) return w
def main(): references = [] sys.stderr.write("Reading English Sentences\n") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i % 100 == 0: sys.stderr.write(".") sys.stderr.write("\nTry reading nbests datastructure from disk ... \n") nbests = read_ds_from_file(opts.nbestDS) if nbests is None: nbests = [] sys.stderr.write("No nbests on disk, so calculating ndests ... \n") for j, line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests) <= i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j % 5000 == 0: sys.stderr.write(".") write_ds_to_file(nbests, opts.nbestDS) arg_num = len(nbests[0][0].feature_list) theta = [1.0 / arg_num for _ in xrange(arg_num)] #initialization avg_theta = [0.0 for _ in xrange(arg_num)] avg_cnt = 0 sys.stderr.write("\nTraining...\n") for j in xrange(opts.epo): mistake = 0 for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake, )) weights = [ avg / avg_cnt if avg_cnt != 0 else 1 / float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score print "\n".join([str(weight) for weight in weights])
# time.time() - start_time, # ) # ) # if best_dev > dev_loss: # print("[DEV] Best model so far, saving snapshot.") #, "") # best_dev = dev_loss # this is how you generate, can replace with desired sentenced to generate model = torch.load("") _, _, test_it, _, _ = get_datasets(1) sentences = [] stats = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) model.eval() sample_batch_id = random.choice(range(len(test_it))) for sent_id, sent in enumerate(test_it): pred, attn_matrix = generate(sent.src[:, 0]) pred_txt = " ".join(map(lambda x: trg_vocab.vocab.itos[x], pred)) target = list(map(lambda x: trg_vocab.vocab.itos[x], sent.trg[1:, 0])) target_txt = " ".join(target[:target.index("<eos>")]) stats += np.array(bleu_stats(pred_txt, target_txt)) sentences.append([pred_txt, target_txt]) if sample_batch_id == sent_id: #now let's visualize it's attention plot_attention([src_vocab.vocab.itos[x] for x in sent.src[:, 0]], [trg_vocab.vocab.itos[x] for x in pred], attn_matrix, 'attention_matrix.png') print("Corpus BLEU: %.2f" % (100 * bleu(stats))) for pred, target in sentences[:10]: print("%s => %s" % (pred, target))
num1 = random.randint(0, 99) num2 = random.randint(0, 99) if num1 == num2: if num1 == 0: num2 = num2 + 1 else: num2 = num2 - 1 #just making sure they arent the same example hyp1 = hyps_for_one_sent[num1][1] #print(hyp1) hyp2 = hyps_for_one_sent[num2][1] #print(hyp2) feats1 = hyps_for_one_sent[num1][2] feats2 = hyps_for_one_sent[num2][2] #calculate bleu score for each example s1=list(bleu.bleu_stats(hyp1, ref)) bs1=bleu.bleu(s1) s2=list(bleu.bleu_stats(hyp2, ref)) bs2=bleu.bleu(s2) #print(bs1, bs2) #make training vector with difference in values of feats and indicator if bs1 > bs2: indic = 1 else: if bs1 < bs2: indic = -1 else: continue #ignore the ones that have same bleu score? #get feat values for each pair of features and subtract trainfeats = []
theta = numpy.random.rand(6) eta = 0.1 (opts, _) = optparser.parse_args() source = open(opts.ref).read().splitlines() target = open(opts.tar).read().splitlines() a_translation = namedtuple('a_translation', 'sentence, features, smoothed_bleu') nbests = [[] for i in range(len(source))] for line in open(opts.nbest): (i, sentence, features) = line.strip().split("|||") ind = int(i) #stats=bleu.bleu_stats(sentence, source[ind]) stats = list(bleu.bleu_stats(sentence, target[ind])) #test1=test[0] bleu_smooth_score = bleu.smoothed_bleu(stats) feature_vec = numpy.fromstring(features, sep=' ') nbests[ind].append(a_translation(sentence, feature_vec, bleu_smooth_score)) def get_sample(nbest): sample = [] for i in range(0, tau): #random_items = random.sample(nbest, 2) #s1 = random_items[0] #s2 = random_items[1] s1 = random.choice(nbest) s2 = random.choice(nbest) if math.fabs(s1.smoothed_bleu - s2.smoothed_bleu) > alpha:
def get_bleu_stats(h, r): h = h.split() r = r.split() stats = [0 for i in xrange(10)] stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h, r))] return stats
#!/usr/bin/env python import optparse, sys, os import bleu optparser = optparse.OptionParser() optparser.add_option("-r", "--reference", dest="reference", default=os.path.join("/usr/shared/CMPT/nlp-class/project/test/", ""), help="English reference sentences") optparser.add_option("-i", "--input", dest="input", default=os.path.join("/home/yongyiw/Documents/Github/final-project/Code", "output_1"), help="decoder output") (opts,_) = optparser.parse_args() # print opts.reference, opts.input ref = [line.strip().split() for line in open(opts.reference)] system = [line.strip().split() for line in open(opts.input)] stats = [0 for i in xrange(10)] for (r,s) in zip(ref, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))] print bleu.bleu(stats)
optparser = optparse.OptionParser() optparser.add_option("-r", "--reference", dest="reference", default="data/test.en", help="English reference sentences") optparser.add_option("-n", "--nbest", dest="nbest", default="data/test.nbest", help="N-best lists") (opts,_) = optparser.parse_args() ref = [line.strip().split() for line in open(opts.reference)] nbests = [] for n, line in enumerate(open(opts.nbest)): (i, sentence, _) = line.strip().split("|||") (i, sentence) = (int(i), sentence.strip()) if len(ref) <= i: break while len(nbests) <= i: nbests.append([]) scores = tuple(bleu.bleu_stats(sentence.split(), ref[i])) inverse_scores = tuple([-x for x in scores]) nbests[i].append(translation_candidate(sentence, scores, inverse_scores)) if n % 2000 == 0: sys.stderr.write(".") oracle = [nbest[0] for nbest in nbests] stats = [0 for i in xrange(10)] for candidate in oracle: stats = [sum(scores) for scores in zip(stats, candidate.scores)] prev_score = 0 score = bleu.bleu(stats) # greedy search for better oracle. For each sentence, choose the
optparser = optparse.OptionParser() optparser.add_option("-r", "--reference", dest="reference", default=os.path.join("data", "test.en"), help="English reference sentences") optparser.add_option("-n", "--nbest", dest="nbest", default=os.path.join("data", "test.nbest"), help="N-best lists") (opts,_) = optparser.parse_args() ref = [line.strip().split() for line in open(opts.reference)] nbests = [] for n, line in enumerate(open(opts.nbest)): (i, sentence, _) = line.strip().split("|||") (i, sentence) = (int(i), sentence.strip()) if len(ref) <= i: break while len(nbests) <= i: nbests.append([]) scores = tuple(bleu.bleu_stats(sentence.split(), ref[i])) inverse_scores = tuple([-x for x in scores]) nbests[i].append(translation_candidate(sentence, scores, inverse_scores)) if n % 2000 == 0: sys.stderr.write(".") oracle = [nbest[0] for nbest in nbests] stats = [0 for i in xrange(10)] for candidate in oracle: stats = [sum(scores) for scores in zip(stats, candidate.scores)] prev_score = 0 score = bleu.bleu(stats) # greedy search for better oracle. For each sentence, choose the
def get_validation_bleu(hypotheses): stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) for hyp, ref in zip(hypotheses, dev_tgt): hyp, ref = (hyp.strip().split(), ref.strip().split()) stats += numpy.array(bleu_stats(hyp, ref)) return "%.2f" % (100 * bleu(stats))
#!/usr/bin/env python import optparse import sys import bleu optparser = optparse.OptionParser() optparser.add_option("-r", "--reference", dest="reference", default="data/test.en", help="English reference sentences") (opts,_) = optparser.parse_args() ref = [line.strip().split() for line in open(opts.reference)] system = [line.strip().split() for line in sys.stdin] stats = [0 for i in xrange(10)] for (r,s) in zip(ref, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))] print bleu.bleu(stats)
def cal_store(ref, system): stats = [0 for i in xrange(10)] for (r,s) in zip(ref, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))] return bleu.bleu(stats)
def writeFeatureVector(): hypothesis_sentences = namedtuple("hyp", "features, bleu") ref = [line.strip().split() for line in open(opts.ref)][:sys.maxint] src_dev = [line.strip().split("|||")[1] for line in open(opts.src_dev)][:sys.maxint] sys.stderr.write("reading dev data...") nbests = [[] for _ in ref] all_hyps = [pair.split(' ||| ') for pair in open(] num_sents = len(all_hyps) / 100 for s in xrange(0, num_sents): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] for (num, hyp, feats) in hyps_for_one_sent: feats = [float(h.split('=')[1]) for h in feats.strip().split()] stats = tuple(bleu.bleu_stats(hyp.strip().split(), ref[s])) #TODO: add extra feature here # 1. adding number of target words enWordsNO = len(hyp.strip().split()) feats.append(enWordsNO) #2. adding number of untranslated source words feats.append(calcNotTranslatedWords(src_dev[s], hyp)) nbests[s].append(hypothesis_sentences(feats, bleu.bleu(stats))) # pairwise sampling. Figure 4 of the paper random.seed(0) sampling_hypothesis = namedtuple("sample", "hyp1, hyp2, gDiff") def sampling(): V = [] for _ in xrange(opts.tau): c1 = random.choice(nbest) c2 = random.choice(nbest) if c1 != c2 and math.fabs(c1.bleu - c2.bleu) > opts.alpha: V.append(sampling_hypothesis(c1, c2, math.fabs(c1.bleu - c2.bleu))) return V x = [] nbest_count = 0 for nbest in nbests: nbest_count = nbest_count +1 V = sampling() sortedV = sorted(V , key=lambda h: h.gDiff, reverse=True)[:opts.xi] x_count = 0 for idx, sample in enumerate(sortedV): x_count = x_count + 1 tmp = [c1j-c2j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)] tmp.append(cmp(sample.hyp1.bleu , sample.hyp2.bleu)) x.append(tmp) tmp = [c2j-c1j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)] tmp.append(cmp(sample.hyp2.bleu , sample.hyp1.bleu)) x.append(tmp) if x_count != opts.xi: sys.stderr.write("%d\n" % (x_count)) #writing feature vector for f in x: print ",".join(str(f0) for f0 in f)
} ref = [line.strip().split() for line in open(opts.reference)] all_hyps = [pair.split(' ||| ') for pair in open(opts.input)] num_sents = len(zip(ref, all_hyps)) # Calculate the "gold" scoring function G # which is just the local BLEU score here. all_scores = defaultdict(list) for s in xrange(0, num_sents): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] for (num, hyp, feats) in hyps_for_one_sent: stats = [0 for i in xrange(10)] stats = [ sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s])) ] score = bleu.bleu(stats) all_scores[s].append((score, hyp, feats)) def tune(): ''' Finds best weight w ''' w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]]) binary_classifier = svm.SVC(kernel="linear") for _ in range(0, 5): # for desired number of iterations X, y = [], [] for s in xrange(0, num_sents): samples = sampler(s, 5000, 50, 0.05) for (feats, sign) in samples:
def get_bleu_score(tgt,src): stats=[ 0 for i in range(10)] stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(tgt,src))] return bleu.bleu(stats)
def main(): nbests = [] references = [] sys.stderr.write("Reading English Sentences") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i%100 == 0: sys.stderr.write(".") sys.stderr.write("\nReading ndests") for j,line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests)<=i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j%5000 == 0: sys.stderr.write(".") arg_num = len(nbests[0][0].feature_list) theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization weights = [ [] for _ in xrange(opts.epo)] sys.stderr.write("\nTraining...\n") for j in xrange(opts.epo): avg_theta = [ 0.0 for _ in xrange(arg_num)] avg_cnt = 0 mistake = 0; for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake,)) weights[j] = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score # print "\n".join([str(weight) for weight in final_theta]) bleu_score = [0 for _ in weights] for j, w in enumerate(weights): trans = [] translation = namedtuple("translation", "english, score") system = [] for i, nbest in enumerate(nbests): # for one sentence for et in nbest: if len(trans) <= int(i): trans.append([]) trans[int(i)].append(translation(et.sentence, sum([x*y for x,y in zip(w, et.feature_list)]))) for tran in trans: system.append(sorted(tran, key=lambda x: -x.score)[0].english) stats = [0 for i in xrange(10)] for (r,s) in zip(references, system): stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))] bleu_score[j] = bleu.bleu(stats) idx = [i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score)][0] sys.stderr.write("Maximum BLEU score of training data is: {}\n".format(max(bleu_score))) sys.stderr.write("Corresponding weights are: {}\n".format(" ".join([ str(w) for w in weights[idx] ]))) print "\n".join([str(weight) for weight in weights[idx]])
def main(): optparser = optparse.OptionParser() optparser.add_option("-s", "--src", dest="src", default="data/train.src", help="source sentences") optparser.add_option("-k", "--kbest-list", dest="kbest", default="data/train.100best", help="100-best translation lists") optparser.add_option("-r", "--ref", dest="ref", default="data/train.ref", help="reference translations") optparser.add_option("-l", "--lm", dest="lm", default=-0.5, type="float", help="Language model weight") optparser.add_option("-t", "--tm1", dest="tm1", default=-0.5, type="float", help="Translation model p(e|f) weight") optparser.add_option("-u", "--tm2", dest="tm2", default=-0.5, type="float", help="Lexical translation model p_lex(f|e) weight") optparser.add_option("-c", "--word_cnt", dest="bp", default=0.5, type="float", help="brevity penalty weight") optparser.add_option("-g", "--greek_to_me", dest="untranslated", default=0.5, type="float", help="untranslated token weight") (opts, _) = optparser.parse_args() data = [{'ref':r.strip().split()} for r in open(opts.ref)] all_hyps = [pair.split(' ||| ') for pair in open(opts.kbest)] for (ind, s) in enumerate(open(opts.src)): if ind >= len(data): break (sent_id, src_sent) = s.split(' ||| ', 1) src = src_sent.strip().split() ref = data[ind]['ref'] data[ind]['src'] = src hyps_for_one_sent = all_hyps[ind * 100:ind * 100 + 100] data[ind]['kbest'] = [-1 for i in hyps_for_one_sent] data[ind]['kbest_feats'] = [-1 for i in hyps_for_one_sent] data[ind]['bleu'] = [-1 for i in hyps_for_one_sent] for (h_ind, (num, h_sent, feats)) in enumerate(hyps_for_one_sent): h = h_sent.strip().split() data[ind]['kbest'][h_ind] = h data[ind]['kbest_feats'][h_ind] = get_feats(h, src, feats) data[ind]['bleu'][h_ind] = [i for i in bleu.bleu_stats(h,ref)] shortcuts = {'p(e)' : 'l', 'p(e|f)' : 't', 'p_lex(f|e)' : 'u', 'word_cnt' : 'c', 'untranslated_cnt': 'g'} weights = {'p(e)' : opts.lm, 'p(e|f)' : opts.tm1, 'p_lex(f|e)' : opts.tm2, 'word_cnt':opts.bp, 'untranslated_cnt':opts.untranslated} sys.stderr.write( "iter -1\n") sys.stderr.write( "train BLEU %f\n" % performance(weights, opts.src, opts.kbest, opts.ref)) sys.stderr.write( "test BLEU %f\n" % performance(weights, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref")) out = "" for (n, w) in weights.items(): out += "-%s %s " % (shortcuts[n], w) sys.stderr.write( out + "\n") best_bleu = 0.0 best_test = 0.0 best_w = weights it = 0 prev_bleu = 0.0 while it < 5: old_weights = copy.deepcopy(weights) mert(weights, data) sys.stderr.write( "iter %d\n" % it) train_bleu = performance(weights, opts.src, opts.kbest, opts.ref) test_bleu = performance(weights, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref") sys.stderr.write( "train BLEU %f\n" % train_bleu) sys.stderr.write( "test BLEU %f\n" % test_bleu) out = "" for (n, w) in weights.items(): out += "-%s %s " % (shortcuts[n], w) sys.stderr.write( out + "\n") if train_bleu > best_bleu: best_bleu = train_bleu best_test = test_bleu best_w = weights diff = 0.0 for (n, w) in old_weights.items(): diff += abs(w - weights[n]) it += 1 if diff <= eps or abs(train_bleu - prev_bleu) < eps: break it += 1 sys.stderr.write( "RANDOM RESTART\n") for name in weights.keys(): weights[name] = random.uniform(MIN_W, -MIN_W) prev_bleu = train_bleu sys.stderr.write( "BEST:\n") sys.stderr.write( "overall BLEU %f\n" % best_bleu) sys.stderr.write( "train BLEU %f\n" % performance(best_w, train_src_, train_kbest_, train_ref_)) sys.stderr.write( "test BLEU %f\n" % performance(best_w, "data/dev+test.src", "data/dev+test.100best", "data/dev.ref")) out = "" for (n, w) in best_w.items(): out += "-%s %s " % (shortcuts[n], w) sys.stderr.write( out + "\n")
def minimum_error_rate_training(weights, all_hyps, num_sents): # # repeat till convergence # # for all parameters # weight_hypothesis = [weights.copy()] #inialize the possible weights rand_weights = { 'p(e)' : random.uniform(-3,3), 'p(e|f)' : random.uniform(-3,3), 'p_lex(f|e)' : random.uniform(-3,3)} # # append randomized weights to hypothesis # weight_hypothesis.append(rand_weights) # # for each weight hypothesis # for w_hyp in weight_hypothesis: # # set of threshold points T # threshold_set = set() # # for all sentences # for s in xrange(0, num_sents): # hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] # # for all translations # for t in hyps_for_one_sent: # compute_line(t) # for all parameters for w in rand_weights: print weights # set of threshold points T threshold_set = [] # for all sentences for s in xrange(0, num_sents): print 'for sentence', s reference = all_refs[s] # for all translations hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] hyp_lines = [] for (num, hyp, feats) in hyps_for_one_sent: print '\tnum', num, 'hyp', hyp # get slope and intersection to define line gradient = 0.0 # gradient = value of the feature y_intersect = 0.0 # y_intersect = sum of other weights * value of feature alt_weight_sum = 0.0 for feat in feats.split(' '): (k, v) = feat.split('=') print '\t\tfeature key', k, 'feature value', v # get the parameter that we are interested in if k == w: gradient = float(v) else: alt_weight_sum += float(rand_weights[k]) y_intersect = float(alt_weight_sum * gradient) print 'gradient', gradient, 'combined weight', alt_weight_sum # line = (gradient, y_intersect, # hypothesis, sentence number for reference) line = {'m': gradient, 'c': y_intersect, 'hyp': hyp, 'ref': reference} hyp_lines.append(line) # sort lines in descending order, # with steepest gradient first, then sort by y intersection sorted_hyp_lines = sorted(hyp_lines, key=lambda element: (-element['m'], -element['c'])) # get steepest lines steepest_lines = {} for i,line in enumerate(sorted_hyp_lines): if line['m'] in steepest_lines: if line['c'] > steepest_lines[line['m']]['c']: steepest_lines[line['m']] = line else: steepest_lines[line['m']] = line # find upper envelope: upper_envelope = [] i = 0 # while find line l_2 that intersects with l first while i+1 < len(sorted_hyp_lines): # intersection points in order intersection_points = {} l_1 = sorted_hyp_lines[i] # y = ax + c # find line l_2 that intersects with l_1 first for j in xrange(i+1, len(sorted_hyp_lines)): l_2 = sorted_hyp_lines[j] # y = bx + d # Check if m is the same (lines are parallel, take the line with higher c) if l_1['m'] == l_2['m']: continue # intersection point x,y # x = (d-c)/(a-b) x_numerator = float(l_2['c']) - float(l_1['c']) x_denominator = float(l_1['m']) - float(l_2['m']) x = float(x_numerator / x_denominator) # y = a(x) + c y = l_1['m'] * x + l_1['c'] # save all intersection points of other lines with l_1 # [0]: x, [1]: y, [2]: (l_1['hyp'], l_2['hyp']), [3]: reference intersection_points[(x,y,(l_1['hyp'], l_2['hyp']),reference)] = j if len(intersection_points) == 0: print 'finished calculating upper envelope' break else: # minimum intersection point with l_1 = first intersection with l_2 min_line_intersect = min(intersection_points) upper_envelope.append(min_line_intersect) # l = l_2 i = intersection_points[min_line_intersect] # add parameter value at intersection # parameter points in the format: # x_1, x_2, bleu score, tuple(hypothesis 1, ref) # where x_1 is the start of the interval, and x_2 is the end of the interval for index, point in enumerate(upper_envelope): # first point starts at infinity if index == 0: parameter = { 'x_1': float('-inf'), 'x_2': point[1], 'score': bleu.bleu_stats(point[2][0], point[3]), 'hyp': point[2][0], 'ref': point[3] } else: parameter = { 'x_1': previous_x, 'x_2': point[1], 'score': bleu.bleu_stats(point[2][0], point[3]), 'hyp': point[2][0], 'ref': point[3] } threshold_set.append(parameter) previous_x = point[1] # last point ends at infinity if index+1 == len(upper_envelope): parameter = { 'x_1': previous_x, 'x_2': float('-inf'), 'score': bleu.bleu_stats(point[2][1], point[3]), 'hyp': point[2][1], 'ref': point[3] } threshold_set.append(parameter)
" ".join(map(lambda x: i2w_trg[x], generate(sample_dev)))) # Evaluate on dev set dev_words, dev_loss = 0, 0.0 start_time = time.time() for sent_id, (start, length) in enumerate(dev_order): dev_batch = dev[start:start + length] my_loss, num_words = calc_loss(dev_batch, 0.0) dev_loss += my_loss.item() dev_words += num_words print("[DEV] iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs" % (ITER, dev_loss / dev_words, math.exp( dev_loss / dev_words), time.time() - start_time)) if best_dev > dev_loss: print("[DEV] Best model so far, saving snapshot."), "") best_dev = dev_loss # this is how you generate, can replace with desired sentenced to generate model = torch.load("") sentences = [] stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) for sent in test: hyp = generate(sent) sentences.append(hyp) stats += np.array( bleu_stats(" ".join(map(lambda x: i2w_trg[x], hyp)), " ".join(map(lambda x: i2w_trg[x], sent[1])))) print("Corpus BLEU: %.2f" % (100 * bleu(stats))) for sent in sentences[:10]: print(" ".join(map(lambda x: i2w_trg[x], sent)))
def minimum_error_rate_training(weights, all_hyps, num_sents): # # repeat till convergence # # for all parameters # weight_hypothesis = [weights.copy()] #inialize the possible weights rand_weights = { 'p(e)' : random.uniform(-3,3), 'p(e|f)' : random.uniform(-3,3), 'p_lex(f|e)' : random.uniform(-3,3)} # # append randomized weights to hypothesis # weight_hypothesis.append(rand_weights) # # for each weight hypothesis # for w_hyp in weight_hypothesis: # # set of threshold points T # threshold_set = set() # # for all sentences # for s in xrange(0, num_sents): # hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] # # for all translations # for t in hyps_for_one_sent: # compute_line(t) # for all parameters curr_score = compute_score(rand_weights, all_refs, all_hyps) for w in rand_weights: # print weights # set of threshold points T threshold_set = [] # for all sentences for s in xrange(0, num_sents): # print 'for sentence', s reference = all_refs[s] # for all translations hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] hyp_lines = [] for (num, hyp, feats) in hyps_for_one_sent: # print '\tnum', num, 'hyp', hyp # get slope and intersection to define line gradient = 0.0 # gradient = value of the feature y_intersect = 0.0 # y_intersect = sum of other weights * value of feature alt_weight_sum = 0.0 for feat in feats.split(' '): (k, v) = feat.split('=') # print '\t\tfeature key', k, 'feature value', v # get the parameter that we are interested in if k == w: gradient = float(v) else: alt_weight_sum += float(rand_weights[k]) y_intersect = float(alt_weight_sum * gradient) # print 'gradient', gradient, 'combined weight', alt_weight_sum # line = (gradient, y_intersect, # hypothesis, sentence number for reference) line = {'m': gradient, 'c': y_intersect, 'hyp': hyp, 'ref': reference} hyp_lines.append(line) # sort lines in descending order, # with steepest gradient first, then sort by y intersection sorted_hyp_lines = sorted(hyp_lines, key=lambda element: (-element['m'], -element['c'])) # get steepest lines steepest_lines = {} for i,line in enumerate(sorted_hyp_lines): if line['m'] in steepest_lines: if line['c'] > steepest_lines[line['m']]['c']: steepest_lines[line['m']] = line else: steepest_lines[line['m']] = line # find upper envelope: upper_envelope = [] i = 0 # while find line l_2 that intersects with l first while i+1 < len(sorted_hyp_lines): # intersection points in order intersection_points = {} l_1 = sorted_hyp_lines[i] # y = ax + c # find line l_2 that intersects with l_1 first for j in xrange(i+1, len(sorted_hyp_lines)): l_2 = sorted_hyp_lines[j] # y = bx + d # Check if m is the same (lines are parallel, take the line with higher c) if l_1['m'] == l_2['m']: continue # intersection point x,y # x = (d-c)/(a-b) x_numerator = float(l_2['c']) - float(l_1['c']) x_denominator = float(l_1['m']) - float(l_2['m']) x = float(x_numerator / x_denominator) # y = a(x) + c y = l_1['m'] * x + l_1['c'] # save all intersection points of other lines with l_1 # [0]: x, [1]: y, [2]: (l_1['hyp'], l_2['hyp']), [3]: reference intersection_points[(x,y,(l_1['hyp'], l_2['hyp']),reference)] = j if len(intersection_points) == 0: print 'finished calculating upper envelope' break else: # minimum intersection point with l_1 = first intersection with l_2 min_line_intersect = min(intersection_points) upper_envelope.append(min_line_intersect) # l = l_2 i = intersection_points[min_line_intersect] # add parameter value at intersection # parameter points in the format: # x_1, x_2, bleu score, tuple(hypothesis 1, ref) # where x_1 is the start of the interval, and x_2 is the end of the interval for index, point in enumerate(upper_envelope): # first point starts at infinity if index == 0: parameter = { 'x_1': float('-inf'), 'x_2': point[1], 'score': bleu.bleu_stats(point[2][0], point[3]), 'hyp': point[2][0], 'ref': point[3] } else: parameter = { 'x_1': previous_x, 'x_2': point[1], 'score': bleu.bleu_stats(point[2][0], point[3]), 'hyp': point[2][0], 'ref': point[3] } threshold_set.append(parameter) previous_x = point[1] # last point ends at infinity if index+1 == len(upper_envelope): parameter = { 'x_1': previous_x, 'x_2': float('-inf'), 'score': bleu.bleu_stats(point[2][1], point[3]), 'hyp': point[2][1], 'ref': point[3] } threshold_set.append(parameter) # --- END SAMANTHA'S STUFF --- # sort T by parameter value # compute score for value before first threshold point # for all t in T # compute score for value after t # if score is highest # record max score and t # if max score > current score # update parameter value #sort threshold set based on the bleu score threshold_set = sorted(threshold_set, key=lambda x: x["score"]) points = [] for dic in threshold_set: points.append((dic['x_1'], dic['x_2'])) point_list = [] for point in points: point_list.append(point[0]) point_list.append(point[1]) point_list.sort() t_weights = rand_weights start, end = get_interval(point_list) max_score = compute_score(rand_weights, all_refs, all_hyps) while point_list: print "." val = (start+end)/float(2) t_weights[w] = val score = compute_score(t_weights, all_refs, all_hyps) if score > max_score: max_score = score best_val = val start = end end = point_list.pop() if max_score > curr_score: print "!" curr_score = max_score best_w = w best_v = best_val weights[w] = best_v print weights print "PRINTING SENTENCES" for s in xrange(0, num_sents): hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100] (best_score, best) = (-1e300, '') for (num, hyp, feats) in hyps_for_one_sent: score = 0.0 for feat in feats.split(' '): (k, v) = feat.split('=') score += weights[k] * float(v) if score > best_score: (best_score, best) = (score, hyp) try: sys.stdout.write("%s\n" % best) except (Exception): sys.exit(1)
def main(): nbests = [] references = [] sys.stderr.write("Reading English Sentences") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i % 100 == 0: sys.stderr.write(".") sys.stderr.write("\nReading ndests") for j, line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests) <= i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j % 5000 == 0: sys.stderr.write(".") arg_num = len(nbests[0][0].feature_list) theta = [1.0 / arg_num for _ in xrange(arg_num)] #initialization weights = [[] for _ in xrange(opts.epo)] sys.stderr.write("\nTraining...\n") for j in xrange(opts.epo): avg_theta = [0.0 for _ in xrange(arg_num)] avg_cnt = 0 mistake = 0 for nbest in nbests: sample = get_sample(nbest) sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True) for i in xrange(min(len(sample), opts.xi)): v1 = sample[i][0].feature_list v2 = sample[i][1].feature_list if dot_product(theta, v1) <= dot_product(theta, v2): mistake += 1 theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta) avg_theta = vector_plus(avg_theta, theta) avg_cnt += 1 sys.stderr.write("Mistake: %s\n" % (mistake, )) weights[j] = [ avg / avg_cnt if avg_cnt != 0 else 1 / float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score # print "\n".join([str(weight) for weight in final_theta]) bleu_score = [0 for _ in weights] for j, w in enumerate(weights): trans = [] translation = namedtuple("translation", "english, score") system = [] for i, nbest in enumerate(nbests): # for one sentence for et in nbest: if len(trans) <= int(i): trans.append([]) trans[int(i)].append( translation( et.sentence, sum([x * y for x, y in zip(w, et.feature_list)]))) for tran in trans: system.append(sorted(tran, key=lambda x: -x.score)[0].english) stats = [0 for i in xrange(10)] for (r, s) in zip(references, system): stats = [ sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r)) ] bleu_score[j] = bleu.bleu(stats) idx = [ i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score) ][0] sys.stderr.write("Maximum BLEU score of training data is: {}\n".format( max(bleu_score))) sys.stderr.write("Corresponding weights are: {}\n".format(" ".join( [str(w) for w in weights[idx]]))) print "\n".join([str(weight) for weight in weights[idx]])
def main(): references = [] sys.stderr.write("Reading English Sentences\n") for i, line in enumerate(open(opts.en)): '''Initialize references to correct english sentences''' references.append(line) if i % 100 == 0: sys.stderr.write(".") sys.stderr.write("\nTry reading %s from disk ... \n" % opts.nbestDS) nbests = read_ds_from_file(opts.nbestDS) if nbests is None: nbests = [] sys.stderr.write("%s is not on disk, so calculating it ... \n" % opts.nbestDS) for j, line in enumerate(open(opts.nbest)): (i, sentence, features) = line.strip().split("|||") i = int(i) stats = list(bleu.bleu_stats(sentence, references[i])) # bleu_score = bleu.bleu(stats) smoothed_bleu_score = bleu.smoothed_bleu(stats) # making the feature string to float list feature_list = [float(x) for x in features.split()] if len(nbests) <= i: nbests.append([]) # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list)) nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list)) if j % 5000 == 0: sys.stderr.write(".") sys.stderr.write("\nWriting %s to disk ... \n" % opts.nbestDS) write_ds_to_file(nbests, opts.nbestDS) sys.stderr.write("Finish writing %s\n" % opts.nbestDS) arg_num = len(nbests[0][0].feature_list) theta = [1.0 / arg_num for _ in xrange(arg_num)] #initialization # avg_theta = [ 0.0 for _ in xrange(arg_num)] # avg_cnt = 0 tau = opts.tau # positive learning margin sys.stderr.write("\nTraining...\n") for iter_num in xrange(opts.epo): sys.stderr.write("\nIteration#{} ".format(iter_num + 1)) cnt = 0 # sentence wise updating for i, nbest in enumerate(nbests): y = sorted(nbest, key=lambda h: h.smoothed_bleu, reverse=True) mu = [0.0] * len(nbest) w_times_x = [0.0] * len(nbest) for j, best in enumerate(nbest): # calculate linear function result w_times_x[j] = dot_product(theta, best.feature_list) # processing pairs top_r = int(len(y) * opts.r) bottom_k = int(len(y) * opts.k) for j in xrange(len(nbest) - 1): for l in xrange(j + 1, len(nbest)): yj = nbest[j].smoothed_bleu yl = nbest[l].smoothed_bleu if yj < yl \ and dist(yj, yl) > opts.epsilon \ and w_times_x[j] - w_times_x[l] < g_learn(yj, yl)*tau: mu[j] = mu[j] + g_learn(yj, yl) mu[l] = mu[l] - g_learn(yj, yl) elif yj > yl \ and dist(yj, yl) > opts.epsilon \ and w_times_x[l] - w_times_x[y] < g_learn(yl, yj)*tau: mu[j] = mu[j] - g_learn(yl, yj) mu[l] = mu[l] + g_learn(yl, yj) else: cnt += 1 if (j + 1) % 10000 == 0: sys.stderr.write(".") vector_sum = [0 for _ in xrange(len(nbest[0].feature_list))] for m, best in enumerate(nbest): vector_sum = vector_plus( vector_sum, scale_product(mu[m], best.feature_list)) theta = vector_plus(theta, vector_sum, opts.eta) # avg_theta = vector_plus(avg_theta, theta) # avg_cnt += 1 sys.stderr.write("\n Non-supported vectors: %s\n" % (cnt, )) # weights = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ] sys.stderr.write("Computing best BLEU score and outputing...\n") # instead of print the averaged-out weights, print the weights that maximize the BLEU score print "\n".join([str(weight) for weight in theta])
def Score(self, hyp, ref): stats = [0 for i in xrange(10)] stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp,ref))] return bleu.bleu(stats)
" ".join(target_txt[:target_txt.index("<eos>")])) print("[DEV] Pred:\t" + " ".join(pred_txt)) my_loss = calc_loss(dev_batch, 0.0) dev_loss += my_loss.item() print("[DEV] iter %r: dev loss=%.4f, time=%.2fs" % ( ITER, dev_loss, time.time() - start_time, )) if best_dev > dev_loss: print("[DEV] Best model so far, saving snapshot."), "") best_dev = dev_loss # this is how you generate, can replace with desired sentenced to generate model = torch.load("") _, _, test_it, _, _ = get_datasets(1) sentences = [] stats = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) model.eval() for sent in test_it: pred_txt = " ".join( map(lambda x: trg_vocab.vocab.itos[x], generate(sent.src[:, 0]))) sentences.append(pred_txt) target_txt = list(map(lambda x: trg_vocab.vocab.itos[x], sent.trg[1:, 0])) stats += np.array( bleu_stats(pred_txt, " ".join(target_txt[:target_txt.index("<eos>")]))) print("Corpus BLEU: %.2f" % (100 * bleu(stats))) for sent in sentences[:10]: print(sent)