def language_eval(dataset, preds, model_id, split): encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') label = {} imgs = json.load(open(dataset, 'r')) for img in imgs: if img["split"] == split: label[img["dataid"]] = img pred_ids = [] with open("eval_results/preds.txt", "w") as text_file: for pred in preds: pred_id = pred["image_id"] pred_txt = pred["caption"] pred_ids.append(pred_id) text_file.write(pred_txt + "\n") with open("eval_results/refer.txt", "w") as text_file: for pred_id in pred_ids: raw_sent = label[pred_id]["sentences"][0]["raw"].lower().encode( 'utf-8') text_file.write(raw_sent + "\n") metrics_dict = compute_metrics(hypothesis='eval_results/preds.txt', references=['eval_results/refer.txt']) return metrics_dict
def eval(net, dataloader, beam_size): net.eval() torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = False sample_count = 0 out_file = open("output.txt", "w") lab_file_names = ["labels{}.txt".format(i) for i in range(5)] lab_files = [open(f, "w") for f in lab_file_names] for i, data in enumerate(tqdm(dataloader)): image, _, all_labels = data image_cuda = image.cuda() outputs = net.forward_single_inference(image.cuda(), beam_size) sample_count += image.shape[0] for j in range(image.shape[0]): h = outputs[j] ls = [] for p, l in enumerate(all_labels[j]): ll = l[1:] ls.append(ll) l_text = utils.get_output_text(net.vectorizer, ll) lab_files[p].write(l_text + os.linesep) out_text = utils.get_output_text(net.vectorizer, h) out_file.write(out_text + os.linesep) out_file.close() for l in lab_files: l.close() metrics_dict = compute_metrics(references=lab_file_names, hypothesis='output.txt', no_overlap=False, no_skipthoughts=True, no_glove=True) return metrics_dict
def evaluation(): from nlgeval import compute_metrics metrics_dict = compute_metrics(hypothesis=prediction_file_path, references=[reference_file_path]) print(metrics_dict) from measures import selfbleu selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 1) print("selfbleu-1", selfbleuobj.get_score()) selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 2) print("selfbleu-2", selfbleuobj.get_score()) selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 3) print("selfbleu-3", selfbleuobj.get_score()) selfbleuobj = selfbleu.SelfBleu(prediction_file_path, 4) print("selfbleu-4", selfbleuobj.get_score()) # embedding_metrics.metrics_embeddings(reference_file_path, # prediction_file_path) eval_log = {} for metric in ['bleu', 'rouge', 'accuracy', 'word_accuracy']: score = evaluation_utils.evaluate(reference_file_path, prediction_file_path, metric) eval_log[metric] = score if metric == "bleu": print( " bleu-1, bleu-2, bleu-3, bleu-4: %.5f, %.5f, %.5f, %.5f" % score) elif metric == "rouge": print(" rouge-1, rouge-2, rouge-l: %.5f, %.5f, %.5f" % score) else: print(" %s: %.5f" % (metric, score))
def test_compute_metrics(self): # The example from the README. root_dir = os.path.join(os.path.dirname(__file__), '..', '..') hypothesis = os.path.join(root_dir, 'examples/hyp.txt') references = os.path.join(root_dir, 'examples/ref1.txt'), os.path.join( root_dir, 'examples/ref2.txt') scores = nlgeval.compute_metrics(hypothesis, references) self.assertAlmostEqual(0.55, scores['Bleu_1'], places=5) self.assertAlmostEqual(0.428174, scores['Bleu_2'], places=5) self.assertAlmostEqual(0.284043, scores['Bleu_3'], places=5) self.assertAlmostEqual(0.201143, scores['Bleu_4'], places=5) self.assertAlmostEqual(0.295797, scores['METEOR'], places=5) self.assertAlmostEqual(0.522104, scores['ROUGE_L'], places=5) self.assertAlmostEqual(1.242192, scores['CIDEr'], places=5) self.assertAlmostEqual(0.626149, scores['SkipThoughtCS'], places=5) self.assertAlmostEqual(0.88469, scores['EmbeddingAverageCosineSimilairty'], places=5) self.assertAlmostEqual(0.568696, scores['VectorExtremaCosineSimilarity'], places=5) self.assertAlmostEqual(0.784205, scores['GreedyMatchingScore'], places=5) self.assertEqual(11, len(scores))
def sample_test_compute_metrics(): # The example from the README. root_dir = os.path.join(os.path.dirname(__file__), 'evaluation_folder') hypothesis = os.path.join(root_dir, 'hyp.txt') references = [os.path.join(root_dir, 'ref1.txt')] scores = nlgeval.compute_metrics(hypothesis, references) return
def eval_top1_acc(pred, ref, step): all_texts = remove_mask(pred) top1 = [all_texts[i] for i in range(0, len(all_texts), step)] refs = open(ref, 'r').readlines() metrics_dict = compute_metrics(hypothesis=top1, references=refs) metrics_dict = {f'top1_{k}': v for k, v in metrics_dict.items()} return metrics_dict
def get_all_scores(reply, answer, scores): from nlgeval import compute_metrics metrics_dict = compute_metrics(hypothesis=reply, references=[answer]) print(metrics_dict) import codecs with codecs.open(scores, 'w', encoding='utf-8') as fo: fo.write(str(metrics_dict))
def main(): p = get_params() hypothesis = p.i references = [os.path.join(p.dataset_dir, "test.q.%s.lc" % p.lang)] print( compute_metrics(hypothesis, references, no_overlap=False, no_skipthoughts=True, no_glove=True))
def getMetrics(df, candidate, reference): with open('ref1.txt','w') as ref: for line in list(reference): ref.writelines(line+'\n') with open('hyp.txt','w') as hyp: for line in list(candidate): hyp.writelines(line+'\n') metrics_dict = compute_metrics(hypothesis='hyp.txt', references=['ref1.txt']) return metrics_dict
def test_compute_metrics(mode='model'): # The example from the README. root_dir = Path(os.path.dirname(__file__)) root_dir = root_dir / 'evaluation_folder' hypothesis = root_dir / '{}_base_gen.txt'.format(mode) references = [root_dir / 'human_base_gen.txt'] assert hypothesis.exists() assert references[0].exists() scores = nlgeval.compute_metrics(hypothesis, references) return
def compute(annotations, outputs): with open('ref.txt', 'w') as f: f.write('\n'.join(annotations)) with open('output.txt', 'w') as f: f.write('\n'.join(outputs)) metrics_dict = compute_metrics(hypothesis='output.txt', references=['ref.txt'], no_skipthoughts=True, no_glove=True) print() return metrics_dict
def score(self): hyp_test_str = "\n".join( [h.replace('\n', '') for h in self.hypothesis]) ref_test_str = "\n".join( [r.replace('\n', '') for r in self.references]) with open("/tmp/hyp.txt", 'w') as fd_hyp: fd_hyp.write(hyp_test_str) fd_hyp.close() with open("/tmp/ref.txt", 'w') as fd_ref: fd_ref.write(ref_test_str) fd_ref.close() self.metrics_dict = compute_metrics(hypothesis="/tmp/hyp.txt", references=["/tmp/ref.txt"], no_glove=True, no_skipthoughts=True)
def eval_topk_acc(pred, ref, step): ref_by_idx = open(ref, 'r').readlines() all_texts = remove_mask(pred) gen_by_idx = [ all_texts[i:i + step] for i in range(0, len(all_texts), step) ] gens = [] for i in range(len(ref_by_idx)): ref = ref_by_idx[i] gen = gen_by_idx[i] metric = [cie(ref, g)['Bleu_4'] for g in gen] gens.append(gen[np.argmax(metric)]) metrics_dict = compute_metrics(hypothesis=gens, references=ref_by_idx) metrics_dict = {f'topk_{k}': v for k, v in metrics_dict.items()} return metrics_dict
from nltk.tokenize import word_tokenize import codecs import re print (sys.argv[1], sys.argv[2]) x = codecs.open(sys.argv[1], encoding="utf-8") x = x.readlines() y = codecs.open(sys.argv[2], encoding="utf-8") y = y.readlines() x = [" ".join(word_tokenize(i)) for i in x] y = [" ".join(word_tokenize(i)) for i in y] x_w = codecs.open(sys.argv[1], "w", encoding="utf-8") for i in x: x_w.write(i.strip().lower() + " \n") y_w = codecs.open(sys.argv[2], "w", encoding="utf-8") for i in y: y_w.write(i.strip() + "\n") x_w.close() y_w.close() print (compute_metrics(sys.argv[1], [sys.argv[2]])) x = x = codecs.open(sys.argv[1], encoding="utf-8") x = x.readlines() x_w = codecs.open(sys.argv[1], "w", encoding="utf-8") for i in x: x_w.write(i.strip() + "\n")
def get_score(): results = open('./result/tmp.out.txt', 'r', encoding='utf-8').readlines() results = results[1015:] sources = open('testdata/test.moses.pro', 'r').readlines() sources = [x.replace('\n', '') for x in sources] ref = pickle.load(open('testdata/test.cus.pkl', 'rb')) dics = pickle.load(open('testdata/test_dic.pkl', 'rb')) sen2code = pickle.load(open('data/sen2code.pkl', 'rb')) sen2code_new = {} for key, value in sen2code.items(): sen2code_new[key.lower()] = value del sen2code count = 0 code_exist = {} for source in sources: try: if sen2code_new[source] not in code_exist.keys(): code_exist[sen2code_new[source]] = 1 else: code_exist[sen2code_new[source]] += 1 except: count += 1 # print(count) # print(len(code_exist.keys())) test_subjects = np.array(results) test_targets = np.array(ref) test_dics = np.array(dics) len_sen = [len(nltk.word_tokenize(x)) for x in sources] len_sen = np.array(len_sen) # print(len_sen.mean(), len_sen.max(), len_sen.min()) len_spilt = [(0, 100000)] for len_current in len_spilt: index = np.where((len_sen >= len_current[0]) & (len_sen < len_current[1])) ref = test_targets[index].tolist() hyp = test_subjects[index].tolist() open('./tmp/hyp.txt', 'w', encoding='utf-8').writelines([x for x in hyp]) ref0 = [x[0] for x in ref] ref1 = [x[1] for x in ref] ref2 = [x[2] for x in ref] ref3 = [x[3] for x in ref] open('./tmp/ref0.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref0]) open('./tmp/ref1.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref1]) open('./tmp/ref2.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref2]) open('./tmp/ref3.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref3]) dics = test_dics[index].tolist() metrics_dict = compute_metrics(hypothesis='./tmp/hyp.txt', references=['./tmp/ref0.txt', './tmp/ref1.txt', './tmp/ref2.txt', './tmp/ref3.txt'], no_glove=True, no_overlap=False, no_skipthoughts=True) # print(metrics_dict) hyp = [nltk.word_tokenize(x) for x in hyp] hit = count_hit(hyp, dics) # hit=1 com = count_common(hyp) BLEU = (metrics_dict['Bleu_1'] + metrics_dict['Bleu_2'] + metrics_dict['Bleu_3'] + metrics_dict['Bleu_4']) / 4 if BLEU<0.0001: BLEU = 0.0001 if hit<0.0001: hit = 0.0001 if com<0.0001: com = 0.0001 Ascore = (1 + 2.25 + 4) / (4 / BLEU + 2.25 / hit + 1 / com) return BLEU, hit, com, Ascore
def fit_epoch(net, dataloader, lr_rate, train, epoch=1): if train: net.train() optimizer = torch.optim.Adam(net.parameters(), lr_rate) else: net.eval() criterion = torch.nn.CrossEntropyLoss(ignore_index=np.where( net.vectorizer.vocab == net.vectorizer.pad_token)[0][0]) torch.set_grad_enabled(train) torch.backends.cudnn.benchmark = train losses = 0.0 att_losses = 0.0 crit_losses = 0.0 output_images = [] accs = 0 sample_count = 0 label_texts = [] output_texts = [] references = [] hypotheses = [] out_file = open("output.txt", "w") lab_file_names = ["labels{}.txt".format(i) for i in range(5)] lab_files = [open(f, "w") for f in lab_file_names] att_loss = torch.tensor(0) for i, data in enumerate(tqdm(dataloader)): image, labels, all_labels = data # labels_lens (seq_length, batch_size) if image is None: continue if train: optimizer.zero_grad() labels_cuda = labels.cuda() outputs, atts = net(image.cuda(), labels_cuda[:, :-1]) if len(atts) != 0: att_loss = 5 * ((atts.mean( (1, 2)).unsqueeze(-1) - atts.sum(2))**2).mean() crit_loss = criterion(outputs, labels_cuda[:, 1:]) loss = crit_loss + att_loss if train: loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), 5) optimizer.step() losses += loss.item() * labels.shape[0] att_losses += att_loss.item() * labels.shape[0] crit_losses += crit_loss.item() * labels.shape[0] sample_count += labels_cuda.shape[0] for j in range(outputs.shape[0]): accs += utils.get_acc(outputs[j].argmax(0), labels_cuda[j, 1:]) if not train: h = outputs[j].detach().argmax(0).cpu().numpy().tolist() hypotheses.append(h) lab_text = utils.get_output_text( net.vectorizer, labels[j, 1:].detach().cpu().numpy().tolist()) out_text = utils.get_output_text(net.vectorizer, h) ls = [] for p, l in enumerate(all_labels[j]): ll = l[1:] ls.append(ll) l_text = utils.get_output_text(net.vectorizer, ll) lab_files[p].write(l_text + os.linesep) references.append(ls) out_file.write(out_text + os.linesep) if i >= len(dataloader) - 10: image = image[0] lab_text = utils.get_output_text( net.vectorizer, labels[0, 1:].detach().cpu().numpy().tolist()) out_text = utils.get_output_text( net.vectorizer, outputs[0].detach().argmax(0).cpu().numpy().tolist()) for t, m, s in zip(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]): t.mul_(s).add_(m) image = image.permute(1, 2, 0).detach().cpu().numpy() plt.gcf().subplots_adjust(bottom=0.15) plt.imshow(image) plt.xlabel(out_text + '\n' + lab_text) output_images.append(plt_to_np(plt)) out_file.close() for l in lab_files: l.close() metrics_dict = {} if not train: metrics_dict = compute_metrics(references=lab_file_names, hypothesis='output.txt', no_overlap=False, no_skipthoughts=True, no_glove=True) return losses / sample_count, att_losses / sample_count, crit_losses / sample_count, output_images, accs / sample_count, metrics_dict
def main(args): metrics_dict = compute_metrics(hypothesis=args.pred_file, references=[args.ref_file], no_skipthoughts=True, no_glove=True)
def get_score(config, is_val=True): results = open('./result/tmp.out.txt', 'r', encoding='utf-8').readlines() txt = open(os.path.join(config.data_dir, config.test_file), 'r').read() txt = txt.lower() txt = txt.split('\n\n') if is_val: txt = txt[0:len(txt) // 2] results = results[0:len(results)//2] else: txt = txt[len(txt) // 2:] results = results[len(results)//2:] src = [] tar = [] exist_dic = [] obtain(txt, exist_dic, src, tar, config) for u in tar: if len(u)<4: print(u) pickle.dump(exist_dic, open('./data/test_dic.pkl', 'wb')) pickle.dump(src, open('./data/test.pro.pkl', 'wb')) pickle.dump(tar, open('./data/test.cus.pkl', 'wb')) sources = pickle.load(open('./data/test.pro.pkl', 'rb')) sources = [x.replace('\n', '') for x in sources] ref = pickle.load(open('./data/test.cus.pkl', 'rb')) dics = pickle.load(open('./data/test_dic.pkl', 'rb')) sen2code = pickle.load(open('./data/sen2code.pkl', 'rb')) sen2code_new = {} for key, value in sen2code.items(): sen2code_new[key.lower()] = value del sen2code count = 0 code_exist = {} for source in sources: try: if sen2code_new[source] not in code_exist.keys(): code_exist[sen2code_new[source]] = 1 else: code_exist[sen2code_new[source]] += 1 except: count += 1 test_subjects = np.array(results) test_targets = np.array(ref) test_dics = np.array(dics) len_sen = [len(nltk.word_tokenize(x)) for x in sources] len_sen = np.array(len_sen) # print(len_sen.mean(), len_sen.max(), len_sen.min()) len_spilt = [(0, 100000)] for len_current in len_spilt: index = np.where((len_sen >= len_current[0]) & (len_sen < len_current[1])) ref = test_targets[index].tolist() hyp = test_subjects[index].tolist() open('./tmp/hyp.txt', 'w', encoding='utf-8').writelines([x for x in hyp]) ref0 = [x[0] for x in ref] ref1 = [x[1] for x in ref] ref2 = [x[2] for x in ref] ref3 = [x[3] for x in ref] open('./tmp/ref0.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref0]) open('./tmp/ref1.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref1]) open('./tmp/ref2.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref2]) open('./tmp/ref3.txt', 'w', encoding='utf-8').writelines([x + '\n' for x in ref3]) dics = test_dics[index].tolist() metrics_dict = compute_metrics(hypothesis='./tmp/hyp.txt', references=['./tmp/ref0.txt', './tmp/ref1.txt', './tmp/ref2.txt', './tmp/ref3.txt'], no_glove=True, no_overlap=False, no_skipthoughts=True) hyp = [nltk.word_tokenize(x) for x in hyp] hit = count_hit(hyp, dics) com = count_common(hyp) BLEU = (metrics_dict['Bleu_1'] + metrics_dict['Bleu_2'] + metrics_dict['Bleu_3'] + metrics_dict['Bleu_4']) / 4 if BLEU<0.0001: BLEU = 0.0001 if hit<0.0001: hit = 0.0001 if com<0.0001: com = 0.0001 Ascore = (1 + 2.25 + 4) / (4 / BLEU + 2.25 / hit + 1 / com) return BLEU, hit, com, Ascore
def Meteor(labels, preds): score = getListMeteor(preds, labels) return score # 训练 model.train_model(train_df, eval_data=eval_df, Meteor=Meteor) # 加载本地训练好的模型 model = BartModel(pretrained_model='Bart/best_model', args=model_args, model_config='Bart/best_model/config.json', vocab_file='Bart/best_model') # 测试 test_list = test_df['input_text'].tolist() pred_list = model.predict(test_list) true_list = test_df['target_text'].tolist() column_name = ['title'] nl_df = pd.DataFrame(true_list, columns=column_name) nl_df.to_csv('result/code_true_bart.csv', index=None, header=False) nl_df = pd.DataFrame(pred_list, columns=column_name) nl_df.to_csv('result/code_pred_bart.csv', index=None, header=False) compute_metrics(hypothesis='result/code_pred_bart.csv', references=['result/code_true_bart.csv'], no_glove=True, no_skipthoughts=True)
from nlgeval import compute_metrics import sys metrics_dict = compute_metrics(hypothesis=sys.argv[1], references=[sys.argv[2]], no_skipthoughts=True) print(metrics_dict)
metrics_dict = compute_metrics( #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_wordrnn10.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_wordrnn07.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_wordrnn05.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_googlelm.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_attentionac.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_noattentionac.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_skipconnections.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_mleseqgan.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_ss.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_seqgan.txt', #hypothesis='/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_rankgan.txt', hypothesis= '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/hypothesis_files/hypothesis_leakgan.txt', references=[ #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn10_most_similar/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn07_most_similar/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/word_rnn05_most_similar/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/google_lm/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/attention_ac/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/no_attention_ac/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/skip_connections/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/mle_seqgan/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/ss/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/seqgan/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/rankgan/ref10.txt' '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref1.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref2.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref3.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref4.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref5.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref6.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref7.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref8.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref9.txt', '/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references_correctedbug_human_labels_majority_vote/leakgan/ref10.txt' ################################### #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/skip_connections/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/no_attention_ac/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/attention_ac/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn10_most_similar/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/leakgan/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/rankgan/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/google_lm/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/seqgan/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn07_most_similar/ref10.txt' #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref1.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref2.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref3.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref4.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref5.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref6.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref7.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref8.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref9.txt', #'/srv/disk01/ggarbace/EvaluationProj/fake_text/saved_models_normalized/nlp_metrics_eval/most_similar_references/word_rnn05_most_similar/ref10.txt' ])
from nlgeval import compute_metrics metrics_dict = compute_metrics(hypothesis='candidate.txt', references=['reference.txt'])
def translate(self, src, tmpl=None, src2=None, tgt=None, src_seq_len=50, tgt_seq_len=50, batch_size=None, attn_debug=False): """ Translate content of `src_data_iter` (if not None) or `src_path` and get gold scores if one of `tgt_data_iter` or `tgt_path` is set. Note: batch_size must not be None Note: one of ('src_path', 'src_data_iter') must not be None Args: src_path (str): filepath of source data tgt_path (str): filepath of target data or None batch_size (int): size of examples per mini-batch attn_debug (bool): enables the attention logging Returns: (`list`, `list`) * all_scores is a list of `batch_size` lists of `n_best` scores * all_predictions is a list of `batch_size` lists of `n_best` predictions """ assert src is not None if batch_size is None: raise ValueError("batch_size must be set") data = inputters.build_dataset(self.fields, src=src, tgt=tgt, src2=src2, tmpl=tmpl, src_seq_len=src_seq_len, tmpl_seq_len=tgt_seq_len, tgt_seq_len=tgt_seq_len, use_filter_pred=self.use_filter_pred) cur_device = "cuda" if self.cuda else "cpu" data_iter = inputters.OrderedIterator(dataset=data, device=cur_device, batch_size=batch_size, train=False, sort=False, sort_within_batch=True, shuffle=False) builder = monmt.translate.TranslationBuilder(data, self.fields, self.n_best, self.replace_unk, tgt) # Statistics counter = count(1) pred_score_total, pred_words_total = 0, 0 gold_score_total, gold_words_total = 0, 0 all_scores = [] all_predictions = [] for batch in data_iter: batch_data = self.translate_batch(batch, data, attn_debug, fast=self.fast, pass1=self.pass1) translations = builder.from_batch(batch_data, pass1=self.pass1) for trans in translations: all_scores += [trans.pred_scores[:self.n_best]] pred_score_total += trans.pred_scores[0] pred_words_total += len(trans.pred_sents[0]) if tgt is not None: gold_score_total += trans.gold_score gold_words_total += len(trans.gold_sent) + 1 n_best_preds = [ " ".join(pred) for pred in trans.pred_sents[:self.n_best] ] all_predictions += [n_best_preds] self.out_file.write('\n'.join(n_best_preds) + '\n') self.out_file.flush() if self.verbose: sent_number = next(counter) output = trans.log(sent_number) if self.logger: self.logger.info(output) else: os.write(1, output.encode('utf-8')) if attn_debug: preds = trans.pred_sents[0] preds.append('<eos>') attns = trans.attns[0].tolist() srcs = trans.src_raw header_format = "{:>10.10} " + "{:>10.7} " * len(srcs) row_format = "{:>10.10} " + "{:>10.7f} " * len(srcs) output = header_format.format("", *srcs) + '\n' for word, row in zip(preds, attns): max_index = row.index(max(row)) row_format = row_format.replace( "{:>10.7f} ", "{:*>10.7f} ", max_index + 1) row_format = row_format.replace( "{:*>10.7f} ", "{:>10.7f} ", max_index) output += row_format.format(word, *row) + '\n' row_format = "{:>10.10} " + "{:>10.7f} " * len(srcs) os.write(1, output.encode('utf-8')) if self.report_score: msg = self._report_score('PRED', pred_score_total, pred_words_total) if self.logger: self.logger.info(msg) else: print(msg) if tgt is not None: msg = self._report_score('GOLD', gold_score_total, gold_words_total) if self.logger: self.logger.info(msg) else: print(msg) if self.report_rouge or self.report_bleu: from nlgeval import compute_metrics compute_metrics(self.pred_file, [self.tgt_file], logger=self.logger) if self.dump_beam: import json json.dump(self.translator.beam_accum, codecs.open(self.dump_beam, 'w', 'utf-8')) return all_scores, all_predictions
from nlgeval import compute_metrics, compute_individual_metrics import numpy as np metrics_lt = {} for i in range(20): metrics_dict = compute_metrics(hypothesis=f'gen_hyp/hyp_{i}.txt', references=[f'gen_hyp/ref_{i}.txt']) for name in metrics_dict: if name in metrics_lt: metrics_lt[name].append(metrics_dict[name]) else: metrics_lt[name] = [metrics_dict[name]] import scipy.stats import math def mean_confidence_interval(data, confidence=0.95): a = 1.0 * np.array(data) n = len(a) m, se = np.mean(a), scipy.stats.sem(a) h = math.sqrt(n) * se * scipy.stats.t.ppf((1 + confidence) / 2., n-1) return m, m-h, m+h print('************ overall result ***************') for name in metrics_lt: lt = metrics_lt[name] m, lb, hb = mean_confidence_interval(lt) print(f'name: {name}, mean: {m}, CI(95%): [{lb}, {hb}]')
from nlgeval import compute_metrics file_generate = "/local/ssd_1/chengzhang/SA_dialog/dialogue/result/nlpcc2017_tune_output/nlpcc2017_diverse_e0_output.txt" file_truth = '/local/ssd_1/chengzhang/SA_dialog/big_data/nlpcc2017_reference.txt' generate = [] truth = [] with open(file_generate, "r", encoding="utf-8") as f: for line in f.readlines(): generate.append(line.strip()) with open(file_truth, "r", encoding="utf-8") as f: for line in f.readlines(): truth.append([line.strip()]) print(len(truth)) print(len(generate)) print("BLUE score:", compute_metrics(hypothesis=file_generate, references=[file_truth]))
) parser.add_argument('--result_dir', default='eval_seq2seq15200', help='directory of test_pred to save.') args = parser.parse_args() argparams = vars(args) ckpt_name = 'BS_test_data-ckpt_from-' + argparams['result_dir'] test_genarated_file = '../{}/{}/{}'.format(argparams['exp_name'], ckpt_name, 'test_data_results.txt') test_plot_file = '{}/{}'.format(argparams['data_path'], 'test_plot.txt') test_ending_file = '{}/{}'.format(argparams['data_path'], 'test_ending.txt') print( 'compute word overlap scores between target ending and generated ending' ) metrics_dict_target_generated = compute_metrics( hypothesis=test_genarated_file, references=[test_ending_file]) print('-' * 50) # print('compute similarity scores between generated ending and target_ending') # metrics_dict_plot_generated = compute_metrics(hypothesis=test_genarated_file, references=[test_ending_file], # no_overlap=True) # print('-' * 50) print('compute similarity scores between generated ending and plot') metrics_dict_plot_target = compute_metrics(hypothesis=test_genarated_file, references=[test_plot_file], no_overlap=True) # print('-' * 50)
import bert_score import sys from nlgeval import compute_metrics print(sys.argv[1]) fn = sys.argv[1] gt = [ e.replace("_go", "").replace("_eos", "").strip() for e in open("ap_data/valid_freq.tgt").readlines() ] pred = [ e.replace("_go", "").replace("_eos", "").strip() for e in open(fn).readlines() ] open("mt_tmp/pred.txt", "w+").writelines([e + "\n" for e in pred]) open("mt_tmp/gt.txt", "w+").writelines([e + "\n" for e in gt]) results = compute_metrics(hypothesis="mt_tmp/pred.txt", references=["mt_tmp/gt.txt"], no_skipthoughts=True, no_glove=True) open("{0}.meteor".format(fn.split(".")[0]), "w+").write(str(results["METEOR"])) pred = bert_score.score(pred, gt, device='gpu', model_type='roberta-base') scores = pred[-1].tolist() open("{0}.scores".format(fn.split(".")[0]), "w+").write(str(scores))
predicted.append(yhat.split()) def save_to_file(path, file): fh = open(path, 'a') fh.write(file) fh.close() for i in range(len(predicted)): str = "" for j in range(len(predicted[i])): str = str + predicted[i][j] + " " str = str[:-1] str = str + "\n" save_to_file("/rap_blues/lunwen/paras#/4/predicted.txt", str) #prediction value from nlgeval import compute_metrics compute_metrics( hypothesis='H:/deep_front/nlg-eval-master/test_comparison/predicted.txt', references=[ 'H:/deep_front/nlg-eval-master/test_comparison/test_labels2.txt' ]) metrics_dict = compute_metrics( hypothesis='H:/deep_front/nlg-eval-master/test_comparison/predicted.txt', references=[ 'H:/deep_front/nlg-eval-master/test_comparison/test_labels2.txt' ])
def __init__(self, hypothesis_file, reference_file): self.metrics_dict = nlgeval.compute_metrics( hypothesis=hypothesis_file, references=[reference_file], no_skipthoughts=True, no_glove=True)
from nlgeval import compute_metrics import csv import sys if len(sys.argv) < 2: print("Please provide the result_xxx.csv file as an argument.") else: res_file = sys.argv[1] file_name = res_file.split('.')[0] ref_file = file_name + '_ref.txt' hyp_file = file_name + '_hyp.txt' fout_1 = open(ref_file, 'w+') fout_2 = open(hyp_file, 'w+') with open(res_file, 'rU') as f: #U for row in csv.reader(f, delimiter=';'): reference = row[0] hypothesis = row[1] fout_1.write(reference + '\n') fout_2.write(hypothesis + '\n') f.close() fout_1.close() fout_2.close() metrics_dict = compute_metrics(hypothesis=hyp_file, references=[ref_file]) print(metrics_dict)