def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500): """ Given a filename, calculate the metric scores for that prediction file isDin: boolean value to check whether input file is DirectIn.txt """ pairs = [] with open(src_file, 'r') as infile: for line in infile: line = line.strip('\r\n') pair = {} pair['tokenized_sentence'] = line[:-1] pairs.append(pair) with open(tgt_file, "r") as infile: cnt = 0 for line in infile: line = line.strip('\n') pairs[cnt]['tokenized_question'] = line[:-1] cnt += 1 output = [] with open(out_file, 'r') as infile: for line in infile: line = line[:-1] output.append(line) for idx, pair in enumerate(pairs): pair['prediction'] = output[idx] ## eval from eval import QGEvalCap import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.4f') res = defaultdict(lambda: []) gts = defaultdict(lambda: []) for pair in pairs[:]: key = pair['tokenized_sentence'] res[key] = [pair['prediction'].encode('utf-8')] ## gts gts[key].append(pair['tokenized_question'].encode('utf-8')) QGEval = QGEvalCap(gts, res) return QGEval.evaluate()
def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500): """ Given a filename, calculate the metric scores for that prediction file isDin: boolean value to check whether input file is DirectIn.txt """ pairs = [] with open(src_file, 'r') as infile: for line in infile: pair = {} pair['tokenized_sentence'] = line[:-1].strip().lower() pairs.append(pair) with open(tgt_file, "r") as infile: cnt = 0 for line in infile: pairs[cnt]['tokenized_question'] = line[:-1].strip() cnt += 1 output = [] with open(out_file, 'r') as infile: for line in infile: line = fix_tokenization(line[:-1].strip()).lower() output.append(line) for idx, pair in enumerate(pairs): pair['prediction'] = output[idx] # eval from eval import QGEvalCap import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.4f') res = defaultdict(lambda: []) gts = defaultdict(lambda: []) for pair in pairs[:]: #Le dict pair contient 3 clef: tokenized_sentence, tokenized_question et prediction key = pair['tokenized_sentence'] res[key] = [pair['prediction'].encode('utf-8')] # gts gts[key].append(pair['tokenized_question'].encode('utf-8')) #print("\n") #print("TEXTE:{}\nREPONSE:{}\n".format(key.split("[sep]")[0], key.split("[sep]")[1])) #print("PREDICTION:",res[key]) #print("\nSQUAD QUESTION:",gts[key]) #print("\n") #print([key for key in pair.keys()]) #print("GTS:{} et RES: {}".format(gts[key], res[key])) QGEval = QGEvalCap(gts, res) return QGEval.evaluate()
def eval(out_file, src_file, tgt_file, isDIn = False, num_pairs = 500): """ Given a filename, calculate the metric scores for that prediction file isDin: boolean value to check whether input file is DirectIn.txt """ pairs = [] with open(src_file, 'r') as infile: for line in infile: pair = {} pair['tokenized_sentence'] = line[:-1] pairs.append(pair) with open(tgt_file, "r") as infile: cnt = 0 for line in infile: pairs[cnt]['tokenized_question'] = line[:-1] cnt += 1 output = [] with open(out_file, 'r') as infile: for line in infile: line = line[:-1] output.append(line) for idx, pair in enumerate(pairs): pair['prediction'] = output[idx] ## eval from eval import QGEvalCap import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.4f') res = defaultdict(lambda: []) gts = defaultdict(lambda: []) for pair in pairs[:]: key = pair['tokenized_sentence'] res[key] = [pair['prediction'].encode('utf-8')] ## gts gts[key].append(pair['tokenized_question'].encode('utf-8')) QGEval = QGEvalCap(gts, res) return QGEval.evaluate()
def eval_squad(out_file, simplified_para_file, para_file, src_file, tgt_file, n_best, isDIn = False, num_pairs = 500): """ Given a filename, calculate the metric scores for that prediction file isDin: boolean value to check whether input file is DirectIn.txt """ simplified_paragraphs =[] with open(simplified_para_file, 'r') as infile: for line in infile: simplified_paragraphs.append(line[:-1]) predictions = [] with open(out_file, 'r') as infile: for i in range(len(simplified_paragraphs)): j = n_best array = [] while j > 0: line = infile.readline() array.append(line[:-1]) j -= 1 predictions.append(array) assert(len(simplified_paragraphs) == len(predictions)) # Mapping between paragraphs and predictions para_prediciton_map = {} for i in range(len(simplified_paragraphs)): para_prediciton_map[simplified_paragraphs[i]] = predictions[i] paragraphs =[] with open(para_file, 'r') as infile: for line in infile: paragraphs.append(line[:-1]) sentences =[] with open(src_file, 'r') as infile: for line in infile: sentences.append(line[:-1]) questions =[] with open(tgt_file, 'r') as infile: for line in infile: questions.append(line[:-1]) pairs = defaultdict(lambda: {'sentences':set(), 'questions':[], 'predictions':[]}) for paragraph, sentence, question in zip(paragraphs, sentences, questions): pairs[paragraph]['sentences'].add(sentence) pairs[paragraph]['questions'].append(question) pairs[paragraph]['predictions'] = para_prediciton_map[paragraph] # Clean up redundant predictions for paragraph in pairs.keys(): while len(pairs[paragraph]['sentences']) != len(pairs[paragraph]['predictions']): del pairs[paragraph]['predictions'][-1] ## eval from eval import QGEvalCap import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.4f') res = defaultdict(lambda: []) gts = defaultdict(lambda: []) for paragraph in pairs.keys(): for prediction in pairs[paragraph]['predictions']: res[paragraph].append(prediction.encode('utf-8')) ## gts for question in pairs[paragraph]['questions']: gts[paragraph].append(question.encode('utf-8')) QGEval = QGEvalCap(gts, res) return QGEval.evaluate()
def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500): """ Given a filename, calculate the metric scores for that prediction file isDin: boolean value to check whether input file is DirectIn.txt """ #pairs:リスト、サイズはlen(sentence)、中身はpair #pair:sentence,question_target,question_predictが辞書の形で格納されている。 pairs = [] with open(src_file, 'r') as infile: for line in infile: pair = {} pair['tokenized_sentence'] = line[:-1] pairs.append(pair) with open(tgt_file, "r") as infile: cnt = 0 for line in infile: pairs[cnt]['tokenized_question'] = line[:-1] cnt += 1 output = [] with open(out_file, 'r') as infile: for line in infile: line = line[:-1] output.append(line) for idx, pair in enumerate(pairs): pair['prediction'] = output[idx] ## eval from eval import QGEvalCap import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.4f') #pair:sentence,prediction,tokenized_question #res:key:sentence,value:prediction #gts:key:sentence,value:question #ただし、gtsの方は同じsentenceについてはquestionを一つのsentenceに与える #また、一つの文につき一つのpredictしか評価していない。->10000文の内4000文は評価していない。 #全ての文を評価する。(同じsrcでも答えが違っている場合はanswerを使うものは違っている可能性があるから) #文と疑問詞句が一致しているものをtargetとする if 0: target_dict = defaultdict(lambda: []) predict_dict = defaultdict(str) for i, pair in enumerate(pairs): s = pair["tokenized_sentence"] t = pair['tokenized_question'].encode('utf-8') p = pair['prediction'].encode('utf-8') target_dict[s].append(t) predict_dict[(p, s)] = s gts = { i: target_dict[predict_dict[(p["prediction"], p["tokenized_sentence"])]] for i, p in enumerate(pairs) } res = {i: p["prediction"] for i, p in enumerate(pairs)} if 0: res = defaultdict(lambda: []) gts = defaultdict(lambda: []) for i, pair in enumerate(pairs[:]): key = pair['tokenized_sentence'] res[key] = [pair['prediction'].encode('utf-8')] gts[key].append(pair['tokenized_question'].encode('utf-8')) print(list(res.items())[0:5]) print(list(gts.items())[0:5]) #正しいもののみをペアにする if 1: target_dict = defaultdict(lambda: []) predict_dict = defaultdict(str) for i, pair in enumerate(pairs): s = pair["tokenized_sentence"] t = pair['tokenized_question'].encode('utf-8') p = pair['prediction'].encode('utf-8') target_dict[s].append(t) predict_dict[(p, s)] = s pairs = [p for p in pairs if "<UNK>" not in p["prediction"]] gts = {i: [p["tokenized_question"]] for i, p in enumerate(pairs)} res = {i: [p["prediction"]] for i, p in enumerate(pairs)} #print(len(pairs)) print("size of items:{}".format(len(res.items()))) QGEval = QGEvalCap(gts, res) return QGEval.evaluate()