def report_run_per_query(qrels, run_file_name, remove_docs_with_zero_score=False): run = TrecRun(run_file_name) system = run.run_data['system'][0] if remove_docs_with_zero_score: run.run_data = run.run_data[run.run_data['score'] > 0] trec_eval = TrecEval(run, qrels) bpref = trec_eval.getBpref(per_query=True) ndcg_10 = trec_eval.getNDCG(depth=10, per_query='query') ndcg = trec_eval.getNDCG(per_query='query') ret = bpref.join(ndcg_10, on='query') ret = ret.join(ndcg, on='query') for query, r in ret.iterrows(): yield json.dumps({ 'corpus': extract_corpus(run_file_name), 'topic': query, 'tag': system, "bpref": r['Bpref@1000'], "pseudoNDCG@10": r['NDCG@10'], "pseudoNDCG": r['NDCG@1000'] })
def eval_rank_df(e: trectools.TrecEval, per_query=True) -> pd.Series: return pd.Series({ "nDCG@5": e.get_ndcg(depth=5, per_query=per_query), "nDCG@10": e.get_ndcg(depth=10, per_query=per_query), "nDCG@20": e.get_ndcg(depth=20, per_query=per_query), "RR": e.get_reciprocal_rank(per_query=per_query) })
def compute_map(valid_codes, pred, gs_out_path=None): """ Custom function to compute MAP evaluation metric. Code adapted from https://github.com/TeMU-BSC/CodiEsp-Evaluation-Script/blob/master/codiespD_P_evaluation.py """ # Input args default values if gs_out_path is None: gs_out_path = './intermediate_gs_file.txt' pred_out_path = './intermediate_predictions_file.txt' ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred, pred_out_path, valid_codes) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel(gs_out_path) # Load pred from run file run = TrecRun(pred_out_path) # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map(trec_eval=False) # With this option False, rank order is taken from the given document order ###### 4. Return results ###### return MAP
def eval(topx): qrels = TrecQrel("./relevance-args-v2.qrels") # Generates a P@10 graph with all the runs in a directory path_to_runs = "./runs/" runs = procedures.list_of_runs_from_path(path_to_runs, "*") for run in runs: print(run.get_filename()) te = TrecEval(run, qrels) rbp, residuals = te.get_rbp() coverage = run.get_mean_coverage(qrels, topX=topx) print( "Average number of documents judged among top %.0f: %.2f, thats about: %.2f percent." % (topx, coverage, coverage / topx * 100)) # precision = te.get_precision(depth=topx) # print("precision (p"+str(topx)+"): ",precision) ndcg = te.get_ndcg(depth=topx, removeUnjudged=False) print("nDCG (n=" + str(topx) + " removeUnjudged=False): " + str(ndcg)) ndcg = te.get_ndcg(depth=topx, removeUnjudged=True) print("nDCG (n=" + str(topx) + " removeUnjudged=True): " + str(ndcg)) print("--------\n")
def precision(e: trectools.TrecEval, per_query=False): rel_ret = e.get_relevant_retrieved_documents(per_query=per_query) rel = e.get_retrieved_documents(per_query=per_query) if per_query: return (rel_ret / rel).fillna(0) else: return rel_ret / rel
def trec_eval(file): r1 = TrecRun(file) qrels = TrecQrel("./dataset/.txt") results = TrecEval(r1, qrels) p5 = results.get_precision(5) p10 = results.get_precision(10) p15 = results.get_precision(15) print(p5) print(p10) print(p15)
def main(gs_path, pred_path, codes_path): ''' Load GS, predictions and valid codes; format GS and predictions according to TREC specifications; compute MAP and print it. Parameters ---------- gs_path : str Path to Gold Standard TSV with 2 columns: filename, code It has no headers row. pred_path : str Path to Gold Standard TSV with 2 columns: filename, code It has no headers row. codes_path : str Path to TSV file with valid codes. It has no headers row. Returns ------- None. ''' ###### 0. Load valid codes lists: ###### valid_codes = set( pd.read_csv(codes_path, sep='\t', header=None, usecols=[0])[0].tolist()) valid_codes = set([x.lower() for x in valid_codes]) ###### 1. Format GS as TrecQrel format: ###### qid_gs = format_gs(gs_path, './intermediate_gs_file.txt') ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred_path, './intermediate_predictions_file.txt', valid_codes, qid_gs) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel('./intermediate_gs_file.txt') # Load pred from run file run = TrecRun('./intermediate_predictions_file.txt') # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map( trec_eval=False ) # With this option False, rank order is taken from the given document order ###### 4. Show results ###### print('\nMAP estimate: {}\n'.format(round(MAP, 3))) #print('\n{}'.format(round(MAP, 3))) print('{}|{}'.format(pred_path, round(MAP, 3)))
def setUp(self): run1 = TrecRun("./files/r4.run") qrels1 = TrecQrel("./files/qrel1.txt") run2 = TrecRun("./files/input.uic0301") qrels2 = TrecQrel("./files/robust03_cs_qrels.txt") # Contains the first 30 documents for the first 10 topics in input.uic0301 run3 = TrecRun("./files/input.uic0301_top30") self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341] self.teval1 = TrecEval(run1, qrels1) self.teval2 = TrecEval(run2, qrels2) self.teval3 = TrecEval(run3, qrels2)
def report_run(qrels, corpus, topics, run_file_name): run = TrecRun(run_file_name) trec_eval = TrecEval(run, qrels) ret = { 'corpus': corpus, 'topics': topics, 'tag': run.run_data['system'][0], "bpref": trec_eval.getBpref(), "pseudoNDCG@10": trec_eval.getNDCG(depth=10, removeUnjudged=True), "pseudoNDCG": trec_eval.getNDCG(removeUnjudged=True), } return json.dumps(ret)
def wss(e: trectools.TrecEval) -> float: ret = e.get_retrieved_documents() r = recall(e) N = 30000000 wss = ((N - ret) / N) - (1.0 - r) if wss < 0: return 0 return wss
def report_run(qrels, run_file_name, remove_docs_with_zero_score=False): run = TrecRun(run_file_name) system = run.run_data['system'][0] if remove_docs_with_zero_score: run.run_data = run.run_data[run.run_data['score'] > 0] trec_eval = TrecEval(run, qrels) ret = { 'corpus': extract_corpus(run_file_name), 'topics': extract_topics(run_file_name), 'tag': system, "bpref": trec_eval.getBpref(), "pseudoNDCG@10": trec_eval.getNDCG(depth=10), "pseudoNDCG": trec_eval.getNDCG() } return json.dumps(ret)
def trec_eval_ndcg(run_name, data_path='./data/', depths=[5, 10, 15, 20, 30, 100, 200, 500, 1000]): qrel_name = os.path.join(data_path, '2019qrels-pass.txt') qrel = TrecQrel(qrel_name) res = TrecRun(run_name) for depth in depths: score = TrecEval(res, qrel).get_ndcg(depth=depth) print('ndcg_cur_%d \t all \t %.4f' % (depth, score))
def to_trec_df(e: trectools.TrecEval, per_query=True) -> pd.Series: return join_series( pd.Series({ "P": precision(e, per_query=per_query), "R": recall(e, per_query=per_query), # "F$_{0.5}$": f_measure(e, 0.5, per_query=per_query), # "F$_1$": f_measure(e, 1, per_query=per_query), "NumRet": e.get_retrieved_documents(per_query=per_query) # "F$_3$": f_measure(e, 3, per_query=per_query), }), eval_rank_df(e, per_query=per_query))
def wss(e: trectools.TrecEval, per_query=False): ret = e.get_retrieved_documents(per_query=True) r = recall(e, per_query=per_query) N = 30000000 if per_query: return pd.Series( dict([(t, (((N - ret.T[t]) / N) - (1.0 - r.T[t]))) for t in ret.index])) else: wss = ((N - ret) / N) - (1.0 - r) if wss < 0: return 0 # don't ask. return wss
def eval(qrel_file_path, run_file_path): """[summary] Arguments: qrel_file_path {[string]} -- [path of the qrel file usually located at the source language folder] run_file_path {[string]} -- [path of the run file usually located at the results folder of a language] Returns: [type] -- [precision@10, precision@20, precision@30, mAP rounded up to four digits] """ r1 = TrecRun(run_file_path) qrels = TrecQrel(qrel_file_path) te = TrecEval(r1, qrels) p5 = te.get_precision(depth=5) p10 = te.get_precision(depth=10) p20 = te.get_precision(depth=20) map = te.get_map() rprec = te.get_rprec() run_object = r1.evaluate_run(qrels, per_query=True) return round(p5, 4), round(p10, 4), round(p20, 4), round(map, 4), round(rprec, 4)
def main(args): gold_labels = TrecQrel(args.gold_labels) prediction = TrecRun(args.scores) results = TrecEval(prediction, gold_labels) metrics = extract_metrics(results, args.metrics) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all') if args.output: metrics.to_csv(args.output, sep="\t", index=False) logger.info(f"Saved results to {args.output}") else: print(metrics.to_string(index=False))
def evaluate(qrels, runs_file, topics, model): runs = TrecRun(runs_file) ev = TrecEval(runs, qrels) path_to_csv = os.path.join("eval", model, "results.csv") n_topics = len(topics) # Calculate various metrics for each query considering the runs/judgment files provided print("Calculating metrics...") res = ev.evaluate_all(per_query=True) # Write results of evaluation to csv file res.printresults(path_to_csv, "csv", perquery=True) # Calculate NDCG@100 for each query, since the previous metrics don't include it, # and append it to each line of the new csv file ndcgs = ev.get_ndcg(depth=100, per_query=True) values = [row['NDCG@100'] for i, row in ndcgs.iterrows() ] # Column name of Pandas dataframe storing the data with open(path_to_csv, 'r') as f: lines = [line[:-1] for line in f] # Remove '\n' from the end of each line lines[0] += ",ndcg@100\n" # Add new column to header for i in range( 1, n_topics + 1 ): # Lines 1 to n contain metric values for each of the n queries lines[i] += "," + str( values[i - 1] ) + "\n" # Line 1 (i) should store value 0 (i-1) - arrays start at 0 global_ndcg = ev.get_ndcg(depth=100, per_query=False) # Calculate global NDCG lines[n_topics + 1] += "," + str( global_ndcg) + "\n" # Append global NDCG to last line with open(path_to_csv, 'w') as f: f.writelines(lines) # Overwrite csv file with new content
def main(args): format_check_passed = run_checks(args.scores) if not format_check_passed: return gold_labels = TrecQrel(args.gold_labels) prediction = TrecRun(args.scores) results = TrecEval(prediction, gold_labels) metrics = extract_metrics(results, args.metrics, args.depths) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all') if args.output: metrics.to_csv(args.output, sep='\t', index=False) logger.info(f'Saved results to file: {args.output}') else: print(metrics.to_string(index=False))
def trec_eval(runs_file_path: Path or str, qrels_file_path: Path or str): metrics = dict() r1 = TrecRun(str(runs_file_path.absolute())) qrels = TrecQrel(str(qrels_file_path.absolute())) results = TrecEval(r1, qrels) metrics["P@5"] = results.get_precision(5) metrics["P@10"] = results.get_precision(10) metrics["P@15"] = results.get_precision(15) metrics["bpref"] = results.get_bpref() metrics["map"] = results.get_map() metrics = {k: round(v, 4) for k, v in metrics.items()} return metrics
def evaluate_run(self, trec_qrel_obj, per_query): from trectools import TrecEval evaluator = TrecEval(self, trec_qrel_obj) result = evaluator.evaluate_all(per_query) return result
class TestTrecEval(unittest.TestCase): def setUp(self): run1 = TrecRun("./files/r4.run") qrels1 = TrecQrel("./files/qrel1.txt") run2 = TrecRun("./files/input.uic0301") qrels2 = TrecQrel("./files/robust03_cs_qrels.txt") # Contains the first 30 documents for the first 10 topics in input.uic0301 run3 = TrecRun("./files/input.uic0301_top30") self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341] self.teval1 = TrecEval(run1, qrels1) self.teval2 = TrecEval(run2, qrels2) self.teval3 = TrecEval(run3, qrels2) def tearDown(self): pass def test_getReciprocalRank(self): value = self.teval1.getReciprocalRank(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.5000, places=4) value = self.teval2.getReciprocalRank(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.6466, places=4) values1 = self.teval2.getReciprocalRank(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getReciprocalRank(depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) results = self.teval2.getReciprocalRank(depth=1000, trec_eval=True, per_query=True) correct_results = [0.0017, 0.1429, 0.3333] values = results.loc[[378,650,624]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v,c, places=4) def test_getMAP(self): value = self.teval1.getMAP(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.2685, places=4) value = self.teval2.getMAP(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.2396, places=4) values1 = self.teval2.getMAP(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getMAP(depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) results = self.teval2.getMAP(depth=1000, trec_eval=True, per_query=True) correct_results = [0.4926, 0.2808, 0.2335] values = results.loc[[622, 609, 320]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v,c, places=4) def test_getPrecision(self): value = self.teval1.getPrecision(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.0010, places=4) value = self.teval2.getPrecision(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.0371, places=4) values1 = self.teval2.getPrecision(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getPrecision(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) values1 = self.teval2.getPrecision(depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getPrecision(depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertNotAlmostEqual(v1, v2, places=4) results = self.teval2.getPrecision(depth=30, trec_eval=True, per_query=True) correct_results = [0.1333, 0.0333, 0.5333] values = results.loc[[607, 433, 375]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v,c, places=4)
class TestTrecEval(unittest.TestCase): def setUp(self): run1 = TrecRun("./files/r4.run") qrels1 = TrecQrel("./files/qrel1.txt") run2 = TrecRun("./files/input.uic0301") qrels2 = TrecQrel("./files/robust03_cs_qrels.txt") # Contains the first 30 documents for the first 10 topics in input.uic0301 run3 = TrecRun("./files/input.uic0301_top30") self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341] self.teval1 = TrecEval(run1, qrels1) self.teval2 = TrecEval(run2, qrels2) self.teval3 = TrecEval(run3, qrels2) def tearDown(self): pass def test_getReciprocalRank(self): value = self.teval1.getReciprocalRank(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.5000, places=4) value = self.teval2.getReciprocalRank(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.6466, places=4) values1 = self.teval2.getReciprocalRank( depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getReciprocalRank( depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) results = self.teval2.getReciprocalRank(depth=1000, trec_eval=True, per_query=True) correct_results = [0.0017, 0.1429, 0.3333] values = results.loc[[378, 650, 624]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v, c, places=4) def test_getMAP(self): value = self.teval1.getMAP(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.2685, places=4) value = self.teval2.getMAP(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.2396, places=4) values1 = self.teval2.getMAP( depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getMAP( depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) results = self.teval2.getMAP(depth=1000, trec_eval=True, per_query=True) correct_results = [0.4926, 0.2808, 0.2335] values = results.loc[[622, 609, 320]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v, c, places=4) def test_getPrecision(self): value = self.teval1.getPrecision(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.0010, places=4) value = self.teval2.getPrecision(depth=1000, trec_eval=True) self.assertAlmostEqual(value, 0.0371, places=4) values1 = self.teval2.getPrecision( depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getPrecision( depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertAlmostEqual(v1, v2, places=4) values1 = self.teval2.getPrecision( depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values values2 = self.teval3.getPrecision( depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values for v1, v2 in zip(values1, values2): self.assertNotAlmostEqual(v1, v2, places=4) results = self.teval2.getPrecision(depth=30, trec_eval=True, per_query=True) correct_results = [0.1333, 0.0333, 0.5333] values = results.loc[[607, 433, 375]].values for v, c in zip(values, correct_results): self.assertAlmostEquals(v, c, places=4)
r1 = TrecRun( "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.5" ) r2 = TrecRun( "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.6" ) # Easy way to create new baselines by fusing existing runs: #fused_run = fusion.reciprocal_rank_fusion([r1,r2]) fused_run = fusion.combos([r1, r2], strategy="mnz") print(fused_run) qrels_file = "/storage/proj/petra/projects/podcasts/podcasts_2020_train.1-8.qrels" qrels = TrecQrel(qrels_file) r1_p10 = TrecEval(r1, qrels).get_precision(depth=10) # P@25: 0.3392 r2_p10 = TrecEval(r2, qrels).get_precision(depth=10) # P@25: 0.2872 fused_run_p10 = TrecEval(fused_run, qrels).get_precision(depth=10) # P@25: 0.3436 r1_map = TrecEval(r1, qrels).get_map() # P@25: 0.3392 r2_map = TrecEval(r2, qrels).get_map() # P@25: 0.2872 fused_run_map = TrecEval(fused_run, qrels).get_map() r1_ndcg = TrecEval(r1, qrels).get_ndcg() # P@25: 0.3392 r2_ndcg = TrecEval(r2, qrels).get_ndcg() # P@25: 0.2872 fused_run_ndcg = TrecEval(fused_run, qrels).get_ndcg() print("NDCG -- Run 1: %.3f, Run 2: %.3f, Fusion Run: %.3f" % (r1_ndcg, r2_ndcg, fused_run_ndcg)) print("MAP -- Run 1: %.3f, Run 2: %.3f, Fusion Run: %.3f" %
def precision(e: trectools.TrecEval) -> float: return e.get_precision(depth=e.get_retrieved_documents(per_query=False), per_query=False)
def eval_rank_df(e: trectools.TrecEval) -> pd.Series: return pd.Series({ "nDCG@5": e.get_ndcg(depth=5, per_query=False), "nDCG@10": e.get_ndcg(depth=10, per_query=False), "nDCG@20": e.get_ndcg(depth=20, per_query=False), })
def map(self): qrels_file = TrecQrel("./Data/qrel.txt") path_to_runs = TrecRun("./Data/run.txt") te = TrecEval(path_to_runs, qrels_file) dic = {"map": te.get_map(), "ndcg": te.get_ndcg()} return dic
def recall(e: trectools.TrecEval) -> float: return e.get_relevant_retrieved_documents(per_query=False) / e.get_relevant_documents(per_query=False)
def plot_rp_curve(qrels, topics, runs_file, results, model): runs = TrecRun(runs_file) ev = TrecEval(runs, qrels) # Get the relevant documents for each one of the topics new_qrels = ev.qrels.qrels_data.copy() relevant_docs = {topic: [] for topic in topics} for i, row in new_qrels.iterrows(): # If the returned document is relevant, add it to the list of relevant docs of the respective topic if row["rel"] > 0: relevant_docs[row["query"]].append(row["docid"]) num_relevant_docs = { doc_id: num for doc_id, num in ev.get_relevant_documents( per_query=True).iteritems() } # TrecTools' precision calculations are very slow, so they are calculated "directly" # Obtain the recall and precision @k values for every k up to p for each topic and plot them for i, topic in enumerate(topics): precisions_aux = [0] recalls_aux = [0] # Get the number of true positives for the given topic for j in range(min(p + 1, len(results[i]))): # Check if the docid is in the list of relevant documents for that topic if results[i][j][0] in relevant_docs[topic]: recalls_aux.append(recalls_aux[j] + 1) precisions_aux.append(precisions_aux[j] + 1) else: recalls_aux.append(recalls_aux[j]) precisions_aux.append(precisions_aux[j]) # Calculate precision and recall values based on the previous values recalls = [x / num_relevant_docs[topic] for x in recalls_aux] precisions = [(x / i if i > 0 else 1) for i, x in enumerate(precisions_aux)] # Interpolate the precisions calculated before (needed to plot the recall-precision curve) interpolated_precisions = precisions.copy() j = len(interpolated_precisions) - 2 while j >= 0: if interpolated_precisions[j + 1] > interpolated_precisions[j]: interpolated_precisions[j] = interpolated_precisions[j + 1] j -= 1 # Reduce the number of points to plot to avoid excessive memory usage recalls = [ value for j, value in enumerate(recalls) if not ((100 < j < 1000 and j % 10 != 0) or (j > 1000 and j % 100 != 0)) ] precisions = [ value for j, value in enumerate(precisions) if not ((100 < j < 1000 and j % 10 != 0) or (j > 1000 and j % 100 != 0)) ] interpolated_precisions = [ value for j, value in enumerate(interpolated_precisions) if not ((100 < j < 1000 and j % 10 != 0) or (j > 1000 and j % 100 != 0)) ] # Plot the precision-recall curve of the topic fig, ax = plt.subplots() for j in range(len(recalls) - 2): ax.plot( (recalls[j], recalls[j]), (interpolated_precisions[j], interpolated_precisions[j + 1]), 'k-', label='', color='red') ax.plot((recalls[j], recalls[j + 1]), (interpolated_precisions[j + 1], interpolated_precisions[j + 1]), 'k-', label='', color='red') ax.plot(recalls, precisions, 'k--', color='blue') ax.title.set_text("R" + str(topic)) ax.set_xlabel("recall") ax.set_ylabel("precision") # Save plot in eval folder fig.savefig(os.path.join("eval", model, f"R{topic}.png")) plt.close()
gs_path, pred_path, codes_path = parse_arguments() ###### 0. Load valid codes lists: ###### valid_codes = set( pd.read_csv(codes_path, sep='\t', header=None, usecols=[0])[0].tolist()) valid_codes = set([x.lower() for x in valid_codes]) ###### 1. Format GS as TrecQrel format: ###### format_gs(gs_path, './intermediate_gs_file.txt') ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred_path, './intermediate_predictions_file.txt', valid_codes) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel('./intermediate_gs_file.txt') # Load pred from run file run = TrecRun('./intermediate_predictions_file.txt') # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map( trec_eval=False ) # With this option False, rank order is taken from the given document order ###### 4. Show results ###### print('\nMAP estimate: {}\n'.format(round(MAP, 3)))