Exemplo n.º 1
0
def report_run_per_query(qrels,
                         run_file_name,
                         remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    bpref = trec_eval.getBpref(per_query=True)
    ndcg_10 = trec_eval.getNDCG(depth=10, per_query='query')
    ndcg = trec_eval.getNDCG(per_query='query')

    ret = bpref.join(ndcg_10, on='query')
    ret = ret.join(ndcg, on='query')

    for query, r in ret.iterrows():
        yield json.dumps({
            'corpus': extract_corpus(run_file_name),
            'topic': query,
            'tag': system,
            "bpref": r['Bpref@1000'],
            "pseudoNDCG@10": r['NDCG@10'],
            "pseudoNDCG": r['NDCG@1000']
        })
Exemplo n.º 2
0
def eval_rank_df(e: trectools.TrecEval, per_query=True) -> pd.Series:
    return pd.Series({
        "nDCG@5": e.get_ndcg(depth=5, per_query=per_query),
        "nDCG@10": e.get_ndcg(depth=10, per_query=per_query),
        "nDCG@20": e.get_ndcg(depth=20, per_query=per_query),
        "RR": e.get_reciprocal_rank(per_query=per_query)
    })
Exemplo n.º 3
0
def compute_map(valid_codes, pred, gs_out_path=None):
    """
    Custom function to compute MAP evaluation metric. 
    Code adapted from https://github.com/TeMU-BSC/CodiEsp-Evaluation-Script/blob/master/codiespD_P_evaluation.py
    """
    
    # Input args default values
    if gs_out_path is None: gs_out_path = './intermediate_gs_file.txt' 
    
    pred_out_path = './intermediate_predictions_file.txt'
    ###### 2. Format predictions as TrecRun format: ######
    format_predictions(pred, pred_out_path, valid_codes)
    
    
    ###### 3. Calculate MAP ######
    # Load GS from qrel file
    qrels = TrecQrel(gs_out_path)

    # Load pred from run file
    run = TrecRun(pred_out_path)

    # Calculate MAP
    te = TrecEval(run, qrels)
    MAP = te.get_map(trec_eval=False) # With this option False, rank order is taken from the given document order
    
    ###### 4. Return results ######
    return MAP
Exemplo n.º 4
0
def eval(topx):
    qrels = TrecQrel("./relevance-args-v2.qrels")

    # Generates a P@10 graph with all the runs in a directory
    path_to_runs = "./runs/"
    runs = procedures.list_of_runs_from_path(path_to_runs, "*")

    for run in runs:
        print(run.get_filename())
        te = TrecEval(run, qrels)

        rbp, residuals = te.get_rbp()

        coverage = run.get_mean_coverage(qrels, topX=topx)
        print(
            "Average number of documents judged among top %.0f: %.2f, thats about: %.2f percent."
            % (topx, coverage, coverage / topx * 100))
        # precision = te.get_precision(depth=topx)
        # print("precision (p"+str(topx)+"): ",precision)

        ndcg = te.get_ndcg(depth=topx, removeUnjudged=False)
        print("nDCG (n=" + str(topx) + " removeUnjudged=False): " + str(ndcg))

        ndcg = te.get_ndcg(depth=topx, removeUnjudged=True)
        print("nDCG (n=" + str(topx) + " removeUnjudged=True): " + str(ndcg))

        print("--------\n")
Exemplo n.º 5
0
def precision(e: trectools.TrecEval, per_query=False):
    rel_ret = e.get_relevant_retrieved_documents(per_query=per_query)
    rel = e.get_retrieved_documents(per_query=per_query)
    if per_query:
        return (rel_ret / rel).fillna(0)
    else:
        return rel_ret / rel
Exemplo n.º 6
0
def trec_eval(file):
    r1 = TrecRun(file)
    qrels = TrecQrel("./dataset/.txt")
    results = TrecEval(r1, qrels)
    p5 = results.get_precision(5)
    p10 = results.get_precision(10)
    p15 = results.get_precision(15)
    print(p5)
    print(p10)
    print(p15)
Exemplo n.º 7
0
def main(gs_path, pred_path, codes_path):
    '''
    Load GS, predictions and valid codes; format GS and predictions according
    to TREC specifications; compute MAP and print it.

    Parameters
    ----------
    gs_path : str
        Path to Gold Standard TSV with 2 columns: filename, code
        It has no headers row.
    pred_path : str
        Path to Gold Standard TSV with 2 columns: filename, code
        It has no headers row.
    codes_path : str
        Path to TSV file with valid codes.
        It has no headers row.

    Returns
    -------
    None.

    '''

    ###### 0. Load valid codes lists: ######
    valid_codes = set(
        pd.read_csv(codes_path, sep='\t', header=None,
                    usecols=[0])[0].tolist())
    valid_codes = set([x.lower() for x in valid_codes])

    ###### 1. Format GS as TrecQrel format: ######
    qid_gs = format_gs(gs_path, './intermediate_gs_file.txt')

    ###### 2. Format predictions as TrecRun format: ######
    format_predictions(pred_path, './intermediate_predictions_file.txt',
                       valid_codes, qid_gs)

    ###### 3. Calculate MAP ######
    # Load GS from qrel file
    qrels = TrecQrel('./intermediate_gs_file.txt')

    # Load pred from run file
    run = TrecRun('./intermediate_predictions_file.txt')

    # Calculate MAP
    te = TrecEval(run, qrels)
    MAP = te.get_map(
        trec_eval=False
    )  # With this option False, rank order is taken from the given document order

    ###### 4. Show results ######
    print('\nMAP estimate: {}\n'.format(round(MAP, 3)))
    #print('\n{}'.format(round(MAP, 3)))
    print('{}|{}'.format(pred_path, round(MAP, 3)))
Exemplo n.º 8
0
    def setUp(self):
        run1 = TrecRun("./files/r4.run")
        qrels1 = TrecQrel("./files/qrel1.txt")

        run2 = TrecRun("./files/input.uic0301")
        qrels2 = TrecQrel("./files/robust03_cs_qrels.txt")

        # Contains the first 30 documents for the first 10 topics in input.uic0301
        run3 = TrecRun("./files/input.uic0301_top30")
        self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341]
        self.teval1 = TrecEval(run1, qrels1)
        self.teval2 = TrecEval(run2, qrels2)
        self.teval3 = TrecEval(run3, qrels2)
def report_run(qrels, corpus, topics, run_file_name):
    run = TrecRun(run_file_name)
    trec_eval = TrecEval(run, qrels)

    ret = {
        'corpus': corpus,
        'topics': topics,
        'tag': run.run_data['system'][0],
        "bpref": trec_eval.getBpref(),
        "pseudoNDCG@10": trec_eval.getNDCG(depth=10, removeUnjudged=True),
        "pseudoNDCG": trec_eval.getNDCG(removeUnjudged=True),
    }

    return json.dumps(ret)
Exemplo n.º 10
0
def wss(e: trectools.TrecEval) -> float:
    ret = e.get_retrieved_documents()
    r = recall(e)
    N = 30000000
    wss = ((N - ret) / N) - (1.0 - r)
    if wss < 0: return 0
    return wss
Exemplo n.º 11
0
def report_run(qrels, run_file_name, remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    ret = {
        'corpus': extract_corpus(run_file_name),
        'topics': extract_topics(run_file_name),
        'tag': system,
        "bpref": trec_eval.getBpref(),
        "pseudoNDCG@10": trec_eval.getNDCG(depth=10),
        "pseudoNDCG": trec_eval.getNDCG()
    }

    return json.dumps(ret)
Exemplo n.º 12
0
def trec_eval_ndcg(run_name,
                   data_path='./data/',
                   depths=[5, 10, 15, 20, 30, 100, 200, 500, 1000]):
    qrel_name = os.path.join(data_path, '2019qrels-pass.txt')
    qrel = TrecQrel(qrel_name)
    res = TrecRun(run_name)
    for depth in depths:
        score = TrecEval(res, qrel).get_ndcg(depth=depth)
        print('ndcg_cur_%d \t all \t %.4f' % (depth, score))
Exemplo n.º 13
0
def to_trec_df(e: trectools.TrecEval, per_query=True) -> pd.Series:
    return join_series(
        pd.Series({
            "P": precision(e, per_query=per_query),
            "R": recall(e, per_query=per_query),
            # "F$_{0.5}$": f_measure(e, 0.5, per_query=per_query),
            # "F$_1$": f_measure(e, 1, per_query=per_query),
            "NumRet": e.get_retrieved_documents(per_query=per_query)
            # "F$_3$": f_measure(e, 3, per_query=per_query),
        }),
        eval_rank_df(e, per_query=per_query))
Exemplo n.º 14
0
def wss(e: trectools.TrecEval, per_query=False):
    ret = e.get_retrieved_documents(per_query=True)
    r = recall(e, per_query=per_query)
    N = 30000000
    if per_query:
        return pd.Series(
            dict([(t, (((N - ret.T[t]) / N) - (1.0 - r.T[t])))
                  for t in ret.index]))
    else:
        wss = ((N - ret) / N) - (1.0 - r)
        if wss < 0: return 0  # don't ask.
        return wss
Exemplo n.º 15
0
def eval(qrel_file_path, run_file_path):
    """[summary]
    
    Arguments:
        qrel_file_path {[string]} -- [path of the qrel file usually located at the source language folder]
        run_file_path {[string]} -- [path of the run file usually located at the results folder of a language]
    
    Returns:
        [type] -- [precision@10, precision@20, precision@30, mAP rounded up to four digits]
    """

    r1 = TrecRun(run_file_path)
    qrels = TrecQrel(qrel_file_path)

    te = TrecEval(r1, qrels)
    p5 = te.get_precision(depth=5)
    p10 = te.get_precision(depth=10)
    p20 = te.get_precision(depth=20)
    map = te.get_map()
    rprec = te.get_rprec()
    run_object = r1.evaluate_run(qrels, per_query=True)

    return round(p5, 4), round(p10, 4), round(p20,
                                              4), round(map,
                                                        4), round(rprec, 4)
Exemplo n.º 16
0
def main(args):
    gold_labels = TrecQrel(args.gold_labels)
    prediction = TrecRun(args.scores)

    results = TrecEval(prediction, gold_labels)
    metrics = extract_metrics(results, args.metrics)

    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str)
    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all')
    if args.output:
        metrics.to_csv(args.output, sep="\t", index=False)
        logger.info(f"Saved results to {args.output}")
    else:
        print(metrics.to_string(index=False))
Exemplo n.º 17
0
def evaluate(qrels, runs_file, topics, model):
    runs = TrecRun(runs_file)
    ev = TrecEval(runs, qrels)

    path_to_csv = os.path.join("eval", model, "results.csv")

    n_topics = len(topics)

    # Calculate various metrics for each query considering the runs/judgment files provided
    print("Calculating metrics...")
    res = ev.evaluate_all(per_query=True)

    # Write results of evaluation to csv file
    res.printresults(path_to_csv, "csv", perquery=True)

    # Calculate NDCG@100 for each query, since the previous metrics don't include it,
    # and append it to each line of the new csv file
    ndcgs = ev.get_ndcg(depth=100, per_query=True)
    values = [row['NDCG@100'] for i, row in ndcgs.iterrows()
              ]  # Column name of Pandas dataframe storing the data
    with open(path_to_csv, 'r') as f:
        lines = [line[:-1]
                 for line in f]  # Remove '\n' from the end of each line
        lines[0] += ",ndcg@100\n"  # Add new column to header
        for i in range(
                1, n_topics + 1
        ):  # Lines 1 to n contain metric values for each of the n queries
            lines[i] += "," + str(
                values[i - 1]
            ) + "\n"  # Line 1 (i) should store value 0 (i-1) - arrays start at 0
        global_ndcg = ev.get_ndcg(depth=100,
                                  per_query=False)  # Calculate global NDCG
        lines[n_topics + 1] += "," + str(
            global_ndcg) + "\n"  # Append global NDCG to last line
    with open(path_to_csv, 'w') as f:
        f.writelines(lines)  # Overwrite csv file with new content
Exemplo n.º 18
0
def main(args):
    format_check_passed = run_checks(args.scores)
    if not format_check_passed:
        return
    gold_labels = TrecQrel(args.gold_labels)
    prediction = TrecRun(args.scores)

    results = TrecEval(prediction, gold_labels)
    metrics = extract_metrics(results, args.metrics, args.depths)

    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str)
    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all')
    if args.output:
        metrics.to_csv(args.output, sep='\t', index=False)
        logger.info(f'Saved results to file: {args.output}')
    else:
        print(metrics.to_string(index=False))
Exemplo n.º 19
0
def trec_eval(runs_file_path: Path or str, qrels_file_path: Path or str):
    metrics = dict()
    r1 = TrecRun(str(runs_file_path.absolute()))
    qrels = TrecQrel(str(qrels_file_path.absolute()))
    results = TrecEval(r1, qrels)
    metrics["P@5"] = results.get_precision(5)
    metrics["P@10"] = results.get_precision(10)
    metrics["P@15"] = results.get_precision(15)
    metrics["bpref"] = results.get_bpref()
    metrics["map"] = results.get_map()

    metrics = {k: round(v, 4) for k, v in metrics.items()}
    return metrics
Exemplo n.º 20
0
    def setUp(self):
        run1 = TrecRun("./files/r4.run")
        qrels1 = TrecQrel("./files/qrel1.txt")

        run2 = TrecRun("./files/input.uic0301")
        qrels2 = TrecQrel("./files/robust03_cs_qrels.txt")

        # Contains the first 30 documents for the first 10 topics in input.uic0301
        run3 = TrecRun("./files/input.uic0301_top30")
        self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341]
        self.teval1 = TrecEval(run1, qrels1)
        self.teval2 = TrecEval(run2, qrels2)
        self.teval3 = TrecEval(run3, qrels2)
Exemplo n.º 21
0
 def evaluate_run(self, trec_qrel_obj, per_query):
     from trectools import TrecEval
     evaluator = TrecEval(self, trec_qrel_obj)
     result = evaluator.evaluate_all(per_query)
     return result
Exemplo n.º 22
0
class TestTrecEval(unittest.TestCase):

    def setUp(self):
        run1 = TrecRun("./files/r4.run")
        qrels1 = TrecQrel("./files/qrel1.txt")

        run2 = TrecRun("./files/input.uic0301")
        qrels2 = TrecQrel("./files/robust03_cs_qrels.txt")

        # Contains the first 30 documents for the first 10 topics in input.uic0301
        run3 = TrecRun("./files/input.uic0301_top30")
        self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341]
        self.teval1 = TrecEval(run1, qrels1)
        self.teval2 = TrecEval(run2, qrels2)
        self.teval3 = TrecEval(run3, qrels2)

    def tearDown(self):
        pass

    def test_getReciprocalRank(self):

        value = self.teval1.getReciprocalRank(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.5000, places=4)

        value = self.teval2.getReciprocalRank(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.6466, places=4)

        values1 = self.teval2.getReciprocalRank(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getReciprocalRank(depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        results = self.teval2.getReciprocalRank(depth=1000, trec_eval=True, per_query=True)
        correct_results = [0.0017, 0.1429, 0.3333]
        values = results.loc[[378,650,624]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v,c, places=4)

    def test_getMAP(self):
        value = self.teval1.getMAP(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.2685, places=4)

        value = self.teval2.getMAP(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.2396, places=4)

        values1 = self.teval2.getMAP(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getMAP(depth=1000, per_query=True, trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        results = self.teval2.getMAP(depth=1000, trec_eval=True, per_query=True)
        correct_results = [0.4926, 0.2808, 0.2335]
        values = results.loc[[622, 609, 320]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v,c, places=4)

    def test_getPrecision(self):
        value = self.teval1.getPrecision(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.0010, places=4)

        value = self.teval2.getPrecision(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.0371, places=4)

        values1 = self.teval2.getPrecision(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getPrecision(depth=30, per_query=True, trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        values1 = self.teval2.getPrecision(depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getPrecision(depth=500, per_query=True, trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertNotAlmostEqual(v1, v2, places=4)

        results = self.teval2.getPrecision(depth=30, trec_eval=True, per_query=True)
        correct_results = [0.1333, 0.0333, 0.5333]
        values = results.loc[[607, 433, 375]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v,c, places=4)
Exemplo n.º 23
0
class TestTrecEval(unittest.TestCase):
    def setUp(self):
        run1 = TrecRun("./files/r4.run")
        qrels1 = TrecQrel("./files/qrel1.txt")

        run2 = TrecRun("./files/input.uic0301")
        qrels2 = TrecQrel("./files/robust03_cs_qrels.txt")

        # Contains the first 30 documents for the first 10 topics in input.uic0301
        run3 = TrecRun("./files/input.uic0301_top30")
        self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341]
        self.teval1 = TrecEval(run1, qrels1)
        self.teval2 = TrecEval(run2, qrels2)
        self.teval3 = TrecEval(run3, qrels2)

    def tearDown(self):
        pass

    def test_getReciprocalRank(self):

        value = self.teval1.getReciprocalRank(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.5000, places=4)

        value = self.teval2.getReciprocalRank(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.6466, places=4)

        values1 = self.teval2.getReciprocalRank(
            depth=30, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getReciprocalRank(
            depth=1000, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        results = self.teval2.getReciprocalRank(depth=1000,
                                                trec_eval=True,
                                                per_query=True)
        correct_results = [0.0017, 0.1429, 0.3333]
        values = results.loc[[378, 650, 624]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v, c, places=4)

    def test_getMAP(self):
        value = self.teval1.getMAP(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.2685, places=4)

        value = self.teval2.getMAP(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.2396, places=4)

        values1 = self.teval2.getMAP(
            depth=30, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getMAP(
            depth=1000, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        results = self.teval2.getMAP(depth=1000,
                                     trec_eval=True,
                                     per_query=True)
        correct_results = [0.4926, 0.2808, 0.2335]
        values = results.loc[[622, 609, 320]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v, c, places=4)

    def test_getPrecision(self):
        value = self.teval1.getPrecision(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.0010, places=4)

        value = self.teval2.getPrecision(depth=1000, trec_eval=True)
        self.assertAlmostEqual(value, 0.0371, places=4)

        values1 = self.teval2.getPrecision(
            depth=30, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getPrecision(
            depth=30, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertAlmostEqual(v1, v2, places=4)

        values1 = self.teval2.getPrecision(
            depth=500, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        values2 = self.teval3.getPrecision(
            depth=500, per_query=True,
            trec_eval=True).loc[self.commontopics].values
        for v1, v2 in zip(values1, values2):
            self.assertNotAlmostEqual(v1, v2, places=4)

        results = self.teval2.getPrecision(depth=30,
                                           trec_eval=True,
                                           per_query=True)
        correct_results = [0.1333, 0.0333, 0.5333]
        values = results.loc[[607, 433, 375]].values
        for v, c in zip(values, correct_results):
            self.assertAlmostEquals(v, c, places=4)
Exemplo n.º 24
0
r1 = TrecRun(
    "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.5"
)
r2 = TrecRun(
    "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.6"
)

# Easy way to create new baselines by fusing existing runs:
#fused_run = fusion.reciprocal_rank_fusion([r1,r2])
fused_run = fusion.combos([r1, r2], strategy="mnz")
print(fused_run)

qrels_file = "/storage/proj/petra/projects/podcasts/podcasts_2020_train.1-8.qrels"
qrels = TrecQrel(qrels_file)

r1_p10 = TrecEval(r1, qrels).get_precision(depth=10)  # P@25: 0.3392
r2_p10 = TrecEval(r2, qrels).get_precision(depth=10)  # P@25: 0.2872
fused_run_p10 = TrecEval(fused_run,
                         qrels).get_precision(depth=10)  # P@25: 0.3436

r1_map = TrecEval(r1, qrels).get_map()  # P@25: 0.3392
r2_map = TrecEval(r2, qrels).get_map()  # P@25: 0.2872
fused_run_map = TrecEval(fused_run, qrels).get_map()

r1_ndcg = TrecEval(r1, qrels).get_ndcg()  # P@25: 0.3392
r2_ndcg = TrecEval(r2, qrels).get_ndcg()  # P@25: 0.2872
fused_run_ndcg = TrecEval(fused_run, qrels).get_ndcg()

print("NDCG -- Run 1: %.3f, Run 2: %.3f, Fusion Run: %.3f" %
      (r1_ndcg, r2_ndcg, fused_run_ndcg))
print("MAP -- Run 1: %.3f, Run 2: %.3f, Fusion Run: %.3f" %
Exemplo n.º 25
0
def precision(e: trectools.TrecEval) -> float:
    return e.get_precision(depth=e.get_retrieved_documents(per_query=False), per_query=False)
Exemplo n.º 26
0
def eval_rank_df(e: trectools.TrecEval) -> pd.Series:
    return pd.Series({
        "nDCG@5": e.get_ndcg(depth=5, per_query=False),
        "nDCG@10": e.get_ndcg(depth=10, per_query=False),
        "nDCG@20": e.get_ndcg(depth=20, per_query=False),
    })
Exemplo n.º 27
0
 def map(self):
     qrels_file = TrecQrel("./Data/qrel.txt")
     path_to_runs = TrecRun("./Data/run.txt")
     te = TrecEval(path_to_runs, qrels_file)
     dic = {"map": te.get_map(), "ndcg": te.get_ndcg()}
     return dic
Exemplo n.º 28
0
def recall(e: trectools.TrecEval) -> float:
    return e.get_relevant_retrieved_documents(per_query=False) / e.get_relevant_documents(per_query=False)
Exemplo n.º 29
0
def plot_rp_curve(qrels, topics, runs_file, results, model):
    runs = TrecRun(runs_file)
    ev = TrecEval(runs, qrels)

    # Get the relevant documents for each one of the topics
    new_qrels = ev.qrels.qrels_data.copy()
    relevant_docs = {topic: [] for topic in topics}
    for i, row in new_qrels.iterrows():
        # If the returned document is relevant, add it to the list of relevant docs of the respective topic
        if row["rel"] > 0:
            relevant_docs[row["query"]].append(row["docid"])

    num_relevant_docs = {
        doc_id: num
        for doc_id, num in ev.get_relevant_documents(
            per_query=True).iteritems()
    }

    # TrecTools' precision calculations are very slow, so they are calculated "directly"
    # Obtain the recall and precision @k values for every k up to p for each topic and plot them
    for i, topic in enumerate(topics):
        precisions_aux = [0]
        recalls_aux = [0]

        # Get the number of true positives for the given topic
        for j in range(min(p + 1, len(results[i]))):
            # Check if the docid is in the list of relevant documents for that topic
            if results[i][j][0] in relevant_docs[topic]:
                recalls_aux.append(recalls_aux[j] + 1)
                precisions_aux.append(precisions_aux[j] + 1)
            else:
                recalls_aux.append(recalls_aux[j])
                precisions_aux.append(precisions_aux[j])

        # Calculate precision and recall values based on the previous values
        recalls = [x / num_relevant_docs[topic] for x in recalls_aux]
        precisions = [(x / i if i > 0 else 1)
                      for i, x in enumerate(precisions_aux)]

        # Interpolate the precisions calculated before (needed to plot the recall-precision curve)
        interpolated_precisions = precisions.copy()
        j = len(interpolated_precisions) - 2
        while j >= 0:
            if interpolated_precisions[j + 1] > interpolated_precisions[j]:
                interpolated_precisions[j] = interpolated_precisions[j + 1]
            j -= 1

        # Reduce the number of points to plot to avoid excessive memory usage
        recalls = [
            value for j, value in enumerate(recalls)
            if not ((100 < j < 1000 and j % 10 != 0) or
                    (j > 1000 and j % 100 != 0))
        ]
        precisions = [
            value for j, value in enumerate(precisions)
            if not ((100 < j < 1000 and j % 10 != 0) or
                    (j > 1000 and j % 100 != 0))
        ]
        interpolated_precisions = [
            value for j, value in enumerate(interpolated_precisions)
            if not ((100 < j < 1000 and j % 10 != 0) or
                    (j > 1000 and j % 100 != 0))
        ]

        # Plot the precision-recall curve of the topic
        fig, ax = plt.subplots()
        for j in range(len(recalls) - 2):
            ax.plot(
                (recalls[j], recalls[j]),
                (interpolated_precisions[j], interpolated_precisions[j + 1]),
                'k-',
                label='',
                color='red')
            ax.plot((recalls[j], recalls[j + 1]),
                    (interpolated_precisions[j + 1],
                     interpolated_precisions[j + 1]),
                    'k-',
                    label='',
                    color='red')
        ax.plot(recalls, precisions, 'k--', color='blue')
        ax.title.set_text("R" + str(topic))
        ax.set_xlabel("recall")
        ax.set_ylabel("precision")

        # Save plot in eval folder
        fig.savefig(os.path.join("eval", model, f"R{topic}.png"))

        plt.close()
    gs_path, pred_path, codes_path = parse_arguments()

    ###### 0. Load valid codes lists: ######
    valid_codes = set(
        pd.read_csv(codes_path, sep='\t', header=None,
                    usecols=[0])[0].tolist())
    valid_codes = set([x.lower() for x in valid_codes])

    ###### 1. Format GS as TrecQrel format: ######
    format_gs(gs_path, './intermediate_gs_file.txt')

    ###### 2. Format predictions as TrecRun format: ######
    format_predictions(pred_path, './intermediate_predictions_file.txt',
                       valid_codes)

    ###### 3. Calculate MAP ######
    # Load GS from qrel file
    qrels = TrecQrel('./intermediate_gs_file.txt')

    # Load pred from run file
    run = TrecRun('./intermediate_predictions_file.txt')

    # Calculate MAP
    te = TrecEval(run, qrels)
    MAP = te.get_map(
        trec_eval=False
    )  # With this option False, rank order is taken from the given document order

    ###### 4. Show results ######
    print('\nMAP estimate: {}\n'.format(round(MAP, 3)))