示例#1
0
    def test_documents_to_remove_for_leave_one_out_with_single_topic_and_multiple_runs_reverse(
            cls):
        run_01 = TrecRun('test/resources/sample-run-file-01')
        run_02 = TrecRun('test/resources/sample-run-file-02')
        actual = identify_judgments_to_remove_for_leave_one_out(
            [run_02, run_01])

        verify(actual.to_csv(header=False))
示例#2
0
    def setUp(self):
        run1 = TrecRun("./files/r4.run")
        qrels1 = TrecQrel("./files/qrel1.txt")

        run2 = TrecRun("./files/input.uic0301")
        qrels2 = TrecQrel("./files/robust03_cs_qrels.txt")

        # Contains the first 30 documents for the first 10 topics in input.uic0301
        run3 = TrecRun("./files/input.uic0301_top30")
        self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341]
        self.teval1 = TrecEval(run1, qrels1)
        self.teval2 = TrecEval(run2, qrels2)
        self.teval3 = TrecEval(run3, qrels2)
示例#3
0
def make_pool_from_files(filenames,
                         strategy="topX",
                         topX=10,
                         rbp_strategy="sum",
                         rbp_p=0.80,
                         rrf_den=60):
    """
        Creates a pool object (TrecPool) from a list of filenames.
        ------
        strategy = (topX, rbp, rrf). Default: topX

        * TOP X options:
        topX = Integer Value. The number of documents per query to make the pool.

        * RBP options:
        topX = Integer Value. The number of documents per query to make the pool. Default 10.
        rbp_strategy = (max, sum). Only in case strategy=rbp. Default: "sum"
        rbp_p = A float value for RBP's p. Only in case strategy=rbp. Default: 0.80

        * RRF options:
        rrf_den = value for the Reciprocal Rank Fusion denominator. Default: 60
    """

    runs = []
    for fname in filenames:
        runs.append(TrecRun(fname))
    return make_pool(runs,
                     strategy,
                     topX=topX,
                     rbp_p=rbp_p,
                     rbp_strategy=rbp_strategy,
                     rrf_den=rrf_den)
示例#4
0
def fuse_runs(run1, run2):
    # https://dl.acm.org/doi/10.1145/1571941.1572114
    r1 = TrecRun(f"runs/{run1}")
    r2 = TrecRun(f"runs/{run2}")

    # Perform reciprocal rank fusion.
    fused_run = fusion.reciprocal_rank_fusion([r1, r2], max_docs=100)

    # Clear names.
    name1 = run1.replace(".txt", "")
    name2 = run2.replace(".txt", "")

    # Save fused file to disk.
    fused_run.print_subset(
        f"runs/fuse_{name1}_{name2}.txt", topics=fused_run.topics()
    )
示例#5
0
def eval(qrel_file_path, run_file_path):
    """[summary]
    
    Arguments:
        qrel_file_path {[string]} -- [path of the qrel file usually located at the source language folder]
        run_file_path {[string]} -- [path of the run file usually located at the results folder of a language]
    
    Returns:
        [type] -- [precision@10, precision@20, precision@30, mAP rounded up to four digits]
    """

    r1 = TrecRun(run_file_path)
    qrels = TrecQrel(qrel_file_path)

    te = TrecEval(r1, qrels)
    p5 = te.get_precision(depth=5)
    p10 = te.get_precision(depth=10)
    p20 = te.get_precision(depth=20)
    map = te.get_map()
    rprec = te.get_rprec()
    run_object = r1.evaluate_run(qrels, per_query=True)

    return round(p5, 4), round(p10, 4), round(p20,
                                              4), round(map,
                                                        4), round(rprec, 4)
示例#6
0
    def run(self, index, topics, debug=True, model="PL2", ndocs=1000, result_dir=None, result_file="trec_terrier.run", terrierc=None, qexp=False, expTerms=5, expDocs=3, expModel="Bo1", showoutput=False):

        if result_dir is None:
            # Current dir is used if result_dir is not set
            result_dir = os.getcwd()

        cmd = "%s batchretrieve -t %s -w %s -Dtrec.results=%s -o %s" % (self.bin_path, topics, model,
                result_dir, result_file)

        cmd += " -Dmatching.retrieved_set_size=%d -Dtrec.output.format.length=%d " % (ndocs,ndocs)

        if terrierc is not None:
            cmd += " -c c:%d " % (terrierc)

        if qexp == True:
            cmd += " -q -Dexpansion.terms=%d -Dexpansion.documents=%d -c qemodel:%s" % (expTerms, expDocs, expModel)

        if showoutput == False:
            cmd += (" > %s 2> %s" % (os.devnull, os.devnull))

        if debug:
            print("Running: %s " % (cmd))

        r = sarge.run(cmd).returncode

        if r == 0:
            return TrecRun(os.path.join(result_dir, result_file))
        else:
            print("ERROR with command %s" % (cmd))
            return None
示例#7
0
def report_run_per_query(qrels,
                         run_file_name,
                         remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    bpref = trec_eval.getBpref(per_query=True)
    ndcg_10 = trec_eval.getNDCG(depth=10, per_query='query')
    ndcg = trec_eval.getNDCG(per_query='query')

    ret = bpref.join(ndcg_10, on='query')
    ret = ret.join(ndcg, on='query')

    for query, r in ret.iterrows():
        yield json.dumps({
            'corpus': extract_corpus(run_file_name),
            'topic': query,
            'tag': system,
            "bpref": r['Bpref@1000'],
            "pseudoNDCG@10": r['NDCG@10'],
            "pseudoNDCG": r['NDCG@1000']
        })
示例#8
0
def compute_map(valid_codes, pred, gs_out_path=None):
    """
    Custom function to compute MAP evaluation metric. 
    Code adapted from https://github.com/TeMU-BSC/CodiEsp-Evaluation-Script/blob/master/codiespD_P_evaluation.py
    """
    
    # Input args default values
    if gs_out_path is None: gs_out_path = './intermediate_gs_file.txt' 
    
    pred_out_path = './intermediate_predictions_file.txt'
    ###### 2. Format predictions as TrecRun format: ######
    format_predictions(pred, pred_out_path, valid_codes)
    
    
    ###### 3. Calculate MAP ######
    # Load GS from qrel file
    qrels = TrecQrel(gs_out_path)

    # Load pred from run file
    run = TrecRun(pred_out_path)

    # Calculate MAP
    te = TrecEval(run, qrels)
    MAP = te.get_map(trec_eval=False) # With this option False, rank order is taken from the given document order
    
    ###### 4. Return results ######
    return MAP
示例#9
0
    def run(self, index, metadata, documents_vector, terms_vector, topics, topic_format="TREC", index_type="block_simdbp", algorithm="block_max_wand", result_dir=None, result_file="trec_pisa.run", ndocs=1000, showerrors=True, debug=True):
        if result_dir is None:
            # Current dir is used if result_dir is not set
            result_dir = os.getcwd()

        outpath = ""
        if result_dir is not None and result_file is not None:
            outpath = os.path.join(result_dir, result_file)
        elif result_file is not None:
            outpath = result_file


        self.extract_topics(topics, topic_format)

        cmd = "%s/evaluate_queries -t %s -a %s -i %s -w %s --documents %s --terms %s -k %s -q topics.title" % (self.bin_path, index_type, algorithm, index, metadata, documents_vector, terms_vector, ndocs)

        if showerrors == True:
            cmd += (" > %s " % (outpath))
        else:
            cmd += (" 2> %s > %s "  % (os.devnull, outpath))

        if debug:
            print("Running: %s " % (cmd))

        r = sarge.run(cmd).returncode

        if r == 0:
            return TrecRun(os.path.join(result_dir, result_file))
        else:
            print("ERROR with command %s" % (cmd))
            return None
示例#10
0
def list_of_runs_from_path(path, suffix="*"):
    runs = []
    for r in glob(os.path.join(path, suffix)):
        tr = TrecRun(r)
        runs.append(tr)

    print("Found %s runs in path %s" % (len(runs), path))
    return runs
示例#11
0
def trec_eval_ndcg(run_name,
                   data_path='./data/',
                   depths=[5, 10, 15, 20, 30, 100, 200, 500, 1000]):
    qrel_name = os.path.join(data_path, '2019qrels-pass.txt')
    qrel = TrecQrel(qrel_name)
    res = TrecRun(run_name)
    for depth in depths:
        score = TrecEval(res, qrel).get_ndcg(depth=depth)
        print('ndcg_cur_%d \t all \t %.4f' % (depth, score))
示例#12
0
    def run(self,
            index,
            topics,
            model="LM",
            server=None,
            stopper=None,
            result_dir=None,
            result_file="trec_indri.run",
            ndocs=1000,
            qexp=False,
            expTerms=5,
            expDocs=3,
            showerrors=True,
            debug=True,
            queryOffset=1):

        if result_dir is None:
            # Current dir is used if result_dir is not set
            result_dir = os.getcwd()

        outpath = ""
        if result_dir is not None and result_file is not None:
            outpath = os.path.join(result_dir, result_file)
        elif result_file is not None:
            outpath = result_file

        cmd = "%s/IndriRunQuery %s -index=%s -trecFormat=true -queryOffset=%d " % (
            self.bin_path, topics, index, queryOffset)

        # Specify number of documents to retrieve
        cmd += " -count=%d " % (ndocs)

        if server is not None:
            cmd += " -server=%s " % (server)

        if stopper is not None:
            cmd += " -stopper.word=%s " % (stopper)

        if qexp == True:
            cmd += " -fbDocs=%d -fbTerms=%d " % (expTerms, expDocs)

        if showerrors == True:
            cmd += (" > %s " % (outpath))
        else:
            cmd += (" 2> %s > %s " % (os.devnull, outpath))

        if debug:
            print("Running: %s " % (cmd))

        r = sarge.run(cmd).returncode

        if r == 0:
            return TrecRun(os.path.join(result_dir, result_file))
        else:
            print("ERROR with command %s" % (cmd))
            return None
示例#13
0
def trec_eval(file):
    r1 = TrecRun(file)
    qrels = TrecQrel("./dataset/.txt")
    results = TrecEval(r1, qrels)
    p5 = results.get_precision(5)
    p10 = results.get_precision(10)
    p15 = results.get_precision(15)
    print(p5)
    print(p10)
    print(p15)
示例#14
0
def run_file_to_jsonl(input_file, output_file):
    from trectools import TrecRun
    import json
    with open(output_file, 'w') as out:
        queries = TrecRun(input_file).run_data.groupby('query')
        for query in queries.groups:
            out.write(
                json.dumps([
                    i[1].to_dict()
                    for i in queries.get_group(query).iterrows()
                ]) + '\n')
示例#15
0
def main(gs_path, pred_path, codes_path):
    '''
    Load GS, predictions and valid codes; format GS and predictions according
    to TREC specifications; compute MAP and print it.

    Parameters
    ----------
    gs_path : str
        Path to Gold Standard TSV with 2 columns: filename, code
        It has no headers row.
    pred_path : str
        Path to Gold Standard TSV with 2 columns: filename, code
        It has no headers row.
    codes_path : str
        Path to TSV file with valid codes.
        It has no headers row.

    Returns
    -------
    None.

    '''

    ###### 0. Load valid codes lists: ######
    valid_codes = set(
        pd.read_csv(codes_path, sep='\t', header=None,
                    usecols=[0])[0].tolist())
    valid_codes = set([x.lower() for x in valid_codes])

    ###### 1. Format GS as TrecQrel format: ######
    qid_gs = format_gs(gs_path, './intermediate_gs_file.txt')

    ###### 2. Format predictions as TrecRun format: ######
    format_predictions(pred_path, './intermediate_predictions_file.txt',
                       valid_codes, qid_gs)

    ###### 3. Calculate MAP ######
    # Load GS from qrel file
    qrels = TrecQrel('./intermediate_gs_file.txt')

    # Load pred from run file
    run = TrecRun('./intermediate_predictions_file.txt')

    # Calculate MAP
    te = TrecEval(run, qrels)
    MAP = te.get_map(
        trec_eval=False
    )  # With this option False, rank order is taken from the given document order

    ###### 4. Show results ######
    print('\nMAP estimate: {}\n'.format(round(MAP, 3)))
    #print('\n{}'.format(round(MAP, 3)))
    print('{}|{}'.format(pred_path, round(MAP, 3)))
示例#16
0
def trec_eval(runs_file_path: Path or str, qrels_file_path: Path or str):
    metrics = dict()
    r1 = TrecRun(str(runs_file_path.absolute()))
    qrels = TrecQrel(str(qrels_file_path.absolute()))
    results = TrecEval(r1, qrels)
    metrics["P@5"] = results.get_precision(5)
    metrics["P@10"] = results.get_precision(10)
    metrics["P@15"] = results.get_precision(15)
    metrics["bpref"] = results.get_bpref()
    metrics["map"] = results.get_map()

    metrics = {k: round(v, 4) for k, v in metrics.items()}
    return metrics
def report_run(qrels, corpus, topics, run_file_name):
    run = TrecRun(run_file_name)
    trec_eval = TrecEval(run, qrels)

    ret = {
        'corpus': corpus,
        'topics': topics,
        'tag': run.run_data['system'][0],
        "bpref": trec_eval.getBpref(),
        "pseudoNDCG@10": trec_eval.getNDCG(depth=10, removeUnjudged=True),
        "pseudoNDCG": trec_eval.getNDCG(removeUnjudged=True),
    }

    return json.dumps(ret)
def main(args):
    gold_labels = TrecQrel(args.gold_labels)
    prediction = TrecRun(args.scores)

    results = TrecEval(prediction, gold_labels)
    metrics = extract_metrics(results, args.metrics)

    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str)
    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all')
    if args.output:
        metrics.to_csv(args.output, sep="\t", index=False)
        logger.info(f"Saved results to {args.output}")
    else:
        print(metrics.to_string(index=False))
示例#19
0
def main(args):
    format_check_passed = run_checks(args.scores)
    if not format_check_passed:
        return
    gold_labels = TrecQrel(args.gold_labels)
    prediction = TrecRun(args.scores)

    results = TrecEval(prediction, gold_labels)
    metrics = extract_metrics(results, args.metrics, args.depths)

    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str)
    metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all')
    if args.output:
        metrics.to_csv(args.output, sep='\t', index=False)
        logger.info(f'Saved results to file: {args.output}')
    else:
        print(metrics.to_string(index=False))
示例#20
0
def report_run(qrels, run_file_name, remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    ret = {
        'corpus': extract_corpus(run_file_name),
        'topics': extract_topics(run_file_name),
        'tag': system,
        "bpref": trec_eval.getBpref(),
        "pseudoNDCG@10": trec_eval.getNDCG(depth=10),
        "pseudoNDCG": trec_eval.getNDCG()
    }

    return json.dumps(ret)
def collect(qrelsFilePath, baseDir):
    qrels = TrecQrel(qrelsFilePath)

    result = {}
    for i, [topicPath, topicNum] in enumerate(
            sorted(_getDirectoryContent(baseDir, directory=True),
                   key=lambda a_b: int(a_b[1]))):
        for modelPath, modelName in _getDirectoryContent(topicPath,
                                                         directory=True):
            modelName = modelName[:-4]
            if modelName not in result:
                result[modelName] = {}

            for filePath, fileName in _getDirectoryContent(modelPath,
                                                           file=True):
                score = 0

                # only evaluate non empty files
                if os.path.getsize(filePath) > 0:
                    run = TrecRun(filePath)
                    runResult = run.evaluate_run(qrels, True)
                    rs = list(
                        runResult.get_results_for_metric('P_10').values())
                    score = np.mean(rs)

                if fileName not in result[modelName]:
                    result[modelName][fileName] = [score]
                else:
                    result[modelName][fileName].append(score)
            print("Finished processing model {} of topic {}".format(
                modelName, topicNum))
        print("Finished processing topic: ", topicNum)

    # Calculate average over all topics
    for modelName in result:
        for comparisonName in result[modelName]:
            result[modelName][comparisonName] = sum(
                result[modelName][comparisonName]) / len(
                    result[modelName][comparisonName])

    return result
示例#22
0
def reciprocal_rank_fusion(trec_runs, k=60, max_docs=1000, output=sys.stdout):
    """
        Implements a reciprocal rank fusion as define in
        ``Reciprocal Rank fusion outperforms Condorcet and individual Rank Learning Methods`` by Cormack, Clarke and Buettcher.

        Parameters:
            k: term to avoid vanishing importance of lower-ranked documents. Default value is 60 (default value used in their paper).
            output: a file pointer to write the results. Sys.stdout is the default.
    """

    outputRun = TrecRun()
    rows = []
    topics = trec_runs[0].topics()

    for topic in sorted(topics):
        doc_scores = {}
        for r in trec_runs:
            docs_for_run = r.get_top_documents(topic, n=1000)

            for pos, docid in enumerate(docs_for_run, start=1):
                doc_scores[docid] = doc_scores.get(docid,
                                                   0.0) + 1.0 / (k + pos)

        # Writes out information for this topic
        for rank, (docid, score) in enumerate(sorted(iter(doc_scores.items()),
                                                     key=lambda x:
                                                     (-x[1], x[0]))[:max_docs],
                                              start=1):
            # output.write("%s Q0 %s %d %f reciprocal_rank_fusion_k=%d\n" % (str(topic), docid, rank, score, k))
            rows.append((topic, "Q0", docid, rank, score,
                         "reciprocal_rank_fusion_k=%d" % k))

    df = pd.DataFrame(rows)
    df.columns = ["query", "q0", "docid", "rank", "score", "system"]
    df["q0"] = df["q0"].astype(np.str)
    outputRun.run_data = df.copy()

    return outputRun
示例#23
0
def evaluate(qrels, runs_file, topics, model):
    runs = TrecRun(runs_file)
    ev = TrecEval(runs, qrels)

    path_to_csv = os.path.join("eval", model, "results.csv")

    n_topics = len(topics)

    # Calculate various metrics for each query considering the runs/judgment files provided
    print("Calculating metrics...")
    res = ev.evaluate_all(per_query=True)

    # Write results of evaluation to csv file
    res.printresults(path_to_csv, "csv", perquery=True)

    # Calculate NDCG@100 for each query, since the previous metrics don't include it,
    # and append it to each line of the new csv file
    ndcgs = ev.get_ndcg(depth=100, per_query=True)
    values = [row['NDCG@100'] for i, row in ndcgs.iterrows()
              ]  # Column name of Pandas dataframe storing the data
    with open(path_to_csv, 'r') as f:
        lines = [line[:-1]
                 for line in f]  # Remove '\n' from the end of each line
        lines[0] += ",ndcg@100\n"  # Add new column to header
        for i in range(
                1, n_topics + 1
        ):  # Lines 1 to n contain metric values for each of the n queries
            lines[i] += "," + str(
                values[i - 1]
            ) + "\n"  # Line 1 (i) should store value 0 (i-1) - arrays start at 0
        global_ndcg = ev.get_ndcg(depth=100,
                                  per_query=False)  # Calculate global NDCG
        lines[n_topics + 1] += "," + str(
            global_ndcg) + "\n"  # Append global NDCG to last line
    with open(path_to_csv, 'w') as f:
        f.writelines(lines)  # Overwrite csv file with new content
示例#24
0
 def setUp(self):
     self.run = TrecRun("./files/r1.run")
示例#25
0
 def test_topics_intersection_with(self):
     another_run = TrecRun("./files/r2.run")
     intersection = self.run.topics_intersection_with(another_run)
     self.assertSetEqual(intersection, set([1]))
示例#26
0
from trectools import TrecRun, TrecEval, TrecQrel, fusion

r1 = TrecRun(
    "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.5"
)
r2 = TrecRun(
    "/storage/proj/petra/projects/podcasts/experiments/experiment5/test_output.6"
)

# Easy way to create new baselines by fusing existing runs:
#fused_run = fusion.reciprocal_rank_fusion([r1,r2])
fused_run = fusion.combos([r1, r2], strategy="mnz")
print(fused_run)

qrels_file = "/storage/proj/petra/projects/podcasts/podcasts_2020_train.1-8.qrels"
qrels = TrecQrel(qrels_file)

r1_p10 = TrecEval(r1, qrels).get_precision(depth=10)  # P@25: 0.3392
r2_p10 = TrecEval(r2, qrels).get_precision(depth=10)  # P@25: 0.2872
fused_run_p10 = TrecEval(fused_run,
                         qrels).get_precision(depth=10)  # P@25: 0.3436

r1_map = TrecEval(r1, qrels).get_map()  # P@25: 0.3392
r2_map = TrecEval(r2, qrels).get_map()  # P@25: 0.2872
fused_run_map = TrecEval(fused_run, qrels).get_map()

r1_ndcg = TrecEval(r1, qrels).get_ndcg()  # P@25: 0.3392
r2_ndcg = TrecEval(r2, qrels).get_ndcg()  # P@25: 0.2872
fused_run_ndcg = TrecEval(fused_run, qrels).get_ndcg()

print("NDCG -- Run 1: %.3f, Run 2: %.3f, Fusion Run: %.3f" %
示例#27
0
    elif retrieval_approach == 'lr':
        mypath = "../../data/runs/iterative_lr_run_dir/clean"
    else:
        mypath = "../../data/runs/iterative_lr_ir_run_dir/"

    run_files = [
        join(mypath, f) for f in listdir(mypath)
        if isfile(join(mypath, f)) and f.endswith('.run')
    ]
    print run_files
    p_10 = np.zeros(30)
    p_20 = np.zeros(30)
    count = 0

    for run_file in run_files:
        run = TrecRun(run_file)
        #print 'run loaded'
        res = run.evaluate_run(myQrel)
        #print 'run evaluated'
        keys = [item for item in res.get_results_for_metric("P_20").keys()]
        keys = sorted(keys, key=int)
        values_p20 = [res.get_results_for_metric("P_20")[i] for i in keys]
        values_p20 = np.asarray(values_p20)
        p_20 += values_p20

        keys = [item for item in res.get_results_for_metric("P_10").keys()]
        keys = sorted(keys, key=int)
        values_p10 = [res.get_results_for_metric("P_10")[i] for i in keys]
        values_p10 = np.asarray(values_p10)
        p_10 += values_p10
        count += 1
示例#28
0
def load_trec_runs(paths: List[str]) -> List[TrecRun]:
    print(f'Loading {len(paths)} runs')
    return [TrecRun(path) for path in paths]
示例#29
0
 def map(self):
     qrels_file = TrecQrel("./Data/qrel.txt")
     path_to_runs = TrecRun("./Data/run.txt")
     te = TrecEval(path_to_runs, qrels_file)
     dic = {"map": te.get_map(), "ndcg": te.get_ndcg()}
     return dic
from trectools import TrecRun, TrecQrel
from trectools import procedures
import glob
import os

task1_run_filepath = "../runs_t1/"
qrels_top = "../qrels/task1.qrels"

filepath = glob.glob(os.path.join(task1_run_filepath, "*.txt"))
topqrels = TrecQrel(qrels_top)

results = []

for filename in filepath:
    r = TrecRun(filename)
    res = r.evaluate_run(topqrels)
    results.append(res)

p10 = procedures.get_results(results, "P_10")
procedures.plot_system_rank("task1_p10.jpg", p10, "P@10")

bpref = procedures.get_results(results, "bpref")
procedures.plot_system_rank("task1_bpref.jpg", bpref, "BPREF")

map_ = procedures.get_results(results, "map")
procedures.plot_system_rank("task1_map.jpg", map_, "MAP")