Exemplos de evaluate_runs em Python, exemplos de covid_baseline_tools.evaluate_runs em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: generate_round4_doc2query_baselines.py Projeto: mayankanand007/anserini

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'
    round4_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4.txt'
    round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs(round3_cumulative_qrels)
    perform_fusion(check_md5=check_md5_flag)
    prepare_final_submissions(round3_cumulative_qrels, check_md5=check_md5_flag)

    expected_metrics = {
        'expanded.anserini.covid-r4.abstract.qq.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6115, 'judged_cut_10': 0.8022, 'ndcg_cut_20': 0.5823,
             'judged_cut_20': 0.7900, 'map': 0.2499, 'recall_1000': 0.5038, 'judged_cut_1000': 0.2676},
        'expanded.anserini.covid-r4.abstract.qdel.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6321, 'judged_cut_10': 0.8022, 'ndcg_cut_20': 0.5922,
             'judged_cut_20': 0.7678, 'map': 0.2528, 'recall_1000': 0.5098, 'judged_cut_1000': 0.2672},
        'expanded.anserini.covid-r4.full-text.qq.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6045, 'judged_cut_10': 0.9044, 'ndcg_cut_20': 0.5640,
             'judged_cut_20': 0.8522, 'map': 0.2420, 'recall_1000': 0.4996, 'judged_cut_1000': 0.3037},
        'expanded.anserini.covid-r4.full-text.qdel.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6514, 'judged_cut_10': 0.9289, 'ndcg_cut_20': 0.5991,
             'judged_cut_20': 0.8711, 'map': 0.2665, 'recall_1000': 0.5240, 'judged_cut_1000': 0.3114},
        'expanded.anserini.covid-r4.paragraph.qq.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6429, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6080,
             'judged_cut_20': 0.8333, 'map': 0.2932, 'recall_1000': 0.5635, 'judged_cut_1000': 0.3256},
        'expanded.anserini.covid-r4.paragraph.qdel.bm25.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6694, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6229,
             'judged_cut_20': 0.8411, 'map': 0.2953, 'recall_1000': 0.5677, 'judged_cut_1000': 0.3232},
        'expanded.anserini.covid-r4.fusion1.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6739, 'judged_cut_10': 0.8778, 'ndcg_cut_20': 0.6188,
             'judged_cut_20': 0.8533, 'map': 0.2914, 'recall_1000': 0.5750, 'judged_cut_1000': 0.3362},
        'expanded.anserini.covid-r4.fusion2.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6618, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6331,
             'judged_cut_20': 0.8444, 'map': 0.2974, 'recall_1000': 0.5847, 'judged_cut_1000': 0.3344},
        'expanded.anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt':
            {'topics': 45, 'ndcg_cut_10': 0.7447, 'judged_cut_10': 0.8933, 'ndcg_cut_20': 0.7067,
             'judged_cut_20': 0.8589, 'map': 0.3182, 'recall_1000': 0.5812, 'judged_cut_1000': 0.2904},
    }
    evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

    expected_metrics = {
        'expanded.anserini.final-r4.fusion1.txt':
            {'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115,
             'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424},
        'expanded.anserini.final-r4.fusion2.txt':
            {'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175,
             'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434},
        'expanded.anserini.final-r4.rf.txt':
            {'topics': 45, 'ndcg_cut_10': 0.6062, 'judged_cut_10': 0.7378, 'ndcg_cut_20': 0.5606,
             'judged_cut_20': 0.6833, 'map': 0.2658, 'recall_1000': 0.6759, 'judged_cut_1000': 0.1284},
    }
    evaluate_runs(round4_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)

Exemplo n.º 2

0

Exibir arquivo

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'

    # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669
    # Thus, no point in checking MD5.
    check_md5_flag = False

    perform_runs()
    perform_fusion()

    expected_metrics = {
        'anserini.covid-r1.abstract.query.bm25.txt':
            {'ndcg_cut_10': 0.4100, 'judged_cut_10': 0.8267, 'recall_1000': 0.5279},
        'anserini.covid-r1.abstract.question.bm25.txt':
            {'ndcg_cut_10': 0.5179, 'judged_cut_10': 0.9833, 'recall_1000': 0.6313},
        'anserini.covid-r1.abstract.query+question.bm25.txt':
            {'ndcg_cut_10': 0.5514, 'judged_cut_10': 0.9833, 'recall_1000': 0.6989},
        'anserini.covid-r1.abstract.query+question+narrative.bm25.txt':
            {'ndcg_cut_10': 0.5294, 'judged_cut_10': 0.9333, 'recall_1000': 0.6929},
        'anserini.covid-r1.abstract.query-udel.bm25.txt':
            {'ndcg_cut_10': 0.5824, 'judged_cut_10': 0.9567, 'recall_1000': 0.6927},
        'anserini.covid-r1.abstract.query-covid19.bm25.txt':
            {'ndcg_cut_10': 0.4520, 'judged_cut_10': 0.6500, 'recall_1000': 0.5061},
        'anserini.covid-r1.full-text.query.bm25.txt':
            {'ndcg_cut_10': 0.3900, 'judged_cut_10': 0.7433, 'recall_1000': 0.6277},
        'anserini.covid-r1.full-text.question.bm25.txt':
            {'ndcg_cut_10': 0.3439, 'judged_cut_10': 0.9267, 'recall_1000': 0.6389},
        'anserini.covid-r1.full-text.query+question.bm25.txt':
            {'ndcg_cut_10': 0.4064, 'judged_cut_10': 0.9367, 'recall_1000': 0.6714},
        'anserini.covid-r1.full-text.query+question+narrative.bm25.txt':
            {'ndcg_cut_10': 0.3280, 'judged_cut_10': 0.7567, 'recall_1000': 0.6591},
        'anserini.covid-r1.full-text.query-udel.bm25.txt':
            {'ndcg_cut_10': 0.5407, 'judged_cut_10': 0.9067, 'recall_1000': 0.7214},
        'anserini.covid-r1.full-text.query-covid19.bm25.txt':
            {'ndcg_cut_10': 0.2434, 'judged_cut_10': 0.5233, 'recall_1000': 0.5692},
        'anserini.covid-r1.paragraph.query.bm25.txt':
            {'ndcg_cut_10': 0.4302, 'judged_cut_10': 0.8400, 'recall_1000': 0.4327},
        'anserini.covid-r1.paragraph.question.bm25.txt':
            {'ndcg_cut_10': 0.4410, 'judged_cut_10': 0.9167, 'recall_1000': 0.5111},
        'anserini.covid-r1.paragraph.query+question.bm25.txt':
            {'ndcg_cut_10': 0.5450, 'judged_cut_10': 0.9733, 'recall_1000': 0.5743},
        'anserini.covid-r1.paragraph.query+question+narrative.bm25.txt':
            {'ndcg_cut_10': 0.4899, 'judged_cut_10': 0.8967, 'recall_1000': 0.5918},
        'anserini.covid-r1.paragraph.query-udel.bm25.txt':
            {'ndcg_cut_10': 0.5544, 'judged_cut_10': 0.9200, 'recall_1000': 0.5640},
        'anserini.covid-r1.paragraph.query-covid19.bm25.txt':
            {'ndcg_cut_10': 0.3180, 'judged_cut_10': 0.5333, 'recall_1000': 0.3552},
        'anserini.covid-r1.fusion1.txt':
            {'ndcg_cut_10': 0.5716, 'judged_cut_10': 0.9867, 'recall_1000': 0.8122},
        'anserini.covid-r1.fusion2.txt':
            {'ndcg_cut_10': 0.6019, 'judged_cut_10': 0.9733, 'recall_1000': 0.8121}
    }
    evaluate_runs(round1_qrels, runs, expected=expected_metrics, check_md5=check_md5_flag)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: generate_round5_baselines.py Projeto: wangcongcong123/anserini

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'

    verify_stored_runs(stored_runs)
    perform_runs()
    perform_fusion()
    prepare_final_submissions(cumulative_qrels)
    evaluate_runs(cumulative_qrels, cumulative_runs)

Exemplo n.º 4

0

Exibir arquivo

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    cumulative_qrels = 'C:/Users/Allemaal/Desktop/ubuntu/Desktop/anserini/src/main/resources/topics-and-qrels/qrels.covid-round12.txt'
    cumulative_qrels = 'C:/Users/Allemaal/Desktop/ubuntu/Desktop/anserini/src/main/resources/topics-and-qrels/qrels.covid-round4-30topics.txt'
    cumulative_qrels = 'C:/Users/Allemaal/Desktop/ubuntu/Desktop/anserini/src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'

    #verify_stored_runs(stored_runs)
    #perform_runs(cumulative_qrels)
    #perform_fusion(check_md5=False)
    #prepare_final_submissions(cumulative_qrels, check_md5=False)

    evaluate_runs(cumulative_qrels, cumulative_runs, check_md5=False)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: generate_round5_baselines.py Projeto: kasys-lab/anserini-jp

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'
    complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt'
    round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt'

    verify_stored_runs(stored_runs)
    perform_runs(5, indexes)
    perform_fusion(5, cumulative_runs, check_md5=True)
    prepare_final_submissions(5, final_runs, check_md5=True)

    evaluate_runs(round4_cumulative_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(complete_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(round5_qrels, final_runs, check_md5=True)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: generate_round3_baselines.py Projeto: kasys-lab/anserini-jp

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    os.system(
        'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' +
        'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' +
        '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    )

    round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt'
    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'

    verify_stored_runs(stored_runs)
    perform_runs(3, indexes)
    perform_fusion(3, cumulative_runs, check_md5=True)
    prepare_final_submissions(3, final_runs, check_md5=True)

    evaluate_runs(round2_cumulative_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(round3_cumulative_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(round3_qrels, final_runs, check_md5=True)

Exemplo n.º 7

0

Exibir arquivo

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    os.system(
        'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' +
        'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' +
        '> src/main/resources/topics-and-qrels/qrels.covid-round12.txt')

    round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt'
    round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round12.txt'
    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'

    verify_stored_runs(stored_runs)
    perform_runs()
    perform_fusion()
    prepare_final_submissions(round2_cumulative_qrels)
    evaluate_runs(round2_cumulative_qrels, cumulative_runs)
    evaluate_runs(round3_cumulative_qrels, cumulative_runs)

    # Download the NIST post-processed runs.
    print('')
    download_url(
        'https://www.dropbox.com/s/ilqgky1tti0zvez/anserini.final-r3.fusion1.post-processed.txt?dl=1',
        'runs',
        force=True)
    download_url(
        'https://www.dropbox.com/s/ue3z6xxxca9krkb/anserini.final-r3.fusion2.post-processed.txt?dl=1',
        'runs',
        force=True)
    download_url(
        'https://www.dropbox.com/s/95vk831wp1ldnpm/anserini.final-r3.rf.post-processed.txt?dl=1',
        'runs',
        force=True)

    evaluate_runs(round3_qrels, final_runs)

Exemplo n.º 8

0

Exibir arquivo

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'
    complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt'
    round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs(5, indexes)
    perform_fusion(5, cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(5, final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r5.abstract.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4580,
            'judged_cut_10': 0.5880,
            'ndcg_cut_20': 0.4379,
            'judged_cut_20': 0.5940,
            'map': 0.1903,
            'recall_1000': 0.4525,
            'judged_cut_1000': 0.2264
        },
        'anserini.covid-r5.abstract.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4912,
            'judged_cut_10': 0.6240,
            'ndcg_cut_20': 0.4596,
            'judged_cut_20': 0.6040,
            'map': 0.2042,
            'recall_1000': 0.4714,
            'judged_cut_1000': 0.2351
        },
        'anserini.covid-r5.full-text.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.3240,
            'judged_cut_10': 0.5660,
            'ndcg_cut_20': 0.3055,
            'judged_cut_20': 0.5250,
            'map': 0.1324,
            'recall_1000': 0.3758,
            'judged_cut_1000': 0.2171
        },
        'anserini.covid-r5.full-text.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4634,
            'judged_cut_10': 0.6460,
            'ndcg_cut_20': 0.4387,
            'judged_cut_20': 0.6280,
            'map': 0.1793,
            'recall_1000': 0.4368,
            'judged_cut_1000': 0.2425
        },
        'anserini.covid-r5.paragraph.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4077,
            'judged_cut_10': 0.6160,
            'ndcg_cut_20': 0.3907,
            'judged_cut_20': 0.5920,
            'map': 0.1981,
            'recall_1000': 0.4877,
            'judged_cut_1000': 0.2661
        },
        'anserini.covid-r5.paragraph.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4918,
            'judged_cut_10': 0.6440,
            'ndcg_cut_20': 0.4569,
            'judged_cut_20': 0.6250,
            'map': 0.2163,
            'recall_1000': 0.5101,
            'judged_cut_1000': 0.2710
        },
        'anserini.covid-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4696,
            'judged_cut_10': 0.6520,
            'ndcg_cut_20': 0.4539,
            'judged_cut_20': 0.6490,
            'map': 0.2044,
            'recall_1000': 0.5027,
            'judged_cut_1000': 0.2751
        },
        'anserini.covid-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5077,
            'judged_cut_10': 0.6800,
            'ndcg_cut_20': 0.4956,
            'judged_cut_20': 0.6690,
            'map': 0.2304,
            'recall_1000': 0.5378,
            'judged_cut_1000': 0.2851
        },
        'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6177,
            'judged_cut_10': 0.6620,
            'ndcg_cut_20': 0.5738,
            'judged_cut_20': 0.6510,
            'map': 0.2657,
            'recall_1000': 0.5505,
            'judged_cut_1000': 0.2562
        },
    }
    evaluate_runs(round4_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r5.abstract.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6925,
            'judged_cut_10': 0.9740,
            'ndcg_cut_20': 0.6586,
            'judged_cut_20': 0.9700,
            'map': 0.3010,
            'recall_1000': 0.4636,
            'judged_cut_1000': 0.4159
        },
        'anserini.covid-r5.abstract.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7301,
            'judged_cut_10': 0.9980,
            'ndcg_cut_20': 0.6979,
            'judged_cut_20': 0.9900,
            'map': 0.3230,
            'recall_1000': 0.4839,
            'judged_cut_1000': 0.4286
        },
        'anserini.covid-r5.full-text.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4709,
            'judged_cut_10': 0.8920,
            'ndcg_cut_20': 0.4382,
            'judged_cut_20': 0.8370,
            'map': 0.1777,
            'recall_1000': 0.3427,
            'judged_cut_1000': 0.3397
        },
        'anserini.covid-r5.full-text.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6286,
            'judged_cut_10': 0.9840,
            'ndcg_cut_20': 0.5973,
            'judged_cut_20': 0.9630,
            'map': 0.2391,
            'recall_1000': 0.4087,
            'judged_cut_1000': 0.3875
        },
        'anserini.covid-r5.paragraph.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5832,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.5659,
            'judged_cut_20': 0.9390,
            'map': 0.2808,
            'recall_1000': 0.4695,
            'judged_cut_1000': 0.4412
        },
        'anserini.covid-r5.paragraph.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6764,
            'judged_cut_10': 0.9840,
            'ndcg_cut_20': 0.6368,
            'judged_cut_20': 0.9740,
            'map': 0.3089,
            'recall_1000': 0.4949,
            'judged_cut_1000': 0.4542
        },
        'anserini.covid-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6469,
            'judged_cut_10': 0.9860,
            'ndcg_cut_20': 0.6184,
            'judged_cut_20': 0.9800,
            'map': 0.2952,
            'recall_1000': 0.4967,
            'judged_cut_1000': 0.4675
        },
        'anserini.covid-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6972,
            'judged_cut_10': 1.0000,
            'ndcg_cut_20': 0.6785,
            'judged_cut_20': 1.000,
            'map': 0.3329,
            'recall_1000': 0.5313,
            'judged_cut_1000': 0.4869
        },
        'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.8395,
            'judged_cut_10': 1.0000,
            'ndcg_cut_20': 0.7955,
            'judged_cut_20': 0.9990,
            'map': 0.3911,
            'recall_1000': 0.5536,
            'judged_cut_1000': 0.4607
        },
    }
    evaluate_runs(complete_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.final-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5668,
            'judged_cut_10': 0.9140,
            'ndcg_cut_20': 0.5244,
            'judged_cut_20': 0.8490,
            'map': 0.2302,
            'recall_1000': 0.5615,
            'judged_cut_1000': 0.2148
        },
        'anserini.final-r5.fusion1.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5726,
            'judged_cut_10': 0.9240,
            'ndcg_cut_20': 0.5313,
            'judged_cut_20': 0.8570,
            'map': 0.2314,
            'recall_1000': 0.5615,
            'judged_cut_1000': 0.2151
        },
        'anserini.final-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6366,
            'judged_cut_10': 0.9640,
            'ndcg_cut_20': 0.5941,
            'judged_cut_20': 0.9080,
            'map': 0.2716,
            'recall_1000': 0.6012,
            'judged_cut_1000': 0.2263
        },
        'anserini.final-r5.fusion2.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6474,
            'judged_cut_10': 0.9780,
            'ndcg_cut_20': 0.6007,
            'judged_cut_20': 0.9150,
            'map': 0.2734,
            'recall_1000': 0.6012,
            'judged_cut_1000': 0.2267
        },
        'anserini.final-r5.rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7777,
            'judged_cut_10': 0.9680,
            'ndcg_cut_20': 0.7193,
            'judged_cut_20': 0.9270,
            'map': 0.3235,
            'recall_1000': 0.6378,
            'judged_cut_1000': 0.2197
        },
        'anserini.final-r5.rf.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7944,
            'judged_cut_10': 0.9860,
            'ndcg_cut_20': 0.7346,
            'judged_cut_20': 0.9470,
            'map': 0.3280,
            'recall_1000': 0.6378,
            'judged_cut_1000': 0.2201
        },
    }
    evaluate_runs(round5_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: generate_round3_baselines.py Projeto: mayankanand007/anserini

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    os.system(
        'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' +
        'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' +
        '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    )

    round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt'
    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs(3, indexes)
    perform_fusion(3, cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(3, final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r3.abstract.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2118,
            'judged_cut_10': 0.3300,
            'ndcg_cut_20': 0.2043,
            'judged_cut_20': 0.3150,
            'map': 0.0951,
            'recall_1000': 0.4398,
            'judged_cut_1000': 0.1275
        },
        'anserini.covid-r3.abstract.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2470,
            'judged_cut_10': 0.3375,
            'ndcg_cut_20': 0.2256,
            'judged_cut_20': 0.3175,
            'map': 0.1023,
            'recall_1000': 0.4537,
            'judged_cut_1000': 0.1248
        },
        'anserini.covid-r3.full-text.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2337,
            'judged_cut_10': 0.4650,
            'ndcg_cut_20': 0.2259,
            'judged_cut_20': 0.4425,
            'map': 0.1099,
            'recall_1000': 0.4817,
            'judged_cut_1000': 0.1490
        },
        'anserini.covid-r3.full-text.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3430,
            'judged_cut_10': 0.5025,
            'ndcg_cut_20': 0.3077,
            'judged_cut_20': 0.4888,
            'map': 0.1426,
            'recall_1000': 0.5267,
            'judged_cut_1000': 0.1575
        },
        'anserini.covid-r3.paragraph.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2848,
            'judged_cut_10': 0.5175,
            'ndcg_cut_20': 0.2734,
            'judged_cut_20': 0.4938,
            'map': 0.1390,
            'recall_1000': 0.5527,
            'judged_cut_1000': 0.1727
        },
        'anserini.covid-r3.paragraph.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3604,
            'judged_cut_10': 0.5050,
            'ndcg_cut_20': 0.3213,
            'judged_cut_20': 0.4875,
            'map': 0.1520,
            'recall_1000': 0.5676,
            'judged_cut_1000': 0.1672
        },
        'anserini.covid-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3093,
            'judged_cut_10': 0.4975,
            'ndcg_cut_20': 0.2933,
            'judged_cut_20': 0.5025,
            'map': 0.1400,
            'recall_1000': 0.5566,
            'judged_cut_1000': 0.1750
        },
        'anserini.covid-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3568,
            'judged_cut_10': 0.5250,
            'ndcg_cut_20': 0.3273,
            'judged_cut_20': 0.4925,
            'map': 0.1564,
            'recall_1000': 0.5769,
            'judged_cut_1000': 0.1715
        },
        'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3633,
            'judged_cut_10': 0.3800,
            'ndcg_cut_20': 0.3175,
            'judged_cut_20': 0.3600,
            'map': 0.1526,
            'recall_1000': 0.5722,
            'judged_cut_1000': 0.1398
        },
    }
    evaluate_runs(round2_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r3.abstract.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5781,
            'judged_cut_10': 0.8875,
            'ndcg_cut_20': 0.5359,
            'judged_cut_20': 0.8325,
            'map': 0.2348,
            'recall_1000': 0.5040,
            'judged_cut_1000': 0.2351
        },
        'anserini.covid-r3.abstract.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6291,
            'judged_cut_10': 0.9300,
            'ndcg_cut_20': 0.5972,
            'judged_cut_20': 0.8925,
            'map': 0.2525,
            'recall_1000': 0.5215,
            'judged_cut_1000': 0.2370
        },
        'anserini.covid-r3.full-text.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3977,
            'judged_cut_10': 0.7500,
            'ndcg_cut_20': 0.3681,
            'judged_cut_20': 0.7213,
            'map': 0.1646,
            'recall_1000': 0.4708,
            'judged_cut_1000': 0.2471
        },
        'anserini.covid-r3.full-text.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5790,
            'judged_cut_10': 0.9050,
            'ndcg_cut_20': 0.5234,
            'judged_cut_20': 0.8525,
            'map': 0.2236,
            'recall_1000': 0.5313,
            'judged_cut_1000': 0.2693
        },
        'anserini.covid-r3.paragraph.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5396,
            'judged_cut_10': 0.9425,
            'ndcg_cut_20': 0.5079,
            'judged_cut_20': 0.9050,
            'map': 0.2498,
            'recall_1000': 0.5766,
            'judged_cut_1000': 0.2978
        },
        'anserini.covid-r3.paragraph.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6327,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.5793,
            'judged_cut_20': 0.9162,
            'map': 0.2753,
            'recall_1000': 0.5923,
            'judged_cut_1000': 0.2956
        },
        'anserini.covid-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5924,
            'judged_cut_10': 0.9625,
            'ndcg_cut_20': 0.5563,
            'judged_cut_20': 0.9362,
            'map': 0.2700,
            'recall_1000': 0.5956,
            'judged_cut_1000': 0.3045
        },
        'anserini.covid-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6515,
            'judged_cut_10': 0.9875,
            'ndcg_cut_20': 0.6200,
            'judged_cut_20': 0.9675,
            'map': 0.3027,
            'recall_1000': 0.6194,
            'judged_cut_1000': 0.3076
        },
        'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.7459,
            'judged_cut_10': 0.9875,
            'ndcg_cut_20': 0.7023,
            'judged_cut_20': 0.9637,
            'map': 0.3190,
            'recall_1000': 0.6125,
            'judged_cut_1000': 0.2600
        },
    }
    evaluate_runs(round3_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.final-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5339,
            'judged_cut_10': 0.8400,
            'ndcg_cut_20': 0.4875,
            'judged_cut_20': 0.7637,
            'map': 0.2283,
            'recall_1000': 0.6160,
            'judged_cut_1000': 0.1370
        },
        'anserini.final-r3.fusion1.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5359,
            'judged_cut_10': 0.8475,
            'ndcg_cut_20': 0.4902,
            'judged_cut_20': 0.7675,
            'map': 0.2293,
            'recall_1000': 0.6160,
            'judged_cut_1000': 0.1373
        },
        'anserini.final-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6072,
            'judged_cut_10': 0.9025,
            'ndcg_cut_20': 0.5599,
            'judged_cut_20': 0.8337,
            'map': 0.2631,
            'recall_1000': 0.6441,
            'judged_cut_1000': 0.1431
        },
        'anserini.final-r3.fusion2.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6100,
            'judged_cut_10': 0.9100,
            'ndcg_cut_20': 0.5617,
            'judged_cut_20': 0.8375,
            'map': 0.2641,
            'recall_1000': 0.6441,
            'judged_cut_1000': 0.1434
        },
        'anserini.final-r3.rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6812,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.6255,
            'judged_cut_20': 0.8450,
            'map': 0.2787,
            'recall_1000': 0.6399,
            'judged_cut_1000': 0.1246
        },
        'anserini.final-r3.rf.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6883,
            'judged_cut_10': 0.9750,
            'ndcg_cut_20': 0.6321,
            'judged_cut_20': 0.8538,
            'map': 0.2817,
            'recall_1000': 0.6399,
            'judged_cut_1000': 0.1250
        },
    }
    evaluate_runs(round3_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: generate_round2_baselines.py Projeto: mayankanand007/anserini

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'
    round2_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt'

    # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669
    # Thus, no point in checking MD5.
    check_md5_flag = False

    perform_runs()
    perform_fusion(cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r2.abstract.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3522,
            'judged_cut_10': 0.5371,
            'ndcg_cut_20': 0.3171,
            'judged_cut_20': 0.5100,
            'map': 0.1752,
            'recall_1000': 0.6601,
            'judged_cut_1000': 0.1013
        },
        'anserini.covid-r2.abstract.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3781,
            'judged_cut_10': 0.5371,
            'ndcg_cut_20': 0.3462,
            'judged_cut_20': 0.4829,
            'map': 0.1804,
            'recall_1000': 0.6485,
            'judged_cut_1000': 0.0958
        },
        'anserini.covid-r2.full-text.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.2070,
            'judged_cut_10': 0.4286,
            'ndcg_cut_20': 0.1931,
            'judged_cut_20': 0.3929,
            'map': 0.1159,
            'recall_1000': 0.5953,
            'judged_cut_1000': 0.0995
        },
        'anserini.covid-r2.full-text.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3123,
            'judged_cut_10': 0.4229,
            'ndcg_cut_20': 0.2738,
            'judged_cut_20': 0.3929,
            'map': 0.1473,
            'recall_1000': 0.6517,
            'judged_cut_1000': 0.1022
        },
        'anserini.covid-r2.paragraph.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.2772,
            'judged_cut_10': 0.4400,
            'ndcg_cut_20': 0.2579,
            'judged_cut_20': 0.4529,
            'map': 0.1607,
            'recall_1000': 0.7248,
            'judged_cut_1000': 0.1220
        },
        'anserini.covid-r2.paragraph.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3353,
            'judged_cut_10': 0.4343,
            'ndcg_cut_20': 0.2956,
            'judged_cut_20': 0.4329,
            'map': 0.1772,
            'recall_1000': 0.7196,
            'judged_cut_1000': 0.1136
        },
        'anserini.covid-r2.fusion1.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3297,
            'judged_cut_10': 0.4657,
            'ndcg_cut_20': 0.3060,
            'judged_cut_20': 0.4643,
            'map': 0.1914,
            'recall_1000': 0.7561,
            'judged_cut_1000': 0.1304
        },
        'anserini.covid-r2.fusion2.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3679,
            'judged_cut_10': 0.4829,
            'ndcg_cut_20': 0.3360,
            'judged_cut_20': 0.4557,
            'map': 0.2066,
            'recall_1000': 0.7511,
            'judged_cut_1000': 0.1200
        },
    }
    evaluate_runs(round1_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    # Note that recall@1k doesn't match the figures reported here:
    # https://github.com/castorini/anserini/blob/master/docs/experiments-covid.md
    expected_metrics = {
        'anserini.final-r2.fusion1.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.4827,
            'judged_cut_10': 0.9543,
            'ndcg_cut_20': 0.4512,
            'judged_cut_20': 0.8614,
            'map': 0.2431,
            'recall_1000': 0.6475,
            'judged_cut_1000': 0.1463
        },
        'anserini.final-r2.fusion2.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.5553,
            'judged_cut_10': 0.9743,
            'ndcg_cut_20': 0.5058,
            'judged_cut_20': 0.8957,
            'map': 0.2739,
            'recall_1000': 0.6832,
            'judged_cut_1000': 0.1528
        },
    }
    evaluate_runs(round2_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: generate_round5_doc2query_baselines.py Projeto: mayankanand007/anserini

def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'
    complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt'
    round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs()
    perform_fusion(check_md5=check_md5_flag)
    prepare_final_submissions(round4_cumulative_qrels, check_md5=check_md5_flag)

    expected_metrics = {
        'expanded.anserini.covid-r5.abstract.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4635, 'judged_cut_10': 0.5300, 'ndcg_cut_20': 0.4326,
             'judged_cut_20': 0.5120, 'map': 0.1728, 'recall_1000': 0.4462, 'judged_cut_1000': 0.2059},
        'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4548, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4260,
             'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4527, 'judged_cut_1000': 0.2051},
        'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4450, 'judged_cut_10': 0.6020, 'ndcg_cut_20': 0.4208,
             'judged_cut_20': 0.5820, 'map': 0.1801, 'recall_1000': 0.4473, 'judged_cut_1000': 0.2393},
        'expanded.anserini.covid-r5.full-text.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4817, 'judged_cut_10': 0.6040, 'ndcg_cut_20': 0.4598,
             'judged_cut_20': 0.5920, 'map': 0.1970, 'recall_1000': 0.4711, 'judged_cut_1000': 0.2439},
        'expanded.anserini.covid-r5.paragraph.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4904, 'judged_cut_10': 0.5820, 'ndcg_cut_20': 0.4622,
             'judged_cut_20': 0.5630, 'map': 0.2107, 'recall_1000': 0.5004, 'judged_cut_1000': 0.2511},
        'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4940, 'judged_cut_10': 0.5700, 'ndcg_cut_20': 0.4542,
             'judged_cut_20': 0.5420, 'map': 0.2107, 'recall_1000': 0.5070, 'judged_cut_1000': 0.2486},
        'anserini.covid-r5.fusion1.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4908, 'judged_cut_10': 0.5880, 'ndcg_cut_20': 0.4753,
             'judged_cut_20': 0.5800, 'map': 0.2017, 'recall_1000': 0.5119, 'judged_cut_1000': 0.2599},
        'expanded.anserini.covid-r5.fusion2.txt':
            {'topics': 50, 'ndcg_cut_10': 0.4846, 'judged_cut_10': 0.5740, 'ndcg_cut_20': 0.4565,
             'judged_cut_20': 0.5400, 'map': 0.2045, 'recall_1000': 0.5218, 'judged_cut_1000': 0.2578},
        'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6095, 'judged_cut_10': 0.6320, 'ndcg_cut_20': 0.5693,
             'judged_cut_20': 0.5990, 'map': 0.2344, 'recall_1000': 0.5280, 'judged_cut_1000': 0.2257},
    }
    evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

    expected_metrics = {
        'expanded.anserini.covid-r5.abstract.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6808, 'judged_cut_10': 0.9980, 'ndcg_cut_20': 0.6375,
             'judged_cut_20': 0.9600, 'map': 0.2718, 'recall_1000': 0.4550, 'judged_cut_1000': 0.3845},
        'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6939, 'judged_cut_10': 0.9920, 'ndcg_cut_20': 0.6524,
             'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4595, 'judged_cut_1000': 0.3825},
        'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6300, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5843,
             'judged_cut_20': 0.9260, 'map': 0.2475, 'recall_1000': 0.4201, 'judged_cut_1000': 0.3921},
        'expanded.anserini.covid-r5.full-text.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6611, 'judged_cut_10': 0.9800, 'ndcg_cut_20': 0.6360,
             'judged_cut_20': 0.9610, 'map': 0.2746, 'recall_1000': 0.4496, 'judged_cut_1000': 0.4073},
        'expanded.anserini.covid-r5.paragraph.qq.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6827, 'judged_cut_10': 0.9800, 'ndcg_cut_20': 0.6477,
             'judged_cut_20': 0.9670, 'map': 0.3080, 'recall_1000': 0.4936, 'judged_cut_1000': 0.4360},
        'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt':
            {'topics': 50, 'ndcg_cut_10': 0.7067, 'judged_cut_10': 0.9960, 'ndcg_cut_20': 0.6614,
             'judged_cut_20': 0.9760, 'map': 0.3127, 'recall_1000': 0.4985, 'judged_cut_1000': 0.4328},
        'expanded.anserini.covid-r5.fusion1.txt':
            {'topics': 50, 'ndcg_cut_10': 0.7072, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6731,
             'judged_cut_20': 0.9920, 'map': 0.2964, 'recall_1000': 0.5063, 'judged_cut_1000': 0.4528},
        'expanded.anserini.covid-r5.fusion2.txt':
            {'topics': 50, 'ndcg_cut_10': 0.7131, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6755,
             'judged_cut_20': 0.9910, 'map': 0.3036, 'recall_1000': 0.5166, 'judged_cut_1000': 0.4518},
        'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
            {'topics': 50, 'ndcg_cut_10': 0.8160, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7787,
             'judged_cut_20': 0.9960, 'map': 0.3421, 'recall_1000': 0.5249, 'judged_cut_1000': 0.4107},
    }
    evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

    expected_metrics = {
        'expanded.anserini.final-r5.fusion1.txt':
            {'topics': 50, 'ndcg_cut_10': 0.5789, 'judged_cut_10': 0.9520, 'ndcg_cut_20': 0.5374,
             'judged_cut_20': 0.8530, 'map': 0.2236, 'recall_1000': 0.5798, 'judged_cut_1000': 0.2132},
        'expanded.anserini.final-r5.fusion1.post-processed.txt':
            {'topics': 50, 'ndcg_cut_10': 0.5817, 'judged_cut_10': 0.9580, 'ndcg_cut_20': 0.5414,
             'judged_cut_20': 0.8610, 'map': 0.2246, 'recall_1000': 0.5798, 'judged_cut_1000': 0.2135},
        'expanded.anserini.final-r5.fusion2.txt':
            {'topics': 50, 'ndcg_cut_10': 0.5801, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.5393,
             'judged_cut_20': 0.8650, 'map': 0.2310, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2135},
        'expanded.anserini.final-r5.fusion2.post-processed.txt':
            {'topics': 50, 'ndcg_cut_10': 0.5825, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5436,
             'judged_cut_20': 0.8700, 'map': 0.2319, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2138},
        'expanded.anserini.final-r5.rf.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6628, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6040,
             'judged_cut_20': 0.8370, 'map': 0.2410, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1995},
        'expanded.anserini.final-r5.rf.post-processed.txt':
            {'topics': 50, 'ndcg_cut_10': 0.6757, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.6124,
             'judged_cut_20': 0.8470, 'map': 0.2433, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1998},
    }
    evaluate_runs(round5_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)