def prepare_final_submissions(qrels): print('') print('## Preparing final submission files by removing qrels...') print('') run1 = 'expanded.anserini.final-r5.fusion1.txt' print(f'Generating {run1}') os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/expanded.anserini.covid-r5.fusion1.txt --output runs/{run1} --runtag r5.fusion1') run1_md5 = compute_md5(f'runs/{run1}') assert generate_md5 or run1_md5 == final_runs[run1], f'Error in producing {run1}!' run2 = 'expanded.anserini.final-r5.fusion2.txt' print(f'Generating {run2}') os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/expanded.anserini.covid-r5.fusion2.txt --output runs/{run2} --runtag r5.fusion2') run2_md5 = compute_md5(f'runs/{run2}') assert generate_md5 or run2_md5 == final_runs[run2], f'Error in producing {run2}!' run3 = 'expanded.anserini.final-r5.rf.txt' print(f'Generating {run3}') os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r5.rf') run3_md5 = compute_md5(f'runs/{run3}') assert generate_md5 or run3_md5 == final_runs[run3], f'Error in producing {run3}!' if generate_md5: final_md5 = {run: compute_md5(f'runs/{run}') for run in final_runs} print(f'Checksums for final runs: {final_md5}')
def perform_fusion(): print('') print('## Performing fusion...') print('') fusion_run1 = 'expanded.anserini.covid-r5.fusion1.txt' set1 = ['expanded.anserini.covid-r5.abstract.qq.bm25.txt', 'expanded.anserini.covid-r5.full-text.qq.bm25.txt', 'expanded.anserini.covid-r5.paragraph.qq.bm25.txt'] print(f'Performing fusion to create {fusion_run1}') os.system('PYTHONPATH=../pyserini ' + 'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 ' f'--out runs/{fusion_run1} --runs runs/{set1[0]} runs/{set1[1]} runs/{set1[2]}') assert generate_md5 or compute_md5(f'runs/{fusion_run1}') == cumulative_runs[fusion_run1], f'Error in producing {fusion_run1}!' fusion_run2 = 'expanded.anserini.covid-r5.fusion2.txt' set2 = ['expanded.anserini.covid-r5.abstract.qdel.bm25.txt', 'expanded.anserini.covid-r5.full-text.qdel.bm25.txt', 'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt'] print(f'Performing fusion to create {fusion_run2}') os.system('PYTHONPATH=../pyserini ' + 'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 ' + f'--out runs/{fusion_run2} --runs runs/{set2[0]} runs/{set2[1]} runs/{set2[2]}') assert generate_md5 or compute_md5(f'runs/{fusion_run2}') == cumulative_runs[fusion_run2], f'Error in producing {fusion_run2}!' if generate_md5: cumulative_md5 = {run: compute_md5(f'runs/{run}') for run in cumulative_runs} print(f'Checksums for cumulative runs: {cumulative_md5}')
def prepare_final_submissions(cumulative_qrels, check_md5=False): print('') print('## Preparing final submission files by removing qrels...') print('') run1 = 'anserini.final-r4.fusion1.txt' print(f'Generating {run1}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r4.fusion1.txt --output runs/{run1} --runtag r4.fusion1' ) run1_md5 = compute_md5(f'runs/{run1}') if check_md5: assert run1_md5 == final_runs[run1], f'Error in producing {run1}!' run2 = 'anserini.final-r4.fusion2.txt' print(f'Generating {run2}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r4.fusion2.txt --output runs/{run2} --runtag r4.fusion2' ) run2_md5 = compute_md5(f'runs/{run2}') if check_md5: assert run2_md5 == final_runs[run2], f'Error in producing {run2}!' run3 = 'anserini.final-r4.rf.txt' print(f'Generating {run3}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r4.rf' ) run3_md5 = compute_md5(f'runs/{run3}') if check_md5: assert run3_md5 == final_runs[run3], f'Error in producing {run3}!' print('') print(run1 + ' ' * (35 - len(run1)) + run1_md5) print(run2 + ' ' * (35 - len(run2)) + run2_md5) print(run3 + ' ' * (35 - len(run3)) + run3_md5)
def perform_fusion(check_md5=True): print('') print('## Performing fusion...') print('') fusion_run1 = 'anserini.covid-r4.fusion1.txt' set1 = [ 'anserini.covid-r4.abstract.qq.bm25.txt', 'anserini.covid-r4.full-text.qq.bm25.txt', 'anserini.covid-r4.paragraph.qq.bm25.txt' ] print(f'Performing fusion to create {fusion_run1}') os.system( 'PYTHONPATH=../pyserini ' + 'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 ' f'--out runs/{fusion_run1} --runs runs/{set1[0]} runs/{set1[1]} runs/{set1[2]}' ) if check_md5: assert compute_md5(f'runs/{fusion_run1}') == cumulative_runs[ fusion_run1], f'Error in producing {fusion_run1}!' fusion_run2 = 'anserini.covid-r4.fusion2.txt' set2 = [ 'anserini.covid-r4.abstract.qdel.bm25.txt', 'anserini.covid-r4.full-text.qdel.bm25.txt', 'anserini.covid-r4.paragraph.qdel.bm25.txt' ] print(f'Performing fusion to create {fusion_run2}') os.system( 'PYTHONPATH=../pyserini ' + 'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 ' + f'--out runs/{fusion_run2} --runs runs/{set2[0]} runs/{set2[1]} runs/{set2[2]}' ) if check_md5: assert compute_md5(f'runs/{fusion_run2}') == cumulative_runs[ fusion_run2], f'Error in producing {fusion_run2}!'
def prepare_final_submissions(qrels): print('') print('## Preparing final submission files by removing qrels...') print('') run1 = 'anserini.final-r3.fusion1.txt' print(f'Generating {run1}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/anserini.covid-r3.fusion1.txt --output runs/{run1} --runtag r3.fusion1' ) run1_md5 = compute_md5(f'runs/{run1}') assert run1_md5 == final_runs[run1], f'Error in producing {run1}!' run2 = 'anserini.final-r3.fusion2.txt' print(f'Generating {run2}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/anserini.covid-r3.fusion2.txt --output runs/{run2} --runtag r3.fusion2' ) run2_md5 = compute_md5(f'runs/{run2}') assert run2_md5 == final_runs[run2], f'Error in producing {run2}!' run3 = 'anserini.final-r3.rf.txt' print(f'Generating {run3}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' + f'--input runs/anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r3.rf' ) run3_md5 = compute_md5(f'runs/{run3}') assert run3_md5 == final_runs[run3], f'Error in producing {run3}!' print('') print(f'{run1:<35}{run1_md5}') print(f'{run2:<35}{run2_md5}') print(f'{run3:<35}{run3_md5}')
def prepare_final_submissions(cumulative_qrels, check_md5=False): print('') print('## Preparing final submission files by removing qrels...') print('') run1 = 'anserini.final-r5.fusion1.txt' print(f'Generating {run1}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r5.fusion1.txt --output runs/{run1} --runtag r5.fusion1' ) run1_md5 = compute_md5(f'runs/{run1}') if check_md5: assert run1_md5 == final_runs[run1], f'Error in producing {run1}!' run2 = 'anserini.final-r5.fusion2.txt' print(f'Generating {run2}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r5.fusion2.txt --output runs/{run2} --runtag r5.fusion2' ) run2_md5 = compute_md5(f'runs/{run2}') if check_md5: assert run2_md5 == final_runs[run2], f'Error in producing {run2}!' run3 = 'anserini.final-r5.rf.txt' print(f'Generating {run3}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r5.rf' ) run3_md5 = compute_md5(f'runs/{run3}') if check_md5: assert run3_md5 == final_runs[run3], f'Error in producing {run3}!' run4 = 'final.ruir1.txt' print(f'Generating {run4}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir.fusion1.txt --output runs/{run4} --runtag r5.rf') run4_md5 = compute_md5(f'runs/{run4}') run5 = 'final.ruir2.txt' print(f'Generating {run5}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir.fusion2.txt --output runs/{run5} --runtag r5.rf') run5_md5 = compute_md5(f'runs/{run5}') run6 = 'final.ruir3.txt' print(f'Generating {run6}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir.fusion3.txt --output runs/{run6} --runtag r5.rf') run6_md5 = compute_md5(f'runs/{run6}') run33 = 'final.qruir33.txt' print(f'Generating {run33}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir33f.txt --output runs/{run33} --runtag final.qruir33.txt' ) run33_md5 = compute_md5(f'runs/{run33}') run52 = 'final.ruir52.txt' print(f'Generating {run52}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir52f.txt --output runs/{run52} --runtag r5.rf') run52_md5 = compute_md5(f'runs/{run52}') runm2 = 'final.ruirm2.txt' print(f'Generating {runm2}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruirm2f.txt --output runs/{runm2} --runtag r5.rf') runm2_md5 = compute_md5(f'runs/{runm2}') runs2 = 'final.ruirs2.txt' print(f'Generating {runs2}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruirs2f.txt --output runs/{runs2} --runtag r5.rf') runs2_md5 = compute_md5(f'runs/{runs2}') runq = 'final.qruir.txt' print(f'Generating {runq}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r5.fusionq.txt --output runs/{runq} --runtag final.qruir.txt' ) runs2_md5 = compute_md5(f'runs/{runq}') runq = 'final.qruir.filtered.txt' print(f'Generating {runq}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/ruir33f.txt-filtered --output runs/{runq} --runtag final.qruir.filtered.txt' ) runs2_md5 = compute_md5(f'runs/{runq}') runq = 'final.qonly.txt' print(f'Generating {runq}') os.system( f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} ' + f'--input runs/anserini.covid-r5.full-text.qonly.bm25.txt --output runs/{runq} --runtag final.qonly.txt' ) runsq_md5 = compute_md5(f'runs/{runq}') print('') print(run1 + ' ' * (35 - len(run1)) + run1_md5) print(run2 + ' ' * (35 - len(run2)) + run2_md5) print(run3 + ' ' * (35 - len(run3)) + run3_md5) print(run4 + ' ' * (35 - len(run4)) + run4_md5) print(run5 + ' ' * (35 - len(run5)) + run5_md5) print(run6 + ' ' * (35 - len(run6)) + run6_md5) print(run33 + ' ' * (35 - len(run33)) + run33_md5) print(run52 + ' ' * (35 - len(run52)) + run52_md5) print(runm2 + ' ' * (35 - len(runm2)) + runm2_md5) print(runs2 + ' ' * (35 - len(runs2)) + runs2_md5)