def test_covid_round3_qrel_conversion(): collection_config = {"name": "covid", "round": 3, "coll_type": "abstract"} benchmark_config = { "name": "covid", "udelqexpand": False, "useprevqrels": False } collection = CovidCollection(collection_config) benchmark = CovidBenchmark(benchmark_config, provide={"collectoin": collection}) benchmark.download_if_missing() docid_map_tmp = "/tmp/docid.map" newdocid_qrels_fn = "/tmp/new.docid.qrels" qrel_url = "https://ir.nist.gov/covidSubmit/data/qrels-covid_d3_j0.5-3.txt" docid_map_url = "https://ir.nist.gov/covidSubmit/data/changedIds-May19.csv" download_file(docid_map_url, docid_map_tmp) download_file(qrel_url, newdocid_qrels_fn) with open(docid_map_tmp) as f: old2new = {line.split(",")[0]: line.split(",")[1] for line in f} newdocid_qrels = load_qrels(newdocid_qrels_fn) olddocid_qrels = benchmark.qrels # since there are dropped out terms in benchmark.qrels (the ones that appeared in previous judgements) # converted olddocid_qrels will have less entries than newdocid_qrels. # Cannot use assert convert_qrels == newdocid_qrels here for qid in olddocid_qrels: for docid in olddocid_qrels[qid]: newdocid = old2new.get(docid, docid) assert olddocid_qrels[qid][docid] == newdocid_qrels[qid][newdocid]
def test_load_norelevant_qrels(tmpdir): """ qids with no relevant documents should be removed when loading a qrels file """ qrels_txt = """ q3 0 d1 1 q5 0 d5 0 """ qrels_dict = {"q3": {"d1": 1}} fn = tmpdir / "qrels" with open(fn, "wt", encoding="utf-8") as outf: outf.write(qrels_txt) qrels = load_qrels(fn) assert sorted(qrels.items()) == sorted(qrels_dict.items())
def test_load_qrels(tmpdir): """ load a TREC qrels file """ qrels_txt = """ q3 0 d1 1 q3 0 d2 0 q3 0 d3 2 q5 0 d5 1 """ qrels_dict = {"q3": {"d1": 1, "d2": 0, "d3": 2}, "q5": {"d5": 1}} fn = tmpdir / "qrels" with open(fn, "wt", encoding="utf-8") as outf: outf.write(qrels_txt) qrels = load_qrels(fn) assert sorted(qrels.items()) == sorted(qrels_dict.items())
def search(self): topics_fn = self.benchmark.get_topics_file() output_dir = self.get_results_path() if hasattr(self.searcher, "index"): self.searcher.index.create_index() if self.config["filter"]: qrels = load_qrels(self.benchmark.qrel_ignore) docs_to_remove = {q: list(d.keys()) for q, d in qrels.items()} search_results_folder = self.searcher.query_from_file( topics_fn, output_dir, docs_to_remove) else: search_results_folder = self.searcher.query_from_file( topics_fn, output_dir) logger.info("searcher results written to: %s", search_results_folder) return search_results_folder
def qrels(self): if not hasattr(self, "_qrels"): self._qrels = load_qrels(self.config["qrels"]["path"]) return self._qrels
def set_qrels(self, path): self._qrels = load_qrels(path)
def qrels(self): if not hasattr(self, "_qrels"): self._qrels = load_qrels(self.qrel_file) return self._qrels
def unsampled_qrels(self): if not hasattr(self, "_unsampled_qrels"): self._unsampled_qrels = load_qrels(self.unsampled_qrel_file) return self._unsampled_qrels