Пример #1
0
def test_covid_round3_qrel_conversion():
    collection_config = {"name": "covid", "round": 3, "coll_type": "abstract"}
    benchmark_config = {
        "name": "covid",
        "udelqexpand": False,
        "useprevqrels": False
    }
    collection = CovidCollection(collection_config)
    benchmark = CovidBenchmark(benchmark_config,
                               provide={"collectoin": collection})

    benchmark.download_if_missing()

    docid_map_tmp = "/tmp/docid.map"
    newdocid_qrels_fn = "/tmp/new.docid.qrels"
    qrel_url = "https://ir.nist.gov/covidSubmit/data/qrels-covid_d3_j0.5-3.txt"
    docid_map_url = "https://ir.nist.gov/covidSubmit/data/changedIds-May19.csv"

    download_file(docid_map_url, docid_map_tmp)
    download_file(qrel_url, newdocid_qrels_fn)
    with open(docid_map_tmp) as f:
        old2new = {line.split(",")[0]: line.split(",")[1] for line in f}
    newdocid_qrels = load_qrels(newdocid_qrels_fn)
    olddocid_qrels = benchmark.qrels

    # since there are dropped out terms in benchmark.qrels (the ones that appeared in previous judgements)
    # converted olddocid_qrels will have less entries than newdocid_qrels.
    # Cannot use assert convert_qrels == newdocid_qrels here
    for qid in olddocid_qrels:
        for docid in olddocid_qrels[qid]:
            newdocid = old2new.get(docid, docid)
            assert olddocid_qrels[qid][docid] == newdocid_qrels[qid][newdocid]
Пример #2
0
def test_load_norelevant_qrels(tmpdir):
    """ qids with no relevant documents should be removed when loading a qrels file """

    qrels_txt = """
               q3 0 d1 1
               q5 0 d5 0
    """
    qrels_dict = {"q3": {"d1": 1}}

    fn = tmpdir / "qrels"
    with open(fn, "wt", encoding="utf-8") as outf:
        outf.write(qrels_txt)

    qrels = load_qrels(fn)
    assert sorted(qrels.items()) == sorted(qrels_dict.items())
Пример #3
0
def test_load_qrels(tmpdir):
    """ load a TREC qrels file """

    qrels_txt = """
               q3 0 d1 1
               q3 0 d2 0
               q3 0 d3 2
               q5 0 d5 1
    """
    qrels_dict = {"q3": {"d1": 1, "d2": 0, "d3": 2}, "q5": {"d5": 1}}

    fn = tmpdir / "qrels"
    with open(fn, "wt", encoding="utf-8") as outf:
        outf.write(qrels_txt)

    qrels = load_qrels(fn)
    assert sorted(qrels.items()) == sorted(qrels_dict.items())
Пример #4
0
    def search(self):
        topics_fn = self.benchmark.get_topics_file()
        output_dir = self.get_results_path()

        if hasattr(self.searcher, "index"):
            self.searcher.index.create_index()

        if self.config["filter"]:
            qrels = load_qrels(self.benchmark.qrel_ignore)
            docs_to_remove = {q: list(d.keys()) for q, d in qrels.items()}
            search_results_folder = self.searcher.query_from_file(
                topics_fn, output_dir, docs_to_remove)
        else:
            search_results_folder = self.searcher.query_from_file(
                topics_fn, output_dir)

        logger.info("searcher results written to: %s", search_results_folder)
        return search_results_folder
Пример #5
0
 def qrels(self):
     if not hasattr(self, "_qrels"):
         self._qrels = load_qrels(self.config["qrels"]["path"])
     return self._qrels
Пример #6
0
 def set_qrels(self, path):
     self._qrels = load_qrels(path)
Пример #7
0
 def qrels(self):
     if not hasattr(self, "_qrels"):
         self._qrels = load_qrels(self.qrel_file)
     return self._qrels
Пример #8
0
 def unsampled_qrels(self):
     if not hasattr(self, "_unsampled_qrels"):
         self._unsampled_qrels = load_qrels(self.unsampled_qrel_file)
     return self._unsampled_qrels