[q_text, str(q_times[q_text])])

    out_trec_f = join(config["output_folder"], "trec_corpus.txt")
    out_t = codecs.open(out_trec_f, "w", encoding='utf8')

    print("Collection2Text ...")
    nl = 0
    relations = []

    if config["from_qrels"]:
        qrels = get_qrels(config["relevance_judgements"]
                          )  # qrels[(q, doc)] = rel q:str, rel:int
        ranked_documents = set([e[1] for e in qrels])
        if bool(config["rerank_run"]):
            ranked_documents = ranked_documents.union(
                get_docs_from_run(config["rerank_run"]))
        print("totalling: %d documents" % len(ranked_documents))
        nl = save_corpus(queries_text, ranked_documents, index, id2token,
                         externalDocId, out_t)

        logging.info("Corpus file saved to " + out_trec_f + " with " +
                     str(nl) + " lines")

        relations = [((e[0], e[1]), qrels[e])
                     for e in qrels]  # same content (q, doc):rel, q:int
        logging.info('From relevance judgements : ' +
                     config["relevance_judgements"])

    elif config["from_run"]:
        logging.info("From run: " + config["train_run"])
        qrels = get_qrels(config["relevance_judgements"]) if bool(
Exemplo n.º 2
0
        """
		print("Append qrels ...")
		for q,d in tqdm(itertools.product(list(queries_text.keys()),list(externelDocId.keys()))):
			if (q,d) not in qrels:
				qrels[(q,d)] = '0'
		"""
        print("Construct relation.txt ...")
        for c in tqdm(collections.OrderedDict(sorted(qrels.items()))):
            #out3.write("{q} 0 {d} {r}\n".format(q=c[0], d=c[1], r=qrels[c]))
            out2.write("{r} {q} {d}\n".format(r=qrels[c], q=c[0], d=c[1]))
            nl2 += 1

    elif bool(args["--rank"]) and not bool(args["--r"]):
        print("From rank file: " + args["--rank"])
        print("Construct relation.txt ...")
        ranked_documents = get_docs_from_run(args["--rank"])
        nl = save_corpus(queries_text, ranked_documents, index, id2token,
                         externelDocId, out)
        relations = rank2relations(args["--rank"], bool(args["--bin"]), out3)
        for r in relations:
            rel, q, doc = r
            out2.write("{r} {q} {d}\n".format(r=rel, q=q, d=doc))
            nl2 += 1

    elif bool(args["--rank"]) and bool(
            args["--r"]) and not (bool(args["--train_all"])):
        print("From rank file {s} with relevance judgements in {t}".format(
            s=args["--rank"], t=args["--r"]))
        qrels = get_qrels(args["--r"])
        #print(qrels)
        ranked_documents = get_docs_from_run(args["--rank"])
Exemplo n.º 3
0
    if bool(config["relevance_judgements"]) and not bool(
            config["run_file"]) and not bool(config["runs_folder"]):
        ranked_documents = set([e[1] for e in qrels])
        print("totalling: %d documents" % len(ranked_documents))
        nl = save_corpus(queries_text, ranked_documents, index, id2token,
                         externelDocId, out_t)
        logging.info("Corpus file saved to " + out_trec_f + " with " +
                     str(nl) + " lines")

        relations = [(e, qrels[e]) for e in qrels]  # same content
        logging.info('From relevance judgements : ' +
                     config["relevance_judgements"])

    elif bool(config["run_file"]) and not bool(config["runs_folder"]):
        logging.info("From run: " + config["run_file"])
        ranked_documents = get_docs_from_run(config["run_file"])
        print("totalling: %d documents" % len(ranked_documents))
        nl = save_corpus(queries_text, ranked_documents, index, id2token,
                         externelDocId, out_t)
        logging.info("Corpus file saved to " + out_trec_f + " with " +
                     str(nl) + " lines")

        with open(config["run_file"], "r") as rank:
            for line in tqdm(rank):
                if line != None:
                    # print(line)
                    q = int(line.strip().split()[0])
                    doc = line.strip().split()[2]
                    try:
                        rel = qrels[(q, doc)]  # line.strip().split()[4]
                    except:
    qrels = {}
    if bool(config["relevance_judgements"]):
        qrels = get_qrels(
            config["relevance_judgements"]
        )  # dictionary: "qrels[(q,doc)]:rel" with q and rel are ints

    print("Collection2Text ...")
    nl = 0
    relations = []
    logging.info("From a set of runs in " + config["runs_folder"])

    ranked_documents = set()
    for f in os.listdir(config["runs_folder"]):
        ranked_documents = ranked_documents.union(
            get_docs_from_run(join(config["runs_folder"], f)))
    if bool(config["relevance_judgements"]):
        ranked_documents = ranked_documents.union(
            set([e[1] for e in get_qrels(config["relevance_judgements"])]))
    if bool(config["run_file"]):
        ranked_documents = ranked_documents.union(
            get_docs_from_run(config["run_file"]))

    print("totalling: %d documents" % len(ranked_documents))
    nl = save_corpus(queries_text, ranked_documents, index, id2token,
                     externalDocId, out_t)
    logging.info("Corpus file saved to " + out_trec_f + " with " + str(nl) +
                 " lines")

    print("Reading runs to relations ...")
    for f in os.listdir(config["runs_folder"]):