Exemplo n.º 1
0
def main():
    # load queires and candidate (from qrel? from BM25 ?)

    # write html
    #   1. Query
    #   2. Doc ID
    #   3. Snippet with most keyword match (BM25 score)
    #   4. scrollable component
    ranked_list_path = os.path.join(output_path, "ranked_list",
                                    "robust_V_10K_10000.txt")
    bert_ranked_list = load_ranked_list_grouped(ranked_list_path)

    queries: Dict[str, str] = load_robust04_desc2()
    qck_queries = to_qck_queries(queries)
    qrels = load_robust04_qrels()

    candidates_d = load_candidate_d()
    # save_to_pickle(candidates_d, "candidate_viewer_candidate_d")
    # candidates_d = load_from_pickle("candidate_viewer_candidate_d")
    style = [get_collapsible_css(), get_scroll_css()]
    #
    html = HtmlVisualizer(
        "robust_V_predictions.html",
        additional_styles=style,
    )

    def is_perfect(judgement, ranked_list):
        label_list = get_labels(judgement, ranked_list)

        all_relevant = True
        for l in label_list:
            if not l:
                all_relevant = False
            if l:
                if not all_relevant:
                    return False
        return True

    def get_labels(judgement, ranked_list):
        label_list = []
        for e in ranked_list:
            doc_id = e.doc_id
            if doc_id in judgement:
                label = judgement[doc_id]
            else:
                label = 0
            label_list.append(label)
        return label_list

    def p_at_k(judgement, ranked_list, k=10):
        label_list = get_labels(judgement, ranked_list)
        num_correct = sum([1 if label else 0 for label in label_list[:k]])
        return num_correct / k

    for qid in bert_ranked_list:
        if qid in candidates_d:
            if qid not in qrels:
                continue
            judgement = qrels[qid]
            q_text = queries[qid]
            ranked_list = bert_ranked_list[qid]
            if is_perfect(judgement, ranked_list):
                continue

            html.write_div_open()
            text = "{0}: {1} ({2:.2f})".format(qid, q_text,
                                               p_at_k(judgement, ranked_list))
            html.write_elem(
                "button",
                text,
                "collapsible",
            )
            html.write_div_open("content")
            doc_text_d = dict(candidates_d[qid])

            for e in ranked_list:
                #tokens = doc_tokens[e.doc_id]
                doc_id = e.doc_id
                if doc_id in judgement:
                    label = judgement[doc_id]
                else:
                    label = 0

                style = "font-size: 13px; padding: 8px;"
                if label:
                    style += " background-color: DarkGreen"
                else:
                    style += " background-color: DarkRed"
                text = "{0}] {1} ({2:.2f})".format(e.rank, doc_id, e.score)
                html.write_elem("p", text, "collapsible", style)
                #text = pretty_tokens(tokens, True)
                doc_text = doc_text_d[doc_id]
                html.write_div(doc_text, "c_content")
            html.write_div_close()
            html.write_div_close()
    html.write_script(get_collapsible_script())
    html.close()
Exemplo n.º 2
0
def main(config):
    # load queires and candidate (from qrel? from BM25 ?)

    # write html
    #   1. Query
    #   2. Doc ID
    #   3. Snippet with most keyword match (BM25 score)
    #   4. scrollable component

    score_d = load_qk_score_as_dict(config)
    # qk_candidate: List[QKUnit] = load_from_pickle("robust_on_clueweb_qk_candidate")
    qk_candidate: List[QKUnit] = load_from_pickle(
        "robust_on_clueweb_qk_candidate_filtered")
    # qk_candidate: List[QKUnit] = load_from_pickle("robust_on_wiki_qk_candidate")

    # candidates_d = load_candidate_d()
    # save_to_pickle(candidates_d, "candidate_viewer_candidate_d")
    style = [get_collapsible_css(), get_scroll_css()]
    #
    html = HtmlVisualizer(
        "robust_k_docs_filtered.html",
        additional_styles=style,
    )

    for query, k_list in qk_candidate:
        qid = query.query_id
        q_text = query.text
        if not k_list:
            continue

        c = Counter()
        for k in k_list:
            kdp_id = "{}-{}".format(k.doc_id, k.passage_idx)
            score = score_d[qid, kdp_id]
            label = 1 if score > 0.5 else 0
            c[label] += 1

        pos_rate = (c[1] / (c[1] + c[0]))

        html.write_div_open()
        html.write_elem(
            "button",
            "{0}: {1} ({2:.2f})".format(qid, q_text, pos_rate),
            "collapsible",
        )
        html.write_div_open("content")
        for k in k_list:
            kdp_id = "{}-{}".format(k.doc_id, k.passage_idx)
            score = score_d[qid, kdp_id]
            label = score > 0.5
            text = " ".join(k.tokens)
            style = "font-size: 13px; padding: 8px;"
            if label:
                style += " background-color: DarkGreen"
            else:
                style += " background-color: DarkRed"
            html.write_elem("p", "{0} : {1:.2f}".format(kdp_id, score),
                            "collapsible", style)
            html.write_div(text, "c_content")
        html.write_div_close()
        html.write_div_close()
    html.write_script(get_collapsible_script())
    html.close()