Пример #1
0
def db(ctx, propbank_db):
    conn = get_connection()
    proc_results(
        propbank_db,
        conn.execute(headword_grouped(typ=ctx.obj["typ"])),
        outf=ctx.obj["out"],
    )
Пример #2
0
def single(ctx, propbank_db, headword):
    conn = get_connection()
    proc_results(
        propbank_db,
        conn.execute(headword_grouped((headword, ), typ=ctx.obj["typ"])),
        outf=ctx.obj["out"],
    )
Пример #3
0
def propbank(ctx, propbank_db, propbank_tsv):
    headwords = {
        key: len(list(group))
        for key, group in groupby((
            row[0] for row in csv.reader(propbank_tsv, delimiter="\t")))
    }
    conn = get_connection()
    proc_results(
        propbank_db,
        conn.execute(headword_grouped(headwords.keys(), typ=ctx.obj["typ"])),
        outf=ctx.obj["out"],
        frame_counts=headwords,
    )
Пример #4
0
def confmat(ctx, propbank_db, headword, figout):
    conn = get_connection()
    group = conn.execute(headword_grouped((headword, ), typ=ctx.obj["typ"]))
    group = list(group)
    gapped_mwes = [tpl[4] for tpl in group]
    print("gapped_mwes", gapped_mwes)
    args = objectview(
        query_dir="/tmp/",
        case=False,
        database=propbank_db,
    )
    with dep_searcher(args) as searcher:
        queries = extract_queries(group)
        (
            counters,
            frame_dict,
            bitvec_dict,
            frame_marginal,
            query_marginal,
            hits,
        ) = cont_mat(searcher, queries)
        contingency = array(counters)
        print("contingency", contingency)
        # XXX: Here
        df = pd.DataFrame(contingency)
        df.columns = list(frame_dict.keys())
        print("gapped_mwes", gapped_mwes)
        bitvec_labels = list(
            bitvecs_to_labels(
                bitvec_dict.keys(),
                gapped_mwes,
                headword,
                "\\~{}" if figout and figout.endswith(".pgf") else "~",
            ))
        print("bitvec_labels", bitvec_labels)
        df.index = bitvec_labels
        print(df)
        if figout:
            plt.figure(figsize=(5, 3.4))
        sns.set(font_scale=0.5)
        sns.heatmap(df, annot=True, fmt="d", cmap="YlGnBu")
        plt.xticks(rotation=90)
        plt.yticks(rotation=0, ha="right")
        if figout:
            plt.tight_layout()
            plt.savefig(figout)
        else:
            plt.show()
Пример #5
0
def mwesize(insert):
    conn = get_connection()
    setup_dist()
    joined = mk_joined()
    print(BEGIN)
    cnts = {}
    total_cnt = conn.execute(
        select([func.count(tables["ud_mwe"].c.id)
                ]).select_from(tables["ud_mwe"])).scalar()
    fmt_row(0, "Total multiwords", total_cnt)
    cnts["total"] = total_cnt
    link_cnts = conn.execute(
        select([tables["link"].c.name,
                func.count(tables["ud_mwe"].c.id)
                ]).select_from(joined).group_by(tables["link"].c.name))
    for idx, (link_name, src_cnt) in enumerate(link_cnts):
        cnts[link_name] = src_cnt
        print("\\midrule\n")
        fmt_row(1, LINK_NAME_MAP[link_name], src_cnt, total_cnt)
        fmt_row(2, "have head", have_head_cnt(conn, joined, link_name),
                src_cnt)
        if link_name != "wikidefn":
            for mwe_typ, typ_cnt in mwe_typ_group(conn, joined, link_name):
                fmt_row(2, TYP_MAP[mwe_typ], typ_cnt, src_cnt)
                if link_name == "wikihw":
                    if mwe_typ == MweType.inflection:
                        for has_sense, cnt in wiki_hw_group(
                                conn,
                                joined,
                                mwe_typ,
                                tables["wiktionary_hw_link"].c.has_senses,
                        ):
                            fmt_row(
                                3,
                                "from a page with definitions" if has_sense
                                else "from a page without definitions",
                                cnt,
                                typ_cnt,
                            )
                            if has_sense:
                                pospos_case = postpositional_case(
                                    conn,
                                    joined,
                                    tables["ud_mwe"].c.typ ==
                                    MweType.inflection,
                                    tables["wiktionary_hw_link"].c.has_senses,
                                )
                                fmt_row(
                                    4,
                                    "postpositional case",
                                    pospos_case,
                                    cnt,
                                )
                    elif mwe_typ == MweType.multiword:
                        for page_exists, cnt in wiki_hw_group(
                                conn,
                                joined,
                                mwe_typ,
                                tables["wiktionary_hw_link"].c.page_exists,
                        ):
                            fmt_row(
                                3,
                                "have a Wiktionary page"
                                if page_exists else "are a redlink",
                                cnt,
                                typ_cnt,
                            )
                elif link_name == "wnhw" and mwe_typ == MweType.inflection:
                    pospos_case = postpositional_case(
                        conn,
                        joined,
                        tables["link"].c.name == "wnhw",
                        tables["ud_mwe"].c.typ == MweType.inflection,
                    )
                    fmt_row(
                        3,
                        "postpositional case",
                        pospos_case,
                        typ_cnt,
                    )
    print("\\midrule\n")
    # fmt_row(1, "syntactic frames", src_cnt, total_cnt)
    # fmt_row(2, "with more than one token", src_cnt, total_cnt)
    if insert:
        insert_metadata(conn, {"mweproc_" + k: v
                               for k, v in cnts.items()},
                        table=tables["meta"])
    print(END)
Пример #6
0
def main():
    conn = get_connection()
    setup_dist()
    conn.execute(DropView("joined", cascade=False))
    metadata.create_all(conn)