def db(ctx, propbank_db): conn = get_connection() proc_results( propbank_db, conn.execute(headword_grouped(typ=ctx.obj["typ"])), outf=ctx.obj["out"], )
def single(ctx, propbank_db, headword): conn = get_connection() proc_results( propbank_db, conn.execute(headword_grouped((headword, ), typ=ctx.obj["typ"])), outf=ctx.obj["out"], )
def propbank(ctx, propbank_db, propbank_tsv): headwords = { key: len(list(group)) for key, group in groupby(( row[0] for row in csv.reader(propbank_tsv, delimiter="\t"))) } conn = get_connection() proc_results( propbank_db, conn.execute(headword_grouped(headwords.keys(), typ=ctx.obj["typ"])), outf=ctx.obj["out"], frame_counts=headwords, )
def confmat(ctx, propbank_db, headword, figout): conn = get_connection() group = conn.execute(headword_grouped((headword, ), typ=ctx.obj["typ"])) group = list(group) gapped_mwes = [tpl[4] for tpl in group] print("gapped_mwes", gapped_mwes) args = objectview( query_dir="/tmp/", case=False, database=propbank_db, ) with dep_searcher(args) as searcher: queries = extract_queries(group) ( counters, frame_dict, bitvec_dict, frame_marginal, query_marginal, hits, ) = cont_mat(searcher, queries) contingency = array(counters) print("contingency", contingency) # XXX: Here df = pd.DataFrame(contingency) df.columns = list(frame_dict.keys()) print("gapped_mwes", gapped_mwes) bitvec_labels = list( bitvecs_to_labels( bitvec_dict.keys(), gapped_mwes, headword, "\\~{}" if figout and figout.endswith(".pgf") else "~", )) print("bitvec_labels", bitvec_labels) df.index = bitvec_labels print(df) if figout: plt.figure(figsize=(5, 3.4)) sns.set(font_scale=0.5) sns.heatmap(df, annot=True, fmt="d", cmap="YlGnBu") plt.xticks(rotation=90) plt.yticks(rotation=0, ha="right") if figout: plt.tight_layout() plt.savefig(figout) else: plt.show()
def mwesize(insert): conn = get_connection() setup_dist() joined = mk_joined() print(BEGIN) cnts = {} total_cnt = conn.execute( select([func.count(tables["ud_mwe"].c.id) ]).select_from(tables["ud_mwe"])).scalar() fmt_row(0, "Total multiwords", total_cnt) cnts["total"] = total_cnt link_cnts = conn.execute( select([tables["link"].c.name, func.count(tables["ud_mwe"].c.id) ]).select_from(joined).group_by(tables["link"].c.name)) for idx, (link_name, src_cnt) in enumerate(link_cnts): cnts[link_name] = src_cnt print("\\midrule\n") fmt_row(1, LINK_NAME_MAP[link_name], src_cnt, total_cnt) fmt_row(2, "have head", have_head_cnt(conn, joined, link_name), src_cnt) if link_name != "wikidefn": for mwe_typ, typ_cnt in mwe_typ_group(conn, joined, link_name): fmt_row(2, TYP_MAP[mwe_typ], typ_cnt, src_cnt) if link_name == "wikihw": if mwe_typ == MweType.inflection: for has_sense, cnt in wiki_hw_group( conn, joined, mwe_typ, tables["wiktionary_hw_link"].c.has_senses, ): fmt_row( 3, "from a page with definitions" if has_sense else "from a page without definitions", cnt, typ_cnt, ) if has_sense: pospos_case = postpositional_case( conn, joined, tables["ud_mwe"].c.typ == MweType.inflection, tables["wiktionary_hw_link"].c.has_senses, ) fmt_row( 4, "postpositional case", pospos_case, cnt, ) elif mwe_typ == MweType.multiword: for page_exists, cnt in wiki_hw_group( conn, joined, mwe_typ, tables["wiktionary_hw_link"].c.page_exists, ): fmt_row( 3, "have a Wiktionary page" if page_exists else "are a redlink", cnt, typ_cnt, ) elif link_name == "wnhw" and mwe_typ == MweType.inflection: pospos_case = postpositional_case( conn, joined, tables["link"].c.name == "wnhw", tables["ud_mwe"].c.typ == MweType.inflection, ) fmt_row( 3, "postpositional case", pospos_case, typ_cnt, ) print("\\midrule\n") # fmt_row(1, "syntactic frames", src_cnt, total_cnt) # fmt_row(2, "with more than one token", src_cnt, total_cnt) if insert: insert_metadata(conn, {"mweproc_" + k: v for k, v in cnts.items()}, table=tables["meta"]) print(END)
def main(): conn = get_connection() setup_dist() conn.execute(DropView("joined", cascade=False)) metadata.create_all(conn)