示例#1
0
def proc(method, inf, outf, head=None, no_zstd_out=False):
    from plumbum.cmd import zstdcat, zstdmt

    if os.environ.get("TRACE_PIPELINE"):
        print(method)
    pipeline = add_head(filter_py, zstdcat["-D", "zstd-compression-dictionary",
                                           inf], head)

    pipeline = (pipeline
                | python[filter_py, "fold-support", "fi", "-", "-"]
                | python[filter_py, "lang", "fi", "-", "-"])

    method_stages = METHODS[method]
    for stage in method_stages:
        long_stage = lookup_stage(stage)
        args = [filter_py] + long_stage.split(" ") + ["-", "-"]
        pipeline = pipeline | python[args]

    if not no_zstd_out:
        pipeline = (pipeline | zstdmt["-D", "zstd-compression-dictionary", "-",
                                      "-o", outf])
    else:
        pipeline = pipeline > outf

    exec_pipeline(pipeline, retcode=[-13, 0])
示例#2
0
def eurosense2stifflike(inf, outf, head, babel2wn_map):
    pipeline = add_head(filter_py, add_zstd(inf), head)
    pipeline = (mk_eurosense2stifflike_pipeline(pipeline, babel2wn_map)
                | python[munge_py, "eurosense-add-anchor-positions", "-",
                         outf])
    print(pipeline)
    pipeline(retcode=[-13, 0], stderr=sys.stderr)
示例#3
0
def stiff2unified(inf, outf, keyout, head, input_fmt):
    pipeline = (add_head(filter_py, add_zstd(inf), head)
                | python[munge_py, "stiff-select-wn", "--wn", "qf2", "-", "-"]
                | python[filter_py, "tok-span-dom", "-", "-"]
                | python[munge_py, "lemma-to-synset", "-", "-"]
                | python[munge_py, "stiff-to-unified", "--input-fmt",
                         input_fmt, "-", "-"]
                | python[munge_py, "unified-split", "-", outf, keyout])
    pipeline(retcode=[-13, 0], stderr=sys.stderr)
示例#4
0
def eurosense2unified(inf, outf, keyout, head, babel2wn_map):
    """
    Convert from the Eurosense format to the Unified format so that Eurosense
    tagged data can be compared with STIFF.
    """
    pipeline = add_head(filter_py, add_zstd(inf), head)
    pipeline = (mk_eurosense2stifflike_pipeline(pipeline, babel2wn_map)
                | python[munge_py, "eurosense-to-unified", "-", "-"]
                | python[munge_py, "unified-split", "-", outf, keyout])
    pipeline(retcode=[-13, 0], stderr=sys.stderr)