def proc(method, inf, outf, head=None, no_zstd_out=False): from plumbum.cmd import zstdcat, zstdmt if os.environ.get("TRACE_PIPELINE"): print(method) pipeline = add_head(filter_py, zstdcat["-D", "zstd-compression-dictionary", inf], head) pipeline = (pipeline | python[filter_py, "fold-support", "fi", "-", "-"] | python[filter_py, "lang", "fi", "-", "-"]) method_stages = METHODS[method] for stage in method_stages: long_stage = lookup_stage(stage) args = [filter_py] + long_stage.split(" ") + ["-", "-"] pipeline = pipeline | python[args] if not no_zstd_out: pipeline = (pipeline | zstdmt["-D", "zstd-compression-dictionary", "-", "-o", outf]) else: pipeline = pipeline > outf exec_pipeline(pipeline, retcode=[-13, 0])
def eurosense2stifflike(inf, outf, head, babel2wn_map): pipeline = add_head(filter_py, add_zstd(inf), head) pipeline = (mk_eurosense2stifflike_pipeline(pipeline, babel2wn_map) | python[munge_py, "eurosense-add-anchor-positions", "-", outf]) print(pipeline) pipeline(retcode=[-13, 0], stderr=sys.stderr)
def stiff2unified(inf, outf, keyout, head, input_fmt): pipeline = (add_head(filter_py, add_zstd(inf), head) | python[munge_py, "stiff-select-wn", "--wn", "qf2", "-", "-"] | python[filter_py, "tok-span-dom", "-", "-"] | python[munge_py, "lemma-to-synset", "-", "-"] | python[munge_py, "stiff-to-unified", "--input-fmt", input_fmt, "-", "-"] | python[munge_py, "unified-split", "-", outf, keyout]) pipeline(retcode=[-13, 0], stderr=sys.stderr)
def eurosense2unified(inf, outf, keyout, head, babel2wn_map): """ Convert from the Eurosense format to the Unified format so that Eurosense tagged data can be compared with STIFF. """ pipeline = add_head(filter_py, add_zstd(inf), head) pipeline = (mk_eurosense2stifflike_pipeline(pipeline, babel2wn_map) | python[munge_py, "eurosense-to-unified", "-", "-"] | python[munge_py, "unified-split", "-", outf, keyout]) pipeline(retcode=[-13, 0], stderr=sys.stderr)