Exemplo n.º 1
0
        if val == 'sfst':
            impl = hfst.ImplementationType.SFST_TYPE
        elif val == 'openfst-tropical':
            impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
        elif val == 'foma':
            impl = hfst.ImplementationType.FOMA_TYPE
        else:
            raise RuntimeError('type not recognized: ' + val)
    elif arg == '-H' or arg == '--do-not-harmonize':
        harmonize = False
    elif arg == '-S' or arg == '--semicolon':
        semicolons = True
    else:
        infile = open(arg, 'r')

ostr = hfst.HfstOutputStream(type=impl)
comp = hfst.XreCompiler(impl)
comp.set_harmonization(harmonize)
if (semicolons):
    data = infile.read()
    i = 0
    while (i < len(data)):
        tr_and_chars_read = comp.compile_first(
            data[i:])  # HFST 4.0: document this
        tr = tr_and_chars_read[0]
        i = i + tr_and_chars_read[1]
        if tr != None:
            ostr.write(tr)
            transducers_written = transducers_written + 1
        else:
            if comp.contained_only_comments(
Exemplo n.º 2
0
        val = argv[i+1]
        if val == 'sfst':
            impl = hfst.ImplementationType.SFST_TYPE
        elif val == 'openfst-tropical':
            impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
        elif val == 'foma':
            impl = hfst.ImplementationType.FOMA_TYPE
        else:
            raise RuntimeError('type not recognized: ' + val)
    elif arg == '-o':
        skip_next= True
        outputfilename = argv[i+1]
    elif arg == '-w':
        impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
    else:
        raise RuntimeError('argument not recognized: ' + arg)

istr = hfst.HfstInputStream()
ostr = None
if outputfilename != None:
    ostr = hfst.HfstOutputStream(filename=outputfilename, type=impl)
else:
    ostr = hfst.HfstOutputStream(type=impl)

for tr in istr:
    tr.convert(impl)
    ostr.write(tr)

istr.close()
ostr.close()
Exemplo n.º 3
0
import hfst
import hfst_commandline

options = hfst_commandline.hfst_getopt('', [], 0)
istr = hfst_commandline.get_one_hfst_input_stream(options)[0]
n = 1
for tr in istr:
    ostr = hfst.HfstOutputStream(filename=str(n) + ".hfst", type=tr.get_type())
    ostr.write(tr)
    ostr.flush()
    ostr.close()
    n = n + 1
istr.close()
Exemplo n.º 4
0
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.SFST_TYPE):
    types.append(hfst.ImplementationType.SFST_TYPE)
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.TROPICAL_OPENFST_TYPE):
    types.append(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE):
    types.append(hfst.ImplementationType.FOMA_TYPE)

for type in types:
    if hfst.HfstTransducer.is_implementation_type_available(type):

        hfst.set_default_fst_type(type)
        
        # StreamIsClosedException
        try:
            tr = hfst.regex('foo')
            outstr = hfst.HfstOutputStream(filename='testfile')
            outstr.close()
            outstr.write(tr)
        except hfst.exceptions.StreamIsClosedException:
            print("Could not write transducer: stream to file was closed.")
            
        # TransducerIsCyclicException
        transducer = hfst.regex('[a:b]*')
        try:
            results = transducer.extract_paths(output='text')
            print("The transducer has %i paths:" % len(results))
            print(results)
        except hfst.exceptions.TransducerIsCyclicException:
            print("The transducer is cyclic and has an infinite number of paths. Some of them:")
            results = transducer.extract_paths(output='text', max_cycles=5)
            print(results)
Exemplo n.º 5
0
def save_transducer(transducer, filename):
    path = os.path.join(shared.options['working_dir'], filename)
    ostr = hfst.HfstOutputStream(filename=path, type=transducer.get_type())
    ostr.write(transducer)
    ostr.flush()
    ostr.close()
Exemplo n.º 6
0
import hfst
defs = None
from sys import argv
if len(argv) < 2:
    from sys import stdin
    exp = str(stdin.read())
    defs = hfst.compile_pmatch_expression(exp)
elif len(argv) == 2:
    defs = hfst.compile_pmatch_file(argv[1])
else:
    raise RuntimeError('error: hfst-pmatch2fst.py [INFILE]')

ostr = hfst.HfstOutputStream(type=hfst.ImplementationType.HFST_OLW_TYPE)
for tr in defs:
    ostr.write(tr)
ostr.close()
Exemplo n.º 7
0
file_prefix = args.input.partition(".")[0]

for pos, lang in zip(range(alig_width), lang_lst):
    bfst = hfst.HfstBasicTransducer()
    for corr in correspondences:
        if len(corr) == 1:
            phoneme = corr
        else:
            phoneme = corr[pos:pos+1]
        pth = ((corr, phoneme),)
        bfst.disjunct(pth, 0.0)
    fst = hfst.HfstTransducer(bfst)
    fst.repeat_star()
    fst.minimize()
    outfilename = file_prefix + "2" + lang + ".fst"
    ostream = hfst.HfstOutputStream(filename=outfilename)
    fst.write(ostream)
    ostream.close
    
    if args.verbosity > 0:
        print(outfilename, "written")

        fst.invert()
    outfilename = lang + "2" + file_prefix + ".fst"
    ostream = hfst.HfstOutputStream(filename=outfilename)
    fst.write(ostream)
    ostream.close
    
    if args.verbosity > 0:
        print(outfilename, "written")
    
Exemplo n.º 8
0
    elif infile1 == None:
        infile1 = arg
    elif infile2 == None:
        infile2 = arg
    else:
        raise RuntimeError(
            'Usage: hfst-compose-intersect.py [-1] INFILE1 [-2] INFILE2')

istr1 = hfst.HfstInputStream(infile1)
istr2 = hfst.HfstInputStream(infile2)
if (istr1.get_type() != istr2.get_type()):
    raise RuntimeError('Error: transducer types differ in ' + infile1 +
                       ' and ' + infile2)

tr1 = istr1.read()
if not istr1.is_eof():
    raise RuntimeError('Error: ' + infile1 +
                       ' must contain exactly one transducer')
istr1.close()

transducers = []
while (not istr2.is_eof()):
    transducers.append(istr2.read())
istr2.close()

tr1.compose_intersect(transducers, False)
ostr = hfst.HfstOutputStream(type=tr1.get_type())
ostr.write(tr1)
ostr.flush()
ostr.close()
Exemplo n.º 9
0
def main():

    version = cfg.timestamp(__file__)
    import argparse
    arpar = argparse.ArgumentParser(
        description="A compiler and tester for two-level rules."\
        " Version {}."\
        " See https://pytwolc.readthedocs.io/en/latest/index.html"\
        " or https://github.com/koskenni/twol"\
        " for more information.".format(version))
    arpar.add_argument(
        "-e", "--examples", action='store', nargs='+',
        help="""Either one name of a FST file that contains the examples or
            a list of names of files which contain the PSTR form examples
            used for compiling the rules.""",
        default=[None])
    arpar.add_argument(
        "-r", "--rules", action='store', nargs='+',
        help="""One or more files which contain the rules,
             either just one rule file or a file of defines
             as the first one and a part of the whole rule set
             as the second""",
        default=[None])
    arpar.add_argument(
        "-o", "--output",
        help="File to which write the compiled rules if a name is given",
        default="")
    arpar.add_argument(
        "-l", "--lost",
        help="File to which write the examples"\
        " that were not accepted by all rules"\
        " -- it is written as a FST",
        default="")
    arpar.add_argument(
        "-w", "--wrong",
        help="file to which write the wrong strings"\
        " that are accepted by all rules -- it is written as a FST",
        default="")
    arpar.add_argument(
        "-t", "--thorough",
        help="test each rule separately: 0 if no testing is desired,"\
        " 1 if against positive examples,"
        " 2 against both positive and negative examples."\
        " Default is 2.",
        type=int, choices=[0, 1, 2], default=2)
    arpar.add_argument(
        "--recursion",
        help="set the limit for recursion depth",
        type=int)
    arpar.add_argument(
        "-v", "--verbosity",
        help="level of  diagnostic output",
        type=int, default=0)

    args = arpar.parse_args()

    cfg.verbosity = args.verbosity
    if args.recursion:
        sys.setrecursionlimit(args.recursion)

    if len(args.examples) == 1 and args.examples[0].endswith(".fst"):
        twexamp.read_fst(args.examples[0])
    else:
        twexamp.read_examples(args.examples)

    if cfg.verbosity >= 30:
        twbt.ppfst(cfg.examples_fst, title="examples_fst")

    parser = twparser_init()

    examples_fsa = hfst.fst_to_fsa(cfg.examples_fst, separator="^")

    examples_up_fsa = cfg.examples_fst.copy()
    examples_up_fsa.input_project()
    if cfg.verbosity >= 30:
        twbt.ppfst(examples_up_fsa, title="examples_up_fsa")

    twrule.init()

    i = 0
    skip = False
    all_rules_fst_lst = []
    line_lst = []

    for line_nl in fileinput.input(args.rules):
        i += 1
        if not line_lst:
            line_nl_lst = []
        line_nl_lst.append(line_nl)
        line = line_nl.split('!', maxsplit=1)[0].strip()
        if line == "START":
            skip = False
            continue
        elif line == "STOP":
            skip = True
        if skip or (not line) or line.startswith("!"):
            continue
        line_lst.append(line)
        if not line.endswith(";"):
            continue
        else:
            rule_str = " ".join(line_lst)
            line_lst = []

        op, left, right = parse_rule(parser, rule_str, i, line_nl_lst)
        if op == "?" or not (left and right):
            continue

        if (args.thorough > 0 and op != "=") or cfg.verbosity > 0:
            print("\n")
            print(rule_str)

        if op == "=":
            #        if cfg.verbosity > 0:
            #            print(line)
            if cfg.verbosity >= 10:
                print(left, op)
                twbt.ppfst(right)
            continue
        elif op == "=>":
            R, selector_fst, MIXe = twrule.rightarrow(line, left, *right)
        elif op == "<=":
            R, selector_fst, MIXe = twrule.output_coercion(line, left, *right)
        elif op == "<--":
            R, selector_fst, MIXe = twrule.input_coercion(line, left, *right)
        elif op == "<=>":
            R, selector_fst, MIXe = twrule.doublearrow(line, left, *right)
        elif op == "/<=":
            R, selector_fst, MIXe = twrule.center_exclusion(line, left, *right)
        else:
            print("Error: not a valid type of a rule", op)
            continue
        if cfg.verbosity >= 10:
            twbt.ppfst(R)
        if args.lost or args.wrong or args.output:
            all_rules_fst_lst.append(R)
        if args.thorough > 0:
            selector_fst.intersect(cfg.examples_fst)
            # selector_fst.n_best(5)
            selector_fst.minimize()
            if cfg.verbosity >= 20:
                paths = selector_fst.extract_paths(output='raw')
                print_raw_paths(paths[0:20])
            passed_pos_examples_fst = selector_fst.copy()
            passed_pos_examples_fst.intersect(R)
            if args.thorough > 0:
                if passed_pos_examples_fst.compare(selector_fst):
                    print("All positive examples accepted")
                else:
                    lost_examples_fst = selector_fst.copy()
                    lost_examples_fst.minus(passed_pos_examples_fst)
                    lost_examples_fst.minimize()
                    print("** Some positive examples were rejected:")
                    lost_paths = lost_examples_fst.extract_paths(output='raw')
                    print_raw_paths(lost_paths[0:20])
        if args.thorough > 1 and op in {"=>", "<=", "<=>", "<--"}:
            neg_examples_fsa = examples_fsa.copy()
            neg_examples_fsa.compose(MIXe)
            neg_examples_fsa.output_project()
            neg_examples_fst = hfst.fsa_to_fst(neg_examples_fsa, separator="^")
            neg_examples_fst.minus(cfg.examples_fst)
            NG = examples_up_fsa.copy()
            NG.compose(neg_examples_fst)
            npaths = NG.extract_paths(output='raw')
            #print_raw_paths(npaths)
            passed_neg_examples_fst = NG.copy()
            passed_neg_examples_fst.intersect(R)
            if passed_neg_examples_fst.compare(hfst.empty_fst()):
                print("All negative examples rejected")
            else:
                print("** Some negative examples accepted:")
                npaths = passed_neg_examples_fst.extract_paths(output='raw')
                print_raw_paths(npaths[0:20])

    if args.lost or args.wrong:
        RESU = examples_up_fsa.copy()
        print(RESU.number_of_arcs(), "arcs in RESU")
        RESU.compose_intersect(tuple(all_rules_fst_lst))
        RESU.minimize()
    if args.lost:
        lost_positive_examples_fst = cfg.examples_fst.copy()
        lost_positive_examples_fst.minus(RESU)
        lost_positive_examples_fst.minimize()
        lost_stream = hfst.HfstOutputStream(filename=args.lost)
        lost_stream.write(lost_positive_examples_fst)
        lost_stream.flush()
        lost_stream.close()
        print("wrote lost examples to", args.lost)
    if args.wrong:
        WRONG = RESU.copy()
        WRONG.subtract(cfg.examples_fst)
        WRONG.minimize()
        wrong_stream = hfst.HfstOutputStream(filename=args.wrong)
        wrong_stream.write(WRONG)
        wrong_stream.flush()
        wrong_stream.close()
        print("wrote wrongly accepted examples to", args.wrong)
    if args.output:
        outstream = hfst.HfstOutputStream(filename=args.output)
        for fst in all_rules_fst_lst:
            outstream.write(fst)
        outstream.flush()
        outstream.close()
        print("wrote {} rule transducers to {}".format(len(all_rules_fst_lst),
                                                        args.output))
    return
Exemplo n.º 10
0
    return


if __name__ == "__main__":
    import hfst
    import argparse
    arpar = argparse.ArgumentParser("python3 twexamp.py")
    arpar.add_argument("examples",
                       help="example pair strings file",
                       default="examples.pstr")
    arpar.add_argument("output",
                       help="file to which write the example FST",
                       default="")
    arpar.add_argument("-v",
                       "--verbosity",
                       help="level of  diagnostic output",
                       type=int,
                       default=0)
    args = arpar.parse_args()

    cfg.verbosity = args.verbosity

    read_examples(args.examples, build_fsts=True)

    if args.output:
        exfile = hfst.HfstOutputStream(filename=args.output)
        exfile.write(cfg.examples_fst)
        exfile.flush()
        exfile.close()
        print("--- example fst written to ", args.output, " ---")
Exemplo n.º 11
0
import hfst
from sys import argv
impl=hfst.ImplementationType.TROPICAL_OPENFST_TYPE
skip_next = False
for i in range(1, len(argv)):
    if skip_next:
        skip_next = False
        continue
    arg = argv[i]
    if arg == '-f':
        skip_next= True
        val = argv[i+1]
        if val == 'sfst':
            impl = hfst.ImplementationType.SFST_TYPE
        elif val == 'openfst-tropical':
            impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
        elif val == 'foma':
            impl = hfst.ImplementationType.FOMA_TYPE
        else:
            raise RuntimeError('type not recognized: ' + val)
    else:
        raise RuntimeError('argument not recognized: ' + arg)

hfst.set_default_fst_type(impl)
result = hfst.compile_sfst_file("")
result.convert(impl)
ostr = hfst.HfstOutputStream(type=hfst.get_default_fst_type())
ostr.write(result)
def save_fst(fst, fn):
    out = hfst.HfstOutputStream(filename=fn)
    out.write(fst)
    out.flush()
    out.close()
Exemplo n.º 13
0
def main():
    import argparse
    arpar = argparse.ArgumentParser(
        "twol-tester",
        description="""A program for testing complete sets of twol rules""")
    arpar.add_argument(
        "-e",
        "--examples",
        action='store',
        nargs='+',
        help="""Either one name of a FST file that contains the examples or
            a list of names of files which contain the PSTR form examples
            used for compiling the rules.""",
        default=[None])
    arpar.add_argument(
        "-r",
        "--rules",
        action='store',
        nargs='+',
        help="""One or more files which contain the compiled rules as FSTs.
            The set  of rules ought to cover (almost) all morphophonemes.""",
        default=[None])
    arpar.add_argument("-l",
                       "--lost",
                       help="""an FST that lists positive examples that are not
            accepted by all rules""",
                       default="")
    arpar.add_argument("-w",
                       "--wrong",
                       help="""an FST that lists the negative examples that are
            accepted gy by all rules""",
                       default="")
    arpar.add_argument("-v",
                       "--verbosity",
                       help="level of  diagnostic output",
                       type=int,
                       default=0)

    args = arpar.parse_args()
    cfg.verbosity = args.verbosity
    #
    # Build the FST of the example pair strings and
    # store in cfg.examples_fst and the cfg.input_symbol_set,
    # cfg.output_symbol_set, cfg.symbol_pair_set and
    # cfg.all_pairs_set
    #
    if len(args.examples) and args.examples[0].endswith(".fst"):
        twexamp.read_fst(args.examples[0])
    elif len(args.examples) > 0:
        twexamp.read_examples(args.examples, build_fsts=True)
    else:
        error("ERROR IN EXAMPLE FILE NAMES: {}".format(args.examples))
        #
    # Read in the compiled twol rule FST or FSTs
    #
    rule_fst_lst = []
    for rule_file_name in args.rules:
        if (not os.path.isfile(rule_file_name)
                and rule_file_name.endswith(".fst")):
            exit("RULE FST FILE {} DOES NOT EXIST", format(rule_file))
        fst_stream = hfst.HfstInputStream(rule_file_name)
        while not fst_stream.is_eof():
            fst = fst_stream.read()
            rule_fst_lst.append(fst)
        fst_stream.close()
    #
    # Build positive and negative examples
    #
    pos_fst = cfg.examples_fst.copy()
    neg_fst = pos_fst.copy()
    paths("positive examples", neg_fst)
    neg_fst.input_project()
    pistar_fst = cfg.all_pairs_fst.copy()
    pistar_fst.repeat_star()
    neg_fst.compose(pistar_fst)
    neg_fst.minimize()
    neg_fst.subtract(pos_fst)
    paths("negative examples", neg_fst)
    #
    # Lost and wrong examples
    #
    if args.lost:
        remain_fst = pos_fst.copy()
        for fst in rule_fst_lst:
            remain_fst.intersect(fst)
        lost_fst = pos_fst.copy()
        lost_fst.subtract(remain_fst)
        lost_stream = hfst.HfstOutputStream(filename=args.lost)
        lost_stream.write(lost_fst)
        lost_stream.close()
    if args.wrong:
        wrong_fst = neg_fst.copy()
        for fst in rule_fst_lst:
            wrong_fst.intersect(fst)
        wrong_stream = hfst.HfstOutputStream(filename=args.wrong)
        wrong_stream.write(wrong_fst)
        wrong_stream.close()
Exemplo n.º 14
0
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE):
    types.append(hfst.ImplementationType.FOMA_TYPE)
    print('HERE!!!')

for type in types:

    print('\n--- Testing implementation type %s ---\n' % hfst.fst_type_to_string(type))

    hfst.set_default_fst_type(type)

    tr1 = None
    tr2 = None
    tr3 = None

    type_ = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
    ostr = hfst.HfstOutputStream(filename='foobar.hfst', type=type_)

    tr_ = hfst.regex('{foo}:{bar}::0.5')
    tr_.convert(type_)

    ostr.write(tr_)
    ostr.write(tr_)
    ostr.flush()
    ostr.close()

    if not os.path.isfile('foobar.hfst'):
        raise RuntimeError('Missing file: foobar.hfst')

    istr = hfst.HfstInputStream('foobar.hfst')
    numtr = 0
    try:
Exemplo n.º 15
0
def save(fst, outf):
    ostr = hfst.HfstOutputStream(filename=outf, type=fst.get_type())
    ostr.write(fst)
    ostr.flush()
    ostr.close()
Exemplo n.º 16
0
            print("All negative examples rejected")
        else:
            print("** Some negative examples accepted:")
            npaths = passed_neg_examples_fst.extract_paths(output='raw')
            print_raw_paths(npaths[0:20])

if args.lost or args.wrong:
    RESU = examples_up_fsa.copy()
    print(RESU.number_of_arcs(), "arcs in RESU")
    RESU.compose_intersect(tuple(all_rules_fst_lst))
    RESU.minimize()
if args.lost:
    lost_positive_examples_fst = cfg.examples_fst.copy()
    lost_positive_examples_fst.minus(RESU)
    lost_positive_examples_fst.minimize()
    lost_stream = hfst.HfstOutputStream(filename=args.lost)
    lost_stream.write(lost_positive_examples_fst)
    lost_stream.flush()
    lost_stream.close()
    print("wrote lost examples to", args.lost)
if args.wrong:
    WRONG = RESU.copy()
    WRONG.subtract(cfg.examples_fst)
    WRONG.minimize()
    wrong_stream = hfst.HfstOutputStream(filename=args.wrong)
    wrong_stream.write(WRONG)
    wrong_stream.flush()
    wrong_stream.close()
    print("wrote wrongly accepted examples to", args.wrong)
if args.output:
    outstream = hfst.HfstOutputStream(filename=args.output)
Exemplo n.º 17
0
    raise RuntimeError('Transducer format must be given as first argument')

if sys.argv[1] == 'sfst':
    if not hfst.HfstTransducer.is_implementation_type_available(
            hfst.ImplementationType.SFST_TYPE):
        sys.exit(77)
    hfst.set_default_fst_type(hfst.ImplementationType.SFST_TYPE)
elif sys.argv[1] == 'foma':
    if not hfst.HfstTransducer.is_implementation_type_available(
            hfst.ImplementationType.FOMA_TYPE):
        sys.exit(77)
    hfst.set_default_fst_type(hfst.ImplementationType.FOMA_TYPE)
elif sys.argv[1] == 'openfst':
    if not hfst.HfstTransducer.is_implementation_type_available(
            hfst.ImplementationType.TROPICAL_OPENFST_TYPE):
        sys.exit(77)
    hfst.set_default_fst_type(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
else:
    raise RuntimeError('implementation format not recognized')

tr1 = hfst.regex('föö:bär')
tr2 = hfst.regex('0')
tr3 = hfst.regex('0-0')

ostr = hfst.HfstOutputStream()
ostr.write(tr1)
ostr.write(tr2)
ostr.write(tr3)
ostr.flush()
ostr.close()
Exemplo n.º 18
0
import hfst
import hfst_commandline

level = None
short_getopts = 'p:'
long_getopts = ['project=']
options = hfst_commandline.hfst_getopt(short_getopts, long_getopts, 1)

for opt in options[0]:
    if opt[0] == '-p' or opt[0] == '--project':
        level = opt[1]

istr = hfst_commandline.get_one_hfst_input_stream(options)[0]
ostr = hfst.HfstOutputStream(type=istr.get_type())

while (not istr.is_eof()):
    tr = istr.read()
    if (level == 'input'):
        tr.input_project()
    elif (level == 'output'):
        tr.output_project()
    else:
        raise RuntimeError(
            'hfst-project: projection level must be defined with -p [input|output]'
        )
    tr.write(ostr)
    ostr.flush()

istr.close()
ostr.close()
Exemplo n.º 19
0
    elif ifile == None:
        ifile = arg
    elif ofile == None:
        ofile = arg
    else:
        raise RuntimeError('Error: hfst-substitute.py: unknown option: ' + arg)

istr = None
if ifile != None:
    istr = hfst.HfstInputStream(ifile)
else:
    istr = hfst.HfstInputStream()

ostr = None
if ofile != None:
    ostr = hfst.HfstOutputStream(filename=ofile, type=istr.get_type())
else:
    ostr = hfst.HfstOutputStream(type=istr.get_type())


def eps(s):
    if s == "@0@":
        return hfst.EPSILON
    else:
        return s


substitutions = {}
if from_file != None:
    f = open(from_file)
    for line in f: