if val == 'sfst': impl = hfst.ImplementationType.SFST_TYPE elif val == 'openfst-tropical': impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE elif val == 'foma': impl = hfst.ImplementationType.FOMA_TYPE else: raise RuntimeError('type not recognized: ' + val) elif arg == '-H' or arg == '--do-not-harmonize': harmonize = False elif arg == '-S' or arg == '--semicolon': semicolons = True else: infile = open(arg, 'r') ostr = hfst.HfstOutputStream(type=impl) comp = hfst.XreCompiler(impl) comp.set_harmonization(harmonize) if (semicolons): data = infile.read() i = 0 while (i < len(data)): tr_and_chars_read = comp.compile_first( data[i:]) # HFST 4.0: document this tr = tr_and_chars_read[0] i = i + tr_and_chars_read[1] if tr != None: ostr.write(tr) transducers_written = transducers_written + 1 else: if comp.contained_only_comments(
val = argv[i+1] if val == 'sfst': impl = hfst.ImplementationType.SFST_TYPE elif val == 'openfst-tropical': impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE elif val == 'foma': impl = hfst.ImplementationType.FOMA_TYPE else: raise RuntimeError('type not recognized: ' + val) elif arg == '-o': skip_next= True outputfilename = argv[i+1] elif arg == '-w': impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE else: raise RuntimeError('argument not recognized: ' + arg) istr = hfst.HfstInputStream() ostr = None if outputfilename != None: ostr = hfst.HfstOutputStream(filename=outputfilename, type=impl) else: ostr = hfst.HfstOutputStream(type=impl) for tr in istr: tr.convert(impl) ostr.write(tr) istr.close() ostr.close()
import hfst import hfst_commandline options = hfst_commandline.hfst_getopt('', [], 0) istr = hfst_commandline.get_one_hfst_input_stream(options)[0] n = 1 for tr in istr: ostr = hfst.HfstOutputStream(filename=str(n) + ".hfst", type=tr.get_type()) ostr.write(tr) ostr.flush() ostr.close() n = n + 1 istr.close()
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.SFST_TYPE): types.append(hfst.ImplementationType.SFST_TYPE) if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.TROPICAL_OPENFST_TYPE): types.append(hfst.ImplementationType.TROPICAL_OPENFST_TYPE) if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE): types.append(hfst.ImplementationType.FOMA_TYPE) for type in types: if hfst.HfstTransducer.is_implementation_type_available(type): hfst.set_default_fst_type(type) # StreamIsClosedException try: tr = hfst.regex('foo') outstr = hfst.HfstOutputStream(filename='testfile') outstr.close() outstr.write(tr) except hfst.exceptions.StreamIsClosedException: print("Could not write transducer: stream to file was closed.") # TransducerIsCyclicException transducer = hfst.regex('[a:b]*') try: results = transducer.extract_paths(output='text') print("The transducer has %i paths:" % len(results)) print(results) except hfst.exceptions.TransducerIsCyclicException: print("The transducer is cyclic and has an infinite number of paths. Some of them:") results = transducer.extract_paths(output='text', max_cycles=5) print(results)
def save_transducer(transducer, filename): path = os.path.join(shared.options['working_dir'], filename) ostr = hfst.HfstOutputStream(filename=path, type=transducer.get_type()) ostr.write(transducer) ostr.flush() ostr.close()
import hfst defs = None from sys import argv if len(argv) < 2: from sys import stdin exp = str(stdin.read()) defs = hfst.compile_pmatch_expression(exp) elif len(argv) == 2: defs = hfst.compile_pmatch_file(argv[1]) else: raise RuntimeError('error: hfst-pmatch2fst.py [INFILE]') ostr = hfst.HfstOutputStream(type=hfst.ImplementationType.HFST_OLW_TYPE) for tr in defs: ostr.write(tr) ostr.close()
file_prefix = args.input.partition(".")[0] for pos, lang in zip(range(alig_width), lang_lst): bfst = hfst.HfstBasicTransducer() for corr in correspondences: if len(corr) == 1: phoneme = corr else: phoneme = corr[pos:pos+1] pth = ((corr, phoneme),) bfst.disjunct(pth, 0.0) fst = hfst.HfstTransducer(bfst) fst.repeat_star() fst.minimize() outfilename = file_prefix + "2" + lang + ".fst" ostream = hfst.HfstOutputStream(filename=outfilename) fst.write(ostream) ostream.close if args.verbosity > 0: print(outfilename, "written") fst.invert() outfilename = lang + "2" + file_prefix + ".fst" ostream = hfst.HfstOutputStream(filename=outfilename) fst.write(ostream) ostream.close if args.verbosity > 0: print(outfilename, "written")
elif infile1 == None: infile1 = arg elif infile2 == None: infile2 = arg else: raise RuntimeError( 'Usage: hfst-compose-intersect.py [-1] INFILE1 [-2] INFILE2') istr1 = hfst.HfstInputStream(infile1) istr2 = hfst.HfstInputStream(infile2) if (istr1.get_type() != istr2.get_type()): raise RuntimeError('Error: transducer types differ in ' + infile1 + ' and ' + infile2) tr1 = istr1.read() if not istr1.is_eof(): raise RuntimeError('Error: ' + infile1 + ' must contain exactly one transducer') istr1.close() transducers = [] while (not istr2.is_eof()): transducers.append(istr2.read()) istr2.close() tr1.compose_intersect(transducers, False) ostr = hfst.HfstOutputStream(type=tr1.get_type()) ostr.write(tr1) ostr.flush() ostr.close()
def main(): version = cfg.timestamp(__file__) import argparse arpar = argparse.ArgumentParser( description="A compiler and tester for two-level rules."\ " Version {}."\ " See https://pytwolc.readthedocs.io/en/latest/index.html"\ " or https://github.com/koskenni/twol"\ " for more information.".format(version)) arpar.add_argument( "-e", "--examples", action='store', nargs='+', help="""Either one name of a FST file that contains the examples or a list of names of files which contain the PSTR form examples used for compiling the rules.""", default=[None]) arpar.add_argument( "-r", "--rules", action='store', nargs='+', help="""One or more files which contain the rules, either just one rule file or a file of defines as the first one and a part of the whole rule set as the second""", default=[None]) arpar.add_argument( "-o", "--output", help="File to which write the compiled rules if a name is given", default="") arpar.add_argument( "-l", "--lost", help="File to which write the examples"\ " that were not accepted by all rules"\ " -- it is written as a FST", default="") arpar.add_argument( "-w", "--wrong", help="file to which write the wrong strings"\ " that are accepted by all rules -- it is written as a FST", default="") arpar.add_argument( "-t", "--thorough", help="test each rule separately: 0 if no testing is desired,"\ " 1 if against positive examples," " 2 against both positive and negative examples."\ " Default is 2.", type=int, choices=[0, 1, 2], default=2) arpar.add_argument( "--recursion", help="set the limit for recursion depth", type=int) arpar.add_argument( "-v", "--verbosity", help="level of diagnostic output", type=int, default=0) args = arpar.parse_args() cfg.verbosity = args.verbosity if args.recursion: sys.setrecursionlimit(args.recursion) if len(args.examples) == 1 and args.examples[0].endswith(".fst"): twexamp.read_fst(args.examples[0]) else: twexamp.read_examples(args.examples) if cfg.verbosity >= 30: twbt.ppfst(cfg.examples_fst, title="examples_fst") parser = twparser_init() examples_fsa = hfst.fst_to_fsa(cfg.examples_fst, separator="^") examples_up_fsa = cfg.examples_fst.copy() examples_up_fsa.input_project() if cfg.verbosity >= 30: twbt.ppfst(examples_up_fsa, title="examples_up_fsa") twrule.init() i = 0 skip = False all_rules_fst_lst = [] line_lst = [] for line_nl in fileinput.input(args.rules): i += 1 if not line_lst: line_nl_lst = [] line_nl_lst.append(line_nl) line = line_nl.split('!', maxsplit=1)[0].strip() if line == "START": skip = False continue elif line == "STOP": skip = True if skip or (not line) or line.startswith("!"): continue line_lst.append(line) if not line.endswith(";"): continue else: rule_str = " ".join(line_lst) line_lst = [] op, left, right = parse_rule(parser, rule_str, i, line_nl_lst) if op == "?" or not (left and right): continue if (args.thorough > 0 and op != "=") or cfg.verbosity > 0: print("\n") print(rule_str) if op == "=": # if cfg.verbosity > 0: # print(line) if cfg.verbosity >= 10: print(left, op) twbt.ppfst(right) continue elif op == "=>": R, selector_fst, MIXe = twrule.rightarrow(line, left, *right) elif op == "<=": R, selector_fst, MIXe = twrule.output_coercion(line, left, *right) elif op == "<--": R, selector_fst, MIXe = twrule.input_coercion(line, left, *right) elif op == "<=>": R, selector_fst, MIXe = twrule.doublearrow(line, left, *right) elif op == "/<=": R, selector_fst, MIXe = twrule.center_exclusion(line, left, *right) else: print("Error: not a valid type of a rule", op) continue if cfg.verbosity >= 10: twbt.ppfst(R) if args.lost or args.wrong or args.output: all_rules_fst_lst.append(R) if args.thorough > 0: selector_fst.intersect(cfg.examples_fst) # selector_fst.n_best(5) selector_fst.minimize() if cfg.verbosity >= 20: paths = selector_fst.extract_paths(output='raw') print_raw_paths(paths[0:20]) passed_pos_examples_fst = selector_fst.copy() passed_pos_examples_fst.intersect(R) if args.thorough > 0: if passed_pos_examples_fst.compare(selector_fst): print("All positive examples accepted") else: lost_examples_fst = selector_fst.copy() lost_examples_fst.minus(passed_pos_examples_fst) lost_examples_fst.minimize() print("** Some positive examples were rejected:") lost_paths = lost_examples_fst.extract_paths(output='raw') print_raw_paths(lost_paths[0:20]) if args.thorough > 1 and op in {"=>", "<=", "<=>", "<--"}: neg_examples_fsa = examples_fsa.copy() neg_examples_fsa.compose(MIXe) neg_examples_fsa.output_project() neg_examples_fst = hfst.fsa_to_fst(neg_examples_fsa, separator="^") neg_examples_fst.minus(cfg.examples_fst) NG = examples_up_fsa.copy() NG.compose(neg_examples_fst) npaths = NG.extract_paths(output='raw') #print_raw_paths(npaths) passed_neg_examples_fst = NG.copy() passed_neg_examples_fst.intersect(R) if passed_neg_examples_fst.compare(hfst.empty_fst()): print("All negative examples rejected") else: print("** Some negative examples accepted:") npaths = passed_neg_examples_fst.extract_paths(output='raw') print_raw_paths(npaths[0:20]) if args.lost or args.wrong: RESU = examples_up_fsa.copy() print(RESU.number_of_arcs(), "arcs in RESU") RESU.compose_intersect(tuple(all_rules_fst_lst)) RESU.minimize() if args.lost: lost_positive_examples_fst = cfg.examples_fst.copy() lost_positive_examples_fst.minus(RESU) lost_positive_examples_fst.minimize() lost_stream = hfst.HfstOutputStream(filename=args.lost) lost_stream.write(lost_positive_examples_fst) lost_stream.flush() lost_stream.close() print("wrote lost examples to", args.lost) if args.wrong: WRONG = RESU.copy() WRONG.subtract(cfg.examples_fst) WRONG.minimize() wrong_stream = hfst.HfstOutputStream(filename=args.wrong) wrong_stream.write(WRONG) wrong_stream.flush() wrong_stream.close() print("wrote wrongly accepted examples to", args.wrong) if args.output: outstream = hfst.HfstOutputStream(filename=args.output) for fst in all_rules_fst_lst: outstream.write(fst) outstream.flush() outstream.close() print("wrote {} rule transducers to {}".format(len(all_rules_fst_lst), args.output)) return
return if __name__ == "__main__": import hfst import argparse arpar = argparse.ArgumentParser("python3 twexamp.py") arpar.add_argument("examples", help="example pair strings file", default="examples.pstr") arpar.add_argument("output", help="file to which write the example FST", default="") arpar.add_argument("-v", "--verbosity", help="level of diagnostic output", type=int, default=0) args = arpar.parse_args() cfg.verbosity = args.verbosity read_examples(args.examples, build_fsts=True) if args.output: exfile = hfst.HfstOutputStream(filename=args.output) exfile.write(cfg.examples_fst) exfile.flush() exfile.close() print("--- example fst written to ", args.output, " ---")
import hfst from sys import argv impl=hfst.ImplementationType.TROPICAL_OPENFST_TYPE skip_next = False for i in range(1, len(argv)): if skip_next: skip_next = False continue arg = argv[i] if arg == '-f': skip_next= True val = argv[i+1] if val == 'sfst': impl = hfst.ImplementationType.SFST_TYPE elif val == 'openfst-tropical': impl = hfst.ImplementationType.TROPICAL_OPENFST_TYPE elif val == 'foma': impl = hfst.ImplementationType.FOMA_TYPE else: raise RuntimeError('type not recognized: ' + val) else: raise RuntimeError('argument not recognized: ' + arg) hfst.set_default_fst_type(impl) result = hfst.compile_sfst_file("") result.convert(impl) ostr = hfst.HfstOutputStream(type=hfst.get_default_fst_type()) ostr.write(result)
def save_fst(fst, fn): out = hfst.HfstOutputStream(filename=fn) out.write(fst) out.flush() out.close()
def main(): import argparse arpar = argparse.ArgumentParser( "twol-tester", description="""A program for testing complete sets of twol rules""") arpar.add_argument( "-e", "--examples", action='store', nargs='+', help="""Either one name of a FST file that contains the examples or a list of names of files which contain the PSTR form examples used for compiling the rules.""", default=[None]) arpar.add_argument( "-r", "--rules", action='store', nargs='+', help="""One or more files which contain the compiled rules as FSTs. The set of rules ought to cover (almost) all morphophonemes.""", default=[None]) arpar.add_argument("-l", "--lost", help="""an FST that lists positive examples that are not accepted by all rules""", default="") arpar.add_argument("-w", "--wrong", help="""an FST that lists the negative examples that are accepted gy by all rules""", default="") arpar.add_argument("-v", "--verbosity", help="level of diagnostic output", type=int, default=0) args = arpar.parse_args() cfg.verbosity = args.verbosity # # Build the FST of the example pair strings and # store in cfg.examples_fst and the cfg.input_symbol_set, # cfg.output_symbol_set, cfg.symbol_pair_set and # cfg.all_pairs_set # if len(args.examples) and args.examples[0].endswith(".fst"): twexamp.read_fst(args.examples[0]) elif len(args.examples) > 0: twexamp.read_examples(args.examples, build_fsts=True) else: error("ERROR IN EXAMPLE FILE NAMES: {}".format(args.examples)) # # Read in the compiled twol rule FST or FSTs # rule_fst_lst = [] for rule_file_name in args.rules: if (not os.path.isfile(rule_file_name) and rule_file_name.endswith(".fst")): exit("RULE FST FILE {} DOES NOT EXIST", format(rule_file)) fst_stream = hfst.HfstInputStream(rule_file_name) while not fst_stream.is_eof(): fst = fst_stream.read() rule_fst_lst.append(fst) fst_stream.close() # # Build positive and negative examples # pos_fst = cfg.examples_fst.copy() neg_fst = pos_fst.copy() paths("positive examples", neg_fst) neg_fst.input_project() pistar_fst = cfg.all_pairs_fst.copy() pistar_fst.repeat_star() neg_fst.compose(pistar_fst) neg_fst.minimize() neg_fst.subtract(pos_fst) paths("negative examples", neg_fst) # # Lost and wrong examples # if args.lost: remain_fst = pos_fst.copy() for fst in rule_fst_lst: remain_fst.intersect(fst) lost_fst = pos_fst.copy() lost_fst.subtract(remain_fst) lost_stream = hfst.HfstOutputStream(filename=args.lost) lost_stream.write(lost_fst) lost_stream.close() if args.wrong: wrong_fst = neg_fst.copy() for fst in rule_fst_lst: wrong_fst.intersect(fst) wrong_stream = hfst.HfstOutputStream(filename=args.wrong) wrong_stream.write(wrong_fst) wrong_stream.close()
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE): types.append(hfst.ImplementationType.FOMA_TYPE) print('HERE!!!') for type in types: print('\n--- Testing implementation type %s ---\n' % hfst.fst_type_to_string(type)) hfst.set_default_fst_type(type) tr1 = None tr2 = None tr3 = None type_ = hfst.ImplementationType.TROPICAL_OPENFST_TYPE ostr = hfst.HfstOutputStream(filename='foobar.hfst', type=type_) tr_ = hfst.regex('{foo}:{bar}::0.5') tr_.convert(type_) ostr.write(tr_) ostr.write(tr_) ostr.flush() ostr.close() if not os.path.isfile('foobar.hfst'): raise RuntimeError('Missing file: foobar.hfst') istr = hfst.HfstInputStream('foobar.hfst') numtr = 0 try:
def save(fst, outf): ostr = hfst.HfstOutputStream(filename=outf, type=fst.get_type()) ostr.write(fst) ostr.flush() ostr.close()
print("All negative examples rejected") else: print("** Some negative examples accepted:") npaths = passed_neg_examples_fst.extract_paths(output='raw') print_raw_paths(npaths[0:20]) if args.lost or args.wrong: RESU = examples_up_fsa.copy() print(RESU.number_of_arcs(), "arcs in RESU") RESU.compose_intersect(tuple(all_rules_fst_lst)) RESU.minimize() if args.lost: lost_positive_examples_fst = cfg.examples_fst.copy() lost_positive_examples_fst.minus(RESU) lost_positive_examples_fst.minimize() lost_stream = hfst.HfstOutputStream(filename=args.lost) lost_stream.write(lost_positive_examples_fst) lost_stream.flush() lost_stream.close() print("wrote lost examples to", args.lost) if args.wrong: WRONG = RESU.copy() WRONG.subtract(cfg.examples_fst) WRONG.minimize() wrong_stream = hfst.HfstOutputStream(filename=args.wrong) wrong_stream.write(WRONG) wrong_stream.flush() wrong_stream.close() print("wrote wrongly accepted examples to", args.wrong) if args.output: outstream = hfst.HfstOutputStream(filename=args.output)
raise RuntimeError('Transducer format must be given as first argument') if sys.argv[1] == 'sfst': if not hfst.HfstTransducer.is_implementation_type_available( hfst.ImplementationType.SFST_TYPE): sys.exit(77) hfst.set_default_fst_type(hfst.ImplementationType.SFST_TYPE) elif sys.argv[1] == 'foma': if not hfst.HfstTransducer.is_implementation_type_available( hfst.ImplementationType.FOMA_TYPE): sys.exit(77) hfst.set_default_fst_type(hfst.ImplementationType.FOMA_TYPE) elif sys.argv[1] == 'openfst': if not hfst.HfstTransducer.is_implementation_type_available( hfst.ImplementationType.TROPICAL_OPENFST_TYPE): sys.exit(77) hfst.set_default_fst_type(hfst.ImplementationType.TROPICAL_OPENFST_TYPE) else: raise RuntimeError('implementation format not recognized') tr1 = hfst.regex('föö:bär') tr2 = hfst.regex('0') tr3 = hfst.regex('0-0') ostr = hfst.HfstOutputStream() ostr.write(tr1) ostr.write(tr2) ostr.write(tr3) ostr.flush() ostr.close()
import hfst import hfst_commandline level = None short_getopts = 'p:' long_getopts = ['project='] options = hfst_commandline.hfst_getopt(short_getopts, long_getopts, 1) for opt in options[0]: if opt[0] == '-p' or opt[0] == '--project': level = opt[1] istr = hfst_commandline.get_one_hfst_input_stream(options)[0] ostr = hfst.HfstOutputStream(type=istr.get_type()) while (not istr.is_eof()): tr = istr.read() if (level == 'input'): tr.input_project() elif (level == 'output'): tr.output_project() else: raise RuntimeError( 'hfst-project: projection level must be defined with -p [input|output]' ) tr.write(ostr) ostr.flush() istr.close() ostr.close()
elif ifile == None: ifile = arg elif ofile == None: ofile = arg else: raise RuntimeError('Error: hfst-substitute.py: unknown option: ' + arg) istr = None if ifile != None: istr = hfst.HfstInputStream(ifile) else: istr = hfst.HfstInputStream() ostr = None if ofile != None: ostr = hfst.HfstOutputStream(filename=ofile, type=istr.get_type()) else: ostr = hfst.HfstOutputStream(type=istr.get_type()) def eps(s): if s == "@0@": return hfst.EPSILON else: return s substitutions = {} if from_file != None: f = open(from_file) for line in f: