def get_struct_feat(labels, weight): center_label = labels[-1] center, center_context = get_struct_center(center_label, weight) context_labels = labels[:-1] left_context = ' '.join([get_label_context(l) for l in context_labels]) rule_str = ' '.join([center, left_context, center_context]) return regex(rule_str), rule_str
def test_tokenized(tok, pathin, pathout, exp, weight=0): tokenized = None if (pathout == None): tokenized = tok.tokenize_one_level(pathin) else: tokenized = tok.tokenize(pathin, pathout) if not libhfst.tokenized_fst(tokenized, weight).compare(libhfst.regex(exp)): if pathout == None: raise RuntimeError('test_tokenized failed with input: ' + pathin) else: raise RuntimeError('test_tokenized failed with input: ' + pathin + ", " + pathout)
def get_top_outputs(outputs, str_model, top_n): fst = empty_fst() outputs = [[['_#_', 0]]] * 2 + outputs + [[['_#_', 0]]] * 2 fst = regex("0") for os in outputs: os_fst = empty_fst() for o, score in os: o_fst = regex("%s %s::%f %s" % (LS, get_symbs(o), score, LS)) os_fst.disjunct(o_fst) fst.concatenate(os_fst) fst.compose(str_model) fst.remove_epsilons() fst.determinize() # fst.minimize() fst.n_best(top_n) paths = [(p[0][1], p[0][0]) for p in fst.extract_paths().values()] paths.sort() return [(remove_markers(p[1]), p[0]) for p in paths]
import libhfst tr1 = libhfst.regex('föö:bär') tr2 = libhfst.regex('0') tr3 = libhfst.regex('0-0') ostr = libhfst.HfstOutputStream() ostr.write(tr1) ostr.write(tr2) ostr.write(tr3) ostr.flush() ostr.close()
import libhfst tr = libhfst.regex('[a::1 a:b::0.3 (b::0)]::0.7;') tr.push_weights(libhfst.TO_INITIAL_STATE) print(tr) tr.push_weights(libhfst.TO_FINAL_STATE) print(tr)
import libhfst transducers = [] istr = libhfst.HfstInputStream() while not istr.is_eof(): transducers.append(istr.read()) istr.close() if not len(transducers) == 3: raise RuntimeError('Wrong number of transducers read.') i = 0 for re in ['föö:bär','0','0-0']: if not transducers[i].compare(libhfst.regex(re)): raise RuntimeError('Transducers are not equivalent.') i += 1 if len(transducers) > 0: f = libhfst.hfst_stdout() i=0 transducers[i].write_att(f) i += 1 while i < len(transducers): f.write('--\n') transducers[i].write_att(f) i += 1
def test_fst(input, result): if not libhfst.fst(input).compare(libhfst.regex(result)): raise RuntimeError('test_fst failed with input: ' + input)
raise RuntimeError(get_linenumber()) # Copy constructor transducer = libhfst.HfstTransducer(TR1) if not (TR1.compare(transducer)): raise RuntimeError(get_linenumber()) if not (transducer.compare(TR1)): raise RuntimeError(get_linenumber()) # Read lexc tr = libhfst.compile_lexc_file('test.lexc') tr.insert_freely(tr1).minimize() tr.insert_freely(('A','B')).minimize() # Substitute tr = libhfst.regex('a a:b b;') tr.substitute('a', 'A', input=True, output=False) eq = libhfst.regex('A:a A:b b;') if not (tr.compare(eq)): raise RuntimeError(get_linenumber()) tr = libhfst.regex('a a:b b;') tr.substitute('a', 'A', input=False, output=True) eq = libhfst.regex('a:A a:b b;') if not (tr.compare(eq)): raise RuntimeError(get_linenumber()) tr = libhfst.regex('a a:b b;') tr.substitute('a','A') eq = libhfst.regex('A A:b b;') if not (tr.compare(eq)):
import libhfst transducers = [] istr = libhfst.HfstInputStream() while not istr.is_eof(): transducers.append(istr.read()) istr.close() if not len(transducers) == 3: raise RuntimeError('Wrong number of transducers read.') i = 0 for re in ['föö:bär', '0', '0-0']: if not transducers[i].compare(libhfst.regex(re)): raise RuntimeError('Transducers are not equivalent.') i += 1 if len(transducers) > 0: f = libhfst.hfst_stdout() i = 0 transducers[i].write_att(f) i += 1 while i < len(transducers): f.write('--\n') transducers[i].write_att(f) i += 1
center, center_context = get_struct_center(center_label, weight) context_labels = labels[:-1] left_context = ' '.join([get_label_context(l) for l in context_labels]) rule_str = ' '.join([center, left_context, center_context]) return regex(rule_str), rule_str if __name__ == '__main__': is_structured = 0 unstructured_model = {} structured_rules = regex('?*') structured_model = regex('?*') oustr = open(argv[1] + '.ustr', 'wb') ostr = create_hfst_output_stream(argv[1] + '.str', TROPICAL_OPENFST_TYPE, 1) seen_struct_feats = set() for i, line in enumerate(map(lambda x: x.strip(), stdin)): if line == '': continue if line == STRUCTID: stderr.write("Structured features.\n") is_structured = 1
import libhfst ab = libhfst.regex('a:b::2.8') out = libhfst.HfstOutputStream(hfst_format=False) out.write(ab) out.flush() out.close()
# -*- coding: utf-8 -*- import libhfst from sys import argv, stderr base = "[\£ \£ £]*" all_ustr_fsts = [] all_str_fsts = [] ustr_fst = libhfst.regex(base) str_fst = libhfst.regex(base) ustr_fsts = libhfst.regex(base) str_fsts = libhfst.regex(base) fst_count = 0 ustr = 1 for line in open(argv[1], "r"): stderr.write("LINE: %u\r" % fst_count) line = line.strip() if line == "": continue if line == "UNSTRUCTURED FEATURES": ustr = 1 elif line == "STRUCTURED FEATURES": ustr = 0 else: if ustr: if fst_count % 100 == 0:
import libhfst fsm = libhfst.HfstBasicTransducer() fsm.add_state(1) fsm.set_final_weight(1, 2.0) fsm.add_transition(0, 1, "foo", libhfst.EPSILON) if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('foo:0::2.0')): raise RuntimeError('') fsm = libhfst.HfstBasicTransducer() fsm.add_state(1) fsm.set_final_weight(1, -0.5) fsm.add_transition(0, 1, "foo", libhfst.UNKNOWN) fsm.add_transition(0, 1, "foo", "foo") if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('foo:?::-0.5')): raise RuntimeError('') fsm = libhfst.HfstBasicTransducer() fsm.add_state(1) fsm.set_final_weight(1, 1.5) fsm.add_transition(0, 1, libhfst.IDENTITY, libhfst.IDENTITY) if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('?::1.5')): raise RuntimeError('')
outputs['_#_'].append('_#_') out = libhfst.create_hfst_output_stream("", libhfst.TROPICAL_OPENFST_TYPE, 1) ustr_model = libhfst.HfstInputStream(argv[2]).read() str_model = libhfst.HfstInputStream(argv[3]).read() for i, line in enumerate(imap(lambda x: x.strip(), stdin)): stderr.write("LINE: %u\r" % i) expr = '' if line == '': continue chars = ('_#_ _#_ # ' + line.replace('0','"0"') + ' # _#_ _#_').split(' ') for char in chars: expr += ('%s [%s] £ ' % (escape(char), '|'.join([escape(c) for c in outputs[char]]))) re = libhfst.regex(expr) re.compose(ustr_model) re.minimize() re.compose(str_model) re.minimize() re.n_best(NBEST) for p in get_sorted_paths(re): print p[1] print "<SEP>" stdout.flush() stderr.write('\n')
# The examples given in doxygen documentation import libhfst # StreamIsClosedException try: tr = libhfst.regex('foo') outstr = libhfst.HfstOutputStream(filename='testfile') outstr.close() outstr.write(tr) except libhfst.StreamIsClosedException: print("Could not write transducer: stream to file was closed.") # TransducerIsCyclicException transducer = libhfst.regex('[a:b]*') try: results = transducer.extract_paths(output='text') print("The transducer has %i paths:" % len(results)) print(results) except libhfst.TransducerIsCyclicException: print( "The transducer is cyclic and has an infinite number of paths. Some of them:" ) results = transducer.extract_paths(output='text', max_cycles=5) print(results) # NotTransducerStreamException f = open('foofile', 'w') f.write('This is an ordinary text file.\n') f.close() try:
import libhfst libhfst.set_default_fst_type(libhfst.FOMA_TYPE) ab = libhfst.regex('a:b') out = libhfst.HfstOutputStream(hfst_format=False) out.write(ab) out.flush() out.close()
# The examples given in doxygen documentation import libhfst # StreamIsClosedException try: tr = libhfst.regex("foo") outstr = libhfst.HfstOutputStream(filename="testfile") outstr.close() outstr.write(tr) except libhfst.StreamIsClosedException: print("Could not write transducer: stream to file was closed.") # TransducerIsCyclicException transducer = libhfst.regex("[a:b]*") try: results = transducer.extract_paths(output="text") print("The transducer has %i paths:" % len(results)) print(results) except libhfst.TransducerIsCyclicException: print("The transducer is cyclic and has an infinite number of paths. Some of them:") results = transducer.extract_paths(output="text", max_cycles=5) print(results) # NotTransducerStreamException f = open("foofile", "w") f.write("This is an ordinary text file.\n") f.close() try: instr = libhfst.HfstInputStream("foofile") tr = instr.read()