def getPhoneticAnalyser(self): if not self.phonetictransducer: self.phonetictransducer = getTransducer(self.fsa_analysis) phoneticanalyser = libhfst.HfstTransducer(self.phonetictransducer) phoneticanalyser.remove_epsilons() phoneticanalyser.lookup_optimize() return phoneticanalyser
def getSyllableAnalyser(self): if not self.syllabletransducer: self.syllabletransducer = getTransducer(self.fsa_syl) syllablizer = libhfst.HfstTransducer(self.syllabletransducer) syllablizer.remove_epsilons() syllablizer.lookup_optimize() return syllablizer
def getG2PConverter(self): if not self.g2ptransducer: self.g2ptransducer = getTransducer(self.fsa_g2p) g2pconverter = libhfst.HfstTransducer(self.g2ptransducer) g2pconverter.remove_epsilons() g2pconverter.lookup_optimize() return g2pconverter
def getAnalyser(self): if not self.transducer: self.transducer = getTransducer(self.fsa) analyser = libhfst.HfstTransducer(self.transducer) analyser.remove_epsilons() analyser.lookup_optimize() return analyser
def get_generator(self): if not self.transducer: self.transducer = get_transducer(self.fsa) generator = libhfst.HfstTransducer(self.transducer) generator.remove_epsilons() generator.lookup_optimize() return generator
def getP2GConverter(self): if not self.g2ptransducer: self.g2ptransducer = getTransducer(self.fsa_g2p) p2gconverter = libhfst.HfstTransducer(self.g2ptransducer) # p2gconverter is obtained by inverting the g2ptransducer p2gconverter.invert() p2gconverter.remove_epsilons() p2gconverter.lookup_optimize() return p2gconverter
import libhfst # Create a HFST basic transducer [a:b] with transition weight 0.3 and final weight 0.5. t = libhfst.HfstBasicTransducer() t.add_state(1) t.add_transition(0, 1, 'a', 'b', 0.3) t.set_final_weight(1, 0.5) # # Convert to tropical OpenFst format (the default) and push weights toward final state. T = libhfst.HfstTransducer(t, libhfst.get_default_fst_type()) T.push_weights(libhfst.TO_FINAL_STATE) # # Convert back to HFST basic transducer. tc = libhfst.HfstBasicTransducer(T) try: # Rounding might affect the precision. if (0.79 < tc.get_final_weight(1)) and (tc.get_final_weight(1) < 0.81): print("TEST PASSED") exit(0) else: print("TEST FAILED") exit(1) # If the state does not exist or is not final */ except libhfst.HfstException: print("TEST FAILED: An exception thrown.") exit(1)
def __init__(self, filename): self.istr = libhfst.HfstInputStream(filename) self.transducer = libhfst.HfstTransducer(self.istr)
tr2 = libhfst.HfstBasicTransducer() tr2.add_state(1) tr2.add_state(2) tr2.set_final_weight(2, 0) tr2.add_transition( 0, libhfst.HfstBasicTransition(1, "@_IDENTITY_SYMBOL_@", "@_IDENTITY_SYMBOL_@", 0)) tr2.add_transition(1, libhfst.HfstBasicTransition(2, "bar", "bar", 0)) # tr2 is now [ [ ?:? ] [ bar:bar ] ] # print(tr2) if libhfst.HfstTransducer.is_implementation_type_available(libhfst.SFST_TYPE): Tr1 = libhfst.HfstTransducer(tr1, libhfst.SFST_TYPE) Tr2 = libhfst.HfstTransducer(tr2, libhfst.SFST_TYPE) Tr1.disjunct(Tr2).minimize() # Tr1 is now [ [ ?:foo | bar:foo ] | [[ ?:? | foo:foo ] [ bar:bar ]]] # print(Tr1) print("HfstBasicTransducer: iterating through") for state in t.states(): for transition in t.transitions(state): # print '%i\t%i\t%s\t%s\t%f' % (state, transition.get_target_state(), transition.get_input_symbol(), transition.get_output_symbol(), transition.get_weight()) print("{0}\t{1}\t{2}\t{3}\t{4}".format(state, transition.get_target_state(), transition.get_input_symbol(), transition.get_output_symbol(), transition.get_weight()))
import libhfst import io tr = libhfst.HfstTransducer('a', 'b', libhfst.TROPICAL_OPENFST_TYPE) paths = libhfst.extract_paths(tr) for path in libhfst.detokenize_paths(paths): print("{0}:{1} {2}".format(path.input, path.output, path.weight)) tr = libhfst.HfstTransducer('a', 'b', libhfst.TROPICAL_OPENFST_TYPE) tr.convert(libhfst.HFST_OLW_TYPE) for path in libhfst.detokenize_paths(tr.lookup("a")): print("{0} {1}".format(path.output, path.weight))
elif sys.argv[1] == 'openfst': ttype = libhfst.TROPICAL_OPENFST_TYPE elif sys.argv[1] == 'foma': ttype = libhfst.FOMA_TYPE else: print("ERROR: could not parse transducer format argument.") sys.exit(1) transducers_in_stream = int(sys.argv[2]) istr = libhfst.HfstInputStream() ostr = libhfst.HfstOutputStream(ttype) transducers_read = 0 transducers_written = 0 while True: try: tr = libhfst.HfstTransducer(istr) transducers_read += 1 ostr.redirect(tr) transducers_written += 1 except: # libhfst.EndOfStreamException: assert(libhfst.hfst_get_exception() == "EndOfStreamException") break; if transducers_read != transducers_in_stream: print("ERROR: wrong number of transducers read") sys.exit(1) istr.close() ostr.close()
print('%i %f' % (state, fsm.get_final_weight(state))) for state in fsm.states(): for arc in fsm.transitions(state): print('%i ' % (state), end='') print(arc) if fsm.is_final_state(state): print('%i %f' % (state, fsm.get_final_weight(state))) # HfstBasicTransducer.disjunct lexicon = libhfst.HfstBasicTransducer() tok = libhfst.HfstTokenizer() lexicon.disjunct(tok.tokenize('dog'), 0.3) lexicon.disjunct(tok.tokenize('cat'), 0.5) lexicon.disjunct(tok.tokenize('elephant'), 1.6) lexicon = libhfst.HfstTransducer(lexicon) if not lexicon.compare(libhfst.regex('{dog}::0.3|{cat}::0.5|{elephant}::1.6')): raise RuntimeError('') # HfstBasicTransducer.transitions for state in fsm.states(): for arc in fsm.transitions(state): print('%i ' % (state), end='') print(arc) if fsm.is_final_state(state): print('%i %f' % (state, fsm.get_final_weight(state))) # HfstBasicTransducer.substitute and HfstTransducer.substitute hfst = libhfst.regex('a:a') basic = libhfst.HfstBasicTransducer(hfst) hfst.substitute('a', 'A', input=True, output=False)
import libhfst import sys import os def remove_generated_files(): # fails on MinGW.. #os.remove('foo.att') #os.remove('foo.hfst') pass for ttype in (libhfst.SFST_TYPE, libhfst.TROPICAL_OPENFST_TYPE, libhfst.FOMA_TYPE): tr1 = libhfst.HfstTransducer('a', 'b', ttype) tr2 = libhfst.HfstTransducer('c', 'd', ttype) ostr = libhfst.HfstOutputStream('foo.hfst', tr1.get_type()) ostr.redirect(tr1) ostr.redirect(tr2) ostr.close() att_file = libhfst.hfst_open('foo.att', 'w') istr = libhfst.HfstInputStream('foo.hfst') transducers_read = 0 while True: try: tr = libhfst.HfstTransducer(istr) transducers_read += 1 if transducers_read == 1:
import sys for file in 'foofile', 'testfile', 'testfile.att', 'testfile.hfst': if os.path.exists(file): pass # doesn't work on MinGW.. #os.remove(file) types = [libhfst.SFST_TYPE, libhfst.TROPICAL_OPENFST_TYPE, libhfst.FOMA_TYPE] # The library required by the implementation type requested is not linked to HFST. # -------------------------------------------------------------------------------- print("ImplementationTypeNotAvailableException") for type in types: try: tr = libhfst.HfstTransducer("foo", "bar", type) except: # libhfst.ImplementationTypeNotAvailableException: assert (libhfst.hfst_get_exception() == "ImplementationTypeNotAvailableException") pass assert (libhfst.hfst_get_exception() == "") # Stream cannot be read. # ---------------------- #print "StreamNotReadableException" # #try: # instr = libhfst.HfstInputStream("foofile") #except libhfst.StreamNotReadableException: # print "ERROR: file cannot be read."
import libhfst types = [libhfst.TROPICAL_OPENFST_TYPE, libhfst.SFST_TYPE, libhfst.FOMA_TYPE] # Transducers which the rule transducers are compared with rule1 = libhfst.HfstTransducer(libhfst.TROPICAL_OPENFST_TYPE) rule2 = libhfst.HfstTransducer(libhfst.TROPICAL_OPENFST_TYPE) rule3 = libhfst.HfstTransducer(libhfst.TROPICAL_OPENFST_TYPE) for type in types: if not libhfst.HfstTransducer.is_implementation_type_available(type): continue print( "HfstTransducer two_level_if(HfstTransducerPair &context, StringPairSet &mappings, StringPairSet &alphabet)" ) print( "HfstTransducer two_level_only_if(HfstTransducerPair &context, StringPairSet &mappings, StringPairSet &alphabet)" ) print( "HfstTransducer two_level_if_and_only_if(HfstTransducerPair &context, StringPairSet &mappings, StringPairSet &alphabet)" ) leftc = libhfst.HfstTransducer("c", type) rightc = libhfst.HfstTransducer("c", type) context = (leftc, rightc) mapping = ("a", "b") mappings = ("a", "b"), alphabet = ('a', 'a'), ('a', 'b'), ('b', 'b'), ('c', 'c')
# Test file for HfstTransducer constructors, destructor, operator= # and member functions set_name, get_name and get_type. # import libhfst types = [libhfst.SFST_TYPE, libhfst.TROPICAL_OPENFST_TYPE, libhfst.FOMA_TYPE] for type in types: if not libhfst.HfstTransducer.is_implementation_type_available(type): continue # The empty transducer print("The empty transducer") empty = libhfst.HfstTransducer(type) # The epsilon transducer print("The epsilon transducer") epsilon = libhfst.HfstTransducer("@_EPSILON_SYMBOL_@", type) # One-transition transducer print("One-transition transducer") foo = libhfst.HfstTransducer("foo", type) foobar = libhfst.HfstTransducer("foo", "bar", type) # The copy constructor print("The copy constructor") foobar_copy = libhfst.HfstTransducer(foobar) assert (foobar.compare(foobar_copy))
except libhfst.EndOfStreamException: pass except: raise RuntimeError(get_linenumber()) istr.close() if numtr != 2: raise RuntimeError(get_linenumber()) if not (TR1.compare(tr1)): raise RuntimeError(get_linenumber()) if not (TR2.compare(tr2)): raise RuntimeError(get_linenumber()) # Copy constructor transducer = libhfst.HfstTransducer(TR1) if not (TR1.compare(transducer)): raise RuntimeError(get_linenumber()) if not (transducer.compare(TR1)): raise RuntimeError(get_linenumber()) # Read lexc tr = libhfst.compile_lexc_file('test.lexc') tr.insert_freely(tr1).minimize() tr.insert_freely(('A','B')).minimize() # Substitute tr = libhfst.regex('a a:b b;') tr.substitute('a', 'A', input=True, output=False) eq = libhfst.regex('A:a A:b b;') if not (tr.compare(eq)):
t.add_transition(s2, libhfst.HfstBasicTransition(s3, fd2, fd2, 0)) t.add_transition(s3, libhfst.HfstBasicTransition(s4, "c", "c", 0)) t.add_transition(s3, libhfst.HfstBasicTransition(s5, "d", "d", 0)) t.add_transition(s4, libhfst.HfstBasicTransition(s6, fd2, fd2, 0)) t.add_transition(s5, libhfst.HfstBasicTransition(s6, fd1, fd1, 0)) types = [libhfst.SFST_TYPE, libhfst.TROPICAL_OPENFST_TYPE, libhfst.FOMA_TYPE] for type in types: if not libhfst.HfstTransducer.is_implementation_type_available(type): continue print("Identitites with flags") id = libhfst.HfstTransducer("@_IDENTITY_SYMBOL_@", type) id.repeat_star() ab_flag = libhfst.HfstTransducer("a", "b", type) flag = libhfst.HfstTransducer("@U.F.A@", type) ab_flag.disjunct(flag) ab_flag.concatenate(id) id.minimize() a_tr = libhfst.HfstTransducer("a", type) b_tr = libhfst.HfstTransducer("b", type) abid = libhfst.HfstTransducer("@_IDENTITY_SYMBOL_@", type) abid.disjunct(a_tr) abid.disjunct(b_tr) abid.repeat_star() abid.minimize()
if not libhfst.HfstTransducer.is_implementation_type_available(type): continue # Test the lexc parser print(" Testing...") # (1) A file in valid lexc format print(" valid file, parse... ") compiler = libhfst.LexcCompiler(type) compiler.parse("test_lexc.lexc") print("") parsed = compiler.compileLexical() assert (parsed != 0) tok = libhfst.HfstTokenizer() cat = libhfst.HfstTransducer("cat", tok, type) dog = libhfst.HfstTransducer("dog", tok, type) mouse = libhfst.HfstTransducer("mouse", tok, type) animals = libhfst.HfstTransducer(type) animals.disjunct(cat) animals.disjunct(dog) animals.disjunct(mouse) assert (animals.compare(libhfst.ptrvalue(parsed))) del (parsed) #try: # print(" valid file, read_lexc... ") # rlexc = libhfst.HfstTransducer.read_lexc("test_lexc.lexc", type) # assert(animals.compare(rlexc))