Exemplo n.º 1
0
def make_suffix_guesser(fsa: hfst.HfstBasicTransducer, verbose: bool):
    # no way to make new start state so hackhack...
    sigmaw = count_sigma_weight_map(fsa)
    if verbose:
        print("making prefix sigma star...", end=" ")
    prefixloop = hfst.HfstBasicTransducer()
    prefixloopstate = 0
    prefixloop.add_transition(prefixloopstate, prefixloopstate,
                              "@_IDENTITY_SYMBOL_@", "@_IDENTITY_SYMBOL_@",
                              PENALTY_)
    for symbol, weight in sigmaw.items():
        prefixloop.add_transition(prefixloopstate, prefixloopstate, symbol,
                                  symbol, weight)
    if verbose:
        print("replicating...", end=" ")
    for state, arcs in enumerate(fsa):
        prefixloop.add_state(state + 1)
        if fsa.is_final_state(state):
            prefixloop.set_final_weight(state + 1, fsa.get_final_weight(state))
    for state, arcs in enumerate(fsa):
        prefixloop.add_transition(prefixloopstate, state + 1,
                                  "@_EPSILON_SYMBOL_@", "@_EPSILON_SYMBOL_@",
                                  0)
        for arc in arcs:
            prefixloop.add_transition(state + 1,
                                      arc.get_target_state() + 1,
                                      arc.get_input_symbol(),
                                      arc.get_output_symbol(),
                                      arc.get_weight())
    return prefixloop
Exemplo n.º 2
0
def seq_to_transducer(alignment, weight=0.0, type=None, alphabet=None):
    if type is None:
        type=shared.config['FST'].getint('transducer_type')
    tr = hfst.HfstBasicTransducer()
    if alphabet is None:
        alphabet = ()
    alphabet = tuple(sorted(set(alphabet) | set(sum(alignment, ()))))
    tr.add_symbols_to_alphabet(alphabet)
    last_state_id = 0
    for (x, y) in alignment:
        state_id = tr.add_state()
        if (x, y) == (hfst.IDENTITY, hfst.IDENTITY):
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            tr.add_transition(state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            for a in tr.get_alphabet():
                if not a.startswith('@_'):
                    tr.add_transition(last_state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
                    tr.add_transition(state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
        else:
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id, x, y, 0.0))
        last_state_id = state_id
    tr.set_final_weight(last_state_id, weight)
    return hfst.HfstTransducer(tr, type)
Exemplo n.º 3
0
def symbol_pair_to_fst(insym, outsym):
    """"Return a FST which accepts one the pair string 'insym:outsym'"""
    bfst = hfst.HfstBasicTransducer()
    string_pair_path = ((insym, outsym))
    bfsa.disjunct(string_pair_path, 0)
    fst = hfst.fst(bfst)
    return (fst)
Exemplo n.º 4
0
def read_examples(filename="test.pstr", build_fsts=True):
    """Reads the examples from the file whose name is 'filename'.
    
    The file must contain one example per line and each line consists of
    a space separated sequence of pair-symbols.  The examples are processed into 
    """
    if build_fsts:
        import hfst
        examples_bfst = hfst.HfstBasicTransducer()
    exfile = open(filename, "r")
    for line_nl in exfile:
        line = line_nl.strip()
        if not line or line.startswith("!"):
            continue
        pairsym_lst = re.split("\s+", line)
        symbol_pair_lst = [
            cfg.pairsym2sympair(pairsym) for pairsym in pairsym_lst
        ]
        # print("symbol_pair_lst:", symbol_pair_lst) ##
        pair_symbol_str = " ".join([
            cfg.sympair2pairsym(insym, outsym)
            for insym, outsym in symbol_pair_lst
        ])
        # print("pair_symbol_lst:", pair_symbol_lst) ##
        cfg.example_lst.append(pair_symbol_str)
        cfg.example_set.add(pair_symbol_str)  # spaces normalized
        #LINE_FST = hfst.tokenized_fst(symbol_pair_lst)
        # twbt.printfst(LINE_FST, True) ##
        if build_fsts:
            examples_bfst.disjunct(symbol_pair_lst, 0)
        for insym, outsym in symbol_pair_lst:
            cfg.symbol_pair_set.add((insym, outsym))
    exfile.close()
    if cfg.verbosity >= 30:
        print("List of examples:", cfg.example_lst)
        print("List of alphabet symbol pairs:", sorted(cfg.symbol_pair_set))
    if build_fsts:
        cfg.examples_fst = hfst.HfstTransducer(examples_bfst)
        cfg.examples_fst.set_name(filename)
        cfg.examples_fst.minimize()
        if cfg.verbosity >= 30:
            twbt.ppfst(cfg.examples_fst, False,
                       title="Example file as FST")  ##
    for insym, outsym in cfg.symbol_pair_set:
        cfg.input_symbol_set.add(insym)
        cfg.output_symbol_set.add(outsym)
    for insym, outsym in cfg.symbol_pair_set:
        pair_symbol = cfg.sympair2pairsym(insym, outsym)
        cfg.pair_symbol_set.add(pair_symbol)
    if build_fsts:
        pair_symbol_lst = [
            insym + ':' + outsym for insym, outsym in cfg.symbol_pair_set
        ]
        pair_symbol_str = " ".join(sorted(pair_symbol_lst))
        # print("symbol pairs:", pair_symbol_str) ##
        cfg.examples_fst.set_property("x-pair_symbols", pair_symbol_str)
    return
Exemplo n.º 5
0
def ppdef(XRC, name, displayed_formula):
    FST = XRC.compile(name)
    BFST = hfst.HfstBasicTransducer(FST)
    FST = hfst.HfstTransducer(BFST)
    FST.set_name(name + " = " + displayed_formula)
    ppfst(FST, True)
    #alph = [pairname(insym, outsym) for insym, outsym
    #        in FST.get_transition_pairs()]
    #print(name, '=',', '.join(sorted(alph)))
    return
Exemplo n.º 6
0
def tag_acceptor(tag, alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
    tr.set_final_weight(0, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.concatenate(seq_to_transducer(tuple(zip(tag, tag))))
    return tr_c
Exemplo n.º 7
0
def remove_bad_transitions(fsa):
    """Copy the FSA excluding transitions with consonants and vowels"""
    old_bfsa = hfst.HfstBasicTransducer(fsa)
    new_bfsa = hfst.HfstBasicTransducer()
    for state in old_bfsa.states():
        new_bfsa.add_state(state)
        if old_bfsa.is_final_state(state):
            new_bfsa.set_final_weight(state, 0.0)
        for arc in old_bfsa.transitions(state):
            in_sym = arc.get_input_symbol()
            if mphon_is_valid(in_sym):
                target_st = arc.get_target_state()
                new_bfsa.add_transition(state, target_st, in_sym, in_sym, 0)
    result_fsa = hfst.HfstTransducer(new_bfsa)
    result_fsa.minimize()
    if cfg.verbosity >= 20:
        print("remove_bad_transitions:")
        print(result_fsa)
    return result_fsa
Exemplo n.º 8
0
def pairs_to_fst(pair_set):
    """Converts a seq of symbol pairs into a fst that accepts any of them
"""
    pairs_bfst = hfst.HfstBasicTransducer()
    for pair in pair_set:
        pairs_bfst.disjunct((pair, ), 0)  # arg in tokenized format
    fst = hfst.HfstTransducer(pairs_bfst)
    fst.remove_epsilons()
    fst.minimize()
    return fst
Exemplo n.º 9
0
def fsa_to_fst(FSA, separator='^'):
    BFSA = hfst.HfstBasicTransducer(FSA)
    sym_pairs = BFSA.get_transition_pairs()
    dic = {}
    for sym_pair in sym_pairs:
        insym, outsym = sym_pair
        in_sym, out_sym = outsym.split(separator)
        dic[sym_pair] = (in_sym, out_sym)
    BFSA.substitute(dic)
    FST = hfst.HfstTransducer(BFSA)
    return FST
Exemplo n.º 10
0
def fst2dicfst(FST):
    BFST = hfst.HfstBasicTransducer(FST)
    dicfst = {}
    for state in BFST.states():
        tdir = {}
        for arc in BFST.transitions(state):
            prnm = pairname(arc.get_input_symbol(),
                            arc.get_output_symbol())
            tdir[prnm] = arc.get_target_state()
        dicfst[state] = (BFST.is_final_state(state), tdir)
    return(dicfst)
Exemplo n.º 11
0
def string_to_fsa(grapheme_string):
    """Return a FSA which accepts the sequence of graphemes in the string"""
    bfsa = hfst.HfstBasicTransducer()
    grapheme_list = list(grapheme.graphemes(grapheme_string))
    string_pair_path = tuple(zip(grapheme_list, grapheme_list))
    if cfg.verbosity >= 10:
        print(grapheme_list)
        print(string_pair_path)
    bfsa.disjunct(string_pair_path, 0)
    fsa = hfst.HfstTransducer(bfsa)
    return (fsa)
Exemplo n.º 12
0
def symbol_to_fsa(sym):
    """Return a FSA which accepts the one letter string 'sym'

The symbol 'sym' may be e.g. a composed Unicode grapheme, i.e. a
string of two or more Unicode characters.
"""
    bfsa = hfst.HfstBasicTransducer()
    string_pair_path = ((sym, sym))
    bfsa.disjunct(string_pair_path, 0)
    fsa = hfst.fst(bfsa)
    return (fsa)
Exemplo n.º 13
0
def accum_input_labels(fst, separator=""):
    """Encode, weight and prune a transducer

fst -- transducer to be processed, input labels are strings of alphabet symbols and output labels are single alphabet symbols

separator -- null string or a symbol not part of the alphabet

Returns a transducer where input labels of thrasitions are concatenations of the input label and the output label of the original transition, the weights are according to the weights of the resulting morphophonemes and all transitions with invalid morphophoneme labels are discarded.
"""
    if cfg.verbosity >= 10:
        print("to be accumulated:\n", fst)
    bfst = hfst.HfstBasicTransducer(fst)
    result_bfst = hfst.HfstBasicTransducer()
    for state in bfst.states():
        result_bfst.add_state(state)
        if bfst.is_final_state(state):
            weight = bfst.get_final_weight(state)
            result_bfst.set_final_weight(state, weight)
        for arc in bfst.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            weight = arc.get_weight()
            new_insym = insym + separator + outsym
            if cfg.verbosity >= 25:
                print("arc", state, tostate, insym, outsym, weight)
            if not alphabet.mphon_is_valid(new_insym):
                continue
            new_weight = alphabet.mphon_weight(new_insym)
            result_arc = hfst.HfstBasicTransition(tostate,
                                                  new_insym,
                                                  new_insym,
                                                  new_weight)
            result_bfst.add_transition(state, result_arc)
            if cfg.verbosity >= 25:
                print("after addition of transition:\n", result_bfst)
    result_fst = hfst.HfstTransducer(result_bfst)
    result_fst.minimize()
    if cfg.verbosity >= 10:
        print("accumulated fst:\n", result_fst)
    return result_fst
Exemplo n.º 14
0
def rootgen_transducer(rootdist):
    # create an automaton for word generation
    if shared.config['Features'].getint('rootdist_n') != 1:
        raise NotImplementedError('Not implemented for rootdist_n != 1')
    weights = rootdist.features[0].log_probs
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, weights[('#',)])
    for char, weight in weights.items():
        if char != ('#',):
            tr.add_transition(0, 
                hfst.HfstBasicTransition(0, char[0], char[0], weight))
    return hfst.HfstTransducer(tr)
Exemplo n.º 15
0
def make_substring_guesser(fsa: hfst.HfstTransducer, verbose: bool):
    # an infix guesser in a way
    sigmaw = count_sigma_weight_map(fsa)
    substringer = hfst.HfstBasicTransducer(fsa)
    if verbose:
        print("adding loops...", end=" ")
    for state, arcs in enumerate(substringer):
        substringer.add_transition(state, state, "@_IDENTITY_SYMBOL_@",
                                   "@_IDENTITY_SYMBOL_@", PENALTY_)
        for symbol, weight in sigmaw.items():
            substringer.add_transition(state, state, symbol, symbol, weight)
    return substringer
Exemplo n.º 16
0
def fst_to_fsa(FST):
    global mphon_separator
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + mphon_separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    RES = hfst.HfstTransducer(FB)
    return RES
Exemplo n.º 17
0
def ppfst(FST, print_equiv_classes=True, title=""):
    """Pretty-prints a HfstTransducer or a HfstBasicTransducer.

FST -- the transducer to be pretty-printed
print_equiv_classes -- if True, then print also
                       the equivalence classes

If the transducer has a name, it is printed as a heading.

>>> twbt.ppfst(hfst.regex("a* [b:p|c] [c|b:p]"), True)

  0 . -> 0  a ; -> 1  b:p ; 
  1 . -> 2  b ; 
  2 : 
Classes of equivalent symbols:
  b:p c

"""
    name = FST.get_name()
    if name:
        print("\n" + name)
    if title:
        print("\n" + title)
    BFST = hfst.HfstBasicTransducer(FST)
    labsy, transy = equivpairs(BFST)
    for state in BFST.states():
        d = {}
        for arc in BFST.transitions(state):
            target = arc.get_target_state()
            if target not in d: d[target] = []
            prnm = pairname(arc.get_input_symbol(),
                            arc.get_output_symbol())
            d[target].append(prnm)
        print(" ", state, (": " if BFST.is_final_state(state) else ". "),
              end="")
        for st, plist in d.items():
            ls = [p for p in plist if p == labsy[p]]
            print( " " + (" ".join(ls)) + " -> " + str(st), end=" ;" )
        print()
    #print(transy) ##
    if print_equiv_classes:
        all_short = True
        for ss, pl in transy.items():
            if len(pl) > 1:
                all_short = False
                break
        if not all_short:
            print("Classes of equivalent symbols:")
            for ss, pl in transy.items():
                if len(pl) > 1:
                    print(" ", " ".join(sorted(pl)))
    return
Exemplo n.º 18
0
def fst_to_fsa(FST, separator='^'):
    """Converts FST into an FSA by joining input and output symbols with separator"""
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    FSA = hfst.HfstTransducer(FB)
    # print("fst_to_fsa:\n", FSA) ##
    return FSA
Exemplo n.º 19
0
def tag_absorber(alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
        elif shared.compiled_patterns['tag'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
            tr.add_transition(1,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
    tr.set_final_weight(0, 0.0)
    tr.set_final_weight(1, 0.0)
    return hfst.HfstTransducer(tr)
Exemplo n.º 20
0
 def to_hfst(self) -> hfst.HfstTransducer:
     result = hfst.HfstBasicTransducer()
     for state in self.states.values():
         total_freq = state.get_total_freq()
         for t in state.transitions.values():
             weight = -math.log(t.freq / total_freq)
             result.add_transition(
                 state.id,
                 hfst.HfstBasicTransition(t.target_state_id, t.symbol,
                                          t.symbol, weight))
         if state.final_freq > 0:
             final_weight = -math.log(state.final_freq / total_freq)
             result.set_final_weight(state.id, final_weight)
     return hfst.HfstTransducer(result)
Exemplo n.º 21
0
def loadTransducer():
    try:
        istr = libhfst.HfstInputStream(r"italian_verb_analyzer.hfst")
        transducers = []        
        while not (istr.is_eof()):
            transducers.append(istr.read())
            
        istr.close()
        td = hfst.HfstBasicTransducer(transducers[1])       
        return td  
    except:
        print('Transducer file was invalid or not found.')
        time.sleep(3)
        exit()
Exemplo n.º 22
0
def set_weights(fsa):
    """Sets weights to transitions using mphon_weight()
    """
    bfsa = hfst.HfstBasicTransducer(fsa)
    for state in bfsa.states():
        for arc in bfsa.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            w = mphon_weight(insym)
            arc.set_weight(w)
    weighted_fsa = hfst.HfstTransducer(bfsa)
    if cfg.verbosity >=20:
        print("set_weights:\n", weighted_fsa)
    return weighted_fsa
Exemplo n.º 23
0
def delenv(alphabet, max_affix_size, max_infix_size, max_infix_slots,
           deletion_symbol='@_DEL_@', deletion_slot_symbol='@_DELSLOT_@'):
    
    def add_deletion_chain(tr, alphabet, state, length):
        tr.add_transition(state,
                          hfst.HfstBasicTransition(
                              state+1, hfst.EPSILON, deletion_slot_symbol, 0.0))
        for i in range(1, length+1):
            for c in alphabet:
                if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                    tr.add_transition(state+i,
                                      hfst.HfstBasicTransition(
                                          state+i+1, 
                                          c, deletion_symbol, 0.0))
        last_state = state + length + 1
        for i in range(length+1):
            tr.add_transition(state+i,
                              hfst.HfstBasicTransition(
                                  last_state,
                                  hfst.EPSILON, hfst.EPSILON, 0.0))
        return last_state

    def add_identity_loop(tr, alphabet, state):
        for c in alphabet:
            if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                tr.add_transition(state,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
                tr.add_transition(state+1,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
        return state+1

    tr = hfst.HfstBasicTransducer()
    # prefix
    state = add_deletion_chain(tr, alphabet, 0, max_affix_size)
    state = add_identity_loop(tr, alphabet, state)
    # infixes
    for i in range(max_infix_slots):
        state = add_deletion_chain(tr, alphabet, state, max_infix_size)
        state = add_identity_loop(tr, alphabet, state)
    # suffix
    state = add_deletion_chain(tr, alphabet, state, max_affix_size)
    tr.set_final_weight(state, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.remove_epsilons()
    tr_c.minimize()
    return tr_c
Exemplo n.º 24
0
def make_prefix_guesser(fsa: hfst.HfstBasicTransducer, verbose: bool):
    sigmaw = count_sigma_weight_map(fsa)
    suffixloop = hfst.HfstBasicTransducer(fsa)
    suffixloopstate = suffixloop.add_state()
    suffixloop.set_final_weight(suffixloopstate, 0)
    suffixloop.add_transition(suffixloopstate, suffixloopstate,
                              "@_IDENTITY_SYMBOL_@", "@_IDENTITY_SYMBOL_@",
                              PENALTY_)
    for symbol, weight in sigmaw.items():
        suffixloop.add_transition(suffixloopstate, suffixloopstate, symbol,
                                  symbol, weight)
    if verbose:
        print("connecting...", end=" ")
    for state, arcs in enumerate(suffixloop):
        suffixloop.add_transition(state, suffixloopstate, "@_EPSILON_SYMBOL_@",
                                  "@_EPSILON_SYMBOL_@", 0)
    return suffixloop
Exemplo n.º 25
0
def expanded_examples(TR, insyms, symbol_pair_set):
    # print("symbol_pair_set =", symbol_pair_set) ##
    BT = hfst.HfstBasicTransducer(TR)
    # print("BT.get_transition_pairs() =", BT.get_transition_pairs()) ##
    for insym in insyms:
        lst = [(ins, outs)
               for ins, outs
               in symbol_pair_set if ins == insym]
        for sympair in lst:
            # print("sympair, lst =", sympair, lst) ##
            BT.substitute(sympair, tuple(lst))
    T = hfst.HfstTransducer(BT)
    T.set_name("negative and positive together")
    T.minimize()
    # ppfst(T, True) ##
    #T.minus(TR)
    #T.minimize()
    return(T)
Exemplo n.º 26
0
def dict_rule(rule_fst):
    brule = hfst.HfstBasicTransducer(rule_fst)
    rule_dict = {}
    final_states = set()
    for state in brule.states():
        if brule.is_final_state(state):
            final_states.add(state)
        trans_dict = {}
        for transition in brule.transitions(state):
            insym = transition.get_input_symbol()
            outsym = transition.get_output_symbol()
            target = transition.get_target_state()
            trans_dict[(insym, outsym)] = target
            if insym not in pairs_with_insym:
                pairs_with_insym[insym] = set()
            pairs_with_insym[insym].add((insym, outsym))
        rule_dict[state] = trans_dict
    return rule_dict, final_states
Exemplo n.º 27
0
def delfilter(alphabet, length, deletion_symbol='@_DEL_@',
              deletion_slot_symbol='@_DELSLOT_@'):
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, 0.0)
    tr.add_transition(0,
                      hfst.HfstBasicTransition(
                          0, deletion_slot_symbol, deletion_slot_symbol, 0.0))
    printable_chars = set(alphabet) -\
                      { hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN,
                        deletion_symbol }
    for i in range(length):
        for c in printable_chars:
            tr.add_transition(i,
                              hfst.HfstBasicTransition(i+1, c, c, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i, deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        tr.set_final_weight(i+1, 0.0)
    first_negative_state = length+1
    tr.add_transition(0, hfst.HfstBasicTransition(
                             first_negative_state, deletion_symbol,
                             hfst.EPSILON, 0.0))
    for c in printable_chars:
        tr.add_transition(first_negative_state, 
                          hfst.HfstBasicTransition(0, c, c, 0.0))
    for i in range(length-1):
        tr.add_transition(first_negative_state+i,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, 
                              deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(first_negative_state+i+1,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        for c in printable_chars:
            tr.add_transition(first_negative_state+i+1,
                              hfst.HfstBasicTransition(
                                  first_negative_state+i, c, c, 0.0))
    tr_c = hfst.HfstTransducer(tr)
    return tr_c
Exemplo n.º 28
0
 def _extract_unique_io_pairs(transducer):
     tr_b = hfst.HfstBasicTransducer(transducer)
     previous_io_pairs = []
     for s in tr_b.states():
         previous_io_pairs.append(set())
     previous_io_pairs[0].add(('', ''))
     results = set()
     empty = False
     while not empty:
         empty = True
         current_io_pairs = []
         for s in tr_b.states():
             current_io_pairs.append(set())
         for state, state_io_pairs in enumerate(previous_io_pairs):
             if state_io_pairs:
                 empty = False
             if tr_b.is_final_state(state):
                 results |= state_io_pairs
             for str_in, str_out in state_io_pairs:
                 for transition in tr_b.transitions(state):
                     target_state = transition.get_target_state()
                     sym_in = transition.get_input_symbol()
                     if sym_in == hfst.EPSILON:
                         sym_in = ''
                     elif sym_in in (hfst.IDENTITY, hfst.UNKNOWN):
                         raise RuntimeError('Illegal symbol!')
                     sym_out = transition.get_output_symbol()
                     if sym_out == hfst.EPSILON:
                         sym_out = ''
                     elif sym_out in (hfst.IDENTITY, hfst.UNKNOWN):
                         raise RuntimeError('Illegal symbol!')
                     current_io_pairs[target_state].add(
                         (str_in + sym_in, str_out + sym_out))
         previous_io_pairs = current_io_pairs
     # convert the results to a dict
     results_dict = {}
     for word_1, word_2 in results:
         if word_1 not in results_dict:
             results_dict[word_1] = []
         results_dict[word_1].append(word_2)
     return results_dict
Exemplo n.º 29
0
def make_guesser(fsa: hfst.HfstTransducer, prefix: bool, suffix: bool,
                 substring: bool, verbose: bool):
    """Make guesser from automaton."""
    if verbose:
        print("Converting...", end=" ")
    guesser = hfst.HfstBasicTransducer(fsa)
    if suffix:
        prefixloop = make_suffix_guesser(guesser, verbose)
        guesser = prefixloop
    elif prefix:
        suffixloop = make_prefix_guesser(guesser, verbose)
        guesser = suffixloop
    # substring guesser can combine with affix guesser
    if substring:
        substringer = make_substring_guesser(guesser, verbose)
        guesser = substringer
    if verbose:
        print("Converting...", end=" ")
    fsa = hfst.HfstTransducer(guesser)
    if verbose:
        print("done!")
    return fsa
Exemplo n.º 30
0
def number_of_paths(transducer):
    # in n-th iteration paths_for_state[s] contains the number of paths
    # of length n terminating in state s
    # terminates if maximum n is reached, i.e. paths_for_state > 0
    # only for states without outgoing transitions
    t = hfst.HfstBasicTransducer(transducer)
    paths_for_state = [1] + [0] * (len(t.states())-1)
    result = 0
    changed = True
    while changed:
        changed = False
        new_paths_for_state = [0] * len(t.states())
        for state in t.states():
            if paths_for_state[state] > 0:
                for tr in t.transitions(state):
                    new_paths_for_state[tr.get_target_state()] +=\
                        paths_for_state[state]
                    changed = True
        for state in t.states():
            if t.is_final_state(state):
                result += new_paths_for_state[state]
        paths_for_state = new_paths_for_state
    return result