示例#1
0
 def _compile_fst(self) -> None:
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     self.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     self.inv_rules_tr.invert()
     logging.getLogger('main').info('Building lexicon transducer...')
     lexicon_tr = FST.load_transducer(\
                    shared.filenames['lexicon-tr'])
     self.fst = hfst.HfstTransducer(lexicon_tr)
     logging.getLogger('main').info('Composing with rules...')
     self.fst.compose(rules_tr)
     self.fst.minimize()
     self.fst.invert()
     self.fst.convert(hfst.ImplementationType.HFST_OLW_TYPE)
示例#2
0
 def getGenerator(self):
     if not self.transducer:
         self.load_filename(self.fsa)
     generator = hfst.HfstTransducer(self.transducer)
     generator.remove_epsilons()
     generator.lookup_optimize()
     self.generator = generator
示例#3
0
def seq_to_transducer(alignment, weight=0.0, type=None, alphabet=None):
    if type is None:
        type=shared.config['FST'].getint('transducer_type')
    tr = hfst.HfstBasicTransducer()
    if alphabet is None:
        alphabet = ()
    alphabet = tuple(sorted(set(alphabet) | set(sum(alignment, ()))))
    tr.add_symbols_to_alphabet(alphabet)
    last_state_id = 0
    for (x, y) in alignment:
        state_id = tr.add_state()
        if (x, y) == (hfst.IDENTITY, hfst.IDENTITY):
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            tr.add_transition(state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            for a in tr.get_alphabet():
                if not a.startswith('@_'):
                    tr.add_transition(last_state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
                    tr.add_transition(state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
        else:
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id, x, y, 0.0))
        last_state_id = state_id
    tr.set_final_weight(last_state_id, weight)
    return hfst.HfstTransducer(tr, type)
示例#4
0
 def getAnalyser(self):
     if not self.transducer:
         self.load_filename(self.fsa)
     analyser = hfst.HfstTransducer(self.transducer)
     analyser.remove_epsilons()
     analyser.lookup_optimize()
     self.analyser = analyser
示例#5
0
 def apply(self, line):
     tok = hfst.HfstTokenizer()
     Transducer = hfst.HfstTransducer(self.fallbackTransducer)
     Transducer.push_weights_to_end()
     words = hfst.tokenized_fst(tok.tokenize(line))
     words.compose(Transducer)
     words.minimize()
     return words
示例#6
0
 def load(filename: str, lexicon, model, **kwargs) -> None:
     kwargs['compile'] = False
     analyzer = Analyzer(lexicon, model, **kwargs)
     analyzer.fst = FST.load_transducer(filename)
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     analyzer.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     analyzer.inv_rules_tr.invert()
     return analyzer
示例#7
0
def read_examples(filename="test.pstr", build_fsts=True):
    """Reads the examples from the file whose name is 'filename'.
    
    The file must contain one example per line and each line consists of
    a space separated sequence of pair-symbols.  The examples are processed into 
    """
    if build_fsts:
        import hfst
        examples_bfst = hfst.HfstBasicTransducer()
    exfile = open(filename, "r")
    for line_nl in exfile:
        line = line_nl.strip()
        if not line or line.startswith("!"):
            continue
        pairsym_lst = re.split("\s+", line)
        symbol_pair_lst = [
            cfg.pairsym2sympair(pairsym) for pairsym in pairsym_lst
        ]
        # print("symbol_pair_lst:", symbol_pair_lst) ##
        pair_symbol_str = " ".join([
            cfg.sympair2pairsym(insym, outsym)
            for insym, outsym in symbol_pair_lst
        ])
        # print("pair_symbol_lst:", pair_symbol_lst) ##
        cfg.example_lst.append(pair_symbol_str)
        cfg.example_set.add(pair_symbol_str)  # spaces normalized
        #LINE_FST = hfst.tokenized_fst(symbol_pair_lst)
        # twbt.printfst(LINE_FST, True) ##
        if build_fsts:
            examples_bfst.disjunct(symbol_pair_lst, 0)
        for insym, outsym in symbol_pair_lst:
            cfg.symbol_pair_set.add((insym, outsym))
    exfile.close()
    if cfg.verbosity >= 30:
        print("List of examples:", cfg.example_lst)
        print("List of alphabet symbol pairs:", sorted(cfg.symbol_pair_set))
    if build_fsts:
        cfg.examples_fst = hfst.HfstTransducer(examples_bfst)
        cfg.examples_fst.set_name(filename)
        cfg.examples_fst.minimize()
        if cfg.verbosity >= 30:
            twbt.ppfst(cfg.examples_fst, False,
                       title="Example file as FST")  ##
    for insym, outsym in cfg.symbol_pair_set:
        cfg.input_symbol_set.add(insym)
        cfg.output_symbol_set.add(outsym)
    for insym, outsym in cfg.symbol_pair_set:
        pair_symbol = cfg.sympair2pairsym(insym, outsym)
        cfg.pair_symbol_set.add(pair_symbol)
    if build_fsts:
        pair_symbol_lst = [
            insym + ':' + outsym for insym, outsym in cfg.symbol_pair_set
        ]
        pair_symbol_str = " ".join(sorted(pair_symbol_lst))
        # print("symbol pairs:", pair_symbol_str) ##
        cfg.examples_fst.set_property("x-pair_symbols", pair_symbol_str)
    return
示例#8
0
文件: twbt.py 项目: koskenni/pytwolc
def ppdef(XRC, name, displayed_formula):
    FST = XRC.compile(name)
    BFST = hfst.HfstBasicTransducer(FST)
    FST = hfst.HfstTransducer(BFST)
    FST.set_name(name + " = " + displayed_formula)
    ppfst(FST, True)
    #alph = [pairname(insym, outsym) for insym, outsym
    #        in FST.get_transition_pairs()]
    #print(name, '=',', '.join(sorted(alph)))
    return
示例#9
0
def tag_acceptor(tag, alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
    tr.set_final_weight(0, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.concatenate(seq_to_transducer(tuple(zip(tag, tag))))
    return tr_c
示例#10
0
def create_new_words_acceptor_if_not_exists(filename, analyzer, lexicon):
    if not file_exists(filename):
        new_words_acceptor = hfst.HfstTransducer(analyzer.fst)
        new_words_acceptor.convert(
            hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
        new_words_acceptor.input_project()
        new_words_acceptor.minimize()
        new_words_acceptor.subtract(lexicon.to_fst())
        new_words_acceptor.minimize()
        FST.save_transducer(new_words_acceptor, filename)
示例#11
0
def pairs_to_fst(pair_set):
    """Converts a seq of symbol pairs into a fst that accepts any of them
"""
    pairs_bfst = hfst.HfstBasicTransducer()
    for pair in pair_set:
        pairs_bfst.disjunct((pair, ), 0)  # arg in tokenized format
    fst = hfst.HfstTransducer(pairs_bfst)
    fst.remove_epsilons()
    fst.minimize()
    return fst
示例#12
0
文件: twbt.py 项目: koskenni/pytwolc
def fsa_to_fst(FSA, separator='^'):
    BFSA = hfst.HfstBasicTransducer(FSA)
    sym_pairs = BFSA.get_transition_pairs()
    dic = {}
    for sym_pair in sym_pairs:
        insym, outsym = sym_pair
        in_sym, out_sym = outsym.split(separator)
        dic[sym_pair] = (in_sym, out_sym)
    BFSA.substitute(dic)
    FST = hfst.HfstTransducer(BFSA)
    return FST
示例#13
0
文件: fs.py 项目: koskenni/pytwolc
def string_to_fsa(grapheme_string):
    """Return a FSA which accepts the sequence of graphemes in the string"""
    bfsa = hfst.HfstBasicTransducer()
    grapheme_list = list(grapheme.graphemes(grapheme_string))
    string_pair_path = tuple(zip(grapheme_list, grapheme_list))
    if cfg.verbosity >= 10:
        print(grapheme_list)
        print(string_pair_path)
    bfsa.disjunct(string_pair_path, 0)
    fsa = hfst.HfstTransducer(bfsa)
    return (fsa)
示例#14
0
def fst_to_fsa(FST):
    global mphon_separator
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + mphon_separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    RES = hfst.HfstTransducer(FB)
    return RES
示例#15
0
def rootgen_transducer(rootdist):
    # create an automaton for word generation
    if shared.config['Features'].getint('rootdist_n') != 1:
        raise NotImplementedError('Not implemented for rootdist_n != 1')
    weights = rootdist.features[0].log_probs
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, weights[('#',)])
    for char, weight in weights.items():
        if char != ('#',):
            tr.add_transition(0, 
                hfst.HfstBasicTransition(0, char[0], char[0], weight))
    return hfst.HfstTransducer(tr)
示例#16
0
文件: twbt.py 项目: koskenni/pytwolc
def fst_to_fsa(FST, separator='^'):
    """Converts FST into an FSA by joining input and output symbols with separator"""
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    FSA = hfst.HfstTransducer(FB)
    # print("fst_to_fsa:\n", FSA) ##
    return FSA
示例#17
0
    def _compute_leaf_prob(self):
        logging.getLogger('main').info('Computing leaf probabilities...')
        self.leaf_prob = np.ones((len(self.lexicon), len(self.tagset)),
                                 dtype=np.float64)
        edge_set = EdgeSet(lexicon)

        def _empty_edge_set(edge_set):
            lexicon = edge_set.lexicon
            n = len(edge_set)
            probs = 1 - self.model.edges_prob(edge_set)
            for e_id, edge in enumerate(edge_set):
                word = lexicon.get_by_symstr(''.join(edge.source.word))[0]
                w_id = lexicon.get_id(word)
                t_id = self.tag_idx[edge.source.tag]
                self.leaf_prob[w_id, t_id] *= probs[e_id]
            edge_set = EdgeSet(lexicon)
            print(n)
            return edge_set

        lexicon_tr = self.lexicon.to_fst()
        lexicon_tr.concatenate(FST.generator(self.tagset))
        rules_tr = self.model.rule_set.to_fst()
        tr = hfst.HfstTransducer(lexicon_tr)
        tr.compose(rules_tr)
        tr.determinize()
        tr.minimize()
        FST.save_transducer(tr, 'tr.fsm')

        tr_path = full_path('tr.fsm')
        cmd = ['hfst-fst2strings', tr_path]
        p = subprocess.Popen(cmd,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.DEVNULL,
                             universal_newlines=True,
                             bufsize=1)
        while True:
            line = p.stdout.readline().strip()
            if line:
                w1, w2 = line.split(':')
                n1 = LexiconEntry(w1)
                n2 = LexiconEntry(w2)
                rules = extract_all_rules(n1, n2)
                for rule in rules:
                    if rule in rule_set:
                        edge_set.add(GraphEdge(n1, n2, rule))
            else:
                break
            if len(edge_set) > 300000:
                edge_set = _empty_edge_set(edge_set)
        edge_set = _empty_edge_set(edge_set)
示例#18
0
def tag_absorber(alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
        elif shared.compiled_patterns['tag'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
            tr.add_transition(1,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
    tr.set_final_weight(0, 0.0)
    tr.set_final_weight(1, 0.0)
    return hfst.HfstTransducer(tr)
示例#19
0
 def to_hfst(self) -> hfst.HfstTransducer:
     result = hfst.HfstBasicTransducer()
     for state in self.states.values():
         total_freq = state.get_total_freq()
         for t in state.transitions.values():
             weight = -math.log(t.freq / total_freq)
             result.add_transition(
                 state.id,
                 hfst.HfstBasicTransition(t.target_state_id, t.symbol,
                                          t.symbol, weight))
         if state.final_freq > 0:
             final_weight = -math.log(state.final_freq / total_freq)
             result.set_final_weight(state.id, final_weight)
     return hfst.HfstTransducer(result)
示例#20
0
def set_weights(fsa):
    """Sets weights to transitions using mphon_weight()
    """
    bfsa = hfst.HfstBasicTransducer(fsa)
    for state in bfsa.states():
        for arc in bfsa.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            w = mphon_weight(insym)
            arc.set_weight(w)
    weighted_fsa = hfst.HfstTransducer(bfsa)
    if cfg.verbosity >=20:
        print("set_weights:\n", weighted_fsa)
    return weighted_fsa
示例#21
0
def delenv(alphabet, max_affix_size, max_infix_size, max_infix_slots,
           deletion_symbol='@_DEL_@', deletion_slot_symbol='@_DELSLOT_@'):
    
    def add_deletion_chain(tr, alphabet, state, length):
        tr.add_transition(state,
                          hfst.HfstBasicTransition(
                              state+1, hfst.EPSILON, deletion_slot_symbol, 0.0))
        for i in range(1, length+1):
            for c in alphabet:
                if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                    tr.add_transition(state+i,
                                      hfst.HfstBasicTransition(
                                          state+i+1, 
                                          c, deletion_symbol, 0.0))
        last_state = state + length + 1
        for i in range(length+1):
            tr.add_transition(state+i,
                              hfst.HfstBasicTransition(
                                  last_state,
                                  hfst.EPSILON, hfst.EPSILON, 0.0))
        return last_state

    def add_identity_loop(tr, alphabet, state):
        for c in alphabet:
            if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                tr.add_transition(state,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
                tr.add_transition(state+1,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
        return state+1

    tr = hfst.HfstBasicTransducer()
    # prefix
    state = add_deletion_chain(tr, alphabet, 0, max_affix_size)
    state = add_identity_loop(tr, alphabet, state)
    # infixes
    for i in range(max_infix_slots):
        state = add_deletion_chain(tr, alphabet, state, max_infix_size)
        state = add_identity_loop(tr, alphabet, state)
    # suffix
    state = add_deletion_chain(tr, alphabet, state, max_affix_size)
    tr.set_final_weight(state, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.remove_epsilons()
    tr_c.minimize()
    return tr_c
示例#22
0
文件: twbt.py 项目: koskenni/pytwolc
def expanded_examples(TR, insyms, symbol_pair_set):
    # print("symbol_pair_set =", symbol_pair_set) ##
    BT = hfst.HfstBasicTransducer(TR)
    # print("BT.get_transition_pairs() =", BT.get_transition_pairs()) ##
    for insym in insyms:
        lst = [(ins, outs)
               for ins, outs
               in symbol_pair_set if ins == insym]
        for sympair in lst:
            # print("sympair, lst =", sympair, lst) ##
            BT.substitute(sympair, tuple(lst))
    T = hfst.HfstTransducer(BT)
    T.set_name("negative and positive together")
    T.minimize()
    # ppfst(T, True) ##
    #T.minus(TR)
    #T.minimize()
    return(T)
示例#23
0
def compute_possible_edges(lexicon: Lexicon, rule_set: RuleSet) -> EdgeSet:
    # build the transducer
    lexicon_tr = lexicon.to_fst()
    tag_seqs = extract_tag_symbols_from_rules(rule_set)
    if tag_seqs:
        lexicon_tr.concatenate(FST.generator(tag_seqs))
    rules_tr = rule_set.to_fst()
    tr = hfst.HfstTransducer(lexicon_tr)
    tr.compose(rules_tr)
    tr.determinize()
    tr.minimize()
    lexicon_tr.invert()
    tr.compose(lexicon_tr)
    tr.determinize()
    tr.minimize()
    FST.save_transducer(tr, 'tr.fsm')

    tr_path = full_path('tr.fsm')
    cmd = ['hfst-fst2strings', tr_path]
    p = subprocess.Popen(cmd,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.DEVNULL,
                         universal_newlines=True,
                         bufsize=1)
    edge_set = EdgeSet(lexicon)
    while True:
        line = p.stdout.readline().strip()
        if line:
            w1, w2 = line.split(':')
            w1_without_tag = re.sub(shared.compiled_patterns['tag'], '', w1)
            w2_without_tag = re.sub(shared.compiled_patterns['tag'], '', w2)
            if w1_without_tag != w2_without_tag:
                n1 = LexiconEntry(w1)
                n2 = LexiconEntry(w2)
                rules = algorithms.align.extract_all_rules(n1, n2)
                for rule in rules:
                    if rule in rule_set:
                        n1_wt = lexicon.get_by_symstr(w1_without_tag)[0]
                        n2_wt = lexicon.get_by_symstr(w2_without_tag)[0]
                        edge_set.add(GraphEdge(n1_wt, n2_wt, rule))
        else:
            break
    return edge_set
示例#24
0
def contexts_to_condition(*contexts):
    """A list of contexsts is converted into a condition.
    
    Each context in the list is converted separately and
    the result is the union of these and is returned as an FST.
    """
    global pistar_fst
    result_fst = hfst.HfstTransducer()
    for leftc, rightc in contexts:
        #twbt.ppfst(leftc, title="leftc") ##
        #twbt.ppfst(rightc, title="rightc") ##
        context_fst = context_to_condition(leftc, rightc)
        result_fst.disjunct(context_fst)
        result_fst.minimize()
        leftc_name = leftc.get_name()
        rightc_name = rightc.get_name()
        result_fst.set_name(leftc_name + "_" + rightc_name)
        #twbt.ppfst(result_fst, title="result") ##
    return (result_fst)
示例#25
0
def remove_bad_transitions(fsa):
    """Copy the FSA excluding transitions with consonants and vowels"""
    old_bfsa = hfst.HfstBasicTransducer(fsa)
    new_bfsa = hfst.HfstBasicTransducer()
    for state in old_bfsa.states():
        new_bfsa.add_state(state)
        if old_bfsa.is_final_state(state):
            new_bfsa.set_final_weight(state, 0.0)
        for arc in old_bfsa.transitions(state):
            in_sym = arc.get_input_symbol()
            if mphon_is_valid(in_sym):
                target_st = arc.get_target_state()
                new_bfsa.add_transition(state, target_st, in_sym, in_sym, 0)
    result_fsa = hfst.HfstTransducer(new_bfsa)
    result_fsa.minimize()
    if cfg.verbosity >= 20:
        print("remove_bad_transitions:")
        print(result_fsa)
    return result_fsa
示例#26
0
def delfilter(alphabet, length, deletion_symbol='@_DEL_@',
              deletion_slot_symbol='@_DELSLOT_@'):
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, 0.0)
    tr.add_transition(0,
                      hfst.HfstBasicTransition(
                          0, deletion_slot_symbol, deletion_slot_symbol, 0.0))
    printable_chars = set(alphabet) -\
                      { hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN,
                        deletion_symbol }
    for i in range(length):
        for c in printable_chars:
            tr.add_transition(i,
                              hfst.HfstBasicTransition(i+1, c, c, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i, deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        tr.set_final_weight(i+1, 0.0)
    first_negative_state = length+1
    tr.add_transition(0, hfst.HfstBasicTransition(
                             first_negative_state, deletion_symbol,
                             hfst.EPSILON, 0.0))
    for c in printable_chars:
        tr.add_transition(first_negative_state, 
                          hfst.HfstBasicTransition(0, c, c, 0.0))
    for i in range(length-1):
        tr.add_transition(first_negative_state+i,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, 
                              deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(first_negative_state+i+1,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        for c in printable_chars:
            tr.add_transition(first_negative_state+i+1,
                              hfst.HfstBasicTransition(
                                  first_negative_state+i, c, c, 0.0))
    tr_c = hfst.HfstTransducer(tr)
    return tr_c
示例#27
0
def accum_input_labels(fst, separator=""):
    """Encode, weight and prune a transducer

fst -- transducer to be processed, input labels are strings of alphabet symbols and output labels are single alphabet symbols

separator -- null string or a symbol not part of the alphabet

Returns a transducer where input labels of thrasitions are concatenations of the input label and the output label of the original transition, the weights are according to the weights of the resulting morphophonemes and all transitions with invalid morphophoneme labels are discarded.
"""
    if cfg.verbosity >= 10:
        print("to be accumulated:\n", fst)
    bfst = hfst.HfstBasicTransducer(fst)
    result_bfst = hfst.HfstBasicTransducer()
    for state in bfst.states():
        result_bfst.add_state(state)
        if bfst.is_final_state(state):
            weight = bfst.get_final_weight(state)
            result_bfst.set_final_weight(state, weight)
        for arc in bfst.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            weight = arc.get_weight()
            new_insym = insym + separator + outsym
            if cfg.verbosity >= 25:
                print("arc", state, tostate, insym, outsym, weight)
            if not alphabet.mphon_is_valid(new_insym):
                continue
            new_weight = alphabet.mphon_weight(new_insym)
            result_arc = hfst.HfstBasicTransition(tostate,
                                                  new_insym,
                                                  new_insym,
                                                  new_weight)
            result_bfst.add_transition(state, result_arc)
            if cfg.verbosity >= 25:
                print("after addition of transition:\n", result_bfst)
    result_fst = hfst.HfstTransducer(result_bfst)
    result_fst.minimize()
    if cfg.verbosity >= 10:
        print("accumulated fst:\n", result_fst)
    return result_fst
示例#28
0
def make_guesser(fsa: hfst.HfstTransducer, prefix: bool, suffix: bool,
                 substring: bool, verbose: bool):
    """Make guesser from automaton."""
    if verbose:
        print("Converting...", end=" ")
    guesser = hfst.HfstBasicTransducer(fsa)
    if suffix:
        prefixloop = make_suffix_guesser(guesser, verbose)
        guesser = prefixloop
    elif prefix:
        suffixloop = make_prefix_guesser(guesser, verbose)
        guesser = suffixloop
    # substring guesser can combine with affix guesser
    if substring:
        substringer = make_substring_guesser(guesser, verbose)
        guesser = substringer
    if verbose:
        print("Converting...", end=" ")
    fsa = hfst.HfstTransducer(guesser)
    if verbose:
        print("done!")
    return fsa
示例#29
0
文件: aligner.py 项目: koskenni/twol
def align_two_words(in_word, out_word, aligner_fst, zero, number):
    w1 = hfst.fst(in_word)
    w1.insert_freely((zero, zero))
    w1.minimize()
    ###print(w1)

    w2 = hfst.fst(out_word)
    w2.insert_freely((zero, zero))
    w2.minimize()
    ###print(w2)

    w3 = hfst.HfstTransducer(w1)
    w3.compose(aligner_fst)
    w3.compose(w2)
    ###print(w1)

    w3.n_best(number)
    w3.minimize()
    ###print(w3)

    raw_paths = w3.extract_paths(output='raw')
    if cfg.verbosity >= 10:
        print("raw_paths:", raw_paths)
    return raw_paths
示例#30
0
import hfst

force = False
from sys import argv
if len(argv) > 3:
    raise RuntimeError(
        'Usage: hfst-prune-alphabet.py [-f|--force] [-S|--safe]')
for arg in argv[1:]:
    if arg == '-f' or arg == '--force':
        force = True
    elif arg == '-S' or arg == '--safe':
        force = False
    else:
        raise RuntimeError('unknown option: ' + arg)

istr = hfst.HfstInputStream()
ostr = hfst.HfstOutputStream(type=istr.get_type())

while (not istr.is_eof()):
    tr = istr.read()
    tr = hfst.HfstBasicTransducer(tr)
    tr.prune_alphabet(force)
    tr = hfst.HfstTransducer(tr, istr.get_type())
    tr.write(ostr)
    ostr.flush()

istr.close()
ostr.close()