Exemplo n.º 1
0
def make_edit(sigma):
    """ Make an edit distance transducer """
    # Create transducer
    syms = fst.SymbolTable()
    sigma.add('<eps>')
    edit = fst.StdVectorFst()
    edit.start = edit.add_state()
    edit[0].final = True
    for x in sigma:
        for y in sigma:
            if x == y == '<eps>': continue
            edit.add_arc(0, 0, syms[x], syms[y], (0 if x == y else 1))

    # Define edit distance
    def distance(a, b):
        # Compose a o edit transducer o b
        comp = make_input(a, syms) >> edit >> make_input(b, syms)
        # Compute distance
        distances = comp.shortest_distance(reverse=True)
        dist = int(distances[0])
        # Find best alignment
        alignment = comp.shortest_path()
        # Re-order states
        alignment.top_sort()
        # Replace "<eps>" -> "-"
        dash = syms['-']
        eps = syms['<eps>']
        alignment.relabel(ipairs=[(eps, dash)], opairs=[(eps, dash)])
        arcs = (next(iter(state)) for state in alignment)
        labels = ((arc.ilabel, arc.olabel) for arc in arcs)
        align = [(syms.find(x), syms.find(y)) for x, y in labels]
        return dist, align

    return distance
def make_compounder(words, word_ids):
    c = fst.StdVectorFst()
    c.start = c.add_state()
    space_id = syms["<space>"]
    c.add_arc(0, 0, space_id, syms["<eps>"])
    c.add_arc(0, 0, space_id, syms["+C+"])
    c.add_arc(0, 0, space_id, syms["+D+"])
    for word_id in word_ids:
        c.add_arc(0, 0, word_id, word_id)
    c[0].final = True
    return c
Exemplo n.º 3
0
def lattice_to_nbest(lat, n=1):
    """Extract n Python lists of output label ids, which corresponds to n most probable paths.

    Args:
        lat(fst.LogVectorFst): or alternatively (StdVectorFst) representing lattice
        n(int): number of list to be extracted
    Returns:
        n-best lists
    """
    # Log semiring -> no best path
    # Converting the lattice to tropical semiring
    std_v = fst.StdVectorFst(lat)
    p = std_v.shortest_path(n)
    return fst_shortest_path_to_lists(p)
Exemplo n.º 4
0
def make_input(word, syms):
    """
    Make a charcter input transducer:
    [0] =w:w=> 1 =o:o=> 2 =r:r=> 3 =d:d=> (4) 
    """
    inp = fst.StdVectorFst()
    inp.start = inp.add_state()
    source = inp.start
    for c in word:
        dest = inp.add_state()
        inp.add_arc(source, dest, syms[c], syms[c])
        source = dest
    inp[source].final = True
    return inp
def make_sentence_fsa(words, word_ids):
    t = fst.StdVectorFst()
    t.start = t.add_state()
    i = 0
    space_id = syms["<space>"]
    for word_id in word_ids:
        if i > 0:
            t.add_state()
            t.add_arc(i, i + 1, space_id, space_id)
            i += 1
        t.add_state()
        t.add_arc(i, i + 1, word_id, word_id)
        i += 1
    t[i].final = True
    return t
Exemplo n.º 6
0
def load_lat(fn):
    lat = fst.read(fn)
    lat = fst.StdVectorFst(lat)

    return lat
Exemplo n.º 7
0
def lattice_to_word_posterior_lists(lat, n=1):
    # Log semiring -> no best path
    # Converting the lattice to tropical semiring
    std_v = fst.StdVectorFst(lat)
    p = std_v.shortest_path(n)
    return fst_shortest_path_to_word_lists(p)