Пример #1
0
 def normalize_fst(f):
     f2 = f.copy()
     z = fst.shortestdistance(fst.arcmap(f, map_type="to_log"),
                              reverse=True)[0]
     for s in f2.states():
         w = f2.final(s)
         nw = fst.Weight(f2.weight_type(), float(w) - float(z))
         f2.set_final(s, nw)
     return f2
Пример #2
0
def calculate_cost(fst_in):
    """
	Calculates the cost of shortest path through an FST
	:param fst_in: <openfst.Fst>
	:return: <float>
	"""
    try:
        return float(openfst.shortestdistance(fst_in)[-1].to_string())
    except:
        return None
Пример #3
0
def get_likelihood_for_fsas_over_paths(d_fsa,
                                       w_fsas,
                                       w,
                                       num_paths=10,
                                       return_type="probability"):
    '''get the weight of a single arc by iterating in Python'''
    if num_paths <= 0:
        raise ValueError('num_paths must be a positive integer')

    w_fsa = w_fsas[w]
    dw_composed = pywrapfst.compose(w_fsa, d_fsa)
    dw_composed.arcsort(sort_type="ilabel")

    if num_paths > 1:
        shortest_paths = pywrapfst.epsnormalize(
            pywrapfst.shortestpath(dw_composed, nshortest=num_paths))
        if return_type == "shortest_paths":
            return (shortest_paths)
        if shortest_paths.num_states() > 0:

            # take the reverse distance because with multiple shortest paths, 0 is the start state, 1 is the final state
            shortest_distance = pywrapfst.shortestdistance(shortest_paths,
                                                           reverse=True)

            # iterate over all outgoing arcs from the start state
            path_weights = get_weights_for_paths(shortest_paths)
            if return_type == "path_weights":
                return (path_weights)
            shortest_paths_sum = np.sum(np.exp(-1. * np.array(path_weights)))
            if return_type == "probability":
                return (shortest_paths_sum)
        else:
            # this is the case where there is no way to compose the d_fsa and the w_fsa
            return (10**-20)

    else:
        shortest_path = pywrapfst.shortestpath(dw_composed)
        if shortest_path.num_states() > 0:
            shortest_distance = pywrapfst.shortestdistance(shortest_path)
            return (np.exp(-1 * float(shortest_distance[0])))
        else:
            return (10**-20)
Пример #4
0
def backward(lattice, neglog_to_log=False):
    if lattice.arc_type() != 'log':
        lattice = openfst.arcmap(lattice, map_type='to_log')

    if neglog_to_log:
        inverted = openfst.arcmap(lattice, map_type='invert')
        one = openfst.Weight.one(lattice.weight_type())
        betas = [openfst.divide(one, a) for a in backward(inverted)]
        return betas

    betas = openfst.shortestdistance(lattice, reverse=True)
    return betas
Пример #5
0
def forward(lattice, neglog_to_log=False):
    if lattice.arc_type() != 'log':
        lattice = openfst.arcmap(lattice, map_type='to_log')

    if neglog_to_log:
        inverted = openfst.arcmap(lattice, map_type='invert')
        one = openfst.Weight.one(lattice.weight_type())
        alphas = [openfst.divide(one, a) for a in forward(inverted)]
        return alphas

    alphas = openfst.shortestdistance(lattice)
    return alphas
Пример #6
0
 def initialize_heuristic(self, src_sentence):
     """Creates a matrix of shortest distances between all nodes """
     self.distances = fst.shortestdistance(self.cur_fst, reverse=True)
Пример #7
0
 def initialize_heuristic(self, src_sentence):
     """Creates a matrix of shortest distances between all nodes """
     self.distances = fst.shortestdistance(self.cur_fst, reverse=True)
    #	print(sample_index)
    sample = candidates[sample_index]  #OOV_array[cand_index]
    prob_new_table = alpha / (sum(cluster_sizes) + alpha)
    nonzero_tables = [-1]
    table_probs = [prob_new_table]

    for cluster_model_ind in range(0, len(cluster_models)):
        #		print("comparing with model " + str(cluster_model_ind))
        cluster_model = cluster_models[cluster_model_ind]
        cluster_size = cluster_sizes[cluster_model_ind]
        comp = fst.determinize(
            fst.compose(sample.arcsort(sort_type="olabel"),
                        cluster_model.arcsort())).minimize()
        if comp.num_states() > 0:
            #			print("nonzero composition")
            shortest_dis_list = fst.shortestdistance(comp)
            score = 0.0
            for state in comp.states():
                if comp.num_arcs(state) == 0:
                    str_w = shortest_dis_list[state].to_string()
                    score_float = float(
                        str_w[:str_w.find(b' ')])  #float(str_w)
                    score = score + np.exp(-1 * score_float)
            prob_merge = cluster_size / (sum(cluster_sizes) + alpha) * score
            #			print(str(prob_merge))
            table_probs.append(prob_merge)
            nonzero_tables.append(
                cluster_model_ind)  #cluster_names[cluster_model_ind])

    sum_probs = sum(table_probs)
    table_probs_norm = [x / sum_probs