def normalize_fst(f): f2 = f.copy() z = fst.shortestdistance(fst.arcmap(f, map_type="to_log"), reverse=True)[0] for s in f2.states(): w = f2.final(s) nw = fst.Weight(f2.weight_type(), float(w) - float(z)) f2.set_final(s, nw) return f2
def calculate_cost(fst_in): """ Calculates the cost of shortest path through an FST :param fst_in: <openfst.Fst> :return: <float> """ try: return float(openfst.shortestdistance(fst_in)[-1].to_string()) except: return None
def get_likelihood_for_fsas_over_paths(d_fsa, w_fsas, w, num_paths=10, return_type="probability"): '''get the weight of a single arc by iterating in Python''' if num_paths <= 0: raise ValueError('num_paths must be a positive integer') w_fsa = w_fsas[w] dw_composed = pywrapfst.compose(w_fsa, d_fsa) dw_composed.arcsort(sort_type="ilabel") if num_paths > 1: shortest_paths = pywrapfst.epsnormalize( pywrapfst.shortestpath(dw_composed, nshortest=num_paths)) if return_type == "shortest_paths": return (shortest_paths) if shortest_paths.num_states() > 0: # take the reverse distance because with multiple shortest paths, 0 is the start state, 1 is the final state shortest_distance = pywrapfst.shortestdistance(shortest_paths, reverse=True) # iterate over all outgoing arcs from the start state path_weights = get_weights_for_paths(shortest_paths) if return_type == "path_weights": return (path_weights) shortest_paths_sum = np.sum(np.exp(-1. * np.array(path_weights))) if return_type == "probability": return (shortest_paths_sum) else: # this is the case where there is no way to compose the d_fsa and the w_fsa return (10**-20) else: shortest_path = pywrapfst.shortestpath(dw_composed) if shortest_path.num_states() > 0: shortest_distance = pywrapfst.shortestdistance(shortest_path) return (np.exp(-1 * float(shortest_distance[0]))) else: return (10**-20)
def backward(lattice, neglog_to_log=False): if lattice.arc_type() != 'log': lattice = openfst.arcmap(lattice, map_type='to_log') if neglog_to_log: inverted = openfst.arcmap(lattice, map_type='invert') one = openfst.Weight.one(lattice.weight_type()) betas = [openfst.divide(one, a) for a in backward(inverted)] return betas betas = openfst.shortestdistance(lattice, reverse=True) return betas
def forward(lattice, neglog_to_log=False): if lattice.arc_type() != 'log': lattice = openfst.arcmap(lattice, map_type='to_log') if neglog_to_log: inverted = openfst.arcmap(lattice, map_type='invert') one = openfst.Weight.one(lattice.weight_type()) alphas = [openfst.divide(one, a) for a in forward(inverted)] return alphas alphas = openfst.shortestdistance(lattice) return alphas
def initialize_heuristic(self, src_sentence): """Creates a matrix of shortest distances between all nodes """ self.distances = fst.shortestdistance(self.cur_fst, reverse=True)
# print(sample_index) sample = candidates[sample_index] #OOV_array[cand_index] prob_new_table = alpha / (sum(cluster_sizes) + alpha) nonzero_tables = [-1] table_probs = [prob_new_table] for cluster_model_ind in range(0, len(cluster_models)): # print("comparing with model " + str(cluster_model_ind)) cluster_model = cluster_models[cluster_model_ind] cluster_size = cluster_sizes[cluster_model_ind] comp = fst.determinize( fst.compose(sample.arcsort(sort_type="olabel"), cluster_model.arcsort())).minimize() if comp.num_states() > 0: # print("nonzero composition") shortest_dis_list = fst.shortestdistance(comp) score = 0.0 for state in comp.states(): if comp.num_arcs(state) == 0: str_w = shortest_dis_list[state].to_string() score_float = float( str_w[:str_w.find(b' ')]) #float(str_w) score = score + np.exp(-1 * score_float) prob_merge = cluster_size / (sum(cluster_sizes) + alpha) * score # print(str(prob_merge)) table_probs.append(prob_merge) nonzero_tables.append( cluster_model_ind) #cluster_names[cluster_model_ind]) sum_probs = sum(table_probs) table_probs_norm = [x / sum_probs