def remove_suboptimal_paths(transducer): active_states = set(transducer.states) costs = {state: CostVector.get_inf_vector() for state in active_states} costs[transducer.initial_state] = CostVector.get_vector(transducer.get_length_of_cost_vectors(), 0) while active_states: cheapest_state = get_cheapest_state(list(active_states), costs) active_states.remove(cheapest_state) for state in active_states: for arc in transducer.get_arcs_by_origin_and_terminal_state(cheapest_state, state): costs[state] = max(costs[state], costs[cheapest_state] + arc.cost_vector) try: #TODO for debug prints most_harmonic_final = get_cheapest_state(transducer.get_final_states(), costs) except KeyError as ex: #print(transducer.get_final_states()) #print(transducer.dot_representation()) raise ex transducer.set_final_state(most_harmonic_final) new_arcs = [] for arc in transducer.get_arcs(): if costs[arc.origin_state] + arc.cost_vector == costs[arc.terminal_state]: new_arcs.append(arc) transducer.set_arcs(new_arcs) #logger.debug("remove_suboptimal_paths: transducer output: %s", transducer) return transducer
def _make_transducer(self): transducer, segments, state = super( DepConstraint, self)._base_faithfulness_transducer() for segment in segments: transducer.add_arc( Arc(state, segment, segment, CostVector.get_vector(1, 0), state)) transducer.add_arc( Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0), state)) if segment.has_feature_bundle(self.feature_bundle): transducer.add_arc( Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 1), state)) else: transducer.add_arc( Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 0), state)) if get_configuration("ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS"): for first_segment, second_segment in permutations(segments, 2): transducer.add_arc( Arc(state, first_segment, second_segment, CostVector.get_vector(1, 0), state)) return transducer
def _make_transducer(self): transducer, segments, state = super(FaithConstraint, self)._base_faithfulness_transducer() for segment in segments: transducer.add_arc(Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 1), state)) transducer.add_arc(Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 1), state)) transducer.add_arc(Arc(state, segment, segment, CostVector.get_vector(1, 0), state)) if configurations["ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS"]: for first_segment, second_segment in permutations(segments, 2): transducer.add_arc(Arc(state, first_segment, second_segment, CostVector.get_vector(1, 1), state)) return transducer
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, length_of_cost_vectors=0) word_segments = self.get_segments() n = len(self.word_string) states = [State("q{}".format(i), i) for i in range(n+1)] for i, state in enumerate(states): transducer.add_state(state) transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state)) if i != n: transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1])) transducer.initial_state = states[0] transducer.add_final_state(states[n]) return transducer
def _make_transducer(self): transducer, segments, state = super( IdentConstraint, self)._base_faithfulness_transducer() for segment in segments: transducer.add_arc( Arc(state, segment, segment, CostVector.get_vector(1, 0), state)) transducer.add_arc( Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0), state)) transducer.add_arc( Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 0), state)) input_segment = segment if input_segment.has_feature_bundle(self.feature_bundle): for output_segment in segments: if output_segment.has_feature_bundle(self.feature_bundle): transducer.add_arc( Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state)) else: transducer.add_arc( Arc(state, input_segment, output_segment, CostVector.get_vector(1, 1), state)) else: for output_segment in segments: transducer.add_arc( Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state)) return transducer
def _get_path_cost(transducer): #logger.debug("_get_path_cost: transducer input: %s", transducer) current_state = transducer.get_a_final_state() path_cost = CostVector.get_vector(transducer.get_length_of_cost_vectors(), 0) initial_state = transducer.initial_state while current_state != initial_state: arcs_to_current_state = transducer.get_arcs_by_terminal_state(current_state) if arcs_to_current_state: arc = arcs_to_current_state[0] if arc.origin_state == current_state: raise TransducerOptimizationError('Cyclic Transducer') else: raise TransducerOptimizationError("No arcs leading to the current state. It is a dead state.") current_state = arc.origin_state path_cost += arc.cost_vector return path_cost
def optimize_transducer_grammar_for_word(word, eval): states_by_index = {} for state in eval.states: if state.index in states_by_index.keys(): states_by_index[state.index].append(state) else: states_by_index[state.index] = [state] arcs_by_index = {} for arc in eval._arcs: if arc.origin_state.index in arcs_by_index.keys(): arcs_by_index[arc.origin_state.index].append(arc) else: arcs_by_index[arc.origin_state.index] = [arc] new_transducer = Transducer(eval.get_alphabet()) state_costs = {} new_transducer.add_state(eval.initial_state) new_transducer.initial_state = eval.initial_state state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0) for index in range(len(word.get_segments())): new_arcs = _best_arcs(arcs_by_index[index], state_costs) for arc in new_arcs: new_transducer.add_arc(arc) new_transducer.add_state(arc.terminal_state) state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector new_final_states = [eval.final_states[0]] for state in eval.final_states[1:]: state_cost = state_costs[state] final_cost = state_costs[new_final_states[0]] if state_cost > final_cost: new_final_states = [state] elif state_cost == final_cost: new_final_states.append(state) for state in new_final_states: new_transducer.add_final_state(state) #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try return new_transducer
def _make_transducer(self): transducer, segments, state = super(IdentConstraint, self)._base_faithfulness_transducer() for segment in segments: transducer.add_arc(Arc(state, segment, segment, CostVector.get_vector(1, 0), state)) transducer.add_arc(Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0), state)) transducer.add_arc(Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 0), state)) input_segment = segment if input_segment.has_feature_bundle(self.feature_bundle): for output_segment in segments: if output_segment.has_feature_bundle(self.feature_bundle): transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state)) else: transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 1), state)) else: for output_segment in segments: transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state)) return transducer
def _make_transducer(self): def compute_num_of_max_satisfied_bundle(segment): i = 0 while i < n and symbol_bundle_characteristic_matrix[segment][i]: i += 1 return i def compute_highest_num_of_satisfied_bundle(segment, j): for k in range(j + 1, 0, -1): if symbol_bundle_characteristic_matrix[segment][k - 1]: return k else: return 0 n = len(self.feature_bundles) - 1 segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) symbol_bundle_characteristic_matrix = { segment: [ segment.has_feature_bundle(self.feature_bundles[i]) for i in range(n + 1) ] for segment in segments } states = {i: {j: 0 for j in range(i)} for i in range(n + 1)} initial_state = State( 'q0|0' ) # here we use a tuple as label. it will change at the end of this function states[0][0] = initial_state transducer.set_as_single_state(initial_state) if not n: for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[segment] [0]) ]), states[0][0])) transducer.add_arc( Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0])) else: for i in range(0, n + 1): for j in range(i): state = State('q{0}|{1}'.format(i, j)) states[i][j] = state transducer.add_state(state) max_num_of_satisfied_bundle_by_segment = { segment: compute_num_of_max_satisfied_bundle(segment) for segment in segments } for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([0]), states[symbol_bundle_characteristic_matrix[segment] [0]][0])) for i in range(n + 1): for j in range(i): state = states[i][j] transducer.add_final_state(state) if i != n: for segment in segments: if symbol_bundle_characteristic_matrix[segment][i]: new_state_level = i + 1 new_state_mem = min([ j + 1, max_num_of_satisfied_bundle_by_segment[ segment] ]) else: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) transducer.add_arc( Arc(new_terminus, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) else: # i = n for segment in segments: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc( state, JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[ segment][i]) ]), new_terminus)) transducer.clear_dead_states() for state in transducer.states: transducer.add_arc( Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def test_costVector_get_vector_with_size_n_and_number_m(self): self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0])) self.assertEqual(CostVector.get_vector(1, 1), CostVector([1])) self.assertEqual(CostVector.get_vector(0, 0), CostVector([])) self.assertEqual(CostVector.get_empty_vector(), CostVector([]))
def test_costVector_comparison(self): self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0])) self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0])) self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector()) self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2])) self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector())