def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) state1 = State('Precede1') state2 = State('Precede2') # After seeing +stress (now it is okay to see +vowel) transducer.add_state(state1) transducer.add_state(state2) transducer.initial_state = state1 transducer.add_final_state(state1) transducer.add_final_state(state2) for segment in segments: segment_symbol = segment.get_symbol() if segment_symbol in yimas_vowels: # segment is vowel transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([1]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) elif segment_symbol == "'": # segment is stress transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state2)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) elif segment_symbol in yimas_cons: # segment is consonant transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) else: raise ConstraintError("{} not supported in this constraint".format(segment_symbol)) for state in transducer.states: transducer.add_arc(Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) state1 = State('Contiguity1') state2 = State('Contiguity2') transducer.add_state(state1) transducer.add_state(state2) transducer.initial_state = state1 transducer.add_final_state(state1) transducer.add_final_state(state2) for segment in segments: transducer.add_arc(Arc(state1, NULL_SEGMENT, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state1, segment, NULL_SEGMENT, CostVector([0]), state1)) transducer.add_arc(Arc(state2, NULL_SEGMENT, segment, CostVector([1]), state1)) transducer.add_arc(Arc(state2, segment, NULL_SEGMENT, CostVector([1]), state1)) segment_symbol = segment.get_symbol() if segment_symbol in yimas_vowels: # segment is vowel transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1)) elif segment_symbol == "'": # segment is stress transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state2)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state2)) elif segment_symbol in yimas_cons: # segment is consonant transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1)) else: raise ConstraintError("{} not supported in this constraint".format(segment_symbol)) return transducer
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, length_of_cost_vectors=0) word_segments = self.get_segments() n = len(self.word_string) states = [State("q{}".format(i), i) for i in range(n+1)] for i, state in enumerate(states): transducer.add_state(state) transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state)) if i != n: transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1])) transducer.initial_state = states[0] transducer.add_final_state(states[n]) return transducer
def optimize_transducer_grammar_for_word(word, eval): states_by_index = {} for state in eval.states: if state.index in states_by_index.keys(): states_by_index[state.index].append(state) else: states_by_index[state.index] = [state] arcs_by_index = {} for arc in eval._arcs: if arc.origin_state.index in arcs_by_index.keys(): arcs_by_index[arc.origin_state.index].append(arc) else: arcs_by_index[arc.origin_state.index] = [arc] new_transducer = Transducer(eval.get_alphabet()) state_costs = {} new_transducer.add_state(eval.initial_state) new_transducer.initial_state = eval.initial_state state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0) for index in range(len(word.get_segments())): new_arcs = _best_arcs(arcs_by_index[index], state_costs) for arc in new_arcs: new_transducer.add_arc(arc) new_transducer.add_state(arc.terminal_state) state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector new_final_states = [eval.final_states[0]] for state in eval.final_states[1:]: state_cost = state_costs[state] final_cost = state_costs[new_final_states[0]] if state_cost > final_cost: new_final_states = [state] elif state_cost == final_cost: new_final_states.append(state) for state in new_final_states: new_transducer.add_final_state(state) #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try return new_transducer
def test_transducer_clear_dead_states(self): transducer = Transducer(self.feature_table.get_segments()) state1 = State('q1') state2 = State('q2') state3 = State('q3') state4 = State('q4') transducer.add_state(state1) transducer.add_state(state2) transducer.add_state(state3) transducer.add_state(state4) transducer.initial_state = state1 transducer.add_final_state(state2) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.clear_dead_states() self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))
def _make_transducer(self): def compute_num_of_max_satisfied_bundle(segment): i = 0 while i < n and symbol_bundle_characteristic_matrix[segment][i]: i += 1 return i def compute_highest_num_of_satisfied_bundle(segment, j): for k in range(j + 1, 0,-1): if symbol_bundle_characteristic_matrix[segment][k-1]: return k else: return 0 n = len(self.feature_bundles) - 1 segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) symbol_bundle_characteristic_matrix = {segment: [segment.has_feature_bundle(self.feature_bundles[i]) for i in range(n+1)] for segment in segments} states = {i: {j: 0 for j in range(i)} for i in range(n+1)} initial_state = State('q0|0') # here we use a tuple as label. it will change at the end of this function states[0][0] = initial_state transducer.set_as_single_state(initial_state) if not n: for segment in segments: transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][0])]), states[0][0])) transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0])) else: for i in range(0, n+1): for j in range(i): state = State('q{0}|{1}'.format(i,j)) states[i][j] = state transducer.add_state(state) max_num_of_satisfied_bundle_by_segment = {segment: compute_num_of_max_satisfied_bundle(segment) for segment in segments} for segment in segments: transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([0]), states[symbol_bundle_characteristic_matrix[segment][0]][0])) for i in range(n+1): for j in range(i): state = states[i][j] transducer.add_final_state(state) if i != n: for segment in segments: if symbol_bundle_characteristic_matrix[segment][i]: new_state_level = i+1 new_state_mem = min([j+1, max_num_of_satisfied_bundle_by_segment[segment]]) else: new_state_level = compute_highest_num_of_satisfied_bundle(segment, j) new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment], abs(new_state_level - 1)]) new_terminus = states[new_state_level][new_state_mem] transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) else: # i = n for segment in segments: new_state_level = compute_highest_num_of_satisfied_bundle(segment, j) new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment], abs(new_state_level - 1)]) new_terminus = states[new_state_level][new_state_mem] transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][i])]), new_terminus)) transducer.clear_dead_states() for state in transducer.states: transducer.add_arc(Arc( state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def _make_transducer(self): def compute_num_of_max_satisfied_bundle(segment): i = 0 while i < n and symbol_bundle_characteristic_matrix[segment][i]: i += 1 return i def compute_highest_num_of_satisfied_bundle(segment, j): for k in range(j + 1, 0, -1): if symbol_bundle_characteristic_matrix[segment][k - 1]: return k else: return 0 n = len(self.feature_bundles) - 1 segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) symbol_bundle_characteristic_matrix = { segment: [ segment.has_feature_bundle(self.feature_bundles[i]) for i in range(n + 1) ] for segment in segments } states = {i: {j: 0 for j in range(i)} for i in range(n + 1)} initial_state = State( 'q0|0' ) # here we use a tuple as label. it will change at the end of this function states[0][0] = initial_state transducer.set_as_single_state(initial_state) if not n: for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[segment] [0]) ]), states[0][0])) transducer.add_arc( Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0])) else: for i in range(0, n + 1): for j in range(i): state = State('q{0}|{1}'.format(i, j)) states[i][j] = state transducer.add_state(state) max_num_of_satisfied_bundle_by_segment = { segment: compute_num_of_max_satisfied_bundle(segment) for segment in segments } for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([0]), states[symbol_bundle_characteristic_matrix[segment] [0]][0])) for i in range(n + 1): for j in range(i): state = states[i][j] transducer.add_final_state(state) if i != n: for segment in segments: if symbol_bundle_characteristic_matrix[segment][i]: new_state_level = i + 1 new_state_mem = min([ j + 1, max_num_of_satisfied_bundle_by_segment[ segment] ]) else: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) transducer.add_arc( Arc(new_terminus, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) else: # i = n for segment in segments: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc( state, JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[ segment][i]) ]), new_terminus)) transducer.clear_dead_states() for state in transducer.states: transducer.add_arc( Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
class TestTransducer(unittest.TestCase): def setUp(self): self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json")) self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture( "phonotactic_test_feature_table.json")) self.transducer = Transducer(self.feature_table.get_segments()) self.state1 = State('q1') self.state2 = State('q2') self.transducer.add_state(self.state1) self.transducer.add_state(self.state2) self.transducer.initial_state = self.state1 self.transducer.add_final_state(self.state2) self.cost_vector1 = CostVector([3, 1, 0]) self.cost_vector2 = CostVector([2, 0, 0]) self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2) self.transducer.add_arc(self.arc) self.simple_transducer = self.transducer self.loops_transducer = deepcopy(self.transducer) zero_cost_vector = CostVector([0]) segment_a = Segment('a', self.feature_table) segment_b = Segment('b', self.feature_table) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1)) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2)) phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer() max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer() self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max) #Transducer tests: def test_transducer_equality(self): feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json")) faith = FaithConstraint([],feature_table).get_transducer() phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer() max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer() transducer1 = Transducer.intersection(faith, phonotactic, max) temp_transducer = Transducer.intersection(phonotactic, max) transducer2 = Transducer.intersection(faith, temp_transducer) self.assertEqual(transducer1, transducer2) #write_to_dot_to_file(transducer1, "transducer1") #write_to_dot_to_file(transducer2, "transducer2") #one with constraint set #create with manual intersection def test_transducer_equality_with_deepcopy(self): phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() phonotactic_transducer_copy = deepcopy(phonotactic_transducer) self.assertEqual(phonotactic_transducer, phonotactic_transducer_copy) def test_transducer_equality_with_pickle(self): phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() pickled_phonotactic_transducer = get_pickle("equality_with_pickle_transducer") phonotactic_transducer == pickled_phonotactic_transducer self.assertEqual(phonotactic_transducer, pickled_phonotactic_transducer) def test_transducer_intersection(self): self.assertEqual(self.intersection_test_transducer, get_pickle("intersection_test_transducer")) def test_transducer_clear_dead_states(self): transducer = Transducer(self.feature_table.get_segments()) state1 = State('q1') state2 = State('q2') state3 = State('q3') state4 = State('q4') transducer.add_state(state1) transducer.add_state(state2) transducer.add_state(state3) transducer.add_state(state4) transducer.initial_state = state1 transducer.add_final_state(state2) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.clear_dead_states() self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer")) def test_get_arcs_by_origin_state(self): initial_state = self.intersection_test_transducer.initial_state arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state) pickled_arc_list = get_pickle("get_arcs_by_origin_state_arc_list") self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list)) def test_get_arcs_by_terminal_state(self): initial_state = self.intersection_test_transducer.initial_state arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state) pickled_arc_list = get_pickle("get_arcs_by_terminal_state_arc_list") self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list)) def test_get_range(self): pass # see TestingParserSuite.test_geneare #State tests: def test_state_str(self): self.assertEqual(str(self.state1), "(q1,0)") def test_states_addition(self): new_state = State.states_addition(self.state1, self.state2) self.assertEqual(str(new_state), "(q1|q2,0)") new_state = State.states_addition(self.state1, self.state2) self.assertEqual(str(new_state), "(q1|q2,0)") #Arcs tests: def test_arc_str(self): self.assertEqual(str(self.arc), "['(q1,0)', 'a', 'b', '[0, 1, 0]', '(q2,0)']") #CostVector tests: def test_costVector_operations(self): self.assertEqual(self.cost_vector1 + self.cost_vector2, CostVector([5, 1, 0])) self.assertEqual(self.cost_vector1 * self.cost_vector2, CostVector([3, 1, 0, 2, 0, 0])) self.assertEqual(self.cost_vector1 - self.cost_vector2, CostVector([1, 1, 0])) def test_costVector_comparison(self): self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0])) self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0])) self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector()) self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2])) self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector()) def test_costVector_get_vector_with_size_n_and_number_m(self): self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0])) self.assertEqual(CostVector.get_vector(1, 1), CostVector([1])) self.assertEqual(CostVector.get_vector(0, 0), CostVector([])) self.assertEqual(CostVector.get_empty_vector(), CostVector([])) def test_costVector_str(self): self.assertEqual(str(CostVector([1, 1, 0])), "[1, 1, 0]") def test_costVector_illegal_operation(self): with self.assertRaises(CostVectorOperationError): CostVector([1,1]) + CostVector([1]) def test_costVector_concatenation_with_empty_vector(self): cost_vector3 = CostVector([]) self.assertEqual(self.cost_vector1 * cost_vector3, CostVector([3, 1, 0])) self.assertEqual(cost_vector3 * self.cost_vector1, CostVector([3, 1, 0]))