def time_test(): blank = 0 batch_size = 32 vocab_size = 30 input_len = 400 output_len = 80 acts = np.random.rand(batch_size, input_len, output_len + 1, vocab_size) labels = np.random.randint(1, vocab_size, (batch_size, output_len)) acts = torch.FloatTensor(acts) lengths = [acts.shape[1]] * acts.shape[0] label_lengths = [len(l) for l in labels] labels = np.array([l for label in labels for l in label]) labels = torch.IntTensor(labels) lengths = torch.IntTensor(lengths) label_lengths = torch.IntTensor(label_lengths) log_probs = nn.functional.log_softmax(acts, dim=3) start = time.time() iters = 10 for _ in range(iters): tfn = Transducer(blank_label=0) costs = tfn.apply(log_probs, labels, lengths, label_lengths) end = time.time() print("Time per iteration: {:.3f}(s)".format((end-start)/iters))
def test_fwd_trivial(self): T = 3 N = 2 emissions = torch.FloatTensor([1.0, 0.0, 0.0, 1.0, 1.0, 0.0]).view(1, T, N) log_probs = torch.log(emissions) # Check without blank: labels = [[0, 1, 0]] transducer = Transducer(tokens=["a", "b"], graphemes_to_idx={ "a": 0, "b": 1 }) self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0) # Check with blank: labels = [[0, 0]] transducer = Transducer(tokens=["a"], graphemes_to_idx={"a": 0}, blank="optional") self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0) # Check with repeats not allowed: labels = [[0, 0]] transducer = Transducer( tokens=["a"], graphemes_to_idx={"a": 0}, blank="optional", allow_repeats=False, ) self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0)
def _construct_hypothesis(self): """ Utilize the observation table to construct a Mealy Machine. Returns: Transducer: A mealy machine build based on a closed and consistent observation table. """ mm = Transducer() for access_string in self.ot.access_strings: for i in self.I: dst = self.ot.equiv_classes[access_string + (i, )] # If dst == None then the table is not closed. if dst is None: logging.debug('Conjecture attempt on non closed table.') return None out = self.ot[access_string, (i, )] src_id = self.ot.access_strings.index(access_string) dst_id = self.ot.access_strings.index(dst) if not self.ot[access_string, (i, )]: out = [EPSILON] else: out = [int(x) for x in self.ot[access_string, (i, )]] mm.add_arc(src_id, dst_id, [int(i)], out) # This is for format compatibility with the DFA/SFAs. for state in mm.states: state.final = True return mm
def _construct_hypothesis(self): """ Utilize the observation table to construct a Mealy Machine. Returns: Transducer: A mealy machine build based on a closed and consistent observation table. """ mm = Transducer() for access_string in self.ot.access_strings: for i in self.I: dst = self.ot.equiv_classes[access_string + (i,)] # If dst == None then the table is not closed. if dst is None: logging.debug('Conjecture attempt on non closed table.') return None out = self.ot[access_string, (i, )] src_id = self.ot.access_strings.index(access_string) dst_id = self.ot.access_strings.index(dst) if not self.ot[access_string, (i, )]: out = [EPSILON] else: out = [int(x) for x in self.ot[access_string, (i, )]] mm.add_arc(src_id, dst_id, [int(i)], out) # This is for format compatibility with the DFA/SFAs. for state in mm.states: state.final = True return mm
def setUp(self): self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json")) self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture( "phonotactic_test_feature_table.json")) self.transducer = Transducer(self.feature_table.get_segments()) self.state1 = State('q1') self.state2 = State('q2') self.transducer.add_state(self.state1) self.transducer.add_state(self.state2) self.transducer.initial_state = self.state1 self.transducer.add_final_state(self.state2) self.cost_vector1 = CostVector([3, 1, 0]) self.cost_vector2 = CostVector([2, 0, 0]) self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2) self.transducer.add_arc(self.arc) self.simple_transducer = self.transducer self.loops_transducer = deepcopy(self.transducer) zero_cost_vector = CostVector([0]) segment_a = Segment('a', self.feature_table) segment_b = Segment('b', self.feature_table) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1)) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2)) phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer() max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer() self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max)
def test_transducer_equality(self): feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json")) faith = FaithConstraint([],feature_table).get_transducer() phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer() max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer() transducer1 = Transducer.intersection(faith, phonotactic, max) temp_transducer = Transducer.intersection(phonotactic, max) transducer2 = Transducer.intersection(faith, temp_transducer) self.assertEqual(transducer1, transducer2)
def __init__(self, filename): '''Read a transducer from filename ''' handle = open(filename, "rb") self.header = Header(handle) self.alphabet = Alphabet(handle, self.header.number_of_symbols) if self.header.weighted: self.transducer = TransducerW(handle, self.header, self.alphabet) else: self.transducer = Transducer(handle, self.header, self.alphabet) handle.close()
def launch_decoder_through_transducer(coder): print("Listening...") tranceducer = Transducer(mode=2, debug=0, coder=coder) while (True): data = tranceducer.receive() if len(data) > 0: # print("Transducer::Decode Decimalize: %s" % [int(d) for d in data]) # print("Transducer::Decode Binarize: %s" % [format(int(d), 'b') for d in data]) data_string = data_to_ascii_string(data) print("Decoded: %s" % data_string) operate_keyboard_if_necessary(data_string)
def main(): """ Simple interface to convert transducers from text format to BEK programs """ filename = 'transducer.txt' if len(argv) > 1: filename = argv[1] trans = Transducer() trans.load(filename) bek = BekProgram() bek.create_from_transducer(trans) print bek.bek_program
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, length_of_cost_vectors=0) word_segments = self.get_segments() n = len(self.word_string) states = [State("q{}".format(i), i) for i in range(n+1)] for i, state in enumerate(states): transducer.add_state(state) transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state)) if i != n: transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1])) transducer.initial_state = states[0] transducer.add_final_state(states[n]) return transducer
def test_backoff_transitions(self): transitions = gtn.loadtxt("trans_backoff_test.txt") T = 4 N = 5 inputs = torch.randn(1, T, N, dtype=torch.float, requires_grad=True) labels = [[0, 1, 0]] tokens = [(n, ) for n in range(N)] graphemes_to_idx = {n: n for n in range(N)} transducer = Transducer( tokens=tokens, graphemes_to_idx=graphemes_to_idx, blank="optional", allow_repeats=False, transitions=transitions, ) loss = transducer(inputs, labels) loss.backward() trans_p = transducer.transition_params analytic_grad = trans_p.grad epsilon = 1e-3 numerical_grad = [] with torch.no_grad(): for i in range(trans_p.numel()): transducer.transition_params.data[i] += epsilon loss_up = transducer(inputs, labels).item() transducer.transition_params.data[i] -= 2 * epsilon loss_down = transducer(inputs, labels).item() numerical_grad.append((loss_up - loss_down) / (2 * epsilon)) transducer.transition_params.data[i] += epsilon numerical_grad = torch.tensor(numerical_grad) self.assertTrue( torch.allclose(analytic_grad, numerical_grad, rtol=1e-3, atol=1e-3))
def reference_rnnt_loss(input_data, target_data, input_lengths, target_lengths): """ runs reference RNN-T code for given input data """ tfn = Transducer(blank_label=0) cost, grads_wlogits, grads_wlogprobs = wrap_and_call( tfn, input_data, input_lengths, target_data, target_lengths) return cost, grads_wlogits, grads_wlogprobs
def make_optimal_paths(transducer_input): transducer = pickle.loads(pickle.dumps(transducer_input, -1)) alphabet = transducer.get_alphabet() new_arcs = list() for segment in alphabet: word = Word(segment.get_symbol()) word_transducer = word.get_transducer() #print(word_transducer.dot_representation()) intersected_machine = Transducer.intersection(word_transducer, transducer) states = transducer.get_states() for state1, state2 in itertools.product(states, states): initial_state = word_transducer.initial_state & state1 final_state = word_transducer.get_a_final_state() & state2 temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1)) temp_transducer.initial_state = initial_state temp_transducer.set_final_state(final_state) temp_transducer.clear_dead_states() if final_state in temp_transducer.get_final_states(): # otherwise no path. try: temp_transducer = remove_suboptimal_paths(temp_transducer) range = temp_transducer.get_range() arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2) new_arcs.append(arc) except KeyError: pass #print("****") #print(temp_transducer.dot_representation()) transducer.set_arcs(new_arcs) return transducer
def small_test(): acts = np.array([[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]]]) labels = [[1, 2]] print("Acts.shape", acts.shape) acts = acts[None, ...] print("Acts.shape", acts.shape) tfn = Transducer(blank_label=0) cost, grads = wrap_and_call(tfn, acts, labels) expected_cost = 4.495666 expected_grads = np.array([[[-0.308198071906, -0.6918019280939998, 0.0, 0.0, 0.0], [-0.308198071906, 0.0, -0.3836038561880001, 0.0, 0.0], [-0.3836038561880001, 0.0, 0.0, 0.0, 0.0]], [[0.0, -0.308198071906, 0.0, 0.0, 0.0], [0.0, 0.0, -0.6163961438119995, 0.0, 0.0], [-0.9999999999999991, 0.0, 0.0, 0.0, 0.0]]]) assert np.allclose(cost, expected_cost, rtol=1e-6), \ "small_test costs mismatch." assert np.allclose(grads, expected_grads), \ "small_test gradient mismatch."
def test_simple_decomposition(self): T = 5 tokens = ["a", "b", "ab", "ba", "aba"] scores = torch.randn((1, T, len(tokens)), requires_grad=True) labels = [[0, 1, 0]] transducer = Transducer(tokens=tokens, graphemes_to_idx={ "a": 0, "b": 1 }) # Hand construct the alignment graph with all of the decompositions alignments = gtn.Graph(False) alignments.add_node(True) # Add the path ['a', 'b', 'a'] alignments.add_node() alignments.add_arc(0, 1, 0) alignments.add_arc(1, 1, 0) alignments.add_node() alignments.add_arc(1, 2, 1) alignments.add_arc(2, 2, 1) alignments.add_node(False, True) alignments.add_arc(2, 3, 0) alignments.add_arc(3, 3, 0) # Add the path ['a', 'ba'] alignments.add_node(False, True) alignments.add_arc(1, 4, 3) alignments.add_arc(4, 4, 3) # Add the path ['ab', 'a'] alignments.add_node() alignments.add_arc(0, 5, 2) alignments.add_arc(5, 5, 2) alignments.add_arc(5, 3, 0) # Add the path ['aba'] alignments.add_node(False, True) alignments.add_arc(0, 6, 4) alignments.add_arc(6, 6, 4) emissions = gtn.linear_graph(T, len(tokens), True) emissions.set_weights(scores.data_ptr()) expected_loss = gtn.subtract( gtn.forward_score(emissions), gtn.forward_score(gtn.intersect(emissions, alignments)), ) loss = transducer(scores, labels) self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) loss.backward() gtn.backward(expected_loss) expected_grad = torch.tensor(emissions.grad().weights_to_numpy()) expected_grad = expected_grad.view((1, T, len(tokens))) self.assertTrue( torch.allclose(scores.grad, expected_grad, rtol=1e-4, atol=1e-5))
def _get_outputs(self, word): grammar_transducer = self.get_transducer() word_transducer = word.get_transducer() write_to_dot(grammar_transducer, "grammar_transducer") write_to_dot(word_transducer, "word_transducer") intersected_transducer = Transducer.intersection(word_transducer, # a transducer with NULLs on inputs and JOKERs on outputs grammar_transducer) # a transducer with segments on inputs and sets on outputs intersected_transducer.clear_dead_states() intersected_transducer = optimize_transducer_grammar_for_word(word, intersected_transducer) outputs = intersected_transducer.get_range() return outputs
def _get_outputs(self, word): grammar_transducer = self.get_transducer() word_transducer = word.get_transducer() intersected_transducer = Transducer.intersection( word_transducer, # a transducer with NULLs on inputs and JOKERs on outputs grammar_transducer ) # a transducer with segments on inputs and sets on outputs intersected_transducer.clear_dead_states() intersected_transducer = optimize_transducer_grammar_for_word( word, intersected_transducer) #dot(intersected_transducer, 'intersected') outputs = intersected_transducer.get_range() return outputs
def test_fwd(self): T = 3 N = 4 labels = [[1, 2]] emissions = torch.FloatTensor([1.0] * T * N).view(1, T, N) log_probs = torch.log(emissions) log_probs = torch.nn.functional.log_softmax(torch.log(emissions), 2) transducer = Transducer( tokens=["a", "b", "c"], graphemes_to_idx={ "a": 0, "b": 1, "c": 2 }, blank="optional", ) fwd = transducer(log_probs, labels) self.assertAlmostEqual(fwd.item(), -math.log(0.25 * 0.25 * 0.25 * 5))
class OlTransducer: def __init__(self, filename): '''Read a transducer from filename ''' handle = open(filename, "rb") self.header = Header(handle) self.alphabet = Alphabet(handle, self.header.number_of_symbols) if self.header.weighted: self.transducer = TransducerW(handle, self.header, self.alphabet) else: self.transducer = Transducer(handle, self.header, self.alphabet) handle.close() def analyse(self, string): '''Take string to analyse, return a vector of (string, weight) pairs. ''' if self.transducer.analyze(string): return self.transducer.displayVector else: return []
def test_ctc_compare(self): T = 20 N = 15 B = 5 tgt = [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 1], [0, 2, 3], [0, 0, 0, 0, 0], [0, 4, 8, 12], ] tokens = list((t, ) for t in range(N - 1)) graphemes_to_idx = {t: t for t in range(N - 1)} inputs = torch.randn(B, T, N, dtype=torch.float, requires_grad=True) # With and without target length reduction: for reduction in ["none", "mean"]: transducer = Transducer( tokens=tokens, graphemes_to_idx=graphemes_to_idx, blank="optional", allow_repeats=False, reduction=reduction, ) ctc_inputs = torch.nn.functional.log_softmax(inputs, 2) ctc_result = CTCLoss(ctc_inputs, tgt, N - 1, reduction) ctc_result.backward() ctc_grad = inputs.grad inputs.grad = None transducer_result = transducer(inputs, tgt) transducer_result.backward() transducer_grad = inputs.grad inputs.grad = None self.assertAlmostEqual(ctc_result.item(), transducer_result.item(), places=4) self.assertTrue( torch.allclose(ctc_grad, transducer_grad, rtol=1e-4, atol=1e-5))
def test_asg_viterbi(self): T = 4 N = 3 inputs = torch.tensor([0, 0, 7, 5, 4, 3, 5, 8, 5, 5, 4, 3], dtype=torch.float32).view(1, T, N) transitions = torch.tensor([0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0], dtype=torch.float32) expected_path = [2, 1, 0] tokens = [(n, ) for n in range(N)] graphemes_to_idx = {n: n for n in range(N)} asg_transitions = ASGLossFunction.create_transitions_graph( torch.zeros(N + 1, N)) transducer = Transducer( tokens=tokens, graphemes_to_idx=graphemes_to_idx, transitions=asg_transitions, ) transducer.transition_params.data = transitions path = transducer.viterbi(inputs)[0].tolist() self.assertTrue(path == expected_path)
def big_test(): # minibatch x T x U x alphabet_size activations = [ [[[0.06535690384862791, 0.7875301411923206, 0.08159176605666074], [0.5297155426466327, 0.7506749639230854, 0.7541348379087998], [0.6097641124736383, 0.8681404965673826, 0.6225318186056529]], [[0.6685222872103057, 0.8580392805336061, 0.16453892311765583], [0.989779515236694, 0.944298460961015, 0.6031678586829663], [0.9467833543605416, 0.666202507295747, 0.28688179752461884]], [[0.09418426230195986, 0.3666735970751962, 0.736168049462793], [0.1666804425271342, 0.7141542198635192, 0.3993997272216727], [0.5359823524146038, 0.29182076440286386, 0.6126422611507932]], [[0.3242405528768486, 0.8007644367291621, 0.5241057606558068], [0.779194617063042, 0.18331417220174862, 0.113745182072432], [0.24022162381327106, 0.3394695622533106, 0.1341595066017014]]], [[[0.5055615569388828, 0.051597282072282646, 0.6402903936686337], [0.43073311517251, 0.8294731834714112, 0.1774668847323424], [0.3207001991262245, 0.04288308912457006, 0.30280282975568984]], [[0.6751777088333762, 0.569537369330242, 0.5584738347504452], [0.08313242153985256, 0.06016544344162322, 0.10795752845152584], [0.7486153608562472, 0.943918041459349, 0.4863558118797222]], [[0.4181986264486809, 0.6524078485043804, 0.024242983423721887], [0.13458171554507403, 0.3663418070512402, 0.2958297395361563], [0.9236695822497084, 0.6899291482654177, 0.7418981733448822]], [[0.25000547599982104, 0.6034295486281007, 0.9872887878887768], [0.5926057265215715, 0.8846724004467684, 0.5434495396894328], [0.6607698886038497, 0.3771277082495921, 0.3580209022231813]]]] print("Acts2", len(activations), len(activations[0]), len(activations[0][0]), len(activations[0][0][0])) expected_costs = [4.2806528590890736, 3.9384369822503591] expected_grads = [ [[[-0.4322264564338117, -0.5677735435661883, 0.0], [-0.36565009313836844, 0.0, -0.20212345042782007], [-0.20212345042782007, 0.0, 0.0]], [[-0.16521672442463506, -0.2670097320091765, 0.0], [-0.3943653886107811, 0.0, -0.2382944365367636], [-0.44041788696458367, 0.0, 0.0]], [[-0.052129794015740985, -0.11308693040889405, 0.0], [-0.18313786985332664, 0.0, -0.3243144491663483], [-0.7647323361309323, 0.0, 0.0]], [[0.0, -0.052129794015740985, 0.0], [0.0, 0.0, -0.23526766386906767], [-1.0, 0.0, 0.0]]], [[[-0.7161424128232795, -0.2838575871767207, 0.0], [-0.18382932237365335, -0.10002826480306751, 0.0], [-0.10002826480306751, 0.0, 0.0]], [[-0.41121794618117213, -0.3049244666421072, 0.0], [-0.3295759402552584, -0.15917784876050195, 0.0], [-0.2592061135635692, 0.0, 0.0]], [[-0.11607642141651396, -0.29514152476465827, 0.0], [-0.2865333615432337, -0.3381841034766833, 0.0], [-0.5973902170402529, 0.0, 0.0]], [[0.0, -0.11607642141651396, 0.0], [0.0, -0.4026097829597475, 0.0], [-1.0, 0.0, 0.0]]]] activations = np.array(activations) labels = [[1, 2], [1, 1]] tfn = Transducer(blank_label=0) costs, grads = wrap_and_call(tfn, activations, labels) assert np.allclose(costs, expected_costs), \ "big_test average costs mismatch." assert np.allclose(grads, expected_grads), \ "big_test grads for average cost mismatch."
class TestTransducer(unittest.TestCase): def setUp(self): self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json")) self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture( "phonotactic_test_feature_table.json")) self.transducer = Transducer(self.feature_table.get_segments()) self.state1 = State('q1') self.state2 = State('q2') self.transducer.add_state(self.state1) self.transducer.add_state(self.state2) self.transducer.initial_state = self.state1 self.transducer.add_final_state(self.state2) self.cost_vector1 = CostVector([3, 1, 0]) self.cost_vector2 = CostVector([2, 0, 0]) self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2) self.transducer.add_arc(self.arc) self.simple_transducer = self.transducer self.loops_transducer = deepcopy(self.transducer) zero_cost_vector = CostVector([0]) segment_a = Segment('a', self.feature_table) segment_b = Segment('b', self.feature_table) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1)) self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2)) self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2)) phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer() max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer() self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max) #Transducer tests: def test_transducer_equality(self): feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json")) faith = FaithConstraint([],feature_table).get_transducer() phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer() max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer() transducer1 = Transducer.intersection(faith, phonotactic, max) temp_transducer = Transducer.intersection(phonotactic, max) transducer2 = Transducer.intersection(faith, temp_transducer) self.assertEqual(transducer1, transducer2) #write_to_dot_to_file(transducer1, "transducer1") #write_to_dot_to_file(transducer2, "transducer2") #one with constraint set #create with manual intersection def test_transducer_equality_with_deepcopy(self): phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() phonotactic_transducer_copy = deepcopy(phonotactic_transducer) self.assertEqual(phonotactic_transducer, phonotactic_transducer_copy) def test_transducer_equality_with_pickle(self): phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}], self.phonotactic_test_feature_table).get_transducer() pickled_phonotactic_transducer = get_pickle("equality_with_pickle_transducer") phonotactic_transducer == pickled_phonotactic_transducer self.assertEqual(phonotactic_transducer, pickled_phonotactic_transducer) def test_transducer_intersection(self): self.assertEqual(self.intersection_test_transducer, get_pickle("intersection_test_transducer")) def test_transducer_clear_dead_states(self): transducer = Transducer(self.feature_table.get_segments()) state1 = State('q1') state2 = State('q2') state3 = State('q3') state4 = State('q4') transducer.add_state(state1) transducer.add_state(state2) transducer.add_state(state3) transducer.add_state(state4) transducer.initial_state = state1 transducer.add_final_state(state2) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.clear_dead_states() self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer")) def test_get_arcs_by_origin_state(self): initial_state = self.intersection_test_transducer.initial_state arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state) pickled_arc_list = get_pickle("get_arcs_by_origin_state_arc_list") self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list)) def test_get_arcs_by_terminal_state(self): initial_state = self.intersection_test_transducer.initial_state arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state) pickled_arc_list = get_pickle("get_arcs_by_terminal_state_arc_list") self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list)) def test_get_range(self): pass # see TestingParserSuite.test_geneare #State tests: def test_state_str(self): self.assertEqual(str(self.state1), "(q1,0)") def test_states_addition(self): new_state = State.states_addition(self.state1, self.state2) self.assertEqual(str(new_state), "(q1|q2,0)") new_state = State.states_addition(self.state1, self.state2) self.assertEqual(str(new_state), "(q1|q2,0)") #Arcs tests: def test_arc_str(self): self.assertEqual(str(self.arc), "['(q1,0)', 'a', 'b', '[0, 1, 0]', '(q2,0)']") #CostVector tests: def test_costVector_operations(self): self.assertEqual(self.cost_vector1 + self.cost_vector2, CostVector([5, 1, 0])) self.assertEqual(self.cost_vector1 * self.cost_vector2, CostVector([3, 1, 0, 2, 0, 0])) self.assertEqual(self.cost_vector1 - self.cost_vector2, CostVector([1, 1, 0])) def test_costVector_comparison(self): self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0])) self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0])) self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector()) self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2])) self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector()) def test_costVector_get_vector_with_size_n_and_number_m(self): self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0])) self.assertEqual(CostVector.get_vector(1, 1), CostVector([1])) self.assertEqual(CostVector.get_vector(0, 0), CostVector([])) self.assertEqual(CostVector.get_empty_vector(), CostVector([])) def test_costVector_str(self): self.assertEqual(str(CostVector([1, 1, 0])), "[1, 1, 0]") def test_costVector_illegal_operation(self): with self.assertRaises(CostVectorOperationError): CostVector([1,1]) + CostVector([1]) def test_costVector_concatenation_with_empty_vector(self): cost_vector3 = CostVector([]) self.assertEqual(self.cost_vector1 * cost_vector3, CostVector([3, 1, 0])) self.assertEqual(cost_vector3 * self.cost_vector1, CostVector([3, 1, 0]))
if c not in Sigma: Sigma[c] = len(Sigma) Sigma_inv = {} for x, y in Sigma.items(): Sigma_inv[y] = x # test training data train = numerize(train_str, Sigma) # number of total insertions per string INSERTION_LIMIT = 3 # transducer t = Transducer(len(Sigma), INSERTION_LIMIT) string1 = train[0][0] string2 = train[0][1] features = Features(Sigma, Sigma_inv) for upper, lower in train_str: #print upper, lower, len(features.features) features.extract(upper, URC=0, ULC=0, create=True) # get tensor # This is equivalent to the earlier tensor. # tensor_features is a list of sparse W tensors. # Every element of tensor_feature is 5 dimensional # where the first 4 are the same as the W tensor. # And the last dimension is feature_index into set of features.
def _make_transducer(self): if len(self.constraints) is 1: # if there is only on constraint in the return pickle.loads(pickle.dumps(self.constraints[0].get_transducer(), -1)) # constraint set there is no need to intersect else: constraints_transducers = [constraint.get_transducer() for constraint in self.constraints] return Transducer.intersection(*constraints_transducers)
def test_transducer_clear_dead_states(self): transducer = Transducer(self.feature_table.get_segments()) state1 = State('q1') state2 = State('q2') state3 = State('q3') state4 = State('q4') transducer.add_state(state1) transducer.add_state(state2) transducer.add_state(state3) transducer.add_state(state4) transducer.initial_state = state1 transducer.add_final_state(state2) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2)) transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3)) transducer.clear_dead_states() self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))
return [] if __name__ == "__main__": if len(sys.argv) != 2: print "Usage: python HfstRuntimeReader FILE" sys.exit() transducerfile = open(sys.argv[1], "rb") header = Header(transducerfile) print "header read" alphabet = Alphabet(transducerfile, header.number_of_symbols) print "alphabet read" if header.weighted: transducer = TransducerW(transducerfile, header, alphabet) else: transducer = Transducer(transducerfile, header, alphabet) print "transducer ready" print while True: try: string = raw_input() except EOFError: sys.exit(0) print string + ":" if transducer.analyze(string): transducer.printAnalyses() print else: # tokenization failed pass
def test_viterbi(self): T = 5 N = 4 B = 2 # fmt: off emissions1 = torch.tensor( ( 0, 4, 0, 1, 0, 2, 1, 1, 0, 0, 0, 2, 0, 0, 0, 2, 8, 0, 0, 2, ), dtype=torch.float, ).view(T, N) emissions2 = torch.tensor( ( 0, 2, 1, 7, 0, 2, 9, 1, 0, 0, 0, 2, 0, 0, 5, 2, 1, 0, 0, 2, ), dtype=torch.float, ).view(T, N) # fmt: on # Test without blank: labels = [[1, 3, 0], [3, 2, 3, 2, 3]] transducer = Transducer( tokens=["a", "b", "c", "d"], graphemes_to_idx={ "a": 0, "b": 1, "c": 2, "d": 3 }, blank="none", ) emissions = torch.stack([emissions1, emissions2], dim=0) predictions = transducer.viterbi(emissions) self.assertEqual([p.tolist() for p in predictions], labels) # Test with blank without repeats: labels = [[1, 0], [2, 2]] transducer = Transducer( tokens=["a", "b", "c"], graphemes_to_idx={ "a": 0, "b": 1, "c": 2 }, blank="optional", allow_repeats=False, ) emissions = torch.stack([emissions1, emissions2], dim=0) predictions = transducer.viterbi(emissions) self.assertEqual([p.tolist() for p in predictions], labels)
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) state1 = State('Precede1') state2 = State('Precede2') # After seeing +stress (now it is okay to see +vowel) transducer.add_state(state1) transducer.add_state(state2) transducer.initial_state = state1 transducer.add_final_state(state1) transducer.add_final_state(state2) for segment in segments: segment_symbol = segment.get_symbol() if segment_symbol in yimas_vowels: # segment is vowel transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([1]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) elif segment_symbol == "'": # segment is stress transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state2)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) elif segment_symbol in yimas_cons: # segment is consonant transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2)) else: raise ConstraintError("{} not supported in this constraint".format(segment_symbol)) for state in transducer.states: transducer.add_arc(Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def _make_transducer(self): def compute_num_of_max_satisfied_bundle(segment): i = 0 while i < n and symbol_bundle_characteristic_matrix[segment][i]: i += 1 return i def compute_highest_num_of_satisfied_bundle(segment, j): for k in range(j + 1, 0,-1): if symbol_bundle_characteristic_matrix[segment][k-1]: return k else: return 0 n = len(self.feature_bundles) - 1 segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) symbol_bundle_characteristic_matrix = {segment: [segment.has_feature_bundle(self.feature_bundles[i]) for i in range(n+1)] for segment in segments} states = {i: {j: 0 for j in range(i)} for i in range(n+1)} initial_state = State('q0|0') # here we use a tuple as label. it will change at the end of this function states[0][0] = initial_state transducer.set_as_single_state(initial_state) if not n: for segment in segments: transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][0])]), states[0][0])) transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0])) else: for i in range(0, n+1): for j in range(i): state = State('q{0}|{1}'.format(i,j)) states[i][j] = state transducer.add_state(state) max_num_of_satisfied_bundle_by_segment = {segment: compute_num_of_max_satisfied_bundle(segment) for segment in segments} for segment in segments: transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([0]), states[symbol_bundle_characteristic_matrix[segment][0]][0])) for i in range(n+1): for j in range(i): state = states[i][j] transducer.add_final_state(state) if i != n: for segment in segments: if symbol_bundle_characteristic_matrix[segment][i]: new_state_level = i+1 new_state_mem = min([j+1, max_num_of_satisfied_bundle_by_segment[segment]]) else: new_state_level = compute_highest_num_of_satisfied_bundle(segment, j) new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment], abs(new_state_level - 1)]) new_terminus = states[new_state_level][new_state_mem] transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) else: # i = n for segment in segments: new_state_level = compute_highest_num_of_satisfied_bundle(segment, j) new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment], abs(new_state_level - 1)]) new_terminus = states[new_state_level][new_state_mem] transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][i])]), new_terminus)) transducer.clear_dead_states() for state in transducer.states: transducer.add_arc(Arc( state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def send_by_transducer(coder, target_str): print("Encoding...") tranceducer = Transducer(mode=1, debug=0, coder=coder) target_list = [ord(t) for t in target_str] tranceducer.send(target_list)
def write_by_transducer(coder, target_str, file_name): print("Encoding...") tranceducer = Transducer(mode=1, debug=0, coder=coder) target_list = [ord(t) for t in target_str] tranceducer.write_to_file(target_list, file_name)
def optimize_transducer_grammar_for_word(word, eval): states_by_index = {} for state in eval.states: if state.index in states_by_index.keys(): states_by_index[state.index].append(state) else: states_by_index[state.index] = [state] arcs_by_index = {} for arc in eval._arcs: if arc.origin_state.index in arcs_by_index.keys(): arcs_by_index[arc.origin_state.index].append(arc) else: arcs_by_index[arc.origin_state.index] = [arc] new_transducer = Transducer(eval.get_alphabet()) state_costs = {} new_transducer.add_state(eval.initial_state) new_transducer.initial_state = eval.initial_state state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0) for index in range(len(word.get_segments())): new_arcs = _best_arcs(arcs_by_index[index], state_costs) for arc in new_arcs: new_transducer.add_arc(arc) new_transducer.add_state(arc.terminal_state) state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector new_final_states = [eval.final_states[0]] for state in eval.final_states[1:]: state_cost = state_costs[state] final_cost = state_costs[new_final_states[0]] if state_cost > final_cost: new_final_states = [state] elif state_cost == final_cost: new_final_states.append(state) for state in new_final_states: new_transducer.add_final_state(state) #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try return new_transducer
def _make_transducer(self): def compute_num_of_max_satisfied_bundle(segment): i = 0 while i < n and symbol_bundle_characteristic_matrix[segment][i]: i += 1 return i def compute_highest_num_of_satisfied_bundle(segment, j): for k in range(j + 1, 0, -1): if symbol_bundle_characteristic_matrix[segment][k - 1]: return k else: return 0 n = len(self.feature_bundles) - 1 segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) symbol_bundle_characteristic_matrix = { segment: [ segment.has_feature_bundle(self.feature_bundles[i]) for i in range(n + 1) ] for segment in segments } states = {i: {j: 0 for j in range(i)} for i in range(n + 1)} initial_state = State( 'q0|0' ) # here we use a tuple as label. it will change at the end of this function states[0][0] = initial_state transducer.set_as_single_state(initial_state) if not n: for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[segment] [0]) ]), states[0][0])) transducer.add_arc( Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0])) else: for i in range(0, n + 1): for j in range(i): state = State('q{0}|{1}'.format(i, j)) states[i][j] = state transducer.add_state(state) max_num_of_satisfied_bundle_by_segment = { segment: compute_num_of_max_satisfied_bundle(segment) for segment in segments } for segment in segments: transducer.add_arc( Arc( states[0][0], JOKER_SEGMENT, segment, CostVector([0]), states[symbol_bundle_characteristic_matrix[segment] [0]][0])) for i in range(n + 1): for j in range(i): state = states[i][j] transducer.add_final_state(state) if i != n: for segment in segments: if symbol_bundle_characteristic_matrix[segment][i]: new_state_level = i + 1 new_state_mem = min([ j + 1, max_num_of_satisfied_bundle_by_segment[ segment] ]) else: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) transducer.add_arc( Arc(new_terminus, JOKER_SEGMENT, segment, CostVector([0]), new_terminus)) else: # i = n for segment in segments: new_state_level = compute_highest_num_of_satisfied_bundle( segment, j) new_state_mem = min([ max_num_of_satisfied_bundle_by_segment[ segment], abs(new_state_level - 1) ]) new_terminus = states[new_state_level][ new_state_mem] transducer.add_arc( Arc( state, JOKER_SEGMENT, segment, CostVector([ int(symbol_bundle_characteristic_matrix[ segment][i]) ]), new_terminus)) transducer.clear_dead_states() for state in transducer.states: transducer.add_arc( Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state)) return transducer
def test_ctc(self): T = 5 N = 6 # Test 1 labels = [[0, 1, 2, 1, 0]] # fmt: off emissions = torch.tensor( ( 0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688, 0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533, 0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107, ), requires_grad=True, ) # fmt: on log_emissions = torch.log(emissions.view(1, T, N)) log_emissions.retain_grad() transducer = Transducer( tokens=["a", "b", "c", "d", "e"], graphemes_to_idx={ "a": 0, "b": 1, "c": 2, "d": 3, "e": 4 }, blank="optional", ) loss = transducer(log_emissions, labels) self.assertAlmostEqual(loss.item(), 3.34211, places=4) loss.backward(retain_graph=True) # fmt: off expected_grad = torch.tensor(( -0.366234, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, -0.411608, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, -0.678582, 0.00249248, 0.00272882, 0.0037688, 0.0663296, -0.356151, 0.280111, 0.00283995, 0.0035545, 0.00331533, -0.541765, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107, )).view(1, T, N) # fmt: on self.assertTrue(log_emissions.grad.allclose(expected_grad)) # Test 2 labels = [[0, 1, 1, 0]] # fmt: off emissions = torch.tensor( ( 0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456, 0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345, 0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, ), requires_grad=True, ) # fmt: on log_emissions = torch.log(emissions.view(1, T, N)) log_emissions.retain_grad() transducer = Transducer( tokens=["a", "b", "c", "d", "e"], graphemes_to_idx={ "a": 0, "b": 1, "c": 2, "d": 3, "e": 4 }, blank="optional", allow_repeats=False, ) loss = transducer(log_emissions, labels) self.assertAlmostEqual(loss.item(), 5.42262, places=4) loss.backward() # fmt: off expected_grad = torch.tensor(( -0.69824, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, -0.602467, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, -0.797544, 0.280884, -0.570478, 0.0326593, 0.0339046, 0.0326856, 0.190345, -0.576714, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, )).view(1, T, N) # fmt: on self.assertTrue(log_emissions.grad.allclose(expected_grad))
def _make_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) state1 = State('Contiguity1') state2 = State('Contiguity2') transducer.add_state(state1) transducer.add_state(state2) transducer.initial_state = state1 transducer.add_final_state(state1) transducer.add_final_state(state2) for segment in segments: transducer.add_arc(Arc(state1, NULL_SEGMENT, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state1, segment, NULL_SEGMENT, CostVector([0]), state1)) transducer.add_arc(Arc(state2, NULL_SEGMENT, segment, CostVector([1]), state1)) transducer.add_arc(Arc(state2, segment, NULL_SEGMENT, CostVector([1]), state1)) segment_symbol = segment.get_symbol() if segment_symbol in yimas_vowels: # segment is vowel transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1)) elif segment_symbol == "'": # segment is stress transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state2)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state2)) elif segment_symbol in yimas_cons: # segment is consonant transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1)) transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1)) else: raise ConstraintError("{} not supported in this constraint".format(segment_symbol)) return transducer
test = numerize(test_str, Sigma) <<<<<<< HEAD def g(theta): theta_g = zeros_like(theta) for i, (x, y) in enumerate(train): t.grad_features(x, y, i, theta, theta_g, features, threshold) return theta_g ======= >>>>>>> 76148e8887cbf535c1574c441fac8eecd4f467d5 # number of total insertions per string INSERTION_LIMIT = 5 # transducer t = Transducer(len(Sigma), INSERTION_LIMIT, features) #string1 = train[0][0] #string2 = train[0][1] theta = zeros((features.num_features)) theta[0] = 10.0 #theta = npr.rand(features.num_features) def f(theta): val = 0.0 for i, (x, y) in enumerate(train): val += t.func_features(x, y, i, theta, features, 20) return val #return np.asarray(t.func_features(string1, string2, 0, theta, features)) def f_tropical(theta):
def test_asg(self): T = 5 N = 6 B = 3 labels = [[2, 1, 5, 1, 3], [4, 3, 5], [3, 2, 2, 1]] emissions = torch.tensor( [ [ [-0.4340, -0.0254, 0.3667, 0.4180, -0.3805, -0.1707], [0.1060, 0.3631, -0.1122, -0.3825, -0.0031, -0.3801], [0.0443, -0.3795, 0.3194, -0.3130, 0.0094, 0.1560], [0.1252, 0.2877, 0.1997, -0.4554, 0.2774, -0.2526], [-0.4001, -0.2402, 0.1295, 0.0172, 0.1805, -0.3299], ], [ [0.3298, -0.2259, -0.0959, 0.4909, 0.2996, -0.2543], [-0.2863, 0.3239, -0.3988, 0.0732, -0.2107, -0.4739], [-0.0906, 0.0480, -0.1301, 0.3975, -0.3317, -0.1967], [0.4372, -0.2006, 0.0094, 0.3281, 0.1873, -0.2945], [0.2399, 0.0320, -0.3768, -0.2849, -0.2248, 0.3186], ], [ [0.0225, -0.3867, -0.1929, -0.2904, -0.4958, -0.2533], [0.4001, -0.1517, -0.2799, -0.2915, 0.4198, 0.4506], [0.1446, -0.4753, -0.0711, 0.2876, -0.1851, -0.1066], [0.2081, -0.1190, -0.3902, -0.1668, 0.1911, -0.2848], [-0.3846, 0.1175, 0.1052, 0.2172, -0.0362, 0.3055], ], ], requires_grad=True, ) tokens = [(n, ) for n in range(N)] graphemes_to_idx = {n: n for n in range(N)} asg_transitions = ASGLossFunction.create_transitions_graph( torch.zeros(N + 1, N)) transducer = Transducer( tokens=tokens, graphemes_to_idx=graphemes_to_idx, transitions=asg_transitions, ) loss = transducer(emissions, labels) self.assertAlmostEqual(loss.item(), 7.47995, places=4) loss.backward() expected_grad = torch.tensor([ [ [0.1060, 0.1595, -0.7639, 0.2485, 0.1118, 0.1380], [0.1915, -0.7524, 0.1539, 0.1175, 0.1717, 0.1178], [0.1738, 0.1137, 0.2288, 0.1216, 0.1678, -0.8057], [0.1766, -0.7923, 0.1902, 0.0988, 0.2056, 0.1210], [0.1212, 0.1422, 0.2059, -0.8160, 0.2166, 0.1300], ], [ [0.2029, 0.1164, 0.1325, 0.2383, -0.8032, 0.1131], [0.1414, 0.2602, 0.1263, -0.3441, -0.3009, 0.1172], [0.1557, 0.1788, 0.1496, -0.5498, 0.0140, 0.0516], [0.2306, 0.1219, 0.1503, -0.4244, 0.1796, -0.2579], [0.2149, 0.1745, 0.1160, 0.1271, 0.1350, -0.7675], ], [ [0.2195, 0.1458, 0.1770, -0.8395, 0.1307, 0.1666], [0.2148, 0.1237, -0.6613, -0.1223, 0.2191, 0.2259], [0.2002, 0.1077, -0.8386, 0.2310, 0.1440, 0.1557], [0.2197, -0.1466, -0.5742, 0.1510, 0.2160, 0.1342], [0.1050, -0.8265, 0.1714, 0.1917, 0.1488, 0.2094], ], ]) expected_grad = expected_grad / B self.assertTrue(emissions.grad.allclose(expected_grad, rtol=1e-03)) expected_trans_grad = (torch.tensor([ [0.3990, 0.3396, 0.3486, 0.3922, 0.3504, 0.3155], [0.3666, 0.0116, -1.6678, 0.3737, 0.3361, -0.7152], [0.3468, 0.3163, -1.1583, -0.6803, 0.3216, 0.2722], [0.3694, -0.6688, 0.3047, -0.8531, -0.6571, 0.2870], [0.3866, 0.3321, 0.3447, 0.3664, -0.2163, 0.3039], [0.3640, -0.6943, 0.2988, -0.6722, 0.3215, -0.1860], ]).view(N, N) / B) trans_grad = transducer.transition_params.grad[N:].view(N, N) self.assertTrue(trans_grad.allclose(expected_trans_grad, rtol=1e-02))
def _base_faithfulness_transducer(self): segments = self.feature_table.get_segments() transducer = Transducer(segments, name=str(self)) state = State('q0') transducer.set_as_single_state(state) return transducer, segments, state
#!/usr/bin/env python """ This module implements the BekProgram class which is used to convert Transducer() objects into BEK programs. """ from sys import argv from collections import defaultdict from operator import attrgetter from transducer import Transducer, EPSILON class _BekState(object): """ Simple storage class, it holds information regarding the lookahead path each BEK state process. """ def __init__(self): self.la_trans_list = set([]) self.prefix = [] self.prefix_out = [] class BekProgram(object): """ Implements a compiler to transform Transducer objects into BEK programs which can then be further analyzed using the BEK infrastructure. For more information see http://rise4fun.com/Bek/tutorial. The main public method is create_from_transducer() which will compile the BEK program from a transducer which can then be accessed in the bek_program public variable.