def sample_model( num_features, num_classes, potentials, transitions, num_samples, max_len=20): """ Sample `num_samples` from a linear-chain CRF specified by a `potentials` graph and a `transitions` graph. The samples will have a random length in `[1, max_len]`. """ model = gtn.compose(potentials, transitions) # Draw a random X with length randomly from [1, max_len] and find the # most likely Y under the model: samples = [] while len(samples) < num_samples: # Sample X: T = np.random.randint(1, max_len + 1) X = np.random.randint(0, num_features, size=(T,)) X = make_chain_graph(X) # Find the most likely Y given X: Y = gtn.viterbi_path(gtn.compose(X, model)) # Clean up Y: Y = gtn.project_output(Y) Y.set_weights(np.zeros(Y.num_arcs())) samples.append((X, Y)) return samples
def process(b): # Create emissions graph: emissions = gtn.linear_graph(T, C, inputs.requires_grad) cpu_data = inputs[b].cpu().contiguous() emissions.set_weights(cpu_data.data_ptr()) target = make_chain_graph(targets[b]) target.arc_sort(True) # Create token to grapheme decomposition graph tokens_target = gtn.remove(gtn.project_output(gtn.compose(target, lexicon))) tokens_target.arc_sort() # Create alignment graph: alignments = gtn.project_input( gtn.remove(gtn.compose(tokens, tokens_target)) ) alignments.arc_sort() # Add transition scores: if transitions is not None: alignments = gtn.intersect(transitions, alignments) alignments.arc_sort() loss = gtn.forward_score(gtn.intersect(emissions, alignments)) # Normalize if needed: if transitions is not None: norm = gtn.forward_score(gtn.intersect(emissions, transitions)) loss = gtn.subtract(loss, norm) losses[b] = gtn.negate(loss) # Save for backward: if emissions.calc_grad: emissions_graphs[b] = emissions
def crf_loss(X, Y, potentials, transitions): feature_graph = gtn.compose(X, potentials) # Compute the unnormalized score of `(X, Y)` target_graph = gtn.compose(feature_graph, gtn.intersect(Y, transitions)) target_score = gtn.forward_score(target_graph) # Compute the partition function norm_graph = gtn.compose(feature_graph, transitions) norm_score = gtn.forward_score(norm_graph) return gtn.subtract(norm_score, target_score)
def test_closure_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 1.3) g1.add_arc(1, 1, 1, 1, 2.1) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node() g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(0, 1, 1) g2.add_arc(1, 2, 0) g2.add_arc(1, 2, 1) g2.add_arc(2, 3, 0) g2.add_arc(2, 3, 1) g2.add_arc(3, 4, 0) g2.add_arc(3, 4, 1) gtn.backward(gtn.forward_score(gtn.compose(closure(g1), g2))) def forward_fn(g, g2=g2): return gtn.forward_score(gtn.compose(closure(g), g2)) self.assertTrue(numerical_grad_check(forward_fn, g1, 1e-3, 1e-3))
def seq_loss(batch_index): obs_fst = linearFstFromArray(arc_scores[batch_index].reshape( num_samples, -1)) gt_fst = fromSequence(arc_labels[batch_index]) # Compose each sequence fst individually: it seems like composition # only works for lattices denom_fst = obs_fst for seq_fst in seq_fsts: denom_fst = gtn.compose(denom_fst, seq_fst) denom_fst = gtn.project_output(denom_fst) num_fst = gtn.compose(denom_fst, gt_fst) loss = gtn.subtract(gtn.forward_score(num_fst), gtn.forward_score(denom_fst)) losses[batch_index] = loss obs_fsts[batch_index] = obs_fst
def __test_basic(base_dir=None, draw=False): if base_dir is None: base_dir = os.path.expanduser('~') vocabulary = ['a', 'b', 'c', 'd', 'e', 'f'] vocab_size = len(vocabulary) transitions = np.random.randn(vocab_size, vocab_size) transition_vocab, transition_ids = makeTransitionVocabulary(transitions) seq = range(vocab_size) seq_transitions = [transition_ids[t] for t in toTransitionSeq(seq)] seq_fst = fromSequence(seq_transitions) num_samples = len(seq_transitions) - 2 # samples_mapper = vocab_mapper({i: str(i) for i in range(num_samples)}) seq_mapper = vocab_mapper({i: s for i, s in enumerate(vocabulary)}) tx_mapper = vocab_mapper({ i: ','.join((seq_mapper[j] for j in t)) for i, t in enumerate(transition_vocab) }) tx_fst = fromTransitions(transitions, transition_ids=transition_ids) scores = torch.tensor(np.random.randn(num_samples, vocab_size**2)) scores_fst = linearFstFromArray(scores) denom = gtn.compose(scores_fst, tx_fst) num = gtn.compose(denom, seq_fst) fsts = (scores_fst, tx_fst, seq_fst, denom, num) names = ('SCORES', 'TRANSITIONS', 'SEQUENCE', 'DENOMINATOR', 'NUMERATOR') if draw: for fst, name in zip(fsts, names): gtn.draw(fst, os.path.join(base_dir, f'TEST_FROM{name}.png'), isymbols=tx_mapper, osymbols=tx_mapper) return fsts, names
def pred_seq(batch_index): obs_fst = linearFstFromArray(arc_scores[batch_index].reshape( num_samples, -1)) # Compose each sequence fst individually: it seems like composition # only works for lattices denom_fst = obs_fst for seq_fst in seq_fsts: denom_fst = gtn.compose(denom_fst, seq_fst) viterbi_path = gtn.viterbi_path(denom_fst) best_paths[batch_index] = gtn.remove( gtn.project_output(viterbi_path))
def test_asg_viterbi_path(self): # Test adapted from wav2letter https://tinyurl.com/yc6nxex9 T = 4 N = 3 # fmt: off input = [ 0, 0, 7, 5, 4, 3, 5, 8, 5, 5, 4, 3, ] trans = [ 0, 2, 0, 0, 0, 2, 2, 0, 0, ] expectedPath = [2, 1, 1, 0] # fmt: on transitions = gtn.Graph() transitions.add_node(True) for i in range(1, N + 1): transitions.add_node(False, True) transitions.add_arc(0, i, i - 1) # p(i | <s>) for i in range(N): for j in range(N): transitions.add_arc(j + 1, i + 1, i, i, trans[i * N + j]) # p(i | j) emissions = emissions_graph(input, T, N, True) path = gtn.viterbi_path(gtn.compose(emissions, transitions)) self.assertEqual(path.labels_to_list(), expectedPath)
def process(b): emissions = gtn.linear_graph(T, C, False) cpu_data = outputs[b].cpu().contiguous() emissions.set_weights(cpu_data.data_ptr()) if self.transitions is not None: full_graph = gtn.intersect(emissions, self.transitions) else: full_graph = emissions # Find the best path and remove back-off arcs: path = gtn.remove(gtn.viterbi_path(full_graph)) # Left compose the viterbi path with the "alignment to token" # transducer to get the outputs: path = gtn.compose(path, self.tokens) # When there are ambiguous paths (allow_repeats is true), we take # the shortest: path = gtn.viterbi_path(path) path = gtn.remove(gtn.project_output(path)) paths[b] = path.labels_to_list()
def time_compose(): N1 = 100 N2 = 50 A1 = 20 A2 = 500 graphs1 = [gtn.linear_graph(N1, A1) for _ in range(B)] graphs2 = [gtn.linear_graph(N2, A2) for _ in range(B)] for g in graphs2: for i in range(N2): for j in range(A2): g.add_arc(i, i, j) def fwd(): gtn.compose(graphs1, graphs2) time_func(fwd, 20, "parallel compose Fwd") out = gtn.compose(graphs1, graphs2) def bwd(): gtn.backward(out, [True]) time_func(bwd, 20, "parallel compose bwd")
def test_compose_grad(self): first = gtn.Graph() first.add_node(True) first.add_node() first.add_node() first.add_node() first.add_node(False, True) first.add_arc(0, 1, 0, 0, 0) first.add_arc(0, 1, 1, 1, 1) first.add_arc(0, 1, 2, 2, 2) first.add_arc(1, 2, 0, 0, 0) first.add_arc(1, 2, 1, 1, 1) first.add_arc(1, 2, 2, 2, 2) first.add_arc(2, 3, 0, 0, 0) first.add_arc(2, 3, 1, 1, 1) first.add_arc(2, 3, 2, 2, 2) first.add_arc(3, 4, 0, 0, 0) first.add_arc(3, 4, 1, 1, 1) first.add_arc(3, 4, 2, 2, 2) second = gtn.Graph() second.add_node(True) second.add_node() second.add_node(False, True) second.add_arc(0, 1, 0, 0, 3.5) second.add_arc(1, 1, 0, 0, 2.5) second.add_arc(1, 2, 1, 1, 1.5) second.add_arc(2, 2, 1, 1, 4.5) composed = gtn.compose(first, second) gtn.backward(composed) gradsFirst = [1, 0, 0, 1, 1, 0, 1, 2, 0, 0, 2, 0] gradsSecond = [1, 2, 3, 2] self.assertEqual(gradsFirst, first.grad().weights_to_list()) self.assertEqual(gradsSecond, second.grad().weights_to_list())
def process(b): out[b] = gtn.forward_score(gtn.compose(graphs1[b], graphs2[b]))
def test_ctc_criterion(self): # These test cases are taken from wav2letter: https:#fburl.com/msom2e4v # Test case 1 ctc = ctc_graph([0, 0], 1) emissions = emissions_graph([1.0, 0.0, 0.0, 1.0, 1.0, 0.0], 3, 2) loss = gtn.forward_score(gtn.compose(ctc, emissions)) self.assertEqual(loss.item(), 0.0) # Should be 0 since scores are normalized z = gtn.forward_score(emissions) self.assertEqual(z.item(), 0.0) # Test case 2 T = 3 N = 4 ctc = ctc_graph([1, 2], N - 1) emissions = emissions_graph([1.0] * (T * N), T, N) expected_loss = -math.log(0.25 * 0.25 * 0.25 * 5) loss = gtn.subtract(gtn.forward_score(gtn.compose(ctc, emissions)), gtn.forward_score(emissions)) self.assertAlmostEqual(-loss.item(), expected_loss) # Test case 3 T = 5 N = 6 target = [0, 1, 2, 1, 0] # generate CTC graph ctc = ctc_graph(target, N - 1) # fmt: off emissions_vec = [ 0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688, 0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533, 0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107, ] # fmt: on emissions = emissions_graph(emissions_vec, T, N) # The log probabilities are already normalized, # so this should be close to 0 z = gtn.forward_score(emissions) self.assertTrue(abs(z.item()) < 1e-5) loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions))) expected_loss = 3.34211 self.assertAlmostEqual(loss.item(), expected_loss, places=5) # Check the gradients gtn.backward(loss) # fmt: off expected_grad = [ -0.366234, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, -0.411608, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, -0.678582, 0.00249248, 0.00272882, 0.0037688, 0.0663296, -0.356151, 0.280111, 0.00283995, 0.0035545, 0.00331533, -0.541765, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107 ] # fmt: on all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(expected_grad[i] - g) < 1e-5) self.assertTrue(all_close) # Test case 4 # This test case is taken from Tensor Flow CTC implementation # tinyurl.com/y9du5v5a T = 5 N = 6 target = [0, 1, 1, 0] # generate CTC graph ctc = ctc_graph(target, N - 1) # fmt: off emissions_vec = [ 0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456, 0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345, 0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, ] # fmt: on emissions = emissions_graph(emissions_vec, T, N) # The log probabilities are already normalized, # so this should be close to 0 z = gtn.forward_score(emissions) self.assertTrue(abs(z.item()) < 1e-5) loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions))) expected_loss = 5.42262 self.assertAlmostEqual(loss.item(), expected_loss, places=4) # Check the gradients gtn.backward(loss) # fmt: off expected_grad = [ -0.69824, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, -0.602467, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, -0.797544, 0.280884, -0.570478, 0.0326593, 0.0339046, 0.0326856, 0.190345, -0.576714, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, ] # fmt: on all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(expected_grad[i] - g) < 1e-5) self.assertTrue(all_close)
def test_asg_criterion(self): # This test cases is taken from wav2letter: https://fburl.com/msom2e4v T = 5 N = 6 # fmt: off targets = [ [2, 1, 5, 1, 3], [4, 3, 5], [3, 2, 2, 1], ] expected_loss = [ 7.7417464256287, 6.4200420379639, 8.2780694961548, ] emissions_vecs = [ [ -0.4340, -0.0254, 0.3667, 0.4180, -0.3805, -0.1707, 0.1060, 0.3631, -0.1122, -0.3825, -0.0031, -0.3801, 0.0443, -0.3795, 0.3194, -0.3130, 0.0094, 0.1560, 0.1252, 0.2877, 0.1997, -0.4554, 0.2774, -0.2526, -0.4001, -0.2402, 0.1295, 0.0172, 0.1805, -0.3299 ], [ 0.3298, -0.2259, -0.0959, 0.4909, 0.2996, -0.2543, -0.2863, 0.3239, -0.3988, 0.0732, -0.2107, -0.4739, -0.0906, 0.0480, -0.1301, 0.3975, -0.3317, -0.1967, 0.4372, -0.2006, 0.0094, 0.3281, 0.1873, -0.2945, 0.2399, 0.0320, -0.3768, -0.2849, -0.2248, 0.3186, ], [ 0.0225, -0.3867, -0.1929, -0.2904, -0.4958, -0.2533, 0.4001, -0.1517, -0.2799, -0.2915, 0.4198, 0.4506, 0.1446, -0.4753, -0.0711, 0.2876, -0.1851, -0.1066, 0.2081, -0.1190, -0.3902, -0.1668, 0.1911, -0.2848, -0.3846, 0.1175, 0.1052, 0.2172, -0.0362, 0.3055, ], ] emissions_grads = [ [ 0.1060, 0.1595, -0.7639, 0.2485, 0.1118, 0.1380, 0.1915, -0.7524, 0.1539, 0.1175, 0.1717, 0.1178, 0.1738, 0.1137, 0.2288, 0.1216, 0.1678, -0.8057, 0.1766, -0.7923, 0.1902, 0.0988, 0.2056, 0.1210, 0.1212, 0.1422, 0.2059, -0.8160, 0.2166, 0.1300, ], [ 0.2029, 0.1164, 0.1325, 0.2383, -0.8032, 0.1131, 0.1414, 0.2602, 0.1263, -0.3441, -0.3009, 0.1172, 0.1557, 0.1788, 0.1496, -0.5498, 0.0140, 0.0516, 0.2306, 0.1219, 0.1503, -0.4244, 0.1796, -0.2579, 0.2149, 0.1745, 0.1160, 0.1271, 0.1350, -0.7675, ], [ 0.2195, 0.1458, 0.1770, -0.8395, 0.1307, 0.1666, 0.2148, 0.1237, -0.6613, -0.1223, 0.2191, 0.2259, 0.2002, 0.1077, -0.8386, 0.2310, 0.1440, 0.1557, 0.2197, -0.1466, -0.5742, 0.1510, 0.2160, 0.1342, 0.1050, -0.8265, 0.1714, 0.1917, 0.1488, 0.2094, ], ] # fmt: on transitions = gtn.Graph() transitions.add_node(True) for i in range(1, N + 1): transitions.add_node(False, True) transitions.add_arc(0, i, i - 1) # p(i | <s>) for i in range(N): for j in range(N): transitions.add_arc(j + 1, i + 1, i) # p(i | j) for b in range(len(targets)): target = targets[b] emissions_vec = emissions_vecs[b] emissions_grad = emissions_grads[b] fal = gtn.Graph() fal.add_node(True) for l in range(1, len(target) + 1): fal.add_node(False, l == len(target)) fal.add_arc(l - 1, l, target[l - 1]) fal.add_arc(l, l, target[l - 1]) emissions = emissions_graph(emissions_vec, T, N, True) loss = gtn.subtract( gtn.forward_score(gtn.compose(emissions, transitions)), gtn.forward_score( gtn.compose(gtn.compose(fal, transitions), emissions)), ) self.assertAlmostEqual(loss.item(), expected_loss[b], places=3) # Check the gradients gtn.backward(loss) all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(emissions_grad[i] - g) < 1e-4) self.assertTrue(all_close) all_close = True # fmt: off trans_grad = [ 0.3990, 0.3396, 0.3486, 0.3922, 0.3504, 0.3155, 0.3666, 0.0116, -1.6678, 0.3737, 0.3361, -0.7152, 0.3468, 0.3163, -1.1583, -0.6803, 0.3216, 0.2722, 0.3694, -0.6688, 0.3047, -0.8531, -0.6571, 0.2870, 0.3866, 0.3321, 0.3447, 0.3664, -0.2163, 0.3039, 0.3640, -0.6943, 0.2988, -0.6722, 0.3215, -0.1860, ] # fmt: on grad = transitions.grad() grad_weights = grad.weights_to_list() for i in range(N * N): g = grad_weights[i + N] all_close = all_close and (abs(trans_grad[i] - g) < 1e-4) self.assertTrue(all_close)
def forward_fn(g, g2=g2): return gtn.forward_score(gtn.compose(closure(g), g2))
def test_epsilon_composition(self): # Simple test case for output epsilon on first graph g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 0, 0, gtn.epsilon, 1.0) g1.add_arc(0, 1, 1, 2) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 2, 3) expected = gtn.Graph() expected.add_node(True) expected.add_node(False, True) expected.add_arc(0, 0, 0, gtn.epsilon, 1.0) expected.add_arc(0, 1, 1, 3) self.assertTrue(gtn.equal(gtn.compose(g1, g2), expected)) # Simple test case for input epsilon on second graph g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 1, 2) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 2, 3) g2.add_arc(1, 1, gtn.epsilon, 0, 2.0) expected = gtn.Graph() expected.add_node(True) expected.add_node(False, True) expected.add_arc(0, 1, 1, 3) expected.add_arc(1, 1, gtn.epsilon, 0, 2.0) self.assertTrue(gtn.equal(gtn.compose(g1, g2), expected)) # This test case is taken from "Weighted Automata Algorithms", Mehryar # Mohri, https://cs.nyu.edu/~mohri/pub/hwa.pdf Section 5.1, Figure 7 symbols = {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4} g1 = gtn.Graph() g1.add_node(True) g1.add_node() g1.add_node() g1.add_node() g1.add_node(False, True) g1.add_arc(0, 1, symbols["a"], symbols["a"]) g1.add_arc(1, 2, symbols["b"], gtn.epsilon) g1.add_arc(2, 3, symbols["c"], gtn.epsilon) g1.add_arc(3, 4, symbols["d"], symbols["d"]) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, symbols["a"], symbols["d"]) g2.add_arc(1, 2, gtn.epsilon, symbols["e"]) g2.add_arc(2, 3, symbols["d"], symbols["a"]) expected = gtn.Graph() expected.add_node(True) expected.add_node() expected.add_node() expected.add_node() expected.add_node(False, True) expected.add_arc(0, 1, symbols["a"], symbols["d"]) expected.add_arc(1, 2, symbols["b"], symbols["e"]) expected.add_arc(2, 3, symbols["c"], gtn.epsilon) expected.add_arc(3, 4, symbols["d"], symbols["a"]) self.assertTrue(gtn.rand_equivalent(gtn.compose(g1, g2), expected)) # Test multiple input/output epsilon transitions per node g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 0, 1, gtn.epsilon, 1.1) g1.add_arc(0, 1, 2, gtn.epsilon, 2.1) g1.add_arc(0, 1, 3, gtn.epsilon, 3.1) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, gtn.epsilon, 3, 2.1) g2.add_arc(0, 1, 1, 2) expected = gtn.Graph() expected.add_node(True) expected.add_node(False, True) expected.add_arc(0, 0, 1, gtn.epsilon, 1.1) expected.add_arc(0, 1, 2, 3, 4.2) expected.add_arc(0, 1, 3, 3, 5.2) self.assertTrue(gtn.rand_equivalent(gtn.compose(g1, g2), expected))
def test_composition(self): # Compos,ing with an empty graph gives an empty graph g1 = gtn.Graph() g2 = gtn.Graph() self.assertTrue(gtn.equal(gtn.compose(g1, g2), gtn.Graph())) g1.add_node(True) g1.add_node() g1.add_arc(0, 1, 0) g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(0, 1, 0) self.assertTrue(gtn.equal(gtn.compose(g1, g2), gtn.Graph())) self.assertTrue(gtn.equal(gtn.compose(g2, g1), gtn.Graph())) self.assertTrue(gtn.equal(gtn.intersect(g2, g1), gtn.Graph())) # Check singly sorted version g1.arc_sort(True) self.assertTrue(gtn.equal(gtn.compose(g1, g2), gtn.Graph())) # Check doubly sorted version g2.arc_sort() self.assertTrue(gtn.equal(gtn.compose(g1, g2), gtn.Graph())) # Self-loop in the composed graph g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 0, 0) g1.add_arc(0, 1, 1) g1.add_arc(1, 1, 2) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(1, 1, 0) g2.add_arc(1, 2, 1) g_str = ["0", "2", "0 1 0", "1 1 0", "1 2 1"] expected = create_graph_from_text(g_str) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) self.assertTrue(gtn.isomorphic(gtn.intersect(g1, g2), expected)) # Check singly sorted version g1.arc_sort(True) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) # Check doubly sorted version g2.arc_sort() self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) # Loop in the composed graph g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0) g1.add_arc(1, 1, 1) g1.add_arc(1, 0, 0) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 0, 0) g2.add_arc(0, 1, 1) g2.add_arc(1, 0, 1) g_str = ["0", "2", "0 1 0", "1 0 0", "1 2 1", "2 1 1"] expected = create_graph_from_text(g_str) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) self.assertTrue(gtn.isomorphic(gtn.intersect(g1, g2), expected)) # Check singly sorted version g1.arc_sort(True) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) # Check doubly sorted version g2.arc_sort() self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) g1 = gtn.Graph() g1.add_node(True) g1.add_node() g1.add_node() g1.add_node() g1.add_node(False, True) for i in range(g1.num_nodes() - 1): for j in range(3): g1.add_arc(i, i + 1, j, j, j) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, 3.5) g2.add_arc(1, 1, 0, 0, 2.5) g2.add_arc(1, 2, 1, 1, 1.5) g2.add_arc(2, 2, 1, 1, 4.5) g_str = [ "0", "6", "0 1 0 0 3.5", "1 2 0 0 2.5", "1 4 1 1 2.5", "2 3 0 0 2.5", "2 5 1 1 2.5", "4 5 1 1 5.5", "3 6 1 1 2.5", "5 6 1 1 5.5", ] expected = create_graph_from_text(g_str) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) self.assertTrue(gtn.isomorphic(gtn.intersect(g1, g2), expected)) # Check singly sorted version g1.arc_sort(True) self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected)) # Check doubly sorted version g2.arc_sort() self.assertTrue(gtn.isomorphic(gtn.compose(g1, g2), expected))
tokens = token_graph(word_pieces) gtn.draw(tokens, "tokens.pdf", idx_to_wp, idx_to_wp) # Recognizes "abc": abc = gtn.Graph(False) abc.add_node(True) abc.add_node() abc.add_node() abc.add_node(False, True) abc.add_arc(0, 1, let_to_idx["a"]) abc.add_arc(1, 2, let_to_idx["b"]) abc.add_arc(2, 3, let_to_idx["c"]) gtn.draw(abc, "abc.pdf", idx_to_let) # Compute the decomposition graph for "abc": abc_decomps = gtn.remove(gtn.project_output(gtn.compose(abc, lex))) gtn.draw(abc_decomps, "abc_decomps.pdf", idx_to_wp, idx_to_wp) # Compute the alignment graph for "abc": abc_alignments = gtn.project_input( gtn.remove(gtn.compose(tokens, abc_decomps))) gtn.draw(abc_alignments, "abc_alignments.pdf", idx_to_wp) # From here we can use the alignment graph with an emissions graph and # transitions graphs to compute the sequence level criterion: emissions = gtn.linear_graph(10, len(word_pieces), True) loss = gtn.subtract( gtn.forward_score(emissions), gtn.forward_score(gtn.intersect(emissions, abc_alignments))) print(f"Loss is {loss.item():.2f}")
def main(): num_features = 3 # number of input features num_classes = 2 # number of output classes num_train = 1000 # size of the training set num_test = 200 # size of the testing set # Setup ground-truth model: gt_potentials, gt_transitions = gen_model(num_features, num_classes) # Sample training and test datasets: samples = sample_model( num_features, num_classes, gt_potentials, gt_transitions, num_train + num_test) train, test = samples[:num_train], samples[num_train:] print(f"Using {len(train)} samples for the training set") print(f"Using {len(test)} samples for the test set") # Make the graphs for learning: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) print("Unary potential graph has {} nodes and {} arcs".format( potentials.num_nodes(), potentials.num_arcs())) print("Transition graph has {} nodes and {} arcs".format( transitions.num_nodes(), transitions.num_arcs())) # Make the graphs to be learned: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) # Run the SGD loop: learning_rate = 1e-2 max_iter = 10000 losses = [] for it, (X, Y) in enumerate(sampler(train)): # Compute the loss and take a gradient step: loss = crf_loss(X, Y, potentials, transitions) gtn.backward(loss) update_params(-learning_rate, potentials, transitions) # Clear the gradients: transitions.zero_grad() potentials.zero_grad() losses.append(loss.item()) if (it + 1) % 1000 == 0: print("=" * 50) print(f"Iteration {it + 1}, Avg. Loss {np.mean(losses):.3f}") losses = [] if it == max_iter: break # Evaluate on the test set: correct = 0.0 total = 0 for X, Y in test: full_graph = gtn.compose(gtn.compose(X, potentials), transitions) prediction = gtn.viterbi_path(full_graph).labels_to_list(False) correct += np.sum(np.array(Y.labels_to_list()) == prediction) total += len(prediction) print("Test: Accuracy {:.3f}".format(correct / total))
def main(out_dir=None, gpu_dev_id=None, num_samples=10, random_seed=None, learning_rate=1e-3, num_epochs=500, dataset_kwargs={}, dataloader_kwargs={}, model_kwargs={}): if out_dir is None: out_dir = os.path.join('~', 'data', 'output', 'seqtools', 'test_gtn') out_dir = os.path.expanduser(out_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) vocabulary = ['a', 'b', 'c', 'd', 'e'] transition = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 1], [0, 0, 0, 0, 0]], dtype=float) initial = np.array([1, 0, 1, 0, 0], dtype=float) final = np.array([0, 1, 0, 0, 1], dtype=float) / 10 seq_params = (transition, initial, final) simulated_dataset = simulate(num_samples, *seq_params) label_seqs, obsv_seqs = tuple(zip(*simulated_dataset)) seq_params = tuple(map(lambda x: -np.log(x), seq_params)) dataset = torchutils.SequenceDataset(obsv_seqs, label_seqs, **dataset_kwargs) data_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs) train_loader = data_loader val_loader = data_loader transition_weights = torch.tensor(transition, dtype=torch.float).log() initial_weights = torch.tensor(initial, dtype=torch.float).log() final_weights = torch.tensor(final, dtype=torch.float).log() model = libfst.LatticeCrf(vocabulary, transition_weights=transition_weights, initial_weights=initial_weights, final_weights=final_weights, debug_output_dir=fig_dir, **model_kwargs) gtn.draw(model._transition_fst, os.path.join(fig_dir, 'transitions-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(model._duration_fst, os.path.join(fig_dir, 'durations-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) if True: for i, (inputs, targets, seq_id) in enumerate(train_loader): arc_scores = model.scores_to_arc(inputs) arc_labels = model.labels_to_arc(targets) batch_size, num_samples, num_classes = arc_scores.shape obs_fst = libfst.linearFstFromArray(arc_scores[0].reshape( num_samples, -1)) gt_fst = libfst.fromSequence(arc_labels[0]) d1_fst = gtn.compose(obs_fst, model._duration_fst) d1_fst = gtn.project_output(d1_fst) denom_fst = gtn.compose(d1_fst, model._transition_fst) # denom_fst = gtn.project_output(denom_fst) num_fst = gtn.compose(denom_fst, gt_fst) viterbi_fst = gtn.viterbi_path(denom_fst) pred_fst = gtn.remove(gtn.project_output(viterbi_fst)) loss = gtn.subtract(gtn.forward_score(num_fst), gtn.forward_score(denom_fst)) loss = torch.tensor(loss.item()) if torch.isinf(loss).any(): denom_alt = gtn.compose(obs_fst, model._transition_fst) d1_min = gtn.remove(gtn.project_output(d1_fst)) denom_alt = gtn.compose(d1_min, model._transition_fst) num_alt = gtn.compose(denom_alt, gt_fst) gtn.draw(obs_fst, os.path.join(fig_dir, 'observations-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(gt_fst, os.path.join(fig_dir, 'labels-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(d1_fst, os.path.join(fig_dir, 'd1-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(d1_min, os.path.join(fig_dir, 'd1-min-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(denom_fst, os.path.join(fig_dir, 'denominator-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(denom_alt, os.path.join(fig_dir, 'denominator-alt-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(num_fst, os.path.join(fig_dir, 'numerator-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(num_alt, os.path.join(fig_dir, 'numerator-alt-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(viterbi_fst, os.path.join(fig_dir, 'viterbi-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(pred_fst, os.path.join(fig_dir, 'pred-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) import pdb pdb.set_trace() # Train the model train_epoch_log = collections.defaultdict(list) val_epoch_log = collections.defaultdict(list) metric_dict = { 'Avg Loss': metrics.AverageLoss(), 'Accuracy': metrics.Accuracy() } criterion = model.nllLoss optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.00) model, last_model_wts = torchutils.trainModel( model, criterion, optimizer, scheduler, train_loader, val_loader, metrics=metric_dict, test_metric='Avg Loss', train_epoch_log=train_epoch_log, val_epoch_log=val_epoch_log, num_epochs=num_epochs) gtn.draw(model._transition_fst, os.path.join(fig_dir, 'transitions-trained.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(model._duration_fst, os.path.join(fig_dir, 'durations-trained.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) torchutils.plotEpochLog(train_epoch_log, title="Train Epoch Log", fn=os.path.join(fig_dir, "train-log.png"))
L.add_node() #1 L.add_node() #2 L.add_node(False, True) #3 # ao L.add_node(False, True) #4 # kuro L.add_node() #5 L.add_node() #6 L.add_node() #7 L.add_node(False, True) #8 L.add_arc(0, 1, phones2id['a'], gtn.epsilon, weight=0.5) L.add_arc(1, 4, phones2id['o'], words2id['青'], weight=0.2) L.add_arc(1, 2, phones2id['k'], gtn.epsilon, weight=0.7) L.add_arc(2, 3, phones2id['a'], words2id['赤'], weight=0.6) L.add_arc(0, 5, phones2id['k'], gtn.epsilon, weight=0.3) L.add_arc(5, 6, phones2id['u'], gtn.epsilon, weight=0.4) L.add_arc(6, 7, phones2id['r'], gtn.epsilon, weight=0.1) L.add_arc(7, 8, phones2id['o'], words2id['黒'], weight=0.5) gtn.draw(L, "L.pdf", id2phones, id2words) # ============================================ # compose # ============================================ compose = (gtn.compose(L, G)) gtn.draw(compose, "compose.pdf", id2phones, id2words)
def fwd(): gtn.compose(graphs1, graphs2)