def compile(self, texts: Iterable[str], P: k2.Fsa) -> Tuple[k2.Fsa, k2.Fsa]: '''Create numerator and denominator graphs from transcripts and the bigram phone LM. Args: texts: A list of transcripts. Within a transcript, words are separated by spaces. P: The bigram phone LM created by :func:`create_bigram_phone_lm`. Returns: A tuple (num_graph, den_graph), where - `num_graph` is the numerator graph. It is an FsaVec with shape `(len(texts), None, None)`. - `den_graph` is the denominator graph. It is an FsaVec with the same shape of the `num_graph`. ''' assert P.is_cpu() ctc_topo_P = k2.intersect(self.ctc_topo, P).invert_() ctc_topo_P = k2.connect(ctc_topo_P) num_graphs = k2.create_fsa_vec( [self.compile_one_and_cache(text) for text in texts]) num = k2.compose(ctc_topo_P, num_graphs) num = k2.connect(num) num = k2.arc_sort(num) den = k2.create_fsa_vec([ctc_topo_P.detach()] * len(texts)) return num, den
def test_fsa(self): if torch.cuda.is_available() is False: print('skip it since CUDA is not available') return if torch.cuda.device_count() < 2: print('skip it since number of GPUs is 1') return if not k2.with_cuda: return device0 = torch.device('cuda', 0) device1 = torch.device('cuda', 1) torch.cuda.set_device(device1) s = ''' 0 1 1 0.1 1 2 -1 0.2 2 ''' fsa0 = k2.Fsa.from_str(s).to(device0).requires_grad_(True) fsa1 = k2.Fsa.from_str(s).to(device1).requires_grad_(True) fsa0 = k2.create_fsa_vec([fsa0, fsa0]) fsa1 = k2.create_fsa_vec([fsa1, fsa1]) tot_scores0 = fsa0.get_forward_scores(True, True) (tot_scores0[0] * 2 + tot_scores0[1]).backward() tot_scores1 = fsa1.get_forward_scores(True, True) (tot_scores1[0] * 2 + tot_scores1[1]).backward()
def test_autograd(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 3 -1 0.3 1 2 2 0.4 2 3 -1 0.5 3 ''' s1 = ''' 0 2 -1 0.6 0 1 1 0.7 1 2 -1 0.8 2 ''' s2 = ''' 0 1 1 1.1 1 2 -1 1.2 2 ''' devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda', 0)) for device in devices: fsa0 = k2.Fsa.from_str(s0).to(device).requires_grad_(True) fsa1 = k2.Fsa.from_str(s1).to(device).requires_grad_(True) fsa2 = k2.Fsa.from_str(s2).to(device).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]) fsa = k2.union(fsa_vec) fsa_vec = k2.create_fsa_vec([fsa]) log_like = k2.get_tot_scores(fsa_vec, log_semiring=True, use_double_scores=False) # expected log_like and gradients are computed using gtn. # See https://bit.ly/35uVaUv log_like.backward() expected_log_like = torch.tensor([3.1136]).to(log_like) assert torch.allclose(log_like, expected_log_like) expected_grad_fsa0 = torch.tensor([ 0.18710044026374817, 0.08949274569749832, 0.06629786640405655, 0.12080258131027222, 0.21029533445835114 ]).to(device) expected_grad_fsa1 = torch.tensor([ 0.08097638934850693, 0.19916976988315582, 0.19916976988315582 ]).to(device) expected_grad_fsa2 = torch.tensor( [0.4432605803012848, 0.4432605803012848]).to(device) assert torch.allclose(fsa0.grad, expected_grad_fsa0) assert torch.allclose(fsa1.grad, expected_grad_fsa1) assert torch.allclose(fsa2.grad, expected_grad_fsa2)
def test_simple_fsa_case_1(self): # see https://git.io/JtttZ s = ''' 0 1 1 0.0 0 1 2 0.1 0 2 3 2.2 1 2 4 0.5 1 2 5 0.6 1 3 -1 3.0 2 3 -1 0.8 3 ''' for device in self.devices: for use_double_scores in [True, False]: fsa = k2.Fsa.from_str(s).to(device).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa]) backward_scores = fsa_vec.get_backward_scores( use_double_scores=use_double_scores, log_semiring=False) expected_backward_scores = torch.empty_like(backward_scores) scores = fsa.scores.detach().clone().requires_grad_(True) expected_backward_scores[3] = 0 # yapf:disable expected_backward_scores[2] = expected_backward_scores[3] + scores[6] # noqa expected_backward_scores[1] = expected_backward_scores[3] + scores[5] # noqa expected_backward_scores[0] = expected_backward_scores[1] + scores[1] # noqa # yapf:enable assert torch.allclose(backward_scores, expected_backward_scores) scale = torch.arange(backward_scores.numel()).to(device) (scale * backward_scores).sum().backward() (scale * expected_backward_scores).sum().backward() assert torch.allclose(fsa.grad, scores.grad) # now for log semiring fsa.scores.grad = None fsa_vec = k2.create_fsa_vec([fsa]) backward_scores = fsa_vec.get_backward_scores( use_double_scores=use_double_scores, log_semiring=True) expected_backward_scores = torch.zeros_like(backward_scores) scores = fsa.scores.detach().clone().requires_grad_(True) expected_backward_scores[3] = 0 # yapf:disable expected_backward_scores[2] = expected_backward_scores[3] + scores[6] # noqa # yapf:enable expected_backward_scores[1] = ( (expected_backward_scores[2] + scores[3]).exp() + (expected_backward_scores[2] + scores[4]).exp() + (expected_backward_scores[3] + scores[5]).exp()).log() expected_backward_scores[0] = ( (expected_backward_scores[1] + scores[0]).exp() + (expected_backward_scores[1] + scores[1]).exp() + (expected_backward_scores[2] + scores[2]).exp()).log() assert torch.allclose(backward_scores, expected_backward_scores) (scale * backward_scores).sum().backward() (scale * expected_backward_scores).sum().backward() assert torch.allclose(fsa.grad, scores.grad)
def test_simple_fsa_case_1(self): # see https://git.io/JtttZ s = ''' 0 1 1 0.0 0 1 2 0.1 0 2 3 2.2 1 2 4 0.5 1 2 5 0.6 1 3 -1 3.0 2 3 -1 0.8 3 ''' devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda')) for device in devices: for use_double_scores in [True, False]: fsa = k2.Fsa.from_str(s).to(device).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa]) forward_scores = fsa_vec.get_forward_scores( use_double_scores=use_double_scores, log_semiring=False) expected_forward_scores = torch.tensor([ 0, # start state 0.1, # state 1, arc: 0 -> 1 (2/0.1) 2.2, # state 2, arc: 0 -> 2 (3/2.2) 3.1, # state 3, arc: 1 -> 3 (-1/3.0) ]).to(forward_scores) assert torch.allclose(forward_scores, expected_forward_scores) scale = torch.arange(forward_scores.numel()).to(device) (scale * forward_scores).sum().backward() expected_grad = torch.tensor([0, 4, 2, 0, 0, 3, 0]).to(fsa.grad) assert torch.allclose(fsa.grad, expected_grad) # now for log semiring fsa.scores.grad = None fsa_vec = k2.create_fsa_vec([fsa]) forward_scores = fsa_vec.get_forward_scores( use_double_scores=use_double_scores, log_semiring=True) scores = fsa.scores.detach().clone().requires_grad_(True) expected_forward_scores = torch.empty_like(forward_scores) expected_forward_scores[0] = 0 expected_forward_scores[1] = scores[:2].exp().sum().log() expected_forward_scores[2] = ( scores[2].exp() + (expected_forward_scores[1] + scores[3]).exp() + (expected_forward_scores[1] + scores[4]).exp()).log() expected_forward_scores[3] = ( (expected_forward_scores[1] + scores[5]).exp() + (expected_forward_scores[2] + scores[6]).exp()).log() assert torch.allclose(forward_scores, expected_forward_scores) (scale * forward_scores).sum().backward() (scale * expected_forward_scores).sum().backward() assert torch.allclose(fsa.grad, scores.grad)
def test_top_k(self): fsa0 = k2.Fsa.from_str(''' 0 1 -1 0 1 ''') fsas = [fsa0.clone() for i in range(10)] fsa_vec = k2.create_fsa_vec(fsas) fsa_vec.scores = torch.tensor([3, 0, 1, 5, 4, 2, 8, 1, 9, 6], dtype=torch.float) # 0 1 2 3 4 5 6 7 8 9 # [ [3 0] [1 5 4] [2 8 1 9 6] shape = k2.RaggedShape('[ [x x] [x x x] [x x x x x] ]') nbest = k2.Nbest(fsa_vec, shape) # top_k: k is 1 nbest1 = nbest.top_k(1) expected_fsa = k2.create_fsa_vec([fsa_vec[0], fsa_vec[3], fsa_vec[8]]) assert str(nbest1.fsa) == str(expected_fsa) expected_shape = k2.RaggedShape('[ [x] [x] [x] ]') assert nbest1.shape == expected_shape # top_k: k is 2 nbest2 = nbest.top_k(2) expected_fsa = k2.create_fsa_vec([ fsa_vec[0], fsa_vec[1], fsa_vec[3], fsa_vec[4], fsa_vec[8], fsa_vec[6] ]) assert str(nbest2.fsa) == str(expected_fsa) expected_shape = k2.RaggedShape('[ [x x] [x x] [x x] ]') assert nbest2.shape == expected_shape # top_k: k is 3 nbest3 = nbest.top_k(3) expected_fsa = k2.create_fsa_vec([ fsa_vec[0], fsa_vec[1], fsa_vec[1], fsa_vec[3], fsa_vec[4], fsa_vec[2], fsa_vec[8], fsa_vec[6], fsa_vec[9] ]) assert str(nbest3.fsa) == str(expected_fsa) expected_shape = k2.RaggedShape('[ [x x x] [x x x] [x x x] ]') assert nbest3.shape == expected_shape # top_k: k is 4 nbest4 = nbest.top_k(4) expected_fsa = k2.create_fsa_vec([ fsa_vec[0], fsa_vec[1], fsa_vec[1], fsa_vec[1], fsa_vec[3], fsa_vec[4], fsa_vec[2], fsa_vec[2], fsa_vec[8], fsa_vec[6], fsa_vec[9], fsa_vec[5] ]) assert str(nbest4.fsa) == str(expected_fsa) expected_shape = k2.RaggedShape('[ [x x x x] [x x x x] [x x x x] ]') assert nbest4.shape == expected_shape
def test_cat_fsa_vec(self): for device in self.devices: s = ''' 0 1 1 0.1 0 1 2 0.2 1 2 -1 0.3 2 ''' fsa1 = k2.Fsa.from_str(s).to(device) fsa1.tensor_attr1 = torch.tensor([1, 2, 3]).to(device) fsa1.tensor_attr2 = torch.tensor([4, 5, 6]).to(device) fsa1.non_tensor_attr1 = 'fsa1' fsa1.ragged_tensor_attr1 = \ k2.RaggedTensor('[[1 2] [] [3 4 5]]').to(device) fsa1.ragged_tensor_attr2 = \ k2.RaggedTensor('[[1 20] [30] [5]]').to(device) fsa2 = k2.Fsa.from_str(s).to(device) fsa2.tensor_attr1 = torch.tensor([10, 20, 30]).to(device) fsa2.tensor_attr3 = torch.tensor([40, 50, 60]).to(device) fsa2.non_tensor_attr1 = 'fsa' fsa2.non_tensor_attr2 = 'fsa2' fsa2.ragged_tensor_attr1 = \ k2.RaggedTensor('[[3] [4 5] [6 7]]').to(device) fsa2.ragged_tensor_attr3 = \ k2.RaggedTensor('[[1 0] [0] [-1]]').to(device) fsa_vec1 = k2.create_fsa_vec([fsa1]) fsa_vec2 = k2.create_fsa_vec([fsa2]) fsa_vec = k2.cat([fsa_vec1, fsa_vec2]) assert str(fsa_vec[0].arcs) == str(fsa1.arcs) assert str(fsa_vec[1].arcs) == str(fsa2.arcs) assert not hasattr(fsa_vec, 'tensor_attr2') assert not hasattr(fsa_vec, 'tensor_attr3') assert fsa_vec.non_tensor_attr1 == fsa1.non_tensor_attr1 assert fsa_vec.non_tensor_attr2 == fsa2.non_tensor_attr2 assert torch.all( torch.eq(fsa_vec.tensor_attr1, torch.tensor([1, 2, 3, 10, 20, 30]).to(device))) assert fsa_vec.ragged_tensor_attr1 == k2.RaggedTensor([ [1, 2], [], [3, 4, 5], [3], [4, 5], [6, 7], ]).to(device) assert not hasattr(fsa_vec, 'ragged_tensor_attr2') assert not hasattr(fsa_vec, 'ragged_tensor_attr3')
def test_fsa_vec_as_dict_ragged(self): r = k2.RaggedInt(k2.RaggedShape('[ [ x x ] [x] [ x x ] [x]]'), torch.tensor([3, 4, 5, 6, 7, 8], dtype=torch.int32)) g = k2.Fsa.from_str('0 1 3 0.0\n 1 2 -1 0.0\n 2') h = k2.create_fsa_vec([g, g]) h.aux_labels = r assert (h[0].aux_labels.dim0() == h[0].labels.shape[0])
def __init__( self, num_classes: int, blank: int, cfg: Optional[DictConfig] = None, intersect_pruned: bool = False, intersect_conf: GraphIntersectDenseConfig = GraphIntersectDenseConfig(), topo_type: str = "default", topo_with_self_loops: bool = True, device: torch.device = torch.device("cpu"), ): # use k2 import guard k2_import_guard() if cfg is not None: intersect_pruned = cfg.get("intersect_pruned", intersect_pruned) intersect_conf = cfg.get("intersect_conf", intersect_conf) topo_type = cfg.get("topo_type", topo_type) topo_with_self_loops = cfg.get("topo_with_self_loops", topo_with_self_loops) self.num_classes = num_classes self.blank = blank self.intersect_pruned = intersect_pruned self.device = device self.topo_type = topo_type self.topo_with_self_loops = topo_with_self_loops self.pad_fsavec = self.topo_type == "ctc_compact" self.intersect_conf = intersect_conf if not hasattr(self, "graph_compiler") or self.graph_compiler is None: self.graph_compiler = CtcTopologyCompiler( self.num_classes, self.topo_type, self.topo_with_self_loops, self.device ) if not hasattr(self, "base_graph") or self.base_graph is None: self.base_graph = k2.create_fsa_vec([self.graph_compiler.ctc_topo_inv.invert()]).to(self.device) self.decoding_graph = None
def test_compose(self): s = ''' 0 1 11 1 1.0 0 2 12 2 2.5 1 3 -1 -1 0 2 3 -1 -1 2.5 3 ''' a_fsa = k2.Fsa.from_str(s, num_aux_labels=1).requires_grad_(True) s = ''' 0 1 1 1 1.0 0 2 2 3 3.0 1 2 3 2 2.5 2 3 -1 -1 2.0 3 ''' b_fsa = k2.Fsa.from_str(s, num_aux_labels=1).requires_grad_(True) ans = k2.compose(a_fsa, b_fsa, inner_labels='inner') ans = k2.connect(ans) ans = k2.create_fsa_vec([ans]) scores = ans.get_tot_scores(log_semiring=True, use_double_scores=False) # The reference values for `scores`, `a_fsa.grad` and `b_fsa.grad` # are computed using GTN. # See https://bit.ly/3heLAJq assert scores.item() == 10 scores.backward() assert torch.allclose(a_fsa.grad, torch.tensor([0., 1., 0., 1.])) assert torch.allclose(b_fsa.grad, torch.tensor([0., 1., 0., 1.]))
def test_fsa_vec(self): for device in self.devices: # See https://git.io/JY7r4 s = ''' 0 1 0 0.1 0 2 0 0.2 0 0 0 0.3 1 1 0 0.4 1 2 0 0.5 2 3 -1 0.6 3 ''' fsa1 = k2.Fsa.from_str(s).to(device).requires_grad_(True) fsa2 = k2.Fsa.from_str(s).to(device).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa1, fsa2]) ans = k2.remove_epsilon_self_loops(fsa_vec) # See https://git.io/JY7oC expected_fsa = k2.Fsa.from_str(''' 0 1 0 0.1 0 2 0 0.2 1 2 0 0.5 2 3 -1 0.6 3 ''') assert str(ans[0]) == str(expected_fsa) assert str(ans[1]) == str(expected_fsa) (ans.scores.sum() * 2).backward() expected_grad = torch.tensor([2, 2, 0, 0, 2, 2.]).to(fsa1.scores.grad) assert torch.all(torch.eq(fsa1.scores.grad, expected_grad)) assert torch.all(torch.eq(fsa2.scores.grad, expected_grad))
def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 1 3 -1 0.4 2 3 -1 0.5 2 1 5 0.55 3 ''' s1 = ''' 0 1 -1 0.6 1 ''' s2 = ''' 0 1 6 0.7 1 0 7 0.8 1 0 8 0.9 1 2 -1 1.0 2 ''' fsa0 = k2.Fsa.from_str(s0) fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]) fsa = k2.union(fsa_vec) dot = k2.to_dot(fsa) dot.render('/tmp/fsa', format='pdf') # the fsa is saved to /tmp/fsa.pdf print(fsa)
def test_two_fsas(self): s1 = ''' 0 1 1 1.0 1 1 1 50.0 1 2 2 2.0 2 3 -1 3.0 3 ''' s2 = ''' 0 1 1 1.0 1 2 2 2.0 2 3 -1 3.0 3 ''' fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa1.requires_grad_(True) fsa2.requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa1, fsa2]) log_prob = torch.tensor( [[[0.1, 0.2, 0.3], [0.04, 0.05, 0.06], [0.0, 0.0, 0.0]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.0, 0.0, 0.0]]], dtype=torch.float32, requires_grad=True) supervision_segments = torch.tensor([[0, 0, 3], [1, 0, 2]], dtype=torch.int32) dense_fsa_vec = k2.DenseFsaVec(log_prob, supervision_segments) out_fsa = k2.intersect_dense(fsa_vec, dense_fsa_vec, output_beam=100000) assert out_fsa.shape == (2, None, None), 'There should be two FSAs!' scores = k2.get_tot_scores(out_fsa, log_semiring=False, use_float_scores=True) scores.sum().backward() # `expected` results are computed using gtn. # See https://bit.ly/3oYObeb # expected_scores_out_fsa = torch.tensor( # [1.2, 2.06, 3.0, 1.2, 50.5, 2.0, 3.0]) expected_grad_fsa1 = torch.tensor([1.0, 1.0, 1.0, 1.0]) expected_grad_fsa2 = torch.tensor([1.0, 1.0, 1.0]) print("fsa2 is ", fsa2.__str__()) # TODO(dan):: fix this.. # expected_grad_log_prob = torch.tensor([ # 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0, 0, 0, 0.0, 1.0, 0.0, 0.0, 1.0, # 0.0, 0.0, 0.0, 1.0 # ]).reshape_as(log_prob) # assert torch.allclose(out_fsa.scores, expected_scores_out_fsa) assert torch.allclose(expected_grad_fsa1, fsa1.scores.grad) assert torch.allclose(expected_grad_fsa2, fsa2.scores.grad)
def test_simple(self): s = ''' 0 1 1 1.0 1 1 1 50.0 1 2 2 2.0 2 3 -1 3.0 3 ''' fsa = k2.Fsa.from_str(s) fsa.requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa]) log_prob = torch.tensor([[[0.1, 0.2, 0.3], [0.04, 0.05, 0.06]]], dtype=torch.float32, requires_grad=True) supervision_segments = torch.tensor([[0, 0, 2]], dtype=torch.int32) dense_fsa_vec = k2.DenseFsaVec(log_prob, supervision_segments) out_fsa = k2.intersect_dense(fsa_vec, dense_fsa_vec, output_beam=100000) scores = k2.get_tot_scores(out_fsa, log_semiring=False, use_float_scores=True) scores.sum().backward() # `expected` results are computed using gtn. # See https://bit.ly/3oYObeb expected_scores_out_fsa = torch.tensor([1.2, 2.06, 3.0]) expected_grad_fsa = torch.tensor([1.0, 0.0, 1.0, 1.0]) expected_grad_log_prob = torch.tensor([0.0, 1.0, 0.0, 0.0, 0.0, 1.0]).reshape_as(log_prob) assert torch.allclose(out_fsa.scores, expected_scores_out_fsa) assert torch.allclose(expected_grad_fsa, fsa.scores.grad) assert torch.allclose(expected_grad_log_prob, log_prob.grad)
def get_hierarchical_targets(ys: List[List[int]], lexicon: k2.Fsa) -> List[Tensor]: """Get hierarchical transcripts (i.e., phone level transcripts) from transcripts (i.e., word level transcripts). Args: ys: Word level transcripts. lexicon: Its labels are words, while its aux_labels are phones. Returns: List[Tensor]: Phone level transcripts. """ if lexicon is None: return ys else: L_inv = lexicon n_batch = len(ys) indices = torch.tensor(range(n_batch)) transcripts = k2.create_fsa_vec([k2.linear_fsa(x) for x in ys]) transcripts_lexicon = k2.intersect(transcripts, L_inv) transcripts_lexicon = k2.arc_sort(k2.connect(transcripts_lexicon)) transcripts_lexicon = k2.remove_epsilon(transcripts_lexicon) transcripts_lexicon = k2.shortest_path(transcripts_lexicon, use_double_scores=True) ys = get_texts(transcripts_lexicon, indices) ys = [torch.tensor(y) for y in ys] return ys
def test_single_fsa(self): s = ''' 0 4 1 1 0 1 1 1 1 2 1 2 1 3 1 3 2 7 1 4 3 7 1 5 4 6 1 2 4 8 1 3 5 9 -1 4 6 9 -1 3 7 9 -1 5 8 9 -1 6 9 ''' for device in self.devices: fsa = k2.Fsa.from_str(s).to(device) fsa = k2.create_fsa_vec([fsa]) fsa.requires_grad_(True) best_path = k2.shortest_path(fsa, use_double_scores=False) # we recompute the total_scores for backprop total_scores = best_path.scores.sum() assert total_scores == 14 expected = torch.zeros(12) expected[torch.tensor([1, 3, 5, 10])] = 1 total_scores.backward() assert torch.allclose(fsa.scores.grad, expected.to(device))
def __init__( self, num_classes: int, topo_type: str = "default", topo_with_self_loops: bool = True, device: torch.device = torch.device("cpu"), aux_graph: Optional['k2.Fsa'] = None, ): super().__init__(num_classes, topo_type, topo_with_self_loops, device, aux_graph) if aux_graph is None: self.den_graph = k2.create_fsa_vec([self.ctc_topo_inv.invert() ]).to(self.device) else: self.den_graph = k2.create_fsa_vec([self.base_graph.detach() ]).to(self.device)
def test_getitem(self): s0 = ''' 0 1 1 0.1 1 2 2 0.2 2 3 -1 0.3 3 ''' s1 = ''' 0 1 -1 0.4 1 ''' fsa0 = k2.Fsa.from_str(s0).requires_grad_(True) fsa1 = k2.Fsa.from_str(s1).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa0, fsa1]) assert fsa_vec.shape == (2, None, None) new_fsa0 = fsa_vec[0] assert new_fsa0.shape == (4, None) # it has 4 states scale = torch.arange(new_fsa0.scores.numel()) (new_fsa0.scores * scale).sum().backward() assert torch.allclose(fsa0.scores.grad, torch.tensor([0., 1., 2.])) new_fsa1 = fsa_vec[1] assert new_fsa1.shape == (2, None) # it has 2 states (new_fsa1.scores * 5).sum().backward() assert torch.allclose(fsa1.scores.grad, torch.tensor([5.]))
def compile(self, texts: Iterable[str]) -> k2.Fsa: decoding_graphs = k2.create_fsa_vec( [self.compile_one_and_cache(text) for text in texts]) # make sure the gradient is not accumulated decoding_graphs.requires_grad_(False) return decoding_graphs
def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 1 3 -1 0.4 2 3 -1 0.5 2 1 5 0.55 3 ''' s1 = ''' 0 1 -1 0.6 1 ''' s2 = ''' 0 1 6 0.7 1 0 7 0.8 1 0 8 0.9 1 2 -1 1.0 2 ''' cpu_device = torch.device('cpu') cuda_device = torch.device('cuda', 0) for device in (cpu_device, cuda_device): fsa0 = k2.Fsa.from_str(s0) fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]).to_(device) fsa = k2.union(fsa_vec) assert torch.allclose( fsa.arcs.values()[:, :3], torch.tensor([ [0, 1, 0], # fsa 0 [0, 4, 0], # fsa 1 [0, 5, 0], # fsa 2 # now for fsa0 [1, 2, 1], [1, 3, 2], [2, 3, 3], [2, 7, -1], [3, 7, -1], [3, 2, 5], # fsa1 [4, 7, -1], # fsa2 [5, 6, 6], [6, 5, 7], [6, 5, 8], [6, 7, -1] ]).to(torch.int32).to(device)) assert torch.allclose( fsa.scores, torch.tensor([ 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8, 0.9, 1.0 ]).to(device))
def my_func(scores: torch.Tensor, switch: torch.Tensor) -> torch.Tensor: s = ''' 0 4 1 0 0 1 1 0 1 2 1 0 1 3 1 0 2 7 1 0 3 7 1 0 4 6 1 0 4 8 1 0 5 9 -1 0 6 9 -1 0 7 9 -1 0 8 9 -1 0 9 ''' fsa = k2.Fsa.from_str(s).to(scores.device) fsa_vec = k2.create_fsa_vec([fsa]) assert scores.requires_grad is True fsa_vec.scores = scores.to(torch.float32) log_semiring = switch[0].item() == 1 log_like = fsa_vec.get_tot_scores(log_semiring=log_semiring, use_double_scores=True) return -2 * log_like
def lexicon_fst(args): ''' This programme create lexicon.fst.pdf and lexicon.fst.txt based on args.word_file input: args: name_space return: lexicon: k2.Fsa, lexicon fst output: lexicon.fst.txt and lexicon.fst.pdf in args.data_directory By lexicon fst, we compress the repeated chars in emission fst. ''' symbols_str = symboletable(args) symbols_paris = symbols_str.split('\n') num_noneps = len(symbols_paris) - 1 symbol2fst = [None] # <eps> has no fst for i in range(1, num_noneps + 1): s = ''' 0 1 %d %d 0.0 1 1 %d 0 0.0 1 2 -1 -1 0.0 2 ''' % (i, i, i) g = k2.Fsa.from_str(s, acceptor=False) symbol2fst.append(g) fst_vec = k2.create_fsa_vec(symbol2fst[1:]) fst_union = k2.union(fst_vec) lexicon = k2.closure(fst_union) lexicon.draw(os.path.join(args.data_directory, 'lexicon.fst.pdf'), title='lexicon') # lexicon.symbols = k2.SymbolTable.from_str(symbols_str) # lexicon.aux_symbols = k2.SymbolTable.from_str(symbols_str) with open(os.path.join(args.data_directory, 'lexicon.fst.txt'), 'w') as f: f.write(k2.to_str(lexicon))
def test_index_fsa(self): devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda', 0)) for device in devices: s1 = ''' 0 1 1 0.1 1 2 -1 0.2 2 ''' s2 = ''' 0 1 -1 1.0 1 ''' fsa1 = k2.Fsa.from_str(s1) fsa1.tensor_attr = torch.tensor([10, 20], dtype=torch.int32) fsa1.ragged_attr = k2.ragged.create_ragged2([[11, 12], [21, 22, 23]]) fsa2 = k2.Fsa.from_str(s2) fsa2.tensor_attr = torch.tensor([100], dtype=torch.int32) fsa2.ragged_attr = k2.ragged.create_ragged2([[111]]) fsa1 = fsa1.to(device) fsa2 = fsa2.to(device) fsa_vec = k2.create_fsa_vec([fsa1, fsa2]) single1 = k2.index_fsa( fsa_vec, torch.tensor([0], dtype=torch.int32, device=device)) assert torch.all(torch.eq(fsa1.tensor_attr, single1.tensor_attr)) assert str(single1.ragged_attr) == str(fsa1.ragged_attr) assert single1.device == device single2 = k2.index_fsa( fsa_vec, torch.tensor([1], dtype=torch.int32, device=device)) assert torch.all(torch.eq(fsa2.tensor_attr, single2.tensor_attr)) assert str(single2.ragged_attr) == str(fsa2.ragged_attr) assert single2.device == device multiples = k2.index_fsa( fsa_vec, torch.tensor([0, 1, 0, 1, 1], dtype=torch.int32, device=device)) assert multiples.shape == (5, None, None) assert torch.all( torch.eq( multiples.tensor_attr, torch.cat( (fsa1.tensor_attr, fsa2.tensor_attr, fsa1.tensor_attr, fsa2.tensor_attr, fsa2.tensor_attr)))) assert str(multiples.ragged_attr) == str( k2.ragged.append([ fsa1.ragged_attr, fsa2.ragged_attr, fsa1.ragged_attr, fsa2.ragged_attr, fsa2.ragged_attr ], axis=0)) # noqa assert multiples.device == device
def test_two_dense(self): s = ''' 0 1 1 1.0 1 1 1 50.0 1 2 2 2.0 2 3 -1 3.0 3 ''' for device in self.devices: fsa = k2.Fsa.from_str(s).to(device) fsa.requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa]) log_prob = torch.tensor( [[[0.1, 0.2, 0.3], [0.04, 0.05, 0.06], [0.0, 0.0, 0.0]], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.0, 0.0, 0.0]]], dtype=torch.float32, device=device, requires_grad=True) supervision_segments = torch.tensor([[0, 0, 2], [1, 0, 3]], dtype=torch.int32) dense_fsa_vec = k2.DenseFsaVec(log_prob, supervision_segments) out_fsa = k2.intersect_dense_pruned(fsa_vec, dense_fsa_vec, search_beam=100000, output_beam=100000, min_active_states=0, max_active_states=10000, seqframe_idx_name='seqframe', frame_idx_name='frame') assert torch.all( torch.eq(out_fsa.seqframe, torch.tensor([0, 1, 2, 3, 4, 5, 6], device=device))) assert torch.all( torch.eq(out_fsa.frame, torch.tensor([0, 1, 2, 0, 1, 2, 3], device=device))) assert out_fsa.shape == (2, None, None), 'There should be two FSAs!' scores = out_fsa.get_tot_scores(log_semiring=False, use_double_scores=False) scores.sum().backward() # `expected` results are computed using gtn. # See https://bit.ly/3oYObeb expected_scores_out_fsa = torch.tensor( [1.2, 2.06, 3.0, 1.2, 50.5, 2.0, 3.0], device=device) expected_grad_fsa = torch.tensor([2.0, 1.0, 2.0, 2.0], device=device) expected_grad_log_prob = torch.tensor([ 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0, 0, 0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0 ]).reshape_as(log_prob).to(device) assert torch.allclose(out_fsa.scores, expected_scores_out_fsa) assert torch.allclose(expected_grad_fsa, fsa.scores.grad) assert torch.allclose(expected_grad_log_prob, log_prob.grad)
def test_two_fsas_long_pruned(self): # as test_two_fsas_long in intersect_dense_test.py, # but with pruned intersection s1 = ''' 0 1 1 1.0 1 1 1 50.0 1 2 2 2.0 2 3 -1 3.0 3 ''' s2 = ''' 0 1 1 1.0 1 2 2 2.0 2 3 -1 3.0 3 ''' devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda', 0)) for device in devices: fsa1 = k2.Fsa.from_str(s1) fsa2 = k2.Fsa.from_str(s2) fsa1.requires_grad_(True) fsa2.requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa1, fsa2]) log_prob = torch.rand((2, 100, 3), dtype=torch.float32, device=device, requires_grad=True) supervision_segments = torch.tensor([[0, 1, 95], [1, 20, 50]], dtype=torch.int32) dense_fsa_vec = k2.DenseFsaVec(log_prob, supervision_segments) fsa_vec = fsa_vec.to(device) out_fsa = k2.intersect_dense_pruned(fsa_vec, dense_fsa_vec, search_beam=100, output_beam=100, min_active_states=1, max_active_states=10, seqframe_idx_name='seqframe', frame_idx_name='frame') expected_seqframe = torch.arange(96).to(torch.int32).to(device) assert torch.allclose(out_fsa.seqframe, expected_seqframe) # the second output FSA is empty since there is no self-loop in fsa2 assert torch.allclose(out_fsa.frame, expected_seqframe) assert out_fsa.shape == (2, None, None), 'There should be two FSAs!' scores = out_fsa.get_tot_scores(log_semiring=False, use_double_scores=False) scores.sum().backward()
def test(self): s0 = ''' 0 1 1 0.1 0 2 2 0.2 1 2 3 0.3 2 3 -1 0.4 3 ''' s1 = ''' 0 1 -1 0.5 1 ''' s2 = ''' 0 2 1 0.6 0 1 2 0.7 1 3 -1 0.8 2 1 3 0.9 3 ''' fsa0 = k2.Fsa.from_str(s0).requires_grad_(True) fsa1 = k2.Fsa.from_str(s1).requires_grad_(True) fsa2 = k2.Fsa.from_str(s2).requires_grad_(True) fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]) new_fsa21 = k2.index(fsa_vec, torch.tensor([2, 1], dtype=torch.int32)) assert new_fsa21.shape == (2, None, None) assert torch.allclose( new_fsa21.arcs.values()[:, :3], torch.tensor([ # fsa 2 [0, 2, 1], [0, 1, 2], [1, 3, -1], [2, 1, 3], # fsa 1 [0, 1, -1] ]).to(torch.int32)) scale = torch.arange(new_fsa21.scores.numel()) (new_fsa21.scores * scale).sum().backward() assert torch.allclose(fsa0.scores.grad, torch.tensor([0., 0, 0, 0])) assert torch.allclose(fsa1.scores.grad, torch.tensor([4.])) assert torch.allclose(fsa2.scores.grad, torch.tensor([0., 1., 2., 3.])) # now select only a single FSA fsa0.scores.grad = None fsa1.scores.grad = None fsa2.scores.grad = None new_fsa0 = k2.index(fsa_vec, torch.tensor([0], dtype=torch.int32)) assert new_fsa0.shape == (1, None, None) scale = torch.arange(new_fsa0.scores.numel()) (new_fsa0.scores * scale).sum().backward() assert torch.allclose(fsa0.scores.grad, torch.tensor([0., 1., 2., 3.])) assert torch.allclose(fsa1.scores.grad, torch.tensor([0.])) assert torch.allclose(fsa2.scores.grad, torch.tensor([0., 0., 0., 0.]))
def compile(self, texts: Iterable[str], P: k2.Fsa, replicate_den: bool = True) -> Tuple[k2.Fsa, k2.Fsa]: '''Create numerator and denominator graphs from transcripts and the bigram phone LM. Args: texts: A list of transcripts. Within a transcript, words are separated by spaces. P: The bigram phone LM created by :func:`create_bigram_phone_lm`. replicate_den: If True, the returned den_graph is replicated to match the number of FSAs in the returned num_graph; if False, the returned den_graph contains only a single FSA Returns: A tuple (num_graph, den_graph), where - `num_graph` is the numerator graph. It is an FsaVec with shape `(len(texts), None, None)`. - `den_graph` is the denominator graph. It is an FsaVec with the same shape of the `num_graph` if replicate_den is True; otherwise, it is an FsaVec containing only a single FSA. ''' assert P.device == self.device P_with_self_loops = k2.add_epsilon_self_loops(P) ctc_topo_P = k2.intersect(self.ctc_topo_inv, P_with_self_loops, treat_epsilons_specially=False).invert() ctc_topo_P = k2.arc_sort(ctc_topo_P) num_graphs = self.build_num_graphs(texts) num_graphs_with_self_loops = k2.remove_epsilon_and_add_self_loops( num_graphs) num_graphs_with_self_loops = k2.arc_sort(num_graphs_with_self_loops) num = k2.compose(ctc_topo_P, num_graphs_with_self_loops, treat_epsilons_specially=False) num = k2.arc_sort(num) ctc_topo_P_vec = k2.create_fsa_vec([ctc_topo_P.detach()]) if replicate_den: indexes = torch.zeros(len(texts), dtype=torch.int32, device=self.device) den = k2.index_fsa(ctc_topo_P_vec, indexes) else: den = ctc_topo_P_vec return num, den
def test_nbest_constructor(self): fsa = k2.Fsa.from_str(''' 0 1 -1 0.1 1 ''') fsa_vec = k2.create_fsa_vec([fsa, fsa, fsa]) shape = k2.RaggedShape('[[x x] [x]]') nbest = k2.Nbest(fsa_vec, shape)
def test(self): # for the symbol table # <eps> 0 # a 0 # b 1 # c 2 # an FSA that recognizes a+(b|c) s = ''' 0 1 1 0.1 1 1 1 0.2 1 2 2 0.3 1 3 3 0.4 2 4 -1 0.5 3 4 -1 0.6 5 ''' a_fsa = k2.Fsa.from_str(s) a_fsa.requires_grad_(True) # an FSA that recognizes ab s = ''' 0 1 1 10 1 2 2 20 2 3 -1 30 3 ''' b_fsa = k2.Fsa.from_str(s) b_fsa.requires_grad_(True) fsa = k2.intersect(a_fsa, b_fsa) assert len(fsa.shape) == 2 actual_str = k2.to_str(fsa) expected_str = '\n'.join( ['0 1 1 10.1', '1 2 2 20.3', '2 3 -1 30.5', '3']) assert actual_str.strip() == expected_str loss = fsa.scores.sum() loss.backward() # arc 0, 2, and 4 of a_fsa are kept in the final intersected FSA assert torch.allclose( a_fsa.scores.grad, torch.tensor([1, 0, 1, 0, 1, 0], dtype=torch.float32)) assert torch.allclose(b_fsa.scores.grad, torch.tensor([1, 1, 1], dtype=torch.float32)) # if any of the input FSA is an FsaVec, # the outupt FSA is also an FsaVec. a_fsa.scores.grad = None b_fsa.scores.grad = None a_fsa = k2.create_fsa_vec([a_fsa]) fsa = k2.intersect(a_fsa, b_fsa) assert len(fsa.shape) == 3
def compile(self, texts: Iterable[str], P: k2.Fsa) -> Tuple[k2.Fsa, k2.Fsa, k2.Fsa]: '''Create numerator and denominator graphs from transcripts and the bigram phone LM. Args: texts: A list of transcripts. Within a transcript, words are separated by spaces. P: The bigram phone LM created by :func:`create_bigram_phone_lm`. Returns: A tuple (num_graph, den_graph, decoding_graph), where - `num_graph` is the numerator graph. It is an FsaVec with shape `(len(texts), None, None)`. It is the result of compose(ctc_topo, P, L, transcript) - `den_graph` is the denominator graph. It is an FsaVec with the same shape of the `num_graph`. It is the result of compose(ctc_topo, P). - decoding_graph: It is the result of compose(ctc_topo, L_disambig, G) Note that it is a single Fsa, not an FsaVec. ''' assert P.device == self.device P_with_self_loops = k2.add_epsilon_self_loops(P) ctc_topo_P = k2.intersect(self.ctc_topo_inv, P_with_self_loops, treat_epsilons_specially=False).invert() ctc_topo_P = k2.arc_sort(ctc_topo_P) num_graphs = self.build_num_graphs(texts) num_graphs_with_self_loops = k2.remove_epsilon_and_add_self_loops( num_graphs) num_graphs_with_self_loops = k2.arc_sort(num_graphs_with_self_loops) num = k2.compose(ctc_topo_P, num_graphs_with_self_loops, treat_epsilons_specially=False, inner_labels='phones') num = k2.arc_sort(num) ctc_topo_P_vec = k2.create_fsa_vec([ctc_topo_P.detach()]) indexes = torch.zeros(len(texts), dtype=torch.int32, device=self.device) den = k2.index_fsa(ctc_topo_P_vec, indexes) return num, den, self.decoding_graph