def testStateToOlabelUniqueSinglePath(self): labels = [ [3, 4, 3], [1, 0, 0], ] num_labels = 8 # 3 frames, 2 batch, 8 states (4 label, 4 blank). # # There is only single valid path for each sequence because the frame # lengths and the label lengths are the same. states = [[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]] labels = ops.convert_to_tensor(labels) states = math_ops.log(states) olabel = ctc_ops._state_to_olabel_unique( labels, num_labels, states, ctc_ops.ctc_unique_labels(labels)) olabel = math_ops.exp(olabel) blank = olabel[:, :, 0] self.assertAllClose(blank, [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) self.assertAllClose(olabel[:, :, 1:], [ [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], ])
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testUnique(self): labels = [ [3, 4, 4, 3], [1, 1, 1, 0], ] unique, idx = ctc_ops.ctc_unique_labels(labels) self.assertAllEqual([ [3, 4, 0, 0], [1, 0, 0, 0], ], unique) self.assertAllEqual([ [0, 1, 1, 0], [0, 0, 0, 1], ], idx)
def testStateToOlabelUnique(self): labels = [ [3, 4, 3, 4], [1, 1, 1, 0], ] num_labels = 8 # 3 frames, 2 batch, 10 states (5 label, 5 blank). states = [ [[0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20], [0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30]], [[1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0], [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0]], [[11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0]], ] labels = ops.convert_to_tensor(labels) states = math_ops.log(states) olabel = ctc_ops._state_to_olabel_unique( labels, num_labels, states, ctc_ops.ctc_unique_labels(labels)) olabel = math_ops.exp(olabel) blank = olabel[:, :, 0] self.assertAllClose(blank, [[ 0.16 + 0.17 + 0.18 + 0.19 + 0.20, 0.26 + 0.27 + 0.28 + 0.29 + 0.30 ], [1.6 + 1.7 + 1.8 + 1.9 + 2.0, 2.6 + 2.7 + 2.8 + 2.9 + 3.0], [ 16.0 + 17.0 + 18.0 + 19.0 + 20.0, 26.0 + 27.0 + 28.0 + 29.0 + 30.0 ]]) self.assertAllClose(olabel[:, :, 1:], [ [[0.0, 0.0, 0.12 + 0.14, 0.13 + 0.15, 0.0, 0.0, 0.0], [0.22 + 0.23 + 0.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.2 + 1.4, 1.3 + 1.5, 0.0, 0.0, 0.0], [2.2 + 2.3 + 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 12.0 + 14.0, 13.0 + 15.0, 0.0, 0.0, 0.0], [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], ])
def testStateToOlabelUnique(self): labels = [ [3, 4, 3, 4], [1, 1, 1, 0], ] num_labels = 8 # 3 frames, 2 batch, 10 states (5 label, 5 blank). states = [ [[0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20], [0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30]], [[1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0], [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0]], [[11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0]], ] labels = ops.convert_to_tensor(labels) states = math_ops.log(states) olabel = ctc_ops._state_to_olabel_unique( labels, num_labels, states, ctc_ops.ctc_unique_labels(labels)) olabel = math_ops.exp(olabel) blank = olabel[:, :, 0] self.assertAllClose(blank, [ [0.16 + 0.17 + 0.18 + 0.19 + 0.20, 0.26 + 0.27 + 0.28 + 0.29 + 0.30], [1.6 + 1.7 + 1.8 + 1.9 + 2.0, 2.6 + 2.7 + 2.8 + 2.9 + 3.0], [16.0 + 17.0 + 18.0 + 19.0 + 20.0, 26.0 + 27.0 + 28.0 + 29.0 + 30.0]]) self.assertAllClose(olabel[:, :, 1:], [ [[0.0, 0.0, 0.12 + 0.14, 0.13 + 0.15, 0.0, 0.0, 0.0], [0.22 + 0.23 + 0.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.2 + 1.4, 1.3 + 1.5, 0.0, 0.0, 0.0], [2.2 + 2.3 + 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 12.0 + 14.0, 13.0 + 15.0, 0.0, 0.0, 0.0], [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], ])
def testCtcLossDenseUniqueFastPathWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where_v2(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense( labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index, unique=ctc_ops.ctc_unique_labels(shifted_labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)