Exemplo n.º 1
0
    def testStateToOlabelUniqueSinglePath(self):
        labels = [
            [3, 4, 3],
            [1, 0, 0],
        ]
        num_labels = 8

        # 3 frames, 2 batch, 8 states (4 label, 4 blank).
        #
        # There is only single valid path for each sequence because the frame
        # lengths and the label lengths are the same.
        states = [[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                   [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
                  [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
                  [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]]
        labels = ops.convert_to_tensor(labels)
        states = math_ops.log(states)
        olabel = ctc_ops._state_to_olabel_unique(
            labels, num_labels, states, ctc_ops.ctc_unique_labels(labels))
        olabel = math_ops.exp(olabel)
        blank = olabel[:, :, 0]

        self.assertAllClose(blank, [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
        self.assertAllClose(olabel[:, :, 1:], [
            [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
             [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
            [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
             [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
            [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
             [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
        ])
Exemplo n.º 2
0
    def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=1,
                                           maxval=num_labels,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        ctc_loss = ctc_ops.ctc_loss_dense(
            labels=labels,
            logits=logits,
            label_length=label_lengths,
            logit_length=logit_lengths,
            unique=ctc_ops.ctc_unique_labels(labels))
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        # Shift labels down by one (move blank from 0 to num_labels -1)
        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
        tf_nn_ctc_logits = array_ops.concat([
            logits[:, :, 1:],
            logits[:, :, 0:1],
        ],
                                            axis=2)

        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=tf_nn_ctc_logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*self.evaluate(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)
Exemplo n.º 3
0
  def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    label_length = 5
    num_frames = 12
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
    labels = random_ops.random_uniform(
        [batch_size, label_length], minval=1, maxval=num_labels,
        dtype=dtypes.int64)

    label_lengths = random_ops.random_uniform(
        [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
    labels *= label_mask

    logit_lengths = [num_frames] * batch_size

    ctc_loss = ctc_ops.ctc_loss_dense(
        labels=labels,
        logits=logits,
        label_length=label_lengths,
        logit_length=logit_lengths,
        unique=ctc_ops.ctc_unique_labels(labels))
    ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

    # Shift labels down by one (move blank from 0 to num_labels -1)
    tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
    tf_nn_ctc_logits = array_ops.concat([
        logits[:, :, 1:],
        logits[:, :, 0:1],
    ], axis=2)

    tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
        tf_ctc_loss_labels, label_lengths)

    tf_nn_ctc_loss = ctc_ops.ctc_loss(
        labels=tf_ctc_loss_labels,
        inputs=tf_nn_ctc_logits,
        sequence_length=logit_lengths,
        time_major=True)
    tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

    with self.cached_session() as sess:
      for _ in range(32):
        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
        self.assertAllClose(
            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
            rtol=2e-06,
            atol=2e-06)
Exemplo n.º 4
0
 def testUnique(self):
     labels = [
         [3, 4, 4, 3],
         [1, 1, 1, 0],
     ]
     unique, idx = ctc_ops.ctc_unique_labels(labels)
     self.assertAllEqual([
         [3, 4, 0, 0],
         [1, 0, 0, 0],
     ], unique)
     self.assertAllEqual([
         [0, 1, 1, 0],
         [0, 0, 0, 1],
     ], idx)
Exemplo n.º 5
0
 def testUnique(self):
   labels = [
       [3, 4, 4, 3],
       [1, 1, 1, 0],
   ]
   unique, idx = ctc_ops.ctc_unique_labels(labels)
   self.assertAllEqual([
       [3, 4, 0, 0],
       [1, 0, 0, 0],
   ], unique)
   self.assertAllEqual([
       [0, 1, 1, 0],
       [0, 0, 0, 1],
   ], idx)
Exemplo n.º 6
0
    def testStateToOlabelUnique(self):
        labels = [
            [3, 4, 3, 4],
            [1, 1, 1, 0],
        ]
        num_labels = 8

        # 3 frames, 2 batch, 10 states (5 label, 5 blank).
        states = [
            [[0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20],
             [0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30]],
            [[1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0],
             [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0]],
            [[11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0],
             [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0]],
        ]
        labels = ops.convert_to_tensor(labels)
        states = math_ops.log(states)
        olabel = ctc_ops._state_to_olabel_unique(
            labels, num_labels, states, ctc_ops.ctc_unique_labels(labels))
        olabel = math_ops.exp(olabel)
        blank = olabel[:, :, 0]
        self.assertAllClose(blank, [[
            0.16 + 0.17 + 0.18 + 0.19 + 0.20, 0.26 + 0.27 + 0.28 + 0.29 + 0.30
        ], [1.6 + 1.7 + 1.8 + 1.9 + 2.0, 2.6 + 2.7 + 2.8 + 2.9 + 3.0],
                                    [
                                        16.0 + 17.0 + 18.0 + 19.0 + 20.0,
                                        26.0 + 27.0 + 28.0 + 29.0 + 30.0
                                    ]])
        self.assertAllClose(olabel[:, :, 1:], [
            [[0.0, 0.0, 0.12 + 0.14, 0.13 + 0.15, 0.0, 0.0, 0.0],
             [0.22 + 0.23 + 0.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
            [[0.0, 0.0, 1.2 + 1.4, 1.3 + 1.5, 0.0, 0.0, 0.0],
             [2.2 + 2.3 + 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
            [[0.0, 0.0, 12.0 + 14.0, 13.0 + 15.0, 0.0, 0.0, 0.0],
             [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
        ])
Exemplo n.º 7
0
  def testStateToOlabelUnique(self):
    labels = [
        [3, 4, 3, 4],
        [1, 1, 1, 0],
    ]
    num_labels = 8

    # 3 frames, 2 batch, 10 states (5 label, 5 blank).
    states = [
        [[0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20],
         [0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30]],
        [[1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0],
         [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0]],
        [[11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0],
         [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0]],
    ]
    labels = ops.convert_to_tensor(labels)
    states = math_ops.log(states)
    olabel = ctc_ops._state_to_olabel_unique(
        labels, num_labels, states, ctc_ops.ctc_unique_labels(labels))
    olabel = math_ops.exp(olabel)
    blank = olabel[:, :, 0]
    self.assertAllClose(blank, [
        [0.16 + 0.17 + 0.18 + 0.19 + 0.20,
         0.26 + 0.27 + 0.28 + 0.29 + 0.30],
        [1.6 + 1.7 + 1.8 + 1.9 + 2.0,
         2.6 + 2.7 + 2.8 + 2.9 + 3.0],
        [16.0 + 17.0 + 18.0 + 19.0 + 20.0,
         26.0 + 27.0 + 28.0 + 29.0 + 30.0]])
    self.assertAllClose(olabel[:, :, 1:], [
        [[0.0, 0.0, 0.12 + 0.14, 0.13 + 0.15, 0.0, 0.0, 0.0],
         [0.22 + 0.23 + 0.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
        [[0.0, 0.0, 1.2 + 1.4, 1.3 + 1.5, 0.0, 0.0, 0.0],
         [2.2 + 2.3 + 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
        [[0.0, 0.0, 12.0 + 14.0, 13.0 + 15.0, 0.0, 0.0, 0.0],
         [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
    ])
Exemplo n.º 8
0
    def testCtcLossDenseUniqueFastPathWithBlankIndexIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=0,
                                           maxval=num_labels - 1,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        # Shift the blank logits/labels to be somewhere in the middle.
        blank_index = 2
        shifted_logits = array_ops.concat([
            logits[:, :, :blank_index],
            logits[:, :, -1:],
            logits[:, :, blank_index:-1],
        ],
                                          axis=2)
        shifted_labels = array_ops.where_v2(labels < blank_index, labels,
                                            labels + 1)

        ctc_loss = ctc_ops.ctc_loss_dense(
            labels=shifted_labels,
            logits=shifted_logits,
            label_length=label_lengths,
            logit_length=logit_lengths,
            blank_index=blank_index,
            unique=ctc_ops.ctc_unique_labels(shifted_labels))
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*self.evaluate(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)