def _run_rnnt(self,
                  acts,
                  labels,
                  input_lengths,
                  label_lengths,
                  expected_costs,
                  expected_grads,
                  blank,
                  use_gpu=False):

        self.assertEqual(acts.shape, expected_grads.shape)

        acts_t = tf.constant(acts)
        labels_t = tf.constant(labels)
        input_lengths_t = tf.constant(input_lengths)
        label_lengths_t = tf.constant(label_lengths)

        with tf.GradientTape() as tape:

            tape.watch(acts_t)
            #logits = acts_t if use_gpu else tf.nn.log_softmax(acts_t)
            tf.print(acts_t)
            logits = tf.nn.log_softmax(acts_t)
            tf.print(logits)
            costs = rnnt_loss(logits, labels_t, input_lengths_t,
                              label_lengths_t, blank)

        grads = tape.gradient(costs, [acts_t])[0]

        self.assertAllClose(costs, expected_costs, atol=1e-6)
        self.assertAllClose(grads, expected_grads, atol=1e-6)
예제 #2
0
 def _loss_fn(y_true, y_pred):
     y_true = tf.cast(y_true, dtype=tf.int32)
     if len(tf.config.list_physical_devices('GPU')) == 0:
         y_pred = tf.nn.log_softmax(y_pred)
     loss = rnnt_loss(y_pred, y_true,
         spec_lengths, label_lengths)
     return loss
예제 #3
0
def train_step(slice_input, label_input, slice_len, label_len):
	with tf.GradientTape() as tape:
		outputs = model([slice_input, label_input])
		loss = tf.reduce_mean(warprnnt_tensorflow.rnnt_loss(outputs, label_input[:, 1:], slice_len, label_len))
		tf.print(loss)
	gradient = tape.gradient(loss, model.trainable_variables)
	optimizer.apply_gradients(zip(gradient, model.trainable_variables))
예제 #4
0
    def _run_rnnt(self,
                  acts,
                  labels,
                  input_lengths,
                  label_lengths,
                  expected_costs,
                  expected_grads,
                  blank,
                  use_gpu=False):
        self.assertEquals(acts.shape, expected_grads.shape)
        acts_t = tf.constant(acts)
        labels_t = tf.constant(labels)
        input_lengths_t = tf.constant(input_lengths)
        label_lengths_t = tf.constant(label_lengths)

        logits = acts_t if use_gpu else tf.nn.log_softmax(acts_t)
        costs = rnnt_loss(logits, labels_t, input_lengths_t, label_lengths_t,
                          blank)

        grads = tf.gradients(costs, [acts_t])[0]

        with self.test_session(use_gpu=use_gpu) as sess:
            (tf_costs, tf_grad) = sess.run([costs, grads])
            self.assertAllClose(tf_costs, expected_costs, atol=1e-6)
            self.assertAllClose(tf_grad, expected_grads, atol=1e-6)
    def train_step(fb, labels, fb_lengths, labels_lengths, enc_state):

        pred_inp = labels[:, :-1]
        pred_out = labels[:, 1:]

        with tf.GradientTape() as tape:
            predictions, _ = model([fb, pred_inp, enc_state], training=True)
            if len(tf.config.list_physical_devices(
                    'GPU')) == 0 and _has_loss_func:
                predictions = tf.nn.log_softmax(predictions)
            if _has_loss_func:
                loss = rnnt_loss(predictions, pred_out, fb_lengths,
                                 labels_lengths)
            else:
                loss = 0
                if verbose:
                    logging.info(
                        'Loss function not available, not computing gradients or optimizing.'
                    )

        if _has_loss_func:
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))

        train_loss(loss)
        train_accuracy(pred_out, predictions[:, -1, :, :])
예제 #6
0
    def rnnt_lambda_func(args):
        y_trans, y_pred, labels, input_length, label_length = args
        import keras.backend as K
        import tensorflow as tf

        # the 2 is critical here since the first couple outputs of the RNN tend to
        # be garbage:
        shift = 2
        y_trans = y_trans[:, shift:, :]
        input_length -= shift

        # calculating lattices from the output from the prediction network and
        # the transcription network.
        batch_size = K.shape(y_trans)[0]
        y_trans = K.expand_dims(y_trans, axis=2)  # BT1H
        y_pred = K.expand_dims(y_pred, axis=1)  # B1UH
        acts = tf.nn.log_softmax(y_trans + y_pred)
        input_length = K.reshape(input_length, [batch_size])
        label_length = K.reshape(label_length, [batch_size])

        from warprnnt_tensorflow import rnnt_loss
        list_value = rnnt_loss(acts,
                               labels,
                               input_length,
                               label_length,
                               blank_label=39)

        return K.reshape(list_value, [batch_size])
예제 #7
0
    def rnnt_lambda_func_v2(args):
        acts, labels, input_length, label_length = args
        import keras.backend as K
        import tensorflow as tf

        batch_size = K.shape(acts)[0]

        # the 2 is critical here since the first couple outputs of the RNN tend to
        # be garbage:
        shift = 2
        acts = acts[:, shift:, :, :]  # B T U V
        input_length -= shift

        acts = tf.nn.log_softmax(acts)
        input_length = K.reshape(input_length, [batch_size])
        label_length = K.reshape(label_length, [batch_size])

        from warprnnt_tensorflow import rnnt_loss
        list_value = rnnt_loss(acts,
                               labels,
                               input_length,
                               label_length,
                               blank_label=39)

        return K.reshape(list_value, [batch_size])
예제 #8
0
 def dev_step(x, y, x_len, y_len):
     logits, x_len, y_len = model(x, y, x_len, y_len, training=False)
     if not tf.config.list_physical_devices('GPU'):
         logits = tf.nn.log_softmax(logits)
     loss = rnnt_loss(logits, y, x_len, y_len, blank)
     loss = loss / tf.cast(y_len, dtype=tf.float32)
     error = 0
     return tf.reduce_mean(loss), error
예제 #9
0
 def _loss_fn(y_true, y_pred, spec_lengths, label_lengths):
     y_true = tf.cast(y_true, dtype=tf.int32)
     if not tf.test.is_built_with_cuda():
         y_pred = tf.nn.log_softmax(y_pred)
     spec_lengths = tf.cast(tf.math.ceil(spec_lengths / reduction_factor),
                            dtype=tf.int32)
     loss = rnnt_loss(y_pred, y_true, spec_lengths, label_lengths)
     return loss
예제 #10
0
    def train_step(x, y, x_len, y_len):
        with tf.GradientTape() as tape:
            logits, x_len, y_len = model(x, y, x_len, y_len, training=True)
            if not tf.config.list_physical_devices('GPU'):
                logits = tf.nn.log_softmax(logits)
            loss = rnnt_loss(logits, y, x_len, y_len, blank)
            loss = loss / tf.cast(y_len, dtype=tf.float32)
            error = 0

        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))
        return tf.reduce_mean(loss), error
예제 #11
0
        def rnnt_loss_wrapper(labels, outputs):
            logit_lengths = tf.math.floordiv(
                self.model.inputs[2][:, 0] +
                tf.math.floormod(self.model.inputs[2][:, 0], 2), 2)
            logit_lengths = keras.backend.print_tensor(logit_lengths,
                                                       'logit_lengths')

            label_lengths = keras.backend.print_tensor(
                self.model.inputs[3][:, 0], 'label_lengths')
            labels_ = keras.backend.print_tensor(labels, 'labels')
            outputs_ = keras.backend.print_tensor(outputs, 'logits')
            return rnnt_loss(
                outputs,
                labels,
                logit_lengths,
                label_lengths,
                # keras.backend.print_tensor(tf.shape(outputs_), 'dddd'),
                blank_label=self._alphabet.blank_token)
예제 #12
0
def gen_test_case(batch_num, max_label_length, max_input_length,
                  output_vocab_size):
    # Inputs
    label_lengths = np.random.randint(low=1,
                                      high=max_label_length + 1,
                                      size=batch_num)
    # label_lengths = np.asarray([max_label_length] * batch_num)
    blank_label = 0  # assuming zero for now; np.random.randint(low=0, high=output_vocab_size+1, size=())
    input_lengths = np.random.randint(low=1,
                                      high=max_input_length + 1,
                                      size=batch_num)
    # input_lengths = np.asarray([max_input_length] * batch_num)
    labels = np.random.randint(low=1,
                               high=output_vocab_size + 1,
                               size=(batch_num, max_label_length))
    with tf.GradientTape(persistent=True) as g:
        acts = tf.convert_to_tensor(
            np.random.rand(batch_num, max_input_length, max_label_length + 1,
                           output_vocab_size + 1).astype(np.float32))
        g.watch(acts)
        log_probs = tf.nn.log_softmax(acts, axis=3)
        g.watch(log_probs)

        # Outputs
        final_loss = warprnnt_tensorflow.rnnt_loss(log_probs, labels,
                                                   input_lengths,
                                                   label_lengths, blank_label)

    grads = g.gradient(final_loss, acts)
    grads_p = g.gradient(final_loss, log_probs)

    return {
        'acts': acts,
        'log_probs': log_probs.numpy,
        'labels': labels,
        'input_lengths': input_lengths,
        'label_lengths': label_lengths,
        'blank_label': blank_label,
        'final_loss': final_loss.numpy(),
        'grads': grads.numpy(),
        'grads_p': grads_p
    }
예제 #13
0
    def test_forward(self):
        # Softmax activations for the following inputs:
        acts = np.array([0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 
                        0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 
                        0.2, 0.8, 0.1, 0.1, 0.6, 0.1, 
                        0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 
                        0.1, 0.7, 0.1, 0.2, 0.1, 0.1], dtype=np.float32).reshape(1, 2, 3, 5)

        expected_costs = np.array([4.495667], dtype=np.float32)

        labels = np.array([[1, 2]], dtype=np.int32)
        input_lengths = np.array([2], dtype=np.int32)
        label_lengths = np.array([2], dtype=np.int32)

        acts_t = tf.constant(acts)
        labels_t = tf.constant(labels)
        input_lengths_t = tf.constant(input_lengths)
        label_lengths_t = tf.constant(label_lengths)
        acts_t = tf.nn.log_softmax(acts_t) # NOTE cpu
        costs = rnnt_loss(acts_t, labels_t, input_lengths_t, label_lengths_t)
        self.assertAllClose(costs, expected_costs, atol=1e-6)
예제 #14
0
    def test_forward(self):
        # Softmax activations for the following inputs:
        acts = np.array([
            0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2,
            0.8, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.7,
            0.1, 0.2, 0.1, 0.1
        ],
                        dtype=np.float32).reshape(1, 2, 3, 5)

        labels = np.array([[1, 2]], dtype=np.int32)
        input_lengths = np.array([2], dtype=np.int32)
        label_lengths = np.array([2], dtype=np.int32)

        acts_t = tf.constant(acts)
        labels_t = tf.constant(labels)
        input_lengths_t = tf.constant(input_lengths)
        label_lengths_t = tf.constant(label_lengths)
        acts_t = tf.nn.log_softmax(acts_t)  # NOTE cpu
        costs = rnnt_loss(acts_t, labels_t, input_lengths_t, label_lengths_t)
        with self.test_session():
            print(costs.eval())
예제 #15
0
def test_step(slice_input, label_input, slice_len, label_len):
	outputs = model([slice_input, label_input])
	loss = tf.reduce_mean(warprnnt_tensorflow.rnnt_loss(outputs, label_input[:, 1:], slice_len, label_len))
	tf.print(loss)
예제 #16
0
            [0.779194617063042, 0.18331417220174862, 0.113745182072432],
            [0.24022162381327106, 0.3394695622533106, 0.1341595066017014]]],

        [[[0.5055615569388828, 0.051597282072282646, 0.6402903936686337],
            [0.43073311517251, 0.8294731834714112, 0.1774668847323424],
            [0.3207001991262245, 0.04288308912457006, 0.30280282975568984]],

            [[0.6751777088333762, 0.569537369330242, 0.5584738347504452],
            [0.08313242153985256, 0.06016544344162322, 0.10795752845152584],
            [0.7486153608562472, 0.943918041459349, 0.4863558118797222]],

            [[0.4181986264486809, 0.6524078485043804, 0.024242983423721887],
            [0.13458171554507403, 0.3663418070512402, 0.2958297395361563],
            [0.9236695822497084, 0.6899291482654177, 0.7418981733448822]],

            [[0.25000547599982104, 0.6034295486281007, 0.9872887878887768],
            [0.5926057265215715, 0.8846724004467684, 0.5434495396894328],
            [0.6607698886038497, 0.3771277082495921, 0.3580209022231813]]]], dtype=tf.float32)

labels = tf.constant([[1, 2, 3], [1, 1, 1]], dtype=np.int32)
input_length = tf.constant([4, 4], dtype=tf.int32)
label_length = tf.constant([2, 2], dtype=tf.int32)

with tf.GradientTape() as tape:
    tape.watch(acts)
    logits = tf.nn.log_softmax(acts)
    costs = rnnt_loss(logits, labels, input_length, label_length, 0)

grads = tape.gradient(costs, [acts])
print(costs)
print(grads)
예제 #17
0
def warp_loss(logits, labels, label_lengths, logit_lengths):
   log_probs = tf.nn.log_softmax(logits, axis=3)
   loss = warprnnt_tensorflow.rnnt_loss(
       log_probs, labels, logit_lengths, label_lengths)
   return loss
예제 #18
0
])

x = Input(shape=(1024, 128))
y = Input(shape=(1, ))

h_enc = encoder_net(x)
h_pre = prediction_net(y)
p = joint_net(tf.concat([h_enc, h_pre], -1))

model = Model(inputs=[x, y], outputs=p)

# Train it!
optim = Adam(1e-4)
train_loss = tf.keras.metrics.Mean(name='train_loss')

for epoch in range(20):
    train_loss.reset_states()

    for batch in train:
        with tf.GradientTape() as tape:
            pred = model(batch['x'], batch['y'])
            loss = rnnt_loss(y, pred)

        grads = tape.gradient(loss, model.trainable_variables)
        optim.apply_gradients(zip(grads, model.trainable_variables))

        train_loss(loss)

# Save the model
model.save('models/librispeech_subwords8k_rnnt')