def _run_rnnt(self, acts, labels, input_lengths, label_lengths, expected_costs, expected_grads, blank, use_gpu=False): self.assertEqual(acts.shape, expected_grads.shape) acts_t = tf.constant(acts) labels_t = tf.constant(labels) input_lengths_t = tf.constant(input_lengths) label_lengths_t = tf.constant(label_lengths) with tf.GradientTape() as tape: tape.watch(acts_t) #logits = acts_t if use_gpu else tf.nn.log_softmax(acts_t) tf.print(acts_t) logits = tf.nn.log_softmax(acts_t) tf.print(logits) costs = rnnt_loss(logits, labels_t, input_lengths_t, label_lengths_t, blank) grads = tape.gradient(costs, [acts_t])[0] self.assertAllClose(costs, expected_costs, atol=1e-6) self.assertAllClose(grads, expected_grads, atol=1e-6)
def _loss_fn(y_true, y_pred): y_true = tf.cast(y_true, dtype=tf.int32) if len(tf.config.list_physical_devices('GPU')) == 0: y_pred = tf.nn.log_softmax(y_pred) loss = rnnt_loss(y_pred, y_true, spec_lengths, label_lengths) return loss
def train_step(slice_input, label_input, slice_len, label_len): with tf.GradientTape() as tape: outputs = model([slice_input, label_input]) loss = tf.reduce_mean(warprnnt_tensorflow.rnnt_loss(outputs, label_input[:, 1:], slice_len, label_len)) tf.print(loss) gradient = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradient, model.trainable_variables))
def _run_rnnt(self, acts, labels, input_lengths, label_lengths, expected_costs, expected_grads, blank, use_gpu=False): self.assertEquals(acts.shape, expected_grads.shape) acts_t = tf.constant(acts) labels_t = tf.constant(labels) input_lengths_t = tf.constant(input_lengths) label_lengths_t = tf.constant(label_lengths) logits = acts_t if use_gpu else tf.nn.log_softmax(acts_t) costs = rnnt_loss(logits, labels_t, input_lengths_t, label_lengths_t, blank) grads = tf.gradients(costs, [acts_t])[0] with self.test_session(use_gpu=use_gpu) as sess: (tf_costs, tf_grad) = sess.run([costs, grads]) self.assertAllClose(tf_costs, expected_costs, atol=1e-6) self.assertAllClose(tf_grad, expected_grads, atol=1e-6)
def train_step(fb, labels, fb_lengths, labels_lengths, enc_state): pred_inp = labels[:, :-1] pred_out = labels[:, 1:] with tf.GradientTape() as tape: predictions, _ = model([fb, pred_inp, enc_state], training=True) if len(tf.config.list_physical_devices( 'GPU')) == 0 and _has_loss_func: predictions = tf.nn.log_softmax(predictions) if _has_loss_func: loss = rnnt_loss(predictions, pred_out, fb_lengths, labels_lengths) else: loss = 0 if verbose: logging.info( 'Loss function not available, not computing gradients or optimizing.' ) if _has_loss_func: gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(pred_out, predictions[:, -1, :, :])
def rnnt_lambda_func(args): y_trans, y_pred, labels, input_length, label_length = args import keras.backend as K import tensorflow as tf # the 2 is critical here since the first couple outputs of the RNN tend to # be garbage: shift = 2 y_trans = y_trans[:, shift:, :] input_length -= shift # calculating lattices from the output from the prediction network and # the transcription network. batch_size = K.shape(y_trans)[0] y_trans = K.expand_dims(y_trans, axis=2) # BT1H y_pred = K.expand_dims(y_pred, axis=1) # B1UH acts = tf.nn.log_softmax(y_trans + y_pred) input_length = K.reshape(input_length, [batch_size]) label_length = K.reshape(label_length, [batch_size]) from warprnnt_tensorflow import rnnt_loss list_value = rnnt_loss(acts, labels, input_length, label_length, blank_label=39) return K.reshape(list_value, [batch_size])
def rnnt_lambda_func_v2(args): acts, labels, input_length, label_length = args import keras.backend as K import tensorflow as tf batch_size = K.shape(acts)[0] # the 2 is critical here since the first couple outputs of the RNN tend to # be garbage: shift = 2 acts = acts[:, shift:, :, :] # B T U V input_length -= shift acts = tf.nn.log_softmax(acts) input_length = K.reshape(input_length, [batch_size]) label_length = K.reshape(label_length, [batch_size]) from warprnnt_tensorflow import rnnt_loss list_value = rnnt_loss(acts, labels, input_length, label_length, blank_label=39) return K.reshape(list_value, [batch_size])
def dev_step(x, y, x_len, y_len): logits, x_len, y_len = model(x, y, x_len, y_len, training=False) if not tf.config.list_physical_devices('GPU'): logits = tf.nn.log_softmax(logits) loss = rnnt_loss(logits, y, x_len, y_len, blank) loss = loss / tf.cast(y_len, dtype=tf.float32) error = 0 return tf.reduce_mean(loss), error
def _loss_fn(y_true, y_pred, spec_lengths, label_lengths): y_true = tf.cast(y_true, dtype=tf.int32) if not tf.test.is_built_with_cuda(): y_pred = tf.nn.log_softmax(y_pred) spec_lengths = tf.cast(tf.math.ceil(spec_lengths / reduction_factor), dtype=tf.int32) loss = rnnt_loss(y_pred, y_true, spec_lengths, label_lengths) return loss
def train_step(x, y, x_len, y_len): with tf.GradientTape() as tape: logits, x_len, y_len = model(x, y, x_len, y_len, training=True) if not tf.config.list_physical_devices('GPU'): logits = tf.nn.log_softmax(logits) loss = rnnt_loss(logits, y, x_len, y_len, blank) loss = loss / tf.cast(y_len, dtype=tf.float32) error = 0 variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return tf.reduce_mean(loss), error
def rnnt_loss_wrapper(labels, outputs): logit_lengths = tf.math.floordiv( self.model.inputs[2][:, 0] + tf.math.floormod(self.model.inputs[2][:, 0], 2), 2) logit_lengths = keras.backend.print_tensor(logit_lengths, 'logit_lengths') label_lengths = keras.backend.print_tensor( self.model.inputs[3][:, 0], 'label_lengths') labels_ = keras.backend.print_tensor(labels, 'labels') outputs_ = keras.backend.print_tensor(outputs, 'logits') return rnnt_loss( outputs, labels, logit_lengths, label_lengths, # keras.backend.print_tensor(tf.shape(outputs_), 'dddd'), blank_label=self._alphabet.blank_token)
def gen_test_case(batch_num, max_label_length, max_input_length, output_vocab_size): # Inputs label_lengths = np.random.randint(low=1, high=max_label_length + 1, size=batch_num) # label_lengths = np.asarray([max_label_length] * batch_num) blank_label = 0 # assuming zero for now; np.random.randint(low=0, high=output_vocab_size+1, size=()) input_lengths = np.random.randint(low=1, high=max_input_length + 1, size=batch_num) # input_lengths = np.asarray([max_input_length] * batch_num) labels = np.random.randint(low=1, high=output_vocab_size + 1, size=(batch_num, max_label_length)) with tf.GradientTape(persistent=True) as g: acts = tf.convert_to_tensor( np.random.rand(batch_num, max_input_length, max_label_length + 1, output_vocab_size + 1).astype(np.float32)) g.watch(acts) log_probs = tf.nn.log_softmax(acts, axis=3) g.watch(log_probs) # Outputs final_loss = warprnnt_tensorflow.rnnt_loss(log_probs, labels, input_lengths, label_lengths, blank_label) grads = g.gradient(final_loss, acts) grads_p = g.gradient(final_loss, log_probs) return { 'acts': acts, 'log_probs': log_probs.numpy, 'labels': labels, 'input_lengths': input_lengths, 'label_lengths': label_lengths, 'blank_label': blank_label, 'final_loss': final_loss.numpy(), 'grads': grads.numpy(), 'grads_p': grads_p }
def test_forward(self): # Softmax activations for the following inputs: acts = np.array([0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.8, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.7, 0.1, 0.2, 0.1, 0.1], dtype=np.float32).reshape(1, 2, 3, 5) expected_costs = np.array([4.495667], dtype=np.float32) labels = np.array([[1, 2]], dtype=np.int32) input_lengths = np.array([2], dtype=np.int32) label_lengths = np.array([2], dtype=np.int32) acts_t = tf.constant(acts) labels_t = tf.constant(labels) input_lengths_t = tf.constant(input_lengths) label_lengths_t = tf.constant(label_lengths) acts_t = tf.nn.log_softmax(acts_t) # NOTE cpu costs = rnnt_loss(acts_t, labels_t, input_lengths_t, label_lengths_t) self.assertAllClose(costs, expected_costs, atol=1e-6)
def test_forward(self): # Softmax activations for the following inputs: acts = np.array([ 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.2, 0.8, 0.1, 0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.7, 0.1, 0.2, 0.1, 0.1 ], dtype=np.float32).reshape(1, 2, 3, 5) labels = np.array([[1, 2]], dtype=np.int32) input_lengths = np.array([2], dtype=np.int32) label_lengths = np.array([2], dtype=np.int32) acts_t = tf.constant(acts) labels_t = tf.constant(labels) input_lengths_t = tf.constant(input_lengths) label_lengths_t = tf.constant(label_lengths) acts_t = tf.nn.log_softmax(acts_t) # NOTE cpu costs = rnnt_loss(acts_t, labels_t, input_lengths_t, label_lengths_t) with self.test_session(): print(costs.eval())
def test_step(slice_input, label_input, slice_len, label_len): outputs = model([slice_input, label_input]) loss = tf.reduce_mean(warprnnt_tensorflow.rnnt_loss(outputs, label_input[:, 1:], slice_len, label_len)) tf.print(loss)
[0.779194617063042, 0.18331417220174862, 0.113745182072432], [0.24022162381327106, 0.3394695622533106, 0.1341595066017014]]], [[[0.5055615569388828, 0.051597282072282646, 0.6402903936686337], [0.43073311517251, 0.8294731834714112, 0.1774668847323424], [0.3207001991262245, 0.04288308912457006, 0.30280282975568984]], [[0.6751777088333762, 0.569537369330242, 0.5584738347504452], [0.08313242153985256, 0.06016544344162322, 0.10795752845152584], [0.7486153608562472, 0.943918041459349, 0.4863558118797222]], [[0.4181986264486809, 0.6524078485043804, 0.024242983423721887], [0.13458171554507403, 0.3663418070512402, 0.2958297395361563], [0.9236695822497084, 0.6899291482654177, 0.7418981733448822]], [[0.25000547599982104, 0.6034295486281007, 0.9872887878887768], [0.5926057265215715, 0.8846724004467684, 0.5434495396894328], [0.6607698886038497, 0.3771277082495921, 0.3580209022231813]]]], dtype=tf.float32) labels = tf.constant([[1, 2, 3], [1, 1, 1]], dtype=np.int32) input_length = tf.constant([4, 4], dtype=tf.int32) label_length = tf.constant([2, 2], dtype=tf.int32) with tf.GradientTape() as tape: tape.watch(acts) logits = tf.nn.log_softmax(acts) costs = rnnt_loss(logits, labels, input_length, label_length, 0) grads = tape.gradient(costs, [acts]) print(costs) print(grads)
def warp_loss(logits, labels, label_lengths, logit_lengths): log_probs = tf.nn.log_softmax(logits, axis=3) loss = warprnnt_tensorflow.rnnt_loss( log_probs, labels, logit_lengths, label_lengths) return loss
]) x = Input(shape=(1024, 128)) y = Input(shape=(1, )) h_enc = encoder_net(x) h_pre = prediction_net(y) p = joint_net(tf.concat([h_enc, h_pre], -1)) model = Model(inputs=[x, y], outputs=p) # Train it! optim = Adam(1e-4) train_loss = tf.keras.metrics.Mean(name='train_loss') for epoch in range(20): train_loss.reset_states() for batch in train: with tf.GradientTape() as tape: pred = model(batch['x'], batch['y']) loss = rnnt_loss(y, pred) grads = tape.gradient(loss, model.trainable_variables) optim.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss) # Save the model model.save('models/librispeech_subwords8k_rnnt')