def model_fn(features, labels, mode, params): """Define model function for deep speech model. Args: features: a dictionary of input_data features. It includes the data input_length, label_length and the spectrogram features. labels: a list of labels for the input data. mode: current estimator mode; should be one of `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`. params: a dict of hyper parameters to be passed to model_fn. Returns: EstimatorSpec parameterized according to the input params and the current mode. """ num_classes = params["num_classes"] input_length = features["input_length"] label_length = features["label_length"] features = features["features"] # Create DeepSpeech2 model. model = deep_speech_model.DeepSpeech2(flags_obj.rnn_hidden_layers, flags_obj.rnn_type, flags_obj.is_bidirectional, flags_obj.rnn_hidden_size, num_classes, flags_obj.use_bias) if mode == tf.estimator.ModeKeys.PREDICT: logits = model(features, training=False) predictions = { "classes": tf.argmax(logits, axis=2), "probabilities": logits, "logits": logits } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # In training mode. logits = model(features, training=True) ctc_input_length = compute_length_after_conv( tf.shape(features)[1], tf.shape(logits)[1], input_length) # Compute CTC loss loss = tf.reduce_mean( tf.keras.backend.ctc_batch_cost(labels, logits, ctc_input_length, label_length)) optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=flags_obj.learning_rate) global_step = tf.compat.v1.train.get_or_create_global_step() minimize_op = optimizer.minimize(loss, global_step=global_step) update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) # Create the train_op that groups both minimize_ops and update_ops train_op = tf.group(minimize_op, update_ops) print('Saving Keras Model') kerasModel = tf.keras.Model(inputs=features, outputs=logits) # kerasModel.compile(optimizer=optimizer, loss=loss) kerasModel.save('myModel.h5') return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Define model function for deep speech model. Args: features: a dictionary of input_data features. It includes the data input_length, label_length and the spectrogram features. labels: a list of labels for the input data. mode: current estimator mode; should be one of `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`. params: a dict of hyper parameters to be passed to model_fn. Returns: EstimatorSpec parameterized according to the input params and the current mode. """ num_classes = params["num_classes"] #input_length = features["input_length"] #label_length = features["label_length"] features = features["features"] print("############################") print(features) print("*****************************") print(labels) print("##############################") exit(0) # Create DeepSpeech2 model. model = deep_speech_model.DeepSpeech2( flags_obj.rnn_hidden_layers, flags_obj.rnn_type, flags_obj.is_bidirectional, flags_obj.rnn_hidden_size, num_classes, flags_obj.use_bias, ) if mode == tf.estimator.ModeKeys.PREDICT: logits = model(features, training=False) predictions = { "classes": tf.argmax(logits, axis=2), "probabilities": tf.nn.softmax(logits), "logits": logits, } return tf.contrib.tpu.TPUEstimatorSpec(mode, predictions=predictions) # In training mode. logits = model(features, training=True) probs = tf.nn.softmax(logits) ctc_input_length = compute_length_after_conv( tf.shape(features)[1], tf.shape(probs)[1], input_length ) # Compute CTC loss loss = tf.reduce_mean(ctc_loss(label_length, ctc_input_length, labels, probs)) optimizer = tf.train.AdamOptimizer(learning_rate=flags_obj.learning_rate) if flags_obj.use_tpu: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) global_step = tf.train.get_or_create_global_step() minimize_op = optimizer.minimize(loss, global_step=global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create the train_op that groups both minimize_ops and update_ops train_op = tf.group(minimize_op, update_ops) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op)