def main(_): if not FLAGS.output_file: raise ValueError( 'You must supply the path to save to with --output_file') # Read cmvn to do reverse mean variance normalization. cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz")) with tf.Graph().as_default() as graph: model = TfModel(rnn_cell=FLAGS.rnn_cell, num_hidden=FLAGS.num_hidden, dnn_depth=FLAGS.dnn_depth, rnn_depth=FLAGS.rnn_depth, output_size=FLAGS.output_dim, bidirectional=FLAGS.bidirectional, rnn_output=FLAGS.rnn_output, cnn_output=FLAGS.cnn_output, look_ahead=FLAGS.look_ahead, mdn_output=FLAGS.mdn_output, mix_num=FLAGS.mix_num, name="tf_model") input_sequence = tf.placeholder(name='input', dtype=tf.float32, shape=[None, FLAGS.input_dim]) length = tf.expand_dims(tf.shape(input_sequence)[0], 0) # Apply normalization for input before inference. mean_inputs = tf.constant(cmvn["mean_inputs"], dtype=tf.float32) stddev_inputs = tf.constant(cmvn["stddev_inputs"], dtype=tf.float32) input_sequence = (input_sequence - mean_inputs) / stddev_inputs input_sequence = tf.expand_dims(input_sequence, 0) output_sequence_logits, final_state = model(input_sequence, length) # Apply reverse cmvn for output after inference mean_labels = tf.constant(cmvn["mean_labels"], dtype=tf.float32) stddev_labels = tf.constant(cmvn["stddev_labels"], dtype=tf.float32) output_sequence_logits = output_sequence_logits * stddev_labels + mean_labels output_sequence_logits = tf.squeeze(output_sequence_logits) output_sequence_logits = tf.identity(output_sequence_logits, name=FLAGS.output_node_name) show_all_variables() graph_def = graph.as_graph_def() with gfile.GFile(FLAGS.output_file, 'wb') as f: f.write(graph_def.SerializeToString()) #tf.train.write_graph(graph_def, './', 'inf_graph.pbtxt') tf.logging.info("Inference graph has been written to %s" % FLAGS.output_file)
def main(_): if not FLAGS.output_file: raise ValueError( 'You must supply the path to save to with --output_file') # Read cmvn to do reverse mean variance normalization. cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz")) with tf.Graph().as_default() as graph: model = TfModel(rnn_cell=FLAGS.rnn_cell, dnn_depth=FLAGS.dnn_depth, dnn_num_hidden=FLAGS.dnn_num_hidden, rnn_depth=FLAGS.rnn_depth, rnn_num_hidden=FLAGS.rnn_num_hidden, output_size=FLAGS.output_dim, bidirectional=FLAGS.bidirectional, rnn_output=FLAGS.rnn_output, cnn_output=FLAGS.cnn_output, look_ahead=FLAGS.look_ahead, mdn_output=FLAGS.mdn_output, mix_num=FLAGS.mix_num, name="tf_model") input_sequence = tf.placeholder(name='input', dtype=tf.float32, shape=[None, FLAGS.input_dim]) length = tf.expand_dims(tf.shape(input_sequence)[0], 0) # Apply normalization for input before inference. mean_inputs = tf.constant(cmvn["mean_inputs"], dtype=tf.float32) stddev_inputs = tf.constant(cmvn["stddev_inputs"], dtype=tf.float32) input_sequence = (input_sequence - mean_inputs) / stddev_inputs input_sequence = tf.expand_dims(input_sequence, 0) output_sequence_logits, final_state = model(input_sequence, length) # Apply reverse cmvn for output after inference mean_labels = tf.constant(cmvn["mean_labels"], dtype=tf.float32) stddev_labels = tf.constant(cmvn["stddev_labels"], dtype=tf.float32) output_sequence_logits = output_sequence_logits * stddev_labels + mean_labels output_sequence_logits = tf.squeeze(output_sequence_logits) output_sequence_logits = tf.identity(output_sequence_logits, name=FLAGS.output_node_name) show_all_variables() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) if ckpt: saver = tf.train.Saver() else: tf.logging.warning("Cannot find checkpoint in {}".format( args.checkpoint)) sys.exit(-1) freeze_graph.freeze_graph_with_def_protos( input_graph_def=graph.as_graph_def(), input_saver_def=saver.as_saver_def(), input_checkpoint=ckpt.model_checkpoint_path, output_node_names=FLAGS.output_node_name, restore_op_name=None, filename_tensor_name=None, output_graph=FLAGS.output_file, clear_devices=True, initializer_nodes="", variable_names_blacklist=None) tf.logging.info("Inference graph has been written to %s" % FLAGS.output_file)
def decode(): """Run the decoding of the acoustic or duration model.""" with tf.device('/cpu:0'): dataset_test = SequenceDataset(subset="test", config_dir=FLAGS.config_dir, data_dir=FLAGS.data_dir, batch_size=1, input_size=FLAGS.input_dim, output_size=FLAGS.output_dim, infer=True, name="dataset_test")() model = TfModel(rnn_cell=FLAGS.rnn_cell, dnn_depth=FLAGS.dnn_depth, dnn_num_hidden=FLAGS.dnn_num_hidden, rnn_depth=FLAGS.rnn_depth, rnn_num_hidden=FLAGS.rnn_num_hidden, output_size=FLAGS.output_dim, bidirectional=FLAGS.bidirectional, rnn_output=FLAGS.rnn_output, cnn_output=FLAGS.cnn_output, look_ahead=FLAGS.look_ahead, mdn_output=FLAGS.mdn_output, mix_num=FLAGS.mix_num, name="tf_model") # Build the testing model and get test output sequence. test_iterator = dataset_test.batched_dataset.make_one_shot_iterator() input_sequence, input_sequence_length = test_iterator.get_next() test_output_sequence_logits, test_final_state = model( input_sequence, input_sequence_length) show_all_variables() saver = tf.train.Saver() # Decode. with tf.Session() as sess: # Run init sess.run(tf.global_variables_initializer()) if not restore_from_ckpt(sess, saver): sys.exit(-1) # Read cmvn to do reverse mean variance normalization cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz")) num_batches = 0 used_time_sum = frames_sum = 0.0 while True: try: time_start = time.time() logits = sess.run(test_output_sequence_logits) time_end = time.time() used_time = time_end - time_start used_time_sum += used_time frame_num = logits.shape[1] frames_sum += frame_num # Squeeze batch dimension. logits = logits.squeeze(axis=0) if FLAGS.mdn_output: out_pi = logits[:, :FLAGS.mix_num] out_mu = logits[:, FLAGS.mix_num:( FLAGS.mix_num + FLAGS.mix_num * FLAGS.output_dim)] out_sigma = logits[:, (FLAGS.mix_num + FLAGS.mix_num * FLAGS.output_dim):] max_index_pi = out_pi.argmax(axis=1) result_mu = [] for i in xrange(out_mu.shape[0]): beg_index = max_index_pi[i] * FLAGS.output_dim end_index = (max_index_pi[i] + 1) * FLAGS.output_dim result_mu.append(out_mu[i, beg_index:end_index]) logits = np.vstack(result_mu) sequence = logits * cmvn["stddev_labels"] + cmvn["mean_labels"] out_dir_name = os.path.join(FLAGS.save_dir, "test", "cmp") out_file_name = os.path.basename( dataset_test.tfrecords_lst[num_batches]).split( '.')[0] + ".cmp" out_path = os.path.join(out_dir_name, out_file_name) write_binary_file(sequence, out_path, with_dim=False) #np.savetxt(out_path, sequence, fmt="%f") tf.logging.info( "writing inferred cmp to %s (%d frames in %.4f seconds)" % (out_path, frame_num, used_time)) num_batches += 1 except tf.errors.OutOfRangeError: break tf.logging.info("Done decoding -- epoch limit reached (%d " "frames per second)" % int(frames_sum / used_time_sum))
def train(): """Run the training of the acoustic or duration model.""" dataset_train = SequenceDataset(subset="train", config_dir=FLAGS.config_dir, data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size, input_size=FLAGS.input_dim, output_size=FLAGS.output_dim, num_threads=FLAGS.num_threads, use_bucket=True, infer=False, name="dataset_train")() dataset_valid = SequenceDataset(subset="valid", config_dir=FLAGS.config_dir, data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size, input_size=FLAGS.input_dim, output_size=FLAGS.output_dim, num_threads=FLAGS.num_threads, use_bucket=True, infer=False, name="dataset_valid")() model = TfModel(rnn_cell=FLAGS.rnn_cell, dnn_depth=FLAGS.dnn_depth, dnn_num_hidden=FLAGS.dnn_num_hidden, rnn_depth=FLAGS.rnn_depth, rnn_num_hidden=FLAGS.rnn_num_hidden, output_size=FLAGS.output_dim, bidirectional=FLAGS.bidirectional, rnn_output=FLAGS.rnn_output, cnn_output=FLAGS.cnn_output, look_ahead=FLAGS.look_ahead, mdn_output=FLAGS.mdn_output, mix_num=FLAGS.mix_num, name="tf_model") # Build a reinitializable iterator for both dataset_train and dataset_valid. iterator = tf.data.Iterator.from_structure( dataset_train.batched_dataset.output_types, dataset_train.batched_dataset.output_shapes) (input_sequence, input_sequence_length, target_sequence, target_sequence_length) = iterator.get_next() training_init_op = iterator.make_initializer(dataset_train.batched_dataset) validation_init_op = iterator.make_initializer( dataset_valid.batched_dataset) # Build the model and get the loss. output_sequence_logits, train_final_state = model(input_sequence, input_sequence_length) loss = model.loss(output_sequence_logits, target_sequence, target_sequence_length) tf.summary.scalar("loss", loss) learning_rate = tf.get_variable("learning_rate", shape=[], dtype=tf.float32, initializer=tf.constant_initializer( FLAGS.learning_rate), trainable=False) reduce_learning_rate = learning_rate.assign( learning_rate * FLAGS.reduce_learning_rate_multiplier) global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.int64, initializer=tf.zeros_initializer(), trainable=False, collections=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP]) # Set up optimizer with global norm clipping. trainable_variables = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads, _ = tf.clip_by_global_norm(tf.gradients(loss, trainable_variables), FLAGS.max_grad_norm) train_step = optimizer.apply_gradients(zip(grads, trainable_variables), global_step=global_step) show_all_variables() merged_all = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=FLAGS.max_epochs) # Train config = tf.ConfigProto() # Prevent exhausting all the gpu memories config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Run init sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter( os.path.join(FLAGS.save_dir, "nnet"), sess.graph) if FLAGS.resume_training: restore_from_ckpt(sess, saver) sess.run(tf.assign(learning_rate, FLAGS.learning_rate)) # add a blank line for log readability print() sys.stdout.flush() sess.run(validation_init_op) loss_prev = eval_one_epoch(sess, loss, dataset_valid.num_batches) tf.logging.info("CROSSVAL PRERUN AVG.LOSS %.4f\n" % loss_prev) for epoch in range(FLAGS.max_epochs): # Train one epoch time_start = time.time() sess.run(training_init_op) tr_loss = train_one_epoch(sess, summary_writer, merged_all, global_step, train_step, loss, dataset_train.num_batches) time_end = time.time() used_time = time_end - time_start # Validate one epoch sess.run(validation_init_op) val_loss = eval_one_epoch(sess, loss, dataset_valid.num_batches) # Determine checkpoint path FLAGS.learning_rate = sess.run(learning_rate) cptk_name = 'nnet_epoch%d_lrate%g_tr%.4f_cv%.4f' % ( epoch + 1, FLAGS.learning_rate, tr_loss, val_loss) checkpoint_path = os.path.join(FLAGS.save_dir, "nnet", cptk_name) # accept or reject new parameters if val_loss < loss_prev: saver.save(sess, checkpoint_path) # logging training loss along with validation loss tf.logging.info("EPOCH %d: TRAIN AVG.LOSS %.4f, (lrate%g) " "CROSSVAL AVG.LOSS %.4f, TIME USED %.2f, %s" % (epoch + 1, tr_loss, FLAGS.learning_rate, val_loss, used_time, "nnet accepted")) loss_prev = val_loss else: tf.logging.info("EPOCH %d: TRAIN AVG.LOSS %.4f, (lrate%g) " "CROSSVAL AVG.LOSS %.4f, TIME USED %.2f, %s" % (epoch + 1, tr_loss, FLAGS.learning_rate, val_loss, used_time, "nnet rejected")) restore_from_ckpt(sess, saver) # Reducing learning rate. sess.run(reduce_learning_rate) # add a blank line for log readability print() sys.stdout.flush()
train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy') train_precision = tf.keras.metrics.Precision(name='train_precision', dtype='float32') train_recall = tf.keras.metrics.Recall(name='train_recall', dtype='float32') # Metrics that will measure loss and accuracy of the model over the testing process test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy') test_precision = tf.keras.metrics.Precision(name='test_precision', dtype='float32') test_recall = tf.keras.metrics.Recall(name='test_recall', dtype='float32') model = TfModel(example_dim, loss_object, optimizer, train_loss, train_accuracy, train_precision, train_recall, test_loss, test_accuracy, test_precision, test_recall) bestModel = None confusion = None if retrain: if (model_type == 'functional'): template = '\n###### Test results ######\n\nTest Loss: {},\nTest Accuracy: {},\nTest Precision: {},\nTest Recall: {},\nTest AUC: {},\nTest F-Score: {}\n' earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)