Python TfModel示例，models.tf_model.TfModel Python示例

示例#1

0

显示文件

文件： export_inference_graph.py 项目： whyxzh/voicenet

def main(_):
    if not FLAGS.output_file:
        raise ValueError(
            'You must supply the path to save to with --output_file')

    # Read cmvn to do reverse mean variance normalization.
    cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz"))
    with tf.Graph().as_default() as graph:
        model = TfModel(rnn_cell=FLAGS.rnn_cell,
                        num_hidden=FLAGS.num_hidden,
                        dnn_depth=FLAGS.dnn_depth,
                        rnn_depth=FLAGS.rnn_depth,
                        output_size=FLAGS.output_dim,
                        bidirectional=FLAGS.bidirectional,
                        rnn_output=FLAGS.rnn_output,
                        cnn_output=FLAGS.cnn_output,
                        look_ahead=FLAGS.look_ahead,
                        mdn_output=FLAGS.mdn_output,
                        mix_num=FLAGS.mix_num,
                        name="tf_model")

        input_sequence = tf.placeholder(name='input',
                                        dtype=tf.float32,
                                        shape=[None, FLAGS.input_dim])
        length = tf.expand_dims(tf.shape(input_sequence)[0], 0)

        # Apply normalization for input before inference.
        mean_inputs = tf.constant(cmvn["mean_inputs"], dtype=tf.float32)
        stddev_inputs = tf.constant(cmvn["stddev_inputs"], dtype=tf.float32)
        input_sequence = (input_sequence - mean_inputs) / stddev_inputs
        input_sequence = tf.expand_dims(input_sequence, 0)

        output_sequence_logits, final_state = model(input_sequence, length)

        # Apply reverse cmvn for output after inference
        mean_labels = tf.constant(cmvn["mean_labels"], dtype=tf.float32)
        stddev_labels = tf.constant(cmvn["stddev_labels"], dtype=tf.float32)
        output_sequence_logits = output_sequence_logits * stddev_labels + mean_labels
        output_sequence_logits = tf.squeeze(output_sequence_logits)
        output_sequence_logits = tf.identity(output_sequence_logits,
                                             name=FLAGS.output_node_name)

        show_all_variables()

        graph_def = graph.as_graph_def()
        with gfile.GFile(FLAGS.output_file, 'wb') as f:
            f.write(graph_def.SerializeToString())
            #tf.train.write_graph(graph_def, './', 'inf_graph.pbtxt')
        tf.logging.info("Inference graph has been written to %s" %
                        FLAGS.output_file)

示例#2

0

显示文件

文件： export_inference_graph.py 项目： npujcong/speaker_adaptation_voicenet

def main(_):
    if not FLAGS.output_file:
        raise ValueError(
            'You must supply the path to save to with --output_file')

    # Read cmvn to do reverse mean variance normalization.
    cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz"))
    with tf.Graph().as_default() as graph:
        model = TfModel(rnn_cell=FLAGS.rnn_cell,
                        dnn_depth=FLAGS.dnn_depth,
                        dnn_num_hidden=FLAGS.dnn_num_hidden,
                        rnn_depth=FLAGS.rnn_depth,
                        rnn_num_hidden=FLAGS.rnn_num_hidden,
                        output_size=FLAGS.output_dim,
                        bidirectional=FLAGS.bidirectional,
                        rnn_output=FLAGS.rnn_output,
                        cnn_output=FLAGS.cnn_output,
                        look_ahead=FLAGS.look_ahead,
                        mdn_output=FLAGS.mdn_output,
                        mix_num=FLAGS.mix_num,
                        name="tf_model")

        input_sequence = tf.placeholder(name='input',
                                        dtype=tf.float32,
                                        shape=[None, FLAGS.input_dim])
        length = tf.expand_dims(tf.shape(input_sequence)[0], 0)

        # Apply normalization for input before inference.
        mean_inputs = tf.constant(cmvn["mean_inputs"], dtype=tf.float32)
        stddev_inputs = tf.constant(cmvn["stddev_inputs"], dtype=tf.float32)
        input_sequence = (input_sequence - mean_inputs) / stddev_inputs
        input_sequence = tf.expand_dims(input_sequence, 0)

        output_sequence_logits, final_state = model(input_sequence, length)

        # Apply reverse cmvn for output after inference
        mean_labels = tf.constant(cmvn["mean_labels"], dtype=tf.float32)
        stddev_labels = tf.constant(cmvn["stddev_labels"], dtype=tf.float32)
        output_sequence_logits = output_sequence_logits * stddev_labels + mean_labels
        output_sequence_logits = tf.squeeze(output_sequence_logits)
        output_sequence_logits = tf.identity(output_sequence_logits,
                                             name=FLAGS.output_node_name)

        show_all_variables()

        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
        if ckpt:
            saver = tf.train.Saver()
        else:
            tf.logging.warning("Cannot find checkpoint in {}".format(
                args.checkpoint))
            sys.exit(-1)

        freeze_graph.freeze_graph_with_def_protos(
            input_graph_def=graph.as_graph_def(),
            input_saver_def=saver.as_saver_def(),
            input_checkpoint=ckpt.model_checkpoint_path,
            output_node_names=FLAGS.output_node_name,
            restore_op_name=None,
            filename_tensor_name=None,
            output_graph=FLAGS.output_file,
            clear_devices=True,
            initializer_nodes="",
            variable_names_blacklist=None)

        tf.logging.info("Inference graph has been written to %s" %
                        FLAGS.output_file)

示例#3

0

显示文件

文件： run_tts.py 项目： npujcong/speaker_adaptation_voicenet

def decode():
    """Run the decoding of the acoustic or duration model."""

    with tf.device('/cpu:0'):
        dataset_test = SequenceDataset(subset="test",
                                       config_dir=FLAGS.config_dir,
                                       data_dir=FLAGS.data_dir,
                                       batch_size=1,
                                       input_size=FLAGS.input_dim,
                                       output_size=FLAGS.output_dim,
                                       infer=True,
                                       name="dataset_test")()

        model = TfModel(rnn_cell=FLAGS.rnn_cell,
                        dnn_depth=FLAGS.dnn_depth,
                        dnn_num_hidden=FLAGS.dnn_num_hidden,
                        rnn_depth=FLAGS.rnn_depth,
                        rnn_num_hidden=FLAGS.rnn_num_hidden,
                        output_size=FLAGS.output_dim,
                        bidirectional=FLAGS.bidirectional,
                        rnn_output=FLAGS.rnn_output,
                        cnn_output=FLAGS.cnn_output,
                        look_ahead=FLAGS.look_ahead,
                        mdn_output=FLAGS.mdn_output,
                        mix_num=FLAGS.mix_num,
                        name="tf_model")

        # Build the testing model and get test output sequence.
        test_iterator = dataset_test.batched_dataset.make_one_shot_iterator()
        input_sequence, input_sequence_length = test_iterator.get_next()
        test_output_sequence_logits, test_final_state = model(
            input_sequence, input_sequence_length)

    show_all_variables()

    saver = tf.train.Saver()

    # Decode.
    with tf.Session() as sess:
        # Run init
        sess.run(tf.global_variables_initializer())

        if not restore_from_ckpt(sess, saver): sys.exit(-1)

        # Read cmvn to do reverse mean variance normalization
        cmvn = np.load(os.path.join(FLAGS.data_dir, "train_cmvn.npz"))

        num_batches = 0
        used_time_sum = frames_sum = 0.0
        while True:
            try:
                time_start = time.time()
                logits = sess.run(test_output_sequence_logits)
                time_end = time.time()

                used_time = time_end - time_start
                used_time_sum += used_time
                frame_num = logits.shape[1]
                frames_sum += frame_num

                # Squeeze batch dimension.
                logits = logits.squeeze(axis=0)

                if FLAGS.mdn_output:
                    out_pi = logits[:, :FLAGS.mix_num]
                    out_mu = logits[:, FLAGS.mix_num:(
                        FLAGS.mix_num + FLAGS.mix_num * FLAGS.output_dim)]
                    out_sigma = logits[:, (FLAGS.mix_num +
                                           FLAGS.mix_num * FLAGS.output_dim):]

                    max_index_pi = out_pi.argmax(axis=1)
                    result_mu = []
                    for i in xrange(out_mu.shape[0]):
                        beg_index = max_index_pi[i] * FLAGS.output_dim
                        end_index = (max_index_pi[i] + 1) * FLAGS.output_dim
                        result_mu.append(out_mu[i, beg_index:end_index])
                    logits = np.vstack(result_mu)

                sequence = logits * cmvn["stddev_labels"] + cmvn["mean_labels"]

                out_dir_name = os.path.join(FLAGS.save_dir, "test", "cmp")
                out_file_name = os.path.basename(
                    dataset_test.tfrecords_lst[num_batches]).split(
                        '.')[0] + ".cmp"
                out_path = os.path.join(out_dir_name, out_file_name)
                write_binary_file(sequence, out_path, with_dim=False)
                #np.savetxt(out_path, sequence, fmt="%f")

                tf.logging.info(
                    "writing inferred cmp to %s (%d frames in %.4f seconds)" %
                    (out_path, frame_num, used_time))
                num_batches += 1
            except tf.errors.OutOfRangeError:
                break

        tf.logging.info("Done decoding -- epoch limit reached (%d "
                        "frames per second)" % int(frames_sum / used_time_sum))

示例#4

0

显示文件

文件： run_tts.py 项目： npujcong/speaker_adaptation_voicenet

def train():
    """Run the training of the acoustic or duration model."""

    dataset_train = SequenceDataset(subset="train",
                                    config_dir=FLAGS.config_dir,
                                    data_dir=FLAGS.data_dir,
                                    batch_size=FLAGS.batch_size,
                                    input_size=FLAGS.input_dim,
                                    output_size=FLAGS.output_dim,
                                    num_threads=FLAGS.num_threads,
                                    use_bucket=True,
                                    infer=False,
                                    name="dataset_train")()

    dataset_valid = SequenceDataset(subset="valid",
                                    config_dir=FLAGS.config_dir,
                                    data_dir=FLAGS.data_dir,
                                    batch_size=FLAGS.batch_size,
                                    input_size=FLAGS.input_dim,
                                    output_size=FLAGS.output_dim,
                                    num_threads=FLAGS.num_threads,
                                    use_bucket=True,
                                    infer=False,
                                    name="dataset_valid")()

    model = TfModel(rnn_cell=FLAGS.rnn_cell,
                    dnn_depth=FLAGS.dnn_depth,
                    dnn_num_hidden=FLAGS.dnn_num_hidden,
                    rnn_depth=FLAGS.rnn_depth,
                    rnn_num_hidden=FLAGS.rnn_num_hidden,
                    output_size=FLAGS.output_dim,
                    bidirectional=FLAGS.bidirectional,
                    rnn_output=FLAGS.rnn_output,
                    cnn_output=FLAGS.cnn_output,
                    look_ahead=FLAGS.look_ahead,
                    mdn_output=FLAGS.mdn_output,
                    mix_num=FLAGS.mix_num,
                    name="tf_model")

    # Build a reinitializable iterator for both dataset_train and dataset_valid.
    iterator = tf.data.Iterator.from_structure(
        dataset_train.batched_dataset.output_types,
        dataset_train.batched_dataset.output_shapes)
    (input_sequence, input_sequence_length, target_sequence,
     target_sequence_length) = iterator.get_next()

    training_init_op = iterator.make_initializer(dataset_train.batched_dataset)
    validation_init_op = iterator.make_initializer(
        dataset_valid.batched_dataset)

    # Build the model and get the loss.
    output_sequence_logits, train_final_state = model(input_sequence,
                                                      input_sequence_length)
    loss = model.loss(output_sequence_logits, target_sequence,
                      target_sequence_length)
    tf.summary.scalar("loss", loss)

    learning_rate = tf.get_variable("learning_rate",
                                    shape=[],
                                    dtype=tf.float32,
                                    initializer=tf.constant_initializer(
                                        FLAGS.learning_rate),
                                    trainable=False)
    reduce_learning_rate = learning_rate.assign(
        learning_rate * FLAGS.reduce_learning_rate_multiplier)

    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.int64,
        initializer=tf.zeros_initializer(),
        trainable=False,
        collections=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP])

    # Set up optimizer with global norm clipping.
    trainable_variables = tf.trainable_variables()
    optimizer = tf.train.AdamOptimizer(learning_rate)
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, trainable_variables),
                                      FLAGS.max_grad_norm)

    train_step = optimizer.apply_gradients(zip(grads, trainable_variables),
                                           global_step=global_step)

    show_all_variables()
    merged_all = tf.summary.merge_all()
    saver = tf.train.Saver(max_to_keep=FLAGS.max_epochs)

    # Train
    config = tf.ConfigProto()
    # Prevent exhausting all the gpu memories
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Run init
        sess.run(tf.global_variables_initializer())

        summary_writer = tf.summary.FileWriter(
            os.path.join(FLAGS.save_dir, "nnet"), sess.graph)

        if FLAGS.resume_training:
            restore_from_ckpt(sess, saver)
        sess.run(tf.assign(learning_rate, FLAGS.learning_rate))
        # add a blank line for log readability
        print()
        sys.stdout.flush()

        sess.run(validation_init_op)
        loss_prev = eval_one_epoch(sess, loss, dataset_valid.num_batches)
        tf.logging.info("CROSSVAL PRERUN AVG.LOSS %.4f\n" % loss_prev)

        for epoch in range(FLAGS.max_epochs):
            # Train one epoch
            time_start = time.time()
            sess.run(training_init_op)
            tr_loss = train_one_epoch(sess, summary_writer, merged_all,
                                      global_step, train_step, loss,
                                      dataset_train.num_batches)
            time_end = time.time()
            used_time = time_end - time_start

            # Validate one epoch
            sess.run(validation_init_op)
            val_loss = eval_one_epoch(sess, loss, dataset_valid.num_batches)

            # Determine checkpoint path
            FLAGS.learning_rate = sess.run(learning_rate)
            cptk_name = 'nnet_epoch%d_lrate%g_tr%.4f_cv%.4f' % (
                epoch + 1, FLAGS.learning_rate, tr_loss, val_loss)
            checkpoint_path = os.path.join(FLAGS.save_dir, "nnet", cptk_name)

            # accept or reject new parameters
            if val_loss < loss_prev:
                saver.save(sess, checkpoint_path)
                # logging training loss along with validation loss
                tf.logging.info("EPOCH %d: TRAIN AVG.LOSS %.4f, (lrate%g) "
                                "CROSSVAL AVG.LOSS %.4f, TIME USED %.2f, %s" %
                                (epoch + 1, tr_loss, FLAGS.learning_rate,
                                 val_loss, used_time, "nnet accepted"))
                loss_prev = val_loss
            else:
                tf.logging.info("EPOCH %d: TRAIN AVG.LOSS %.4f, (lrate%g) "
                                "CROSSVAL AVG.LOSS %.4f, TIME USED %.2f, %s" %
                                (epoch + 1, tr_loss, FLAGS.learning_rate,
                                 val_loss, used_time, "nnet rejected"))
                restore_from_ckpt(sess, saver)
                # Reducing learning rate.
                sess.run(reduce_learning_rate)

            # add a blank line for log readability
            print()
            sys.stdout.flush()

示例#5

0

显示文件

    train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')
    train_precision = tf.keras.metrics.Precision(name='train_precision', dtype='float32')
    train_recall = tf.keras.metrics.Recall(name='train_recall', dtype='float32')

    # Metrics that will measure loss and accuracy of the model over the testing process
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')
    test_precision = tf.keras.metrics.Precision(name='test_precision', dtype='float32')
    test_recall = tf.keras.metrics.Recall(name='test_recall', dtype='float32')

    model = TfModel(example_dim,
                loss_object, 
                optimizer, 
                train_loss, 
                train_accuracy, 
                train_precision, 
                train_recall, 
                test_loss, 
                test_accuracy, 
                test_precision, 
                test_recall)

bestModel = None
confusion = None
if retrain:
    if (model_type == 'functional'):
        template = '\n###### Test results ######\n\nTest Loss: {},\nTest Accuracy: {},\nTest Precision: {},\nTest Recall: {},\nTest AUC: {},\nTest F-Score: {}\n'
        
        earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                    patience=5,
                                                    restore_best_weights=True)