def classifier_gru_train_in_fn(train_file, test_file, batch_size,
                               sequence_length, input_dimension,
                               shuffle_capacity, shuffle_threads,
                               shuffle_min_after_dequeue):

    with tf.name_scope('Input_Queue') as scope:

        train_data_queue = tf.train.string_input_producer([train_file])
        train_features, train_labels = gen_data.read_and_decode(
            train_data_queue, batch_size, sequence_length, input_dimension,
            shuffle_capacity, shuffle_threads, shuffle_min_after_dequeue)
        test_data_queue = tf.train.string_input_producer([test_file])
        test_features, test_labels = gen_data.read_and_decode(
            test_data_queue, batch_size, sequence_length, input_dimension,
            shuffle_capacity, shuffle_threads, shuffle_min_after_dequeue)

        data_dict = {
            'train_features': train_features,
            'test_features': test_features
        }

        labels_dict = {
            'train_labels': train_labels,
            'test_labels': test_labels
        }

    return data_dict, labels_dict
Пример #2
0
def classifier_gru_train_in_fn(train_file, test_file, batch_size,
                               sequence_length, input_dimension,
                               shuffle_capacity, shuffle_threads,
                               shuffle_min_after_dequeue):

    with tf.name_scope('Input_Queue') as scope:

        train_data_queue = tf.train.string_input_producer([train_file])
        train_features_dict, train_labels = gen_data.read_and_decode(
            train_data_queue, batch_size, sequence_length, input_dimension,
            shuffle_capacity, shuffle_threads, shuffle_min_after_dequeue)
        test_data_queue = tf.train.string_input_producer([test_file])
        test_features_dict, test_labels = gen_data.read_and_decode(
            test_data_queue, batch_size, sequence_length, input_dimension,
            shuffle_capacity, shuffle_threads, shuffle_min_after_dequeue)
        train_features = train_features_dict['sequence_values']
        test_features = test_features_dict['sequence_values']

        # Mentioned modification is here:
        # Get elements 1 to seqlen-1 for features
        train_features_s = tf.slice(train_features, [0, 1, 0], [
            train_features.shape[0], train_features.shape[1] - 1,
            train_features.shape[2]
        ])
        test_features_s = tf.slice(test_features, [0, 1, 0], [
            test_features.shape[0], test_features.shape[1] - 1,
            test_features.shape[2]
        ])
        # And elements 0 to seqlen -2 for labels
        train_labels = tf.slice(train_features, [0, 0, 0], [
            train_features.shape[0], train_features.shape[1] - 1,
            train_features.shape[2]
        ])
        test_labels = tf.slice(test_features, [0, 0, 0], [
            test_features.shape[0], test_features.shape[1] - 1,
            test_features.shape[2]
        ])

        data_dict = {
            'train_features': {
                'sequence_values': train_features_s
            },
            'test_features': {
                'sequence_values': test_features_s
            }
        }
        labels_dict = {
            'train_labels': train_labels,
            'test_labels': test_labels
        }

    return data_dict, labels_dict
Пример #3
0
def main():

    # Create checkpoint+checkpoint_path
    if not os.path.exists(config.checkpoint_path):
        os.makedirs(config.checkpoint_path)

    # Create training data.
    if GENERATE_DATA or not os.path.exists(config.data_tmp_folder):
        if not os.path.exists(config.data_tmp_folder):
            os.makedirs(config.data_tmp_folder)
        print("Generating Data CSV")
        # List of lambdas: [lambda x: math.sin(x)]
        gen_data.gen_function_vals_csv(-50, -50 + (config.epoch_size + config.test_epoch_size + config.sequence_length)*0.02, 0.02, lambda x: math.sin(x),
                                       config.data_tmp_folder + 'sine.csv')
        gen_data.gen_function_vals_csv(-50, -50 + (config.epoch_size + config.test_epoch_size + config.sequence_length)*0.02, 0.02, lambda x: x*0.8 + 0.04,
                                       config.data_tmp_folder + 'lin.csv')

        print("Reading Data from CSV")
        sine_x, data_sine = gen_data.read_function_vals_csv('x', 'y', config.data_tmp_folder + 'sine.csv')
        # sine_x: [TOTAL_LENGTH, 1]
        # data_sine:  [TOTAL_LENGTH, INPUT_DIMENSION]
        lin_x, data_lin = gen_data.read_function_vals_csv('x', 'y', config.data_tmp_folder + 'lin.csv')
        # lin_x: [TOTAL_LENGTH, 1]
        # data_lin:  [TOTAL_LENGTH, INPUT_DIMENSION]

        print("Writing TFRecords")
        datasequences = np.stack((data_sine, data_lin), axis=0)
        # datasequences: [ OUTPUT_DIMENSION, TOTAL_LENGTH, INPUT_DIMENSION ]

        functionsequences, labels = gen_data.all_sequences_from_datasequence(datasequences, config.sequence_length)
        # functionsequences: [ TOTAL_SEQUENCE_NUM, SEQUENCE_LENGTH, INPUT_DIMENSION ]
        # labels: [ TOTAL_SEQUENCE_NUM ]
        # Set apart some test data
        test_functionsequences, test_labels = gen_data.rand_sequences_from_datasequences(functionsequences, labels, config.test_epoch_size, True)
        # test_functionsequences: [ TEST_EPOCH_SIZE, SEQUENCE_LENGTH, INPUT_DIMENSION ]
        # test_labels: [ TEST_EPOCH_SIZE ]
        # functionsequences: [ SEQUENCE_NUM, SEQUENCE_LENGTH, INPUT_DIMENSION ]
        # labels: [ SEQUENCE_NUM ]

        gen_data.function_sequences_to_tfrecord(functionsequences, labels, config.data_tmp_folder+config.data_tfrecord_filename)
        gen_data.function_sequences_to_tfrecord(test_functionsequences, test_labels, config.data_tmp_folder+config.test_tfrecord_filename)

    print('Setup Input Queue')
    with tf.name_scope('Input_Queue') as scope:

        data_queue = tf.train.string_input_producer([config.data_tmp_folder + config.data_tfrecord_filename])
        test_queue = tf.train.string_input_producer([config.data_tmp_folder + config.test_tfrecord_filename])

        sequences_batch, labels_batch = gen_data.read_and_decode(data_queue, config.batch_size, config.sequence_length, config.input_dimension, config.shuffle_capacity, config.shuffle_threads, config.shuffle_min_after_dequeue)
        test_sequences_batch, test_labels_batch = gen_data.read_and_decode(test_queue, config.batch_size, config.sequence_length, config.input_dimension, config.shuffle_capacity, config.shuffle_threads, config.shuffle_min_after_dequeue)

    # Global Step Counter
    with tf.name_scope('Global_Step') as scope:
        global_step = tf.Variable(0, trainable=False, name='Global_Step_Var')
        increment_global_step_op = tf.assign(global_step, global_step + 1)

    # Create model
    print("Creating Model")

    # Model
    Hin = np.zeros([config.batch_size, config.hidden_layer_size *
                    config.hidden_layer_depth], dtype=np.float32)
    # Hin: [ BATCH_SIZE, INTERNALSIZE * NLAYERS ]

    train_H, train_keep, train_step, train_summary_op = lstmnet(
        sequences_batch, labels_batch, global_step, "train", False)
    test_H, test_keep, test_step, test_summary_op = lstmnet(
        test_sequences_batch, test_labels_batch, global_step, "test", True)

    # Setup logging with Tensorboard
    print("Setup Tensorboard")
    timestamp = str(math.trunc(time.time()))
    graph_location = "log" + tempfile.mkdtemp()
    print(colored('    Saving graph to: ' + graph_location, 'red'))
    writer = tf.summary.FileWriter(graph_location, graph=tf.get_default_graph())
    saver = tf.train.Saver()

    # Limit used gpu memory.
    print("Configuring Tensorflow")
    tfconfig = tf.ConfigProto()
    # tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.75
    init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())

    # train model.
    with tf.Session(config=tfconfig) as sess:
        print("Setup")
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print("Training")
        for step in range(config.iters):

            if step % config.summary_iters == 0:  # summary step
                _, training_summary, test_summary = sess.run([train_step, train_summary_op, test_summary_op],
                                                             feed_dict={train_keep: config.pkeep, train_H: Hin,
                                                                        test_keep: 1.0, test_H: Hin})

                saver.save(sess, config.checkpoint_path)
                writer.add_summary(training_summary, step)
                writer.add_summary(test_summary, step)
            else:
                _ = sess.run([train_step], feed_dict={train_keep: config.pkeep, train_H: Hin})

            # Increment global step Counter
            # sess.run(increment_global_step_op)

        coord.request_stop()
        coord.join(threads)