예제 #1
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Setup the model that we are going to use
    if config.model_type == 'RNN':
        print("Initializing Vanilla RNN model...")
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size)
    else:
        print("Initializing LSTM model...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size)

    ###########################################################################
    # Implement code here.
    ###########################################################################
    # Load test data
    test_size = int(config.batch_size * config.train_steps / 3)
    x_test, y_test = get_batch(test_size, config.input_length)

    input_placeholder = tf.placeholder(tf.float32,
                                       shape=(config.input_length - 1, None,
                                              config.input_dim))
    labels_placeholder = tf.placeholder(tf.int32,
                                        shape=(None, config.num_classes))
    logits = model.compute_logits(input_placeholder)
    logits_test = model.compute_logits_test(input_placeholder, test_size)

    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(config.learning_rate)

    ###########################################################################
    # QUESTION: what happens here and why?
    ###########################################################################
    dummy = model.compute_loss(logits, labels_placeholder)  # ... implement me
    grads_and_vars = optimizer.compute_gradients(dummy)

    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(
        zip(grads_clipped, variables))  #, global_step=global_step)
    ############################################################################
    accuracy = model.accuracy(logits_test, labels_placeholder)

    summary = tf.summary.merge_all()
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    summary_writer = tf.summary.FileWriter(config.summary_path, sess.graph)
    sess.run(init)
    ############################################################################

    for train_step in range(config.train_steps):

        # Only for time measurement of step through network
        t1 = time.time()

        # Load palindromes
        x, y = get_batch(config.batch_size, config.input_length)
        feed_dict = {
            input_placeholder: x,
            labels_placeholder: y,
        }
        #_, loss_value, accuracy_value = sess.run([apply_gradients_op, dummy, accuracy], feed_dict=feed_dict)
        _, loss_value = sess.run([apply_gradients_op, dummy],
                                 feed_dict=feed_dict)

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Print the training progress
        if train_step % config.print_every == 0:
            feed_dict = {
                input_placeholder: x_test,
                labels_placeholder: y_test,
            }
            accuracy_value = sess.run(accuracy, feed_dict=feed_dict)
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, "
                  "Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.2f}".
                  format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step,
                         config.train_steps, config.batch_size,
                         examples_per_second, accuracy_value, loss_value))
            # Update the events file.
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, train_step)
            summary_writer.flush()
예제 #2
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Setup the model that we are going to use
    if config.model_type == 'RNN':
        print("Initializing Vanilla RNN model...")
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size)
    else:
        print("Initializing LSTM model...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    #Create placeholders
    with tf.name_scope('input'):
        inputs = tf.placeholder(
            tf.int32,
            shape=[config.batch_size, config.input_length - 1],
            name='inputs')
        labels = tf.placeholder(tf.int32,
                                shape=[config.batch_size],
                                name='labels')
        test_inputs = tf.placeholder(
            tf.int32,
            shape=[config.batch_size, config.input_length - 1],
            name='test_inputs')
        test_labels = tf.placeholder(tf.int32,
                                     shape=[config.batch_size],
                                     name='test_labels')

    #Compute the logits
    with tf.name_scope('logits'):
        logits = model.compute_logits(inputs)
    #Compute the loss
    with tf.name_scope('loss'):
        loss = model.compute_loss(logits, labels)
    tf.summary.scalar('loss', loss)
    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(config.learning_rate)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    ###########################################################################
    # QUESTION: what happens here and why? ->put threshold in order to avoid exploding gradients (gradient clipping)
    ###########################################################################
    global_step = tf.Variable(0, trainable=False, name='global_step')

    dummy = loss
    grads_and_vars = optimizer.compute_gradients(dummy)

    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)

    #Compute the accuracy
    with tf.name_scope('accuracy'):
        with tf.name_scope('predictions'):
            predictions = model.compute_logits(test_inputs)
        with tf.name_scope('accuracy'):
            accuracy = model.accuracy(predictions, test_labels)
    tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()
    test_writer = tf.summary.FileWriter(config.summary_path + '/test',
                                        graph=tf.get_default_graph())

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    ############################################################################

    ###########################################################################
    # Implement code here.
    ###########################################################################

    for train_step in range(config.train_steps + 1):

        # Only for time measurement of step through network
        t1 = time.time()

        batch = utils.generate_palindrome_batch(config.batch_size,
                                                config.input_length)
        #Take the first T-1 digits as input
        batch_x = batch[:, 0:(config.input_length - 1)]
        #Take the last digit as the label, correct class
        batch_y = batch[:, -1]
        sess.run(apply_gradients_op,
                 feed_dict={
                     inputs: batch_x,
                     labels: batch_y
                 })

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Print the training progress
        if train_step % config.print_every == 0:
            #Create batch to test
            batch_test = utils.generate_palindrome_batch(
                config.batch_size, config.input_length)
            batch_x_test = batch_test[:, 0:(config.input_length - 1)]
            batch_y_test = batch_test[:, -1]

            l, acc, summary = sess.run(
                [loss, accuracy, merged],
                feed_dict={
                    inputs: batch_x,
                    labels: batch_y,
                    test_inputs: batch_x_test,
                    test_labels: batch_y_test
                })

            test_writer.add_summary(summary, train_step)
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, "
                  "Examples/Sec = {:.2f}, Accuracy = {}%, Loss = {}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), train_step,
                      config.train_steps, config.batch_size,
                      examples_per_second, acc, l))
    test_writer.close()
    sess.close()