def main(_=None): # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) # Create our model. The result of softmax_classifier is a namedtuple # that has members result.loss and result.softmax. if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only include this part of the graph when testing, so we need to specify # that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def main(_=None): # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) # Create our model. The result of softmax_classifier is a namedtuple # that has members result.loss and result.softmax. if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only include this part of the graph when testing, so we need to specify # that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = result.softmax.evaluate_classifier( labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def main(_=None): image_shape = inp.get_image_shape(FLAGS.input_folder) batch_shape = (BATCH_SIZE,) + image_shape print('>>', image_shape, batch_shape) image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) if FLAGS.model == 'full': print('fully connected network') result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': print('conv network') result = lenet5(image_placeholder, labels_placeholder) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) print(train_images.shape) print(train_labels.shape) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(20): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) train_images = inp.get_images(FLAGS.input_folder) runner.train_model( train_op, result.loss, _epoch_size, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, _test_size, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % ( epoch + 1, classification_accuracy * 100))
def main(_=None): print 'Starting Shakespeare' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print 'Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print sample(inference_input, inference_logits, max_length=128, temperature=0.5) # Print a sampling from the model. print sample(inference_input, inference_logits)
def main(_=None): print 'Starting Baby Names' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data(tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat(1, [ tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ]) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data( tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) / BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data( (names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print 'Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)
def main(_=None): print('Starting Shakespeare') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print('Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print(sample(inference_input, inference_logits, max_length=128, temperature=0.5)) # Print a sampling from the model. print(sample(inference_input, inference_logits))
result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in range(10): train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model(train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels))
def main(_=None, weight_init=None, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=3,learning_rate=None): tf.reset_default_graph() input_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 2]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) # Grab the data as numpy arrays. train_input, train_output = data_utils.mnist(training=True) test_input, test_output = data_utils.mnist(training=False) train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale) test_set = ut.mnist_select_n_classes(test_input, test_output, NUM_CLASSES, min=data_min, scale=data_scale) train_input, train_output = train_set[1], train_set[0] test_input, test_output = test_set[1], test_set[0] ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0]))) visual_inputs, visual_output = train_set[1][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE] epoch_reconstruction = [] EPOCH_SIZE = len(train_input) // BATCH_SIZE TEST_SIZE = len(test_input) // BATCH_SIZE ut.print_info('train: %s' % str(train_input.shape)) ut.print_info('test: %s' % str(test_input.shape)) ut.print_info('output shape: %s' % str(train_output[0].shape)) assert visual_inputs.shape == input_placeholder.get_shape() assert len(train_input.shape) == len(input_placeholder.get_shape()) assert len(test_input.shape) == len(input_placeholder.get_shape()) assert visual_output.shape == output_placeholder.get_shape() assert len(train_output.shape) == len(output_placeholder.get_shape()) assert len(test_output.shape) == len(output_placeholder.get_shape()) with pt.defaults_scope(activation_fn=activation_f, # batch_normalize=True, # learned_moments_update_rate=0.0003, # variance_epsilon=0.001, # scale_after_normalization=True ): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("model") as scope: output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init) pretty_loss = loss(output_tensor, output_placeholder) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) train = pt.apply_optimizer(optimizer, losses=[pretty_loss]) init = tf.initialize_all_variables() runner = pt.train.Runner(save_path=FLAGS.save_path) best_q = 100000 with tf.Session() as sess: sess.run(init) for epoch in xrange(epochs): # Shuffle the training data. if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs: reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output}) epoch_reconstruction.append(reconstruct) ut.print_info('epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct))) train_input, train_output = data_utils.permute_data( (train_input, train_output)) runner.train_model( train, pretty_loss, EPOCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output) ) accuracy = runner.evaluate_model( pretty_loss, TEST_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output)) ut.print_time('Accuracy after %d epoch %g%%' % ( epoch + 1, accuracy * 100)) if best_q > accuracy * 10: best_q = accuracy * 10 ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params={'suf':'mn_trivs', 'act':activation_f, 'e':epochs, 'opt':optimizer, 'lr': learning_rate, 'init':weight_init, 'acu': int(best_q)})
def main(_=None): print('Starting Baby Names') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data( tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat_v2( [ tf.constant( numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ], 1) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data(tf.sparse_to_dense( t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) // BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data((names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print('Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100))
def main(_=None, weight_init=tf.random_normal, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=50, learning_rate=0.01, prefix=None): tf.reset_default_graph() input_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) # Grab the data as numpy arrays. train_input, train_output = data_utils.mnist(training=True) test_input, test_output = data_utils.mnist(training=False) train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale) test_set = ut.mnist_select_n_classes(test_input, test_output, NUM_CLASSES, min=data_min, scale=data_scale) train_input, train_output = train_set[0], train_set[0] test_input, test_output = test_set[0], test_set[0] ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0]))) visual_inputs, visual_output = train_set[0][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE] epoch_reconstruction = [] EPOCH_SIZE = len(train_input) // BATCH_SIZE TEST_SIZE = len(test_input) // BATCH_SIZE assert_model(input_placeholder, output_placeholder, test_input, test_output, train_input, train_output, visual_inputs, visual_output) with pt.defaults_scope(activation_fn=activation_f, # batch_normalize=True, # learned_moments_update_rate=0.0003, # variance_epsilon=0.001, # scale_after_normalization=True ): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("model") as scope: output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init) pretty_loss = loss(output_tensor, output_placeholder) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = pt.apply_optimizer(optimizer, losses=[pretty_loss]) init = tf.initialize_all_variables() runner = pt.train.Runner(save_path=FLAGS.save_path) best_q = 100000 with tf.Session() as sess: sess.run(init) for epoch in xrange(epochs): # Shuffle the training data. additional_info = '' if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs: reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output}) epoch_reconstruction.append(reconstruct) additional_info += 'epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct)) train_input, train_output = data_utils.permute_data( (train_input, train_output)) runner.train_model( train, pretty_loss, EPOCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output), print_every=None ) accuracy = runner.evaluate_model( pretty_loss, TEST_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output)) ut.print_time('Accuracy after %2d/%d epoch %.2f; %s' % (epoch + 1, epochs, accuracy, additional_info)) if best_q > accuracy: best_q = accuracy save_params = {'suf': 'mn_basic', 'act': activation_f, 'e': epochs, 'opt': optimizer, 'lr': learning_rate, 'init': weight_init, 'acu': int(best_q), 'bs': BATCH_SIZE, 'h': HIDDEN_0_SIZE, 'i':prefix} ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params=save_params) ut.print_time('Best Quality: %f for %s' % (best_q, ut.to_file_name(save_params))) ut.reset_start_time() return best_q