def evaluate(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Graph creation batch_size = dataset.num_examples images_placeholder, labels_placeholder = mnist.placeholder_inputs( batch_size) logits = mnist.inference(images_placeholder, train=False) validation_accuracy = tf.reduce_sum( mnist.evaluation(logits, labels_placeholder)) / tf.constant(batch_size) validation_loss = mnist.loss(logits, labels_placeholder) # Reference to sess and saver sess = tf.Session() saver = tf.train.Saver() # Create summary writer graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def) step = -1 while True: step = do_eval(saver, summary_writer, validation_accuracy, validation_loss, images_placeholder, labels_placeholder, dataset, prev_global_step=step) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def run_training(): data_sets = data_mnist.read_data_sets() with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries() saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) do_eval(sess,eval_correct, images_placeholder, labels_placeholder, data_sets.train) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_test(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. train, validation, test = datasets_mnist.read_data_sets( FLAGS.input_data_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder, phase_pl = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2, phase_pl) eval_correct = mnist.evaluation(logits, labels_placeholder) # Add the variable initializer Op. all_variable = tf.global_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. with tf.Session() as sess: saver.restore(sess, "log/model.ckpt-1999") for variable in all_variable: if "moving" in variable.name: print(variable.name, variable.eval()) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, phase_pl, test)
def run_training(): """Train MNIST for a number of steps.""" data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.calculate_loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries() # Collect all summaries generated by the default graph saver = tf.train.Saver() # Create a saver for writing training checkpoints. sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # Training loop for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) print('Training Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print('Validation Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print('Test Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary tensor based on the TF collection of Summaries summary = tf.summary.merge_all() init = tf.global_variables_initializer() # Create a saver for writing training checkpoints saver = tf.train.Saver() sess = tf.Session() # Instantiate a SummaryWriter to output summries and the Graph summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(init) for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate aginst the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): data_sets = input_data.read_data_sets(fake_data) with tf.Graph().as_default(): image_placeholder, label_placeholder = placeholder_inputs(batch_size) logits = mnist.inference(image_placeholder, hidden1_unit, hidden2_unit) #计算损失 loss = mnist.loss(logits, label_placeholder) #训练 train_op = mnist.training(loss, 0.01) #计算正确分类数 eval_correct = mnist.evaluation(logits, label_placeholder) #合并默认图中的所有summaries操作 summary_op = tf.merge_all_summaries() #用于保存网络中的变量 saver = tf.train.Saver() sess = tf.Session() #初始化所有变量 sess.run(tf.initialize_all_variables()) summary_writter = tf.train.SummaryWriter(train_dir, sess.graph) for step in xrange(max_steps): start_time = time.time() #获取feed_dict字典 feed_dict = fill_placeholder(data_sets.train, image_placeholder, label_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print( 'Step %d:loss=%.2f (%.3f sec) %(step,loss_value,duration)') summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writter.add_summary(summary_str, step) summary_writter.flush() if step % 1000 == 0: saver.save(sess, train_dir, global_step=step) print('Training Data Eval:') do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.train) print("Validation Data Eval") do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.validation) print("Test Data Eval:") do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.test)
def run_training(): """Train MNIST for a number of steps.""" data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.calculate_loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries( ) # Collect all summaries generated by the default graph saver = tf.train.Saver( ) # Create a saver for writing training checkpoints. sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # Training loop for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) print('Training Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print('Validation Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print('Test Data Evaluation:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): data_sets = input_data.read_data_sets('MNIST_data', FLAGS.fake_data) with tf.Graph().as_default(): image_placeholder, label_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(image_placeholder, FLAGS.hidden1_unit, FLAGS.hidden2_unit) loss = mnist.loss(logits, label_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, label_placeholder) summary_op = tf.summary.merge_all() saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in range(FLAGS.max_steps): start_time = time.time() feed_dict = fill_placeholder(data_sets.train, image_placeholder, label_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0 : print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict = feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 1000 == 0: saver.save(sess, FLAGS.train_dir, global_step=step) print('Training Data Eval: ') do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.train) print('Validation Data Eval: ') do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.validation) print('Test Data Eval:') do_eval(sess, eval_correct, image_placeholder, label_placeholder, data_sets.test)
def run_training(): data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # 模型的部分 loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(init) for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print 'Setp %d:loss =%.2f (%.3f sec)' % (step, loss_value, duration) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print "Validation Data Eval:" do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print "Test Data Eval:" do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) with tf.Graph().as_default(): # 初始化输入占位符 images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # 构建神经网络 logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # 添加损失函数 loss = mnist.loss(logits, labels_placeholder) # 训练 train_op = mnist.training(loss, FLAGS.learning_rate) # 正确率计算 eval_correct = mnist.evaluation(logits, labels_placeholder) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) start_time = time.time() for step in range(FLAGS.max_steps): feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 100 == 0: duration = time.time() - start_time print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) start_time = time.time() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test) print(FLAGS)
def run_training(): data_sets = input_data.read_data_sets(data_dir) with tf.Graph().as_default(): input_holder, label_holder = generate_placeholder(50, input_size) logits = mnist.inference(input_holder, input_size, 128, 32, label_classes) loss = mnist.loss(logits, label_holder) train_op = mnist.training(loss, 0.01) eval_correct = mnist.evaluation(logits, label_holder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(init) for step in range(2000): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, input_holder, label_holder, 50) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == 2000: checkpoint_file = os.path.join(log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) print('Training Data Eval:') do_eval(sess, eval_correct, input_holder, label_holder, data_sets.train, 50) print('Validation Data Eval:') do_eval(sess, eval_correct, input_holder, label_holder, data_sets.validation, 50) print('Test Data Eval:') do_eval(sess, eval_correct, input_holder, label_holder, data_sets.test, 50)
def run_training(): data_sets = data_mnist.read_data_sets() with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries() saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def mnist_training(): x = tf.placeholder(tf.float32, shape=[None, IMG_SIZE * IMG_SIZE], name='x_ph') y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES], name='y_ph') logits = mnist.inference(x) loss = mnist.loss(logits, y_) train_op = mnist.training(loss, LR) eval_correct = mnist.evaluation(logits, y_) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=cfg.MNIST.RUN.models_to_save) sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. #summary_writer = tf.summary.FileWriter(, sess.graph) sess.run(init) print "*****TRAINING STARTED*******" for i in range(MAX_ITER): if i % 100 == 0 and i > 0: print('Step %d: loss = %.2f' % (i, loss_val)) #targets=sess.run(tf.cast(mnist_db.test.labels,tf.int32)) #prediction=sess.run(eval_correct,feed_dict={x:mnist_db.test.images,y_:targets}) #print('Step %d: loss = %.2f, accuracy %.4f' % (i, loss_val,prediction)) saver.save(sess, os.path.join(cfg.MNIST.RUN.models_dir, 'model'), global_step=i) batch = mnist_db.train.next_batch(BATCH_SIZE) _, loss_val = sess.run([train_op, loss], feed_dict={ x: batch[0], y_: batch[1] }) saver.save( sess, os.path.join(cfg.MNIST.RUN.models_dir, cfg.MNIST.RUN.last_model_name))
def train(dataset, testset): """Train on dataset for a number of steps.""" with tf.Graph().as_default(), tf.device('/gpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.Variable(0, name="global_step", trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.AdamOptimizer(lr) #fetch the data batch from training set images, labels = mnist.placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(images) #calc the loss and gradients total_loss = mnist.loss(logits, labels) grads = opt.compute_gradients(total_loss) # Apply the gradients to adjust the shared variables. apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge_all() # For testing trained model test_size = testset.num_examples test_images_placeholder, test_labels_placeholder = mnist.placeholder_inputs( FLAGS.batch_size) # logits_test = mnist.inference(test_images_placeholder, train=False) #pred = mnist.predictions(logits_test) validation_accuracy = tf.reduce_sum(mnist.evaluation( logits, labels)) / tf.constant(FLAGS.batch_size) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) for step in range(FLAGS.max_steps): feed_dict = mnist.fill_feed_dict(dataset, images, labels, FLAGS.batch_size) start_time = time.time() _, loss_value, acc = sess.run( [train_op, total_loss, validation_accuracy], feed_dict=feed_dict) # acc = sess.run(validation_accuracy, feed_dict=feed_dict_test) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch); acc=%.4f') tf.logging.info(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration, acc)) if step % 500 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def run_training(): # create input pipelines for training set and validation set train_image_batch, train_label_batch, TRAIN_SIZE = create_input_pipeline( LABELS_FILE_TRAIN, FLAGS.batch_size, num_epochs=None, produceVGGInput=False) val_image_batch, val_label_batch, VAL_SIZE = create_input_pipeline( LABELS_FILE_VAL, FLAGS.batch_size, num_epochs=None, produceVGGInput=False) printdebug("TRAIN_SIZE: %d VAL_SIZE: %d BATCH_SIZE: %d " % (TRAIN_SIZE, VAL_SIZE, FLAGS.batch_size)) # Tell TensorFlow that the model will be built into the default Graph. #with tf.Graph().as_default(): if True: # Generate placeholders for the images and labels. images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, NUM_PIXELS)) labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size)) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate, FLAGS.eps) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) tf.train.start_queue_runners(sess=sess) # Start the training loop. duration = 0.0 for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. # Never, ever run image_batch.eval() + label_batch.eval() separately np_image_batch, np_label_batch = sess.run( [train_image_batch, train_label_batch]) train_feed_dict = { images_placeholder: np_image_batch, labels_placeholder: np_label_batch } # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=train_feed_dict) duration += time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) duration = 0.0 # Update the events file. summary_str = sess.run(summary_op, feed_dict=train_feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, TRAIN_SIZE, images_placeholder, labels_placeholder, train_image_batch, train_label_batch) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, VAL_SIZE, images_placeholder, labels_placeholder, val_image_batch, val_label_batch) print('\n')
def train(target, dataset, dataset_test, cluster_spec): """Train Inception on a dataset for a number of steps.""" # Number of workers and parameter servers are infered from the workers and ps # hosts string. num_workers = len(cluster_spec.as_dict()['worker']) num_parameter_servers = len(cluster_spec.as_dict()['ps']) # If no value is given, num_replicas_to_aggregate defaults to be the number of # workers. if FLAGS.num_replicas_to_aggregate == -1: num_replicas_to_aggregate = num_workers else: num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate # Both should be greater than 0 in a distributed training. assert num_workers > 0 and num_parameter_servers > 0, ( ' num_workers and ' 'num_parameter_servers' ' must be > 0.') # Choose worker 0 as the chief. Note that any worker could be the chief # but there should be only one chief. is_chief = (FLAGS.task_id == 0) # Ops are assigned to worker by default. with tf.device( tf.train.replica_device_setter( worker_device='/job:worker/task:%d' % FLAGS.task_id, cluster=cluster_spec)): # Create a variable to count the number of train() calls. This equals the # number of updates applied to the variables. The PS holds the global step. global_step = tf.Variable(0, name="global_step", trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples / FLAGS.batch_size) # Decay steps need to be divided by the number of replicas to aggregate. # This was the old decay schedule. Don't want this since it decays too fast with a fixed learning rate. decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_replicas_to_aggregate) # New decay schedule. Decay every few steps. #decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_workers) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) images, labels = mnist.placeholder_inputs(FLAGS.batch_size) # images_test, labels_test = mnist.placeholder_inputs(int(FLAGS.batch_size/6)) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. logits = mnist.inference(images, train=True) # Test logits # logits_test = mnist.inference(images_test, train=False) # Add classification loss. total_loss = mnist.loss(logits, labels) # Add train accuracy train_acc = mnist.evaluation(logits, labels) # Test accuracy # test_acc = mnist.evaluation(logits_test, labels_test) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Use SyncReplicasOptimizer optimizer if FLAGS.interval_method or FLAGS.worker_times_cdf_method: opt = TimeoutReplicasOptimizer(opt, global_step, total_num_replicas=num_workers) else: opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_replicas_to_aggregate, total_num_replicas=num_workers) # Compute gradients with respect to the loss. grads = opt.compute_gradients(total_loss) # Apply drop connect if FLAGS.drop_connect is True. if FLAGS.drop_connect: bernoulli_sampler = tf.contrib.distributions.Bernoulli( p=FLAGS.drop_connect_probability) dropped_grads = [(drop_connect(gv[0], bernoulli_sampler), gv[1]) for gv in grads] if FLAGS.interval_method or FLAGS.worker_times_cdf_method: apply_gradients_op = opt.apply_gradients( grads, FLAGS.task_id, global_step=global_step, collect_cdfs=FLAGS.worker_times_cdf_method) else: if FLAGS.drop_connect: apply_gradients_op = opt.apply_gradients( dropped_grads, global_step=global_step) else: apply_gradients_op = opt.apply_gradients( grads, global_step=global_step) ''' This part is an old version, new version only uses apply_gradients_op with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') ''' # Get chief queue_runners, init_tokens and clean_up_op, which is used to # synchronize replicas. # More details can be found in sync_replicas_optimizer. chief_queue_runners = [opt.get_chief_queue_runner()] init_tokens_op = opt.get_init_tokens_op() #clean_up_op = opt.get_clean_up_op() # Create a saver. saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init_op = tf.initialize_all_variables() test_print_op = logging_ops.Print(0, [0], message="Test print success") # We run the summaries in the same thread as the training operations by # passing in None for summary_op to avoid a summary_thread being started. # Running summaries and training operations in parallel could run out of # GPU memory. if is_chief: local_init_op = opt.chief_init_op else: local_init_op = opt.local_step_init_op local_init_opt = [local_init_op] ready_for_local_init_op = opt.ready_for_local_init_op sv = tf.train.Supervisor( is_chief=is_chief, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, logdir=FLAGS.train_dir, init_op=init_op, summary_op=None, global_step=global_step, saver=saver, save_model_secs=FLAGS.save_interval_secs) tf.logging.info('%s Supervisor' % datetime.now()) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # Get a session. sess = sv.prepare_or_wait_for_session(target, config=sess_config) # Start the queue runners. queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) sv.start_queue_runners(sess, queue_runners) tf.logging.info('Started %d queues for processing input data.', len(queue_runners)) if is_chief: if not FLAGS.interval_method or FLAGS.worker_times_cdf_method: sv.start_queue_runners(sess, chief_queue_runners) sess.run(init_tokens_op) # TIMEOUT client overseer. # Even if not using timeout, we want to wait until all machines are ready. timeout_client, timeout_server = launch_manager(sess, FLAGS) # Train, checking for Nans. Concurrently run the summary operation at a # specified interval. Note that the summary_op and train_op never run # simultaneously in order to prevent running out of GPU memory. next_summary_time = time.time() + FLAGS.save_summaries_secs begin_time = time.time() cur_iteration = -1 iterations_finished = set() if FLAGS.task_id == 0 and FLAGS.interval_method: opt.start_interval_updates(sess, timeout_client) time_acc_list = [] while not sv.should_stop(): try: sys.stdout.flush() tf.logging.info("A new iteration...") # Increment current iteration cur_iteration += 1 #sess.run([opt._wait_op], options=tf.RunOptions(timeout_in_ms=10000)) #sess.run([opt._wait_op]) #sess.run([test_print_op]) if FLAGS.worker_times_cdf_method: sess.run([opt._wait_op]) timeout_client.broadcast_worker_dequeued_token( cur_iteration) start_time = time.time() feed_dict = mnist.fill_feed_dict(dataset, images, labels, FLAGS.batch_size) # feed_dict_test = mnist.fill_feed_dict(dataset_test, images_test, # labels_test, int(FLAGS.batch_size/6)) run_options = tf.RunOptions() run_metadata = tf.RunMetadata() if FLAGS.timeline_logging: run_options.trace_level = tf.RunOptions.FULL_TRACE run_options.output_partition_graphs = True #timeout_ms = random.randint(300, 1200) #tf.logging.info("SETTING TIMEOUT FOR %d ms" % timeout_ms) #run_options.timeout_in_ms = 1000 * 60 * 1 tf.logging.info("RUNNING SESSION... %f" % time.time()) #if FLAGS.drop_connect: # sess.run(drop_connect_op, feed_dict=feed_dict, run_metadata=run_metadata, # options=run_options) # print(sess.run(grads, feed_dict=feed_dict, run_metadata=run_metadata, # options=run_options)) sess.run(apply_gradients_op, feed_dict=feed_dict, run_metadata=run_metadata, options=run_options) loss_value, step, train_acc_value = sess.run( [total_loss, global_step, train_acc], feed_dict=feed_dict, run_metadata=run_metadata, options=run_options) # test_acc_value = sess.run(test_acc, feed_dict=feed_dict_test, run_metadata=run_metadata, # options=run_options) #step, train_acc_value = sess.run([global_step, train_acc], # feed_dict=feed_dict, run_metadata=run_metadata, options=run_options) tf.logging.info("Global step attained: %d" % step) tf.logging.info("DONE RUNNING SESSION...") if FLAGS.worker_times_cdf_method: timeout_client.broadcast_worker_finished_computing_gradients( cur_iteration) # the following assert line sometimes causes problem, remove for now # assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # Log the elapsed time per iteration finish_time = time.time() # Create the Timeline object, and write it to a json if FLAGS.timeline_logging: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open( '%s/worker=%d_timeline_iter=%d.json' % (FLAGS.train_dir, FLAGS.task_id, step), 'w') as f: f.write(ctf) if step > FLAGS.max_steps: break test_acc_value = 0.0 duration = finish_time - start_time examples_per_sec = FLAGS.batch_size / float(duration) format_str = ( 'Worker %d: %s: step %d, loss = %f, train_acc = %f, test_acc = %f' '(%.1f examples/sec; %.3f sec/batch)') tf.logging.info(format_str % (FLAGS.task_id, datetime.now(), step, loss_value, train_acc_value, test_acc_value, examples_per_sec, duration)) time_acc_list.append( (finish_time, train_acc_value, test_acc_value, loss_value)) # Save the results when step % FLAGS.save_results_period == 0 if step % FLAGS.save_results_period == 0: time_acc_file_name = FLAGS.train_dir + ( '/worker%d_time_acc.npy' % FLAGS.task_id) # np.save(loss_file_name, loss_list) np.save(time_acc_file_name, time_acc_list) # Determine if the summary_op should be run on the chief worker. if is_chief and next_summary_time < time.time( ) and FLAGS.should_summarize: tf.logging.info('Running Summary operation on the chief.') summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) tf.logging.info('Finished running Summary operation.') # Determine the next time for running the summary. next_summary_time += FLAGS.save_summaries_secs except tf.errors.DeadlineExceededError: tf.logging.info("Killed at time %f" % time.time()) sess.reset_kill() except: tf.logging.info("Unexpected error: %s" % str(sys.exc_info()[0])) sess.reset_kill() if is_chief: tf.logging.info('Elapsed Time: %f' % (time.time() - begin_time)) # Stop the supervisor. This also waits for service threads to finish. sv.stop() # Save after the training ends. if is_chief: saver.save(sess, os.path.join(FLAGS.train_dir, 'model.ckpt'), global_step=global_step)
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # And then after everything is built, start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): """ Train MNIST for a number of steps """ data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference( images_placeholder, FLAGS.hidden1, FLAGS.hidden2 ) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries() saver = tf.train.Saver() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) summary_writer = tf.training.summary_io.SummaryWriter( FLAGS.train_dir, graph_def = sess.graph_def ) for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict( data_sets.train, images_placeholder, labels_placeholder ) _, loss_value = sess.run([train_op, loss], feed_dict = feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step = step) print('Evaluating on training data...') do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train ) print('Evaluating on validation data...') do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation ) print('Evaluating on test data...') do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test )
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # labels and images are properties of the DataSet class, which is # defined at tensorflow/contrib/learn/python/learn/datasets/mnist.py # numpy.savetxt("/tmp/xx.csv", data_sets.train.labels, delimiter=",") # numpy.savetxt("/tmp/yy.csv", data_sets.test.images, delimiter=",") # sys.exit(1) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print("Step %d: loss = %.2f (%.3f sec)" % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, "checkpoint") saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print("Training Data Eval:") do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print("Validation Data Eval:") do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print("Test Data Eval:") do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test) if (step + 1) == FLAGS.max_steps: float_formatter = lambda x: "%.2f" % x numpy.set_printoptions(formatter={"float_kind": float_formatter}) feed_dict = fill_feed_dict(data_sets.test, images_placeholder, labels_placeholder) # output with softmax # output = sess.run(tf.nn.softmax(logits), feed_dict=feed_dict) # output without softmax output = sess.run(tf.argmax(logits, dimension=1), feed_dict=feed_dict) numpy.savetxt("/tmp/outputX.csv", output, delimiter=",")
def run_training(): # 获取数据 data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) # 在默认Graph下运行. with tf.Graph().as_default(): # 配置graph images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) # logits是shape为(batch_size,NUM_CLASSES),表示每条数据预测后未归一化的概率分布 logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # 损失函数 loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) # 汇聚tensor summary = tf.summary.merge_all() # 建立初始化机制 init = tf.global_variables_initializer() # 建立保存机制 saver = tf.train.Saver() # 建立Session sess = tf.Session() # 建立一个SummaryWriter输出汇聚的tensor summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # 开始执行 # 执行变量 sess.run(init) # 开始训练,2000次循环 for step in xrange(FLAGS.max_steps): start_time = time.time() #获取当次循环的数据 feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # 丢弃了train数据, 记录loss数据, sess.run(train_op)是最简单的训练代码 # run(self, fetches, feed_dict=None, options=None, run_metadata=None) # fetches格式很自由,详情见help(tf.Session.run) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # 每训练100次输出当前损失,并记录数据 if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # 每1000次测试模型 if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): """ TRAIN MNIST for a number of steps """ # Get the sets of images and labels for training, validation, and # test on MNIST data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph with tf.Graph().as_default(): # Generate placeholders for the images and labels images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary Tensor based on the TF collection of Summaries summary = tf.summary.merge_all() # Add the variable initializer Op init = tf.global_variables_initializer() # Create a saver for writing training checkpoints saver = tf.train.Saver() # Create a session for running Ops on the Graph sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables sess.run(init) # Start the training loop for step in xrange(FLAGS.max_steps): start_time = time.time() """ TODO: Add progress bar """ # Fill a feed dictionary with the actual set of images and labels # for t his particular training step feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the 'train_op' (which is discarded) and the 'loss' Op. # To inspect the values of your Ops or variables, you m ay include # them in the list passed to sess.run() and the value tensors # will be returned in the tuple from the calculate _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often if step % 100 == 0: # Print status to stdout print('Step %d: loss = %.2f (%.3f)' % (step, loss_value, duration)) #Update the events file summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validationset print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and test on MNIST. data_sets = aymericdamien.input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) # Build a graph the Ops for loss calculation logits = mnist.inference( images_placeholder, FLAGS.hidden1, FLAGS.hidden2 ) # Add to the graph ops for loss calculation loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints saver = tf.train.Saver() # create a session for running ops on the graph session = tf.Session() # Run the Op to initialize the variables init = tf.initialize_all_variables() session.run(init) # Instantiate a SummaryWriter to output summaries and the Graph summary_writer = tf.train.SummaryWriter( FLAGS.train_dir, graph_def=session.graph_def ) # After everything is built start the training loop for step in xrange(FLAGS.max_steps): start_time = time() # Fill the feed dictionary with the actual set of images and labels # for this training step feed_dict = fill_feed_dict( data_sets.train, images_placeholder, labels_placeholder ) # run one step of the model, return values are the activations from # the 'train_op' (which is discarded) and the 'loss' Op. To inspect # the values of your Ops or Variables, you may include them in the list # passed to the session.Run() and the value tensors will be returned in # the tuple from the call _, loss_value = session.run( [train_op, loss], feed_dict=feed_dict ) duration = time() - start_time if step % 100 == 0: # print status update print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = session.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(session, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_evaluation( session, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_evaluation( session, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_evaluation( session, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def train_model(): """ This method deals with training the model in mnist-model.py by running optimization over a number of steps. """ #read input da(ta first #read_data_sets() function will ensure that the correct data has been downloaded to your local training folder #and then unpack that data to return a dictionary of DataSet instances. #FLAGS.fake_data can be ignored as it is used-for unit testing purposes print("Training Started! ") data = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) #We need to specify that our model will be used with the Default global Tensor Flow graph. #A default global TF graph tf.Graph() is a collection of ops that may be executed as a group with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_ph, labels_ph = input_placeholders(FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_ph, FLAGS.num_hidden1_nodes, FLAGS.num_hidden1_nodes) # Add the loss calculation op to the default Graph loss = mnist.loss(logits, labels_ph) # Add the minimization op the Graph train_op = mnist.training(loss, FLAGS.eta) # Add the Evaluation to test predictions to the Graph eval_correct = mnist.evaluation(logits, labels_ph) # Build the summary Tensor based on the TF collection of Summaries. summary = tf.summary.merge_all() # Add the variable initializer Op. to the Graph init = tf.global_variables_initializer() #Create a Tensor Flow Saver for writing Training Checkpoints saver = tf.train.Saver() #Now once all the build preparation is completed and all the ops are added to the Graph, #we need to create a Session in order to run the computaional Graph sess = tf.Session() #We also need to create a TF Summary Writer in order to record all the summaries and the Graph summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) #Now all the required Ops are attached to the Default Graph and all is built, #Start the session by initialing all the TF variables. sess.run(init) #Start the Training for step in xrange(FLAGS.max_steps): start_time = time.time() #Update the feed_dict with the next batch of samples to train with. feed_dict = fill_feed_dictionary(data.train, images_ph, labels_ph) #Run one step of the training by running Ops train_op and loss #No need to store the activations returned by the train_op minimization step _, loss_val = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time #Record all the training summaries generates and print the training progress/statistics #after every 100th iteration if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_val, duration)) #Now we need to update the events file with summaries #Run the summary Op attached to the Graph #Everytime the summary is evaluated, new summaries are written into the events files summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() #Save the Model at every 1000th iteration and perform evaluation on complete data if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) #Evaluate against the training set print('Training Data Evaluation:') do_eval(sess, eval_correct, images_ph, labels_ph, data.train) print('Validation Data Evaluation:') do_eval(sess, eval_correct, images_ph, labels_ph, data.validation) # Evaluate against the test set. print('Test Data Evaluation:') do_eval(sess, eval_correct, images_ph, labels_ph, data.test)
labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) saver = tf.train.Saver() saver.save(sess, FLAGS.train_dir, global_step=step) saver.restore(sess, FLAGS.train_dir) print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test) eval_correct = mnist.evaluation(logits, labels_placeholder) eval_correct = tf.nn.in_top_k(logits, labels, 1) for step in xrange(steps_per_epoch): feed_dict = fill_feed_dict(data_set, images_placeholder, labels_placeholder) true_count += sess.run(eval_correct, feed_dict=feed_dict) precision = true_count / num_examples print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (num_examples, true_count, precision))
def run_training(): data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # 可參考 mnist.py 的實作 # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # 可參考 mnist.py 的實作 # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # 可參考 mnist.py 的實作 # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # 可參考 mnist.py 的實作 summary = tf.summary.merge_all() # 用在 tensorboard init = tf.global_variables_initializer() # 會把 training 的 checkpoints 記下來 saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(init) for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # 寫到 log 內 summary_writer.flush() # 這邊有教怎麼存 checkpoint, and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') # 重要! print( "checkpoint saved in : " + checkpoint_file ) saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # 只有換 dataset 而已 # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(learning_rate=FLAGS.learning_rate, momentum=FLAGS.momentum, max_norm=FLAGS.max_norm, weight_decay=FLAGS.weight_decay, keep_prob=FLAGS.keep_prob, keep_input=FLAGS.keep_input, beta2=FLAGS.beta2, num_layers=FLAGS.num_layers): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder = tf.placeholder(tf.float32, shape=(None, mnist.IMAGE_PIXELS), name='images') labels_placeholder = tf.placeholder(tf.int32, shape=[None], name='labels') keep_prob_pl = tf.placeholder(tf.float32, name='keep_prob_pl') keep_input_pl = tf.placeholder(tf.float32, name='keep_input_pl') learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate_pl') def fill_feed_dict(data_set, batch_size=FLAGS.batch_size): # Create the feed_dict for the placeholders filled with the next # `batch size ` examples. images_feed, labels_feed = data_set.next_batch(batch_size, FLAGS.fake_data) feed_dict = { images_placeholder: images_feed, labels_placeholder: labels_feed, keep_prob_pl: keep_prob, keep_input_pl: keep_input, learning_rate_pl: learning_rate } return feed_dict def fill_feed_dict_eval(data_set): return { images_placeholder: data_set._images, labels_placeholder: data_set._labels, keep_prob_pl: 1.0, keep_input_pl: 1.0, } # Build a Graph that computes predictions from the inference model. with tf.variable_scope('feed_forward_model') as scope: logits, bn = mnist.inference(images_placeholder, FLAGS.hidden1, num_layers, weight_decay, keep_prob_pl, keep_input_pl, max_norm) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) #loss_eval = mnist.loss( logits_eval, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, learning_rate_pl, momentum, beta2) with tf.control_dependencies([train_op]): train_op = tf.group(*[b.get_assigner() for b in bn]) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) results = tf.placeholder( tf.float32, [4]) summarize_evaluation = tf.scalar_summary(['correct_train', 'loss_train', 'correct_test', 'loss_test'], results) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2) train_loss = test_loss = 0 train_cor = test_cor = 0.97 previous_test_loss = None first_step = 0 # Create a session for running Ops on the Graph. sess = tf.Session() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def) restore_path = tf.train.latest_checkpoint("/Users/mikowals/projects/mnist") if restore_path: saver.restore(sess, restore_path) first_step = int(restore_path.split('/')[-1].split('-')[-1]) print('retored variables from ', restore_path) else: # Run the Op to initialize the variables. print('initializing variables') init = tf.initialize_all_variables() sess.run(init) # And then after everything is built, start the training loop. for step in range(first_step,FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. # Evaluate against the validation set. print('training Data Eval:') feed_dict = fill_feed_dict_eval(data_sets.train) train_cor, train_loss = sess.run([eval_correct, loss], feed_dict=feed_dict) train_cor = train_cor / data_sets.train.num_examples print(train_cor, train_loss) print('Validation Data Eval:') feed_dict = fill_feed_dict_eval(data_sets.validation) test_cor, test_loss = sess.run([eval_correct, loss], feed_dict=feed_dict) test_cor = test_cor / data_sets.validation.num_examples print (test_cor, test_loss ) #if previous_test_loss and test_loss > previous_test_loss: # learning_rate = learning_rate * 0.6 #if previous_test_loss and test_loss < previous_test_loss: # learning_rate = learning_rate * 1.02 #previous_test_loss = test_loss if step > 1000 and step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. feed_dict[results] = [ train_cor, train_loss, test_cor, test_loss] summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) return -test_cor
def main(_): # build a model data = input_data.read_data_sets(FLAGS.data_dir, one_hot=False, fake_data=False) # make placeholders images = tf.placeholder(tf.float32, [FLAGS.batch_size, mnist.IMAGE_PIXELS], name='inputs') labels = tf.placeholder(tf.int32, [FLAGS.batch_size], name='labels') # build model up to inference logits = mnist.inference(images, FLAGS.hidden_size, FLAGS.num_layers, do_weightnorm=FLAGS.weightnorm, do_batchnorm=FLAGS.batchnorm, train=True) if not FLAGS.batchnorm: eval_logits = logits else: eval_logits = mnist.inference(images, FLAGS.hidden_size, FLAGS.num_layers, do_weightnorm=FLAGS.weightnorm, do_batchnorm=FLAGS.batchnorm, train=False) # get a loss function loss = mnist.loss(logits, labels, 'train_xent') eval_loss = mnist.loss(eval_logits, labels, 'eval_xent') # add a sumary of this to track the training loss # get training ops train_op, gstep = mnist.training(loss, FLAGS.learning_rate, FLAGS.momentum) # get an op to return precision on a batch eval_op = mnist.evaluation(eval_logits, labels) valid_var = tf.Variable(0, 'validation performance') valid_summ = tf.scalar_summary('validation accuracy', valid_var) # get summary op summarise = tf.merge_all_summaries() with tf.Session() as sess: writer = tf.train.SummaryWriter(FLAGS.logdir, sess.graph_def) tf.initialize_all_variables().run() # do some training print('nb: {} steps per epoch'.format( data.train.num_examples // FLAGS.batch_size)) print('Step 0/{}.'.format(FLAGS.max_steps), end='') for i in range(FLAGS.max_steps): if (i+1) % 5 == 0: # write summaries, check on validation set if (i+1) % 100 == 0: valid_perf = evaluate(sess, data.validation, logits, [eval_op, eval_loss], FLAGS.batch_size, images, labels, gstep, writer) print() summ_str, _, _ = sess.run([summarise, loss, train_op], fill_feed(data.train, images, labels, FLAGS.batch_size)) writer.add_summary(summ_str, gstep.eval(session=sess)) else: # do a step of training loss_val, _ = sess.run([loss, train_op], fill_feed(data.train, images, labels, FLAGS.batch_size)) print('\rStep {} (loss {})'.format(i+1, loss_val), end='', flush=True) print('\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print('Test evaluation:') evaluate(sess, data.test, logits, [eval_op, eval_loss], FLAGS.batch_size, images, labels, gstep, writer) print('\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets=reader(patchlength=0,\ maxlength=300,\ embedding_size=100,\ num_verbs=10,\ allinclude=False,\ shorten=False,\ shorten_front=False,\ testflag=False,\ passnum=0,\ dpflag=False) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits, keep_prob = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary Tensor based on the TF collection of Summaries. summary = tf.summary.merge_all() # Add the variable initializer Op. init = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. with tf.Session() as session: sess.run(init) if True: model_file = tf.train.latest_checkpoint(FLAGS.log_dir) saver.restore(sess, model_file) # Start the training loop. start_time = time.time() for step in xrange(FLAGS.max_steps): # Fill a feed dictionary with the actual set of images and labels # for this particular training step. inputs, answers = data_sets.list_tags(FLAGS.batch_size, test=False) # print(len(inputs),len(inputs[0]),inputs[0]) # input() inputs2 = [] for i in range(len(inputs)): inputs2.append(inputs[i] / 255) # print(len(inputs2),len(inputs2[0]),inputs2[0]) # input() feed_dict = { images_placeholder: inputs2, labels_placeholder: answers, keep_prob: 0.5 } # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value, logi = sess.run([train_op, loss, logits], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # print(logi) # print(answers) for i0 in range(FLAGS.batch_size): lgans = np.argmax(logi[i0]) if (lgans != answers[i0] and False): for tt in range(784): if (tt % 28 == 0): print(' ') if (inputs[i0][tt] != 0): print('1', end=' ') else: print('0', end=' ') # print('np',np.argmax(i),answers,answers[i0],'np') print(lgans, answers[i0]) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 500 == 0 or (step + 1) == FLAGS.max_steps: #print('Training Data Eval:') do_eval(sess, eval_correct, data_sets, FLAGS.batch_size, images_placeholder, labels_placeholder, keep_prob) do_evalfake(sess, eval_correct, data_sets, FLAGS.batch_size, images_placeholder, labels_placeholder, logits, keep_prob) # Save a checkpoint and evaluate the model periodically. #if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) print('saved to', checkpoint_file) '''
def run_training(): # 获取用于训练,验证和测试的图像数据以及类别标签集合 data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) # 告诉TensorFlow模型将会被构建在默认的Graph上. with tf.Graph().as_default(): # 为图像特征向量数据和类标签数据创建输入占位符 images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # 从前向推断模型中构建用于预测的计算图 logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # 为计算图添加计算损失的节点. loss = mnist.loss(logits, labels_placeholder) # 为计算图添加计算和应用梯度的训练节点 train_op = mnist.training(loss, FLAGS.learning_rate) # 添加节点用于在评估过程中比较 logits 和 ground-truth labels . eval_correct = mnist.evaluation(logits, labels_placeholder) # 基于 TF collection of Summaries构建汇总张量. merged_summaries = tf.summary.merge_all() # 创建一个 saver 用于写入 训练过程中的模型的检查点文件(checkpoints). saver = tf.train.Saver() # 添加变量初始化节点(variable initializer Op). init = tf.global_variables_initializer() # 创建一个会话用来运行计算图中的节点 sess = tf.Session() # 实例化一个 SummaryWriter 输出 summaries 和 Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) summary_writer.flush() # 运行初始化节点来初始化所有变量(Variables). sess.run(init) # 开启训练循环. for step in xrange(FLAGS.max_steps): start_time = time.time() # 在此特定的训练步,使用真实的图像和类标签数据集填充 feed dictionary feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # 在当前批次样本上把模型运行一步(run one step). # 返回值是从`train_op`和`loss`节点拿到的activations _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) # 计算当前批次的训练花费的时间 duration = time.time() - start_time # 每隔100个批次就写入summaries并输出overview if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # 更新事件文件.还是调用sess.run()方法 summary_str = sess.run(merged_summaries, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # 周期性的保存一个检查点文件并评估当前模型的性能 if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # 在所有训练集上评估模型. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # 在验证集上评估模型. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # 在测试集上评估模型. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(FLAGS.data_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.batch_size) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.global_variables_initializer() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) # And then after everything is built, start the training loop. for step in range(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training(settings: Settings) -> float: tf.gfile.MakeDirs(settings.log_dir) data_sets = copy.deepcopy(DATASETS) with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs( settings.batch_size, ) logits = mnist.inference( images_placeholder, settings.hidden1, settings.hidden2, ) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, settings.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(settings.log_dir, sess.graph) sess.run(init) for step in range(settings.max_steps): feed_dict = fill_feed_dict( data_sets.train, images_placeholder, labels_placeholder, settings, ) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) # Write the summaries and print an overview fairly often. if step % 100 == 0: # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == settings.max_steps: checkpoint_file = os.path.join(settings.log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. # print('Training Data Eval:') do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train, settings, ) # Evaluate against the validation set. # print('Validation Data Eval:') do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation, settings, ) # Evaluate against the test set. # print('Test Data Eval:') acc = do_eval( sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test, settings, ) return 1 - acc