def evaluate(): """Eval CIFAR-10 for a number of steps.""" dataset = input_data.read(FLAGS.input_dir) image_size = dataset.image_size with tf.Graph().as_default(): # Build a Graph that computes the logits predictions from the # inference model. eval_images = tf.placeholder(tf.float32, shape=(2, FLAGS.batch_size, image_size[0], image_size[1], image_size[2])) labels = tf.placeholder(tf.float32, shape=(FLAGS.batch_size)) images, images_p = tf.split(0, 2, train_images) with tf.variable_scope('inference') as scope: logits = cifar10.inference(images) scope.reuse_variables() logits2 = cifar10.inference(images_p) # Calculate predictions. accuracy = cifar10.accuracy(logits, logits2, labels) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() testImg, testlabels = cifar10.inputs(eval_data=True) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) test_pre = cifar10.inference(testImg,test=True) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time if step % 10 == 0: print ('loss '+str(loss_value)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 10 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #eval if step%10==0: cifar10.accuracy(test_pre,testlabels)
def eval_once(filename): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get loss. # Create placeholder. images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3)) labels = tf.placeholder(tf.int32, shape = (FLAGS.batch_size,)) domain_labels = tf.placeholder(tf.int32, shape = (FLAGS.batch_size,)) #loss_label, loss_domain = cifar10.inference(images) logits1, logits2= cifar10.inference(images) loss_label = cifar10.loss_without_decay(logits1, labels) loss_domain = cifar10.loss_without_decay(logits2, domain_labels) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) x_tr, y_tr, d_tr, nl_x_v, nl_y_v, nl_d_v, l_x_te, l_y_te, l_d_te = input.input() # Calculate losses of training, non-lifelog validation, and lifelog test. y_tr_loss, d_tr_loss = eval_a_dataset(saver, filename, x_tr, y_tr, d_tr, images, labels, domain_labels, loss_label, loss_domain) y_nl_loss, d_nl_loss = eval_a_dataset(saver, filename, nl_x_v, nl_y_v, nl_d_v, images, labels, domain_labels, loss_label, loss_domain) y_l_loss, d_l_loss = eval_a_dataset(saver, filename, l_x_te, l_y_te, l_d_te, images, labels, domain_labels, loss_label, loss_domain) return y_tr_loss, d_tr_loss, y_nl_loss, d_nl_loss, y_l_loss, d_l_loss
def test_once(filename): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get loss. # Create placeholder. images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3)) loss_label, loss_domain = cifar10.inference(images) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) _, _, _, _, _, _, \ x, y, domain= input.input() # Supplement the number of examples to multiplier of 25. num_of_examples = np.shape(x)[0] remainder = FLAGS.batch_size - int(math.ceil(num_of_examples/FLAGS.batch_size)) index = range(num_of_examples) + [0] * remainder with tf.Session() as sess: #need to modify for test saver.restore(sess, filename) global_step = int(filename.split('-')[1]) # Allocate results in a list. losses_label = [] losses_domain = [] # Start the queue runners. step = 0 while step + FLAGS.batch_size <= len(index): label_loss_value, domain_loss_value = sess.run([loss_label, loss_domain], feed_dict = {images:x[index[step:step+FLAGS.batch_size], :]}) losses_label.append(label_loss_value) losses_domain.append(domain_loss_value) step = step + FLAGS.batch_size # Convert list of lists to numpy array. losses_label = np.asarray(losses_label) losses_domain = np.asarray(losses_domain) losses_label = losses_label.reshape((-1, 21)) losses_domain = losses_domain.reshape((-1, 2)) losses_label = losses_label[:num_of_examples, :] losses_domain = losses_domain[:num_of_examples, :] sp.savez('test.npz', losses_label = losses_label, losses_domain = losses_domain, y = y, domain = domain) return losses_label, losses_domain, y, domain
def tower_loss(scope, images, labels): # Build inference Graph. logits = cifar10.inference(images, None, None, None, None, None, None, None, train=True) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cifar10.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def tower_loss(scope): """Calculate the total loss on a single tower running the CIFAR model. Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' Returns: Tensor of shape [] containing the total loss for a batch of data """ # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build inference Graph. logits = cifar10.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cifar10.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name) # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.scalar_summary(loss_name +' (raw)', l) tf.scalar_summary(loss_name, loss_averages.average(l)) with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, _ = cifar10.inputs(eval_data=eval_data) logits = cifar10.inference(images) prediction = tf.nn.softmax(logits) variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') return # Build a Graph that computes the logits predictions from the # inference model. result = tf.argmax(prediction, 1) tf.print(result) print('DONE')
def tower_loss(scope): images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) _ = cifar10.loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') return total_loss
def main(_): with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) loss = cifar10.loss(logits, labels) train_op = cifar10.train(loss, global_step) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) summary = tf.summary.merge_all() writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) print('Training started') for i in range(FLAGS.max_step): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time if i % 100 == 0: print('Step %6d: loss = %.2f (%.3f sec)' % (i, loss_value, duration)) summary_str = sess.run(summary) writer.add_summary(summary_str, i) writer.flush() coord.request_stop() coord.join(threads)
def tower_loss(scope, images, labels): """Calculate the total loss on a single tower running the CIFAR model. Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' images: Images. 4D tensor of shape [batch_size, height, width, 3]. labels: Labels. 1D tensor of shape [batch_size]. Returns: Tensor of shape [] containing the total loss for a batch of data """ # Build inference Graph. logits = cifar10.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cifar10.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): with tf.Graph().as_default(): # get global step global_step = tf.train.get_or_create_global_step() # get data through cpu with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # get loss and logit # logits = cifar10.inference(images=images, r=low_ranks) logits = cifar10.inference(images=images, r=low_ranks) loss = cifar10.loss(logits=logits, labels=labels) # set train_op train_op = cifar10.train(loss, global_step) for v in tf.trainable_variables(): print(v) nonzero = tf.count_nonzero(tf.get_collection('sparse_components')[-1]) # define a LoggerHook to log something # clean_list = tf.get_collection('sparse_components') # clean_list = clean_s(clean_list) # clean_op = [c.op for c in clean_list] class _LoggerHook(tf.train.SessionRunHook): """ log session and runtime info """ def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results example_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.6f (%.1f examples/sec;' '%.3f sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, example_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(): """ 多次评价CIFAR-10 """ with tf.Graph().as_default() as g: # 从CIFAR-10中读取图像和标签 eval_data = (FLAGS.eval_data == 'test') images, labels = cifar10.input(eval_data = eval_data) # 构建一个图计算推理模型的logits logits = cifar10.inference(images) # 计算准确度 top_k_op = tf.nn.in_top_k(logits, labels, 1) # 读取已经训练好的模型参数 variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # 用TF collection of Summaries构建summary operation summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) run_time = FLAGS.run_once while True: eval_once(saver, summary_writer, top_k_op, summary_op) if run_time <= 0: break run_time -= 1 time.sleep(FLAGS.eval_interval_secs)
def train(): with tf.Graph().as_default(): images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) loss = cifar10.loss(logits, labels) global_step = tf.Variable(0, trainable=False) train_op = cifar10.train(loss, global_step=global_step) summary_op = tf.merge_all_summaries() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) trainable_var = tf.trainable_variables() print([v.name for v in trainable_var]) var = trainable_var[0] res = sess.run(var) print(type(res))
def train(): with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images, train = True) loss = cifar10.loss(logits, labels) accuracy = cifar10.accuracy(logits, labels) train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs([loss, accuracy]) def after_run(self, run_context, run_values): if self._step % 10 == 0: loss_value, acc_value = run_values.results format_str = ('step %d, loss = %.2f, accuracy = %.2f ') print (format_str %(self._step, loss_value, acc_value)) with tf.train.MonitoredTrainingSession( checkpoint_dir=train_dir, hooks=[tf.train.StopAtStepHook(last_step=max_step), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=False)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def tower_loss(scope): """Calculate the total loss on a single tower running the CIFAR model. Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' Returns: Tensor of shape [] containing the total loss for a batch of data """ # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build inference Graph. logits = cifar10.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cifar10.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Compute the moving average of all individual losses and the total loss. # loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') # loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. # for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. # loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name) # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. # tf.scalar_summary(loss_name +' (raw)', l) # tf.scalar_summary(loss_name, loss_averages.average(l)) # with tf.control_dependencies([loss_averages_op]): # total_loss = tf.identity(total_loss) return total_loss
def tower_loss(scope): """Calculate the total loss on a single tower running the CIFAR model. Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' Returns: Tensor of shape [] containing the total loss for a batch of data """ # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build inference Graph. logits = cifar10.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cifar10.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) loss = cifar10.loss(logits, labels) # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)) # train_op = tf.train.GradientDescentOptimizer(1e-2).minimize(loss) train_op = cifar10.train(loss, global_step) top_k_op = tf.nn.in_top_k(logits, labels, 1) saver = tf.train.Saver(tf.all_variables()) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) tf.train.start_queue_runners(sess=sess) true_count = 0 for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value, precisions = sess.run([train_op, loss, top_k_op]) true_count += np.sum(precisions) if step % 10 == 0: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) duration = time.time() - start_time print(' step %d, loss = %.3f, acc = %.3f, dur = %.2f' % (step, loss_value, true_count/(FLAGS.batch_size*10), duration)) true_count = 0
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) tf.summary.image('images', images) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. # eval_data = FLAGS.eval_data == 'test' # images, labels = cifar10.inputs(eval_data=eval_data) f = tf.read_file('/images/input.jpg') image = tf.image.decode_jpeg(f, channels=3) image = tf.image.resize_images(image, (24, 24)) tf.summary.image('images', [image]) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference([image]) # Calculate predictions. # top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) eval_once(saver, summary_writer, logits, summary_op)
def train(): """Train the cifar-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force the input pipeline to CPU:0 to avoid operation sometimes ended up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model logits = cifar10.inference(images) # calculate the loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameter train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs( loss) # asks for the loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f', 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(): """ Evaluate cifar10 for a number of steps """ with tf.Graph().as_default() as g: # get images and labels for cifar-10 eval_data = FLAGS.eval_data == "test" images, labels = cifar10.inputs(eval_data=eval_data) # build a tf.Graph that computes logits from inference model logits = cifar10.inference(images) # calculate prediction top_k_op = tf.nn.in_top_k(logits, labels, 1) # restore moving average version of the learned variables for eval variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # build the summary operation based on the tf collection of summaries summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.train_dir, g) eval_once(saver, summary_writer, top_k_op, summary_op)
def evaluate_image(filename): with tf.Graph().as_default() as g: # Number of images to process FLAGS.batch_size = 1 image = img_read(filename) logit = cifar10.inference(image) output = tf.nn.softmax(logit) top_k_pred = tf.nn.top_k(output, k=1) variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) else: print("No checkpoint file found") return values, indices = sess.run(top_k_pred) print LABEL_NAMES[indices[0][0] - 1], values[0][0]
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. inputs = cifar10.ram_inputs(unit_variance=True, is_train=False) images = inputs['images'] labels = inputs['labels'] # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, 3, use_batchnorm=True, use_nrelu=False, id_decay=False, add_shortcuts=True, is_train=False) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op, inputs) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. images, labels = cifar10.inputs(eval_data='test') # need modify for test logits, _ = cifar10.inference(images) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() del variables_to_restore['input_producer/limit_epochs/epochs'] saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.test_dir, g) while True: eval_once_given_model(saver, summary_writer, logits, labels, summary_op, images) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Generate a batch of images and labels # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # k = 1, this outputs a batch_size bool array, an entry out[i] is true if the prediction for the target class is among the top k predictions among all predictions for example i # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() # Returns a map of names to Variables to restore. saver = tf.train.Saver(variables_to_restore) # The Saver class adds ops to save and restore variables to and from checkpoints # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Merges all summaries collected in the default graph. summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) # create an event file in a given directory and add summaries and events to it while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs) # Suspend execution of the calling thread for the given number of seconds
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), "Model diverged with loss = NaN" if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = "%s: step %d, loss = %.2f (%.1f examples/sec; %.3f " "sec/batch)" print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) global step_no step_no = self._step with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, inter_op_parallelism_threads=4, intra_op_parallelism_threads=0)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op) """run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
def train(): """ 多步训练CIFAR-10数据集 """ with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # 指定 CPU:0 进行数据的变形处理 with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # inference 前向预测,定义在cifar10.py文件 logits = cifar10.inference(images) # loss. loss = cifar10.loss(logits, labels) # train train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """记录 loss 和 runtime.""" def __init__(self): self._start_time = time.time() self._step = -1 def begin(self): pass def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs( loss) # 将loss 操作加到Session.run()调用 def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)' print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook( last_step=FLAGS.max_steps), # 在运行指定步长后停止 tf.train.NanTensorHook(loss), # 监视loss,如果loss==NaN,停止训练 _LoggerHook() ], # log_device_placement是否打印设备分配日志 config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): """Train CIFAR-10 for a number of steps.""" g1 = tf.Graph() with g1.as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) grads = cifar10.train_part1(loss, global_step) only_gradients = [g for g, _ in grads] only_vars = [v for _, v in grads] placeholder_gradients = [] #with tf.device("/gpu:0"): for grad_var in grads: placeholder_gradients.append( (tf.placeholder('float', shape=grad_var[0].get_shape()), grad_var[1])) feed_dict = {} for i, grad_var in enumerate(grads): feed_dict[placeholder_gradients[i][0]] = np.zeros( placeholder_gradients[i][0].shape) train_op = cifar10.train_part2(global_step, placeholder_gradients) sess = tf.Session() sess.run(tf.global_variables_initializer()) feeds = [] print("Reached here") for i, grad_var in enumerate(grads): feeds.append(placeholder_gradients[i][0]) # Partial Run print("Reached here", len(feeds)) for x in feeds: print(x, ) h = sess.partial_run_setup([only_gradients, train_op], feeds) print("Reached here") for i in xrange(10): res_grads = sess.partial_run(h, only_gradients, feed_dict=feed_dict) feed_dict = {} for i, grad_var in enumerate(res_grads): feed_dict[placeholder_gradients[i][0]] = res_grads[i] res_train_op = sess.partial_run(h, train_op, feed_dict=feed_dict)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" dt = Transformer.Transformer() dt.checkpoint_version = FLAGS.checkpoint_version dt.trainable = False #with dt.graph.as_default() as g: with tf.Graph().as_default() as g: if FLAGS.is_compressed: dt.load_flow('/tmp/cifar10_train_' + str(dt.checkpoint_version - 1) + '/pruned_flow_graph.pkl') # prefetch flow graph dt.flow = dt.cached_flow else: dt.flow = NeuralFlow() with g.device('/cpu:0'): # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(dt, images, is_training=False, is_compressed=FLAGS.is_compressed) total_params, _ = slim.model_analyzer.analyze_vars( slim.get_model_variables()) print('total params: %d' % total_params) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Calculate loss. loss_op = cifar10.loss(dt, logits, labels) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore_simple = tf.global_variables() if FLAGS.is_compressed: saver = tf.train.Saver(variables_to_restore_simple) else: saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(dt, saver, summary_writer, top_k_op, loss_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # # inference model. 推理模型 logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) ### # tf.nn.in_top_k组要是用于计算预测的结果和实际结果的是否相等,返回一个bool类型的张量, # tf.nn.in_top_k(prediction, target, K): # prediction就是表示你预测的结果,大小就是预测样本的数量乘以输出的维度,类型是tf.float32 # 等。target就是实际样本类别的标签,大小就是样本数量的个数。K表示每个样本的预测结果的前K # 个最大的数里面是否含有target中的值。一般都是取1。 # # import tensorflow as tf; # A = [[0.8,0.6,0.3], [0.1,0.6,0.4]] # B = [1, 1] # out = tf.nn.in_top_k(A, B, 1) # with tf.Session() as sess: # sess.run(tf.initialize_all_variables()) # print sess.run(out) # # Running Result:[False, True] ! 注意0起址index ### # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) ### # tf.train.ExponentialMovingAverage这个函数用于更新参数,就是采用滑动平均的方法更新参数。 # 这个函数初始化需要提供一个衰减速率(decay),用于控制模型的更新速度。这个函数还会维护一个 # 影子变量(也就是更新参数后的参数值),这个影子变量的初始值就是这个变量的初始值,影子变量 # 值的更新方式如下: # shadow_variable = decay * shadow_variable + (1-decay) * variable # shadow_variable是影子变量,variable表示待更新的变量,也就是变量被赋予的值,decay为衰减速率。 # decay一般设为接近于1的数(0.99,0.999)。decay越大模型越稳定,因为decay越大,参数更新的速度 # 就越慢,趋于稳定。 ### variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break # # 休眠一顿时间后再评估 time.sleep(FLAGS.eval_interval_secs)
def build_model_func(): images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) loss = cifar10.loss(logits, labels) return loss
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = inputs() # cifar10.inputs(eval_data=eval_data) print "images dimension @ evaluation:", images print "image label @ evaluation:", labels # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) top_k_predict_op = tf.argmax(logits, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = {} for v in tf.all_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) # test_image(saver, logits) ###Test one image print "#################" print "##gonna test this:" print "#################" ### while True: eval_once(saver, summary_writer, top_k_op, top_k_predict_op, summary_op, images) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): """ Train the CIFAR10 model for a number of steps""" global_step = tf.train.get_or_create_global_step() # Get images and labels for the cifar10 data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') images_batch, labels_batch = cifar10_input.train_inputs( data_dir, FLAGS.batch_size) # Build a Graph that computes the logits predictions from the # inference model logits = cifar10.inference(images_batch) # Compute loss loss = cifar10.loss(logits, labels_batch) # Build a Graph that trains the model with one batch of examples and # updates the model parameters train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.checkpoint_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def main(): # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) with tf.Session() as sess: # Build a Graph that computes the logits predictions from the # inference model. probabilities = tf.nn.softmax(cifar10.inference(images)) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) submission = [] true_labels = [] step = 0 while step < num_iter and not coord.should_stop(): submission_batch, true_labels_batch = sess.run( [probabilities, labels]) submission.append(submission_batch) true_labels.append(true_labels_batch) step += 1 submission = np.vstack(submission) true_labels = np.concatenate(true_labels) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10) return submission, true_labels
def run(name): image_name = 'living.jpg' image_string = open(image_name, 'rb').read() image = tf.image.decode_jpeg(image_string, channels=1) if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.expand_dims(image, 0) # Save the processed image for Fathom with tf.Session() as sess: npimage = image.eval() np.save('image.npy', npimage.squeeze()) # Run the network with Tensorflow with tf.Graph().as_default(): image = tf.placeholder("float", [1,45,44,1], name="input") #with slim.arg_scope(inception.inception_v1_arg_scope()): logits = cifar10.inference(image) #probabilities = tf.nn.softmax(logits) probabilities = logits #init_fn = slim.assign_from_checkpoint_fn('./model.ckpt-19999', slim.get_model_variables('model')) variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('ecotaxa_train/') saver.restore(sess, ckpt.model_checkpoint_path) init_fn = slim.assign_from_checkpoint_fn('./ecotaxa_train/model.ckpt-0', tf.global_variables()) with tf.Session() as sess: init_fn(sess) prob = probabilities.eval(feed_dict = {"input:0" : npimage}) # Save the network for Fathom saver = tf.train.Saver(tf.global_variables()) saver.save(sess, name) """ i = 1 for n in tf.get_default_graph().as_graph_def().node: if i < 100: print("name:",n.name) print("op",n.op) print("input:",n.input) print(" ") i = i +1 """ # Dump output prob = prob.squeeze() probidx = np.argsort(-prob) print('Top-2:') for i in range(2): print(probidx[i], prob[probidx[i]])
def evaluate(images, labels=None): with tf.Graph().as_default(): image_holder = tf.placeholder(tf.uint8, [32, 32, 3]) resized_image = tf.cast(image_holder, tf.float32) resized_image = tf.image.resize_image_with_crop_or_pad( image_holder, IMAGE_SIZE, IMAGE_SIZE) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(resized_image) # batch size = 1 float_image_batch = tf.reshape(float_image, [1, IMAGE_SIZE, IMAGE_SIZE, 3]) # inference model. logits = cifar10.inference(float_image_batch, False) softmax = tf.nn.softmax(logits) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] else: print('No checkpoint file found') return total = 0 correct = 0 statics = {} for cls in CLASSES: statics[cls] = 0 for i, image in enumerate(images): total += 1 softmax_out = sess.run(softmax, feed_dict={image_holder: image}) index = np.argmax(softmax_out) if labels: if labels[i] == index: correct += 1 statics[CLASSES[index]] += 1 for cls in CLASSES: print('%-12s: %4d/%d' % (cls, statics[cls], total)) if labels: print('%d/%d, %.1f%%' % (correct, total, correct * 100 / float(total)))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) global step_no step_no = self._step with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, inter_op_parallelism_threads=4,intra_op_parallelism_threads=0)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op) """run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) logits = cifar10.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) loss = cifar10.loss(logits, labels) saver = tf.train.Saver(tf.all_variables()) while True: eval_once(saver, top_k_op, loss) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cifar10.distorted_inputs() logits = cifar10.inference(images) loss = cifar10.loss(logits, labels) train_op = cifar10.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), "Model diverged with loss = NaN" if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = "%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)" print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == "test" print(eval_data) images, labels, ground_truth = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits, _ = cifar10.inference(images) print(logits) print(logits.get_shape()) print("after inference node creation") loss = cifar10.loss(logits, labels) accuracy, precision, accuracies = cifar10.accuracy(logits, ground_truth) labels = tf.cast(labels, tf.int64) label_shape = labels.get_shape().as_list() reshaped_labels = tf.reshape(labels, [label_shape[0] * label_shape[1] * label_shape[2]]) logits_shape = logits.get_shape().as_list() reshaped_logits = tf.reshape(logits, [logits_shape[0] * logits_shape[1] * logits_shape[2], logits_shape[3]]) # Calculate predictions. # top_k_op = tf.nn.in_top_k(logits, labels, 1) # top_k_op = tf.nn.in_top_k(reshaped_logits, reshaped_labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: print("evaluate:") eval_once(saver, summary_writer, summary_op, accuracy, precision, accuracies) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) logits = cifar10.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): """Train a model for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for a segmentation model. images, labels, ground_truth = cifar10.distorted_inputs() tf.histogram_summary('label_hist/with_ignore', labels) tf.histogram_summary('label_hist/ground_truth', ground_truth) # Build a Graph that computes the logits predictions from the # inference model. print("before inference") print(images.get_shape()) logits, nr_params = cifar10.inference(images) print("nr_params: "+str(nr_params) ) print("after inference") # Calculate loss. loss = cifar10.loss(logits, labels) accuracy, precision, cat_accs = cifar10.accuracy(logits, ground_truth) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # tf.image_summary('images2', images) print (logits) # tf.image_summary('predictions', logits) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') print('Initializing new model') sess.run(init) global_step = 0 # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(global_step, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy_value, precision_value, cat_accs_val = sess.run([train_op, loss, accuracy, precision, cat_accs]) duration = time.time() - start_time print (precision_value) print (cat_accs_val) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' #precision_value = [0 if np.isnan(p) else p for p in precision_value] #print (precision_value) if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)\n Accuracy = %.4f, mean average precision = %.4f') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch, accuracy_value, np.mean(precision_value))) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary = tf.Summary() summary.value.add(tag='Accuracy (raw)', simple_value=float(accuracy_value)) for i,s in enumerate(CLASSES): summary.value.add(tag="precision/"+s+" (raw)",simple_value=float(precision_value[i])) summary.value.add(tag="accs/"+s+" (raw)",simple_value=float(cat_accs_val[i])) # summary.value.add(tag='Human precision (raw)', simple_value=float(precision_value)) summary_writer.add_summary(summary, step) print("hundred steps") # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: print("thousand steps") checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def main_fun(argv, ctx): import tensorflow as tf import cifar10 sys.argv = argv FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train', """Directory where to write event logs """ """and checkpoint.""") tf.app.flags.DEFINE_integer('max_steps', 1000000, """Number of batches to run.""") tf.app.flags.DEFINE_boolean('log_device_placement', False, """Whether to log device placement.""") tf.app.flags.DEFINE_boolean('rdma', False, """Whether to use rdma.""") # cifar10.maybe_download_and_extract() if tf.gfile.Exists(FLAGS.train_dir): tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) cluster_spec, server = TFNode.start_cluster_server(ctx, 1, FLAGS.rdma) # Train CIFAR-10 for a number of steps. with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def main(_): class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build inference Graph. logits = cifar10.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss,global_step) # The StopAtStepHook handles stopping after running given steps. hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), _LoggerHook()] # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(FLAGS.task_index == 0), checkpoint_dir=FLAGS.train_dir, save_checkpoint_secs=60, hooks=hooks) as mon_sess: while not mon_sess.should_stop(): # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. # mon_sess.run handles AbortedError in case of preempted PS. mon_sess.run(train_op)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. # images, labels = cifar10.standard_distorted_inputs() inputs = cifar10.ram_inputs(unit_variance=True, is_train=True) images = inputs['images'] labels = inputs['labels'] # Batch generator batcher = cifar10.Cifar10BatchGenerator( inputs['data_images'], inputs['data_labels'], True, FLAGS.max_epochs) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, 3, use_batchnorm=True, use_nrelu=False, id_decay=False, add_shortcuts=True, is_train=True) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) step = -1 while not batcher.is_done(): step += 1 batch_im, batch_labs = batcher.next_batch() feed_dict = { inputs['images_pl']: batch_im, inputs['labels_pl']: batch_labs, } start_time = time.time() _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 10 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 10 == 0 or batcher.is_done(): checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
import cifar10 from PIL import Image FLAGS = tf.app.flags.FLAGS FLAGS.batch_size = 1 filename = tf.placeholder(tf.string) value = tf.read_file(filename) images = tf.image.decode_png(value, channels = 3) resized_images = tf.image.resize_images(images, 124, 124) img = tf.image.per_image_whitening(resized_images) logits = cifar10.inference(tf.expand_dims(img, 0)) outputs = tf.nn.softmax(logits) k_pl = tf.placeholder(tf.int32) top_k_op = tf.nn.top_k(outputs, k = k_pl) def classify(session, file_name, k = 1): values, indices = session.run(top_k_op, feed_dict = { filename: file_name, k_pl: k }) return zip(indices.flatten().tolist(), values.flatten().tolist()) def session_with_checkpoint(checkpoint_dir = "./train_old", gpu_enabled = True): # disable gpu config = tf.ConfigProto(
def train(): # # debug # true_classes = np.ndarray(shape=(FLAGS.batch_size, 1), dtype=int) # true_classes.fill(2) # # Create a pair of constant ops, add the numpy # # array matrices. # true_classes_tf_matrix = tf.constant(true_classes, dtype=tf.int64) # # playing with introducing the sampler # classes_sampler = tf.nn.learned_unigram_candidate_sampler( # true_classes_tf_matrix, # 1, # true_classes # 5, # num_sampled # False, # unique # 10, # range_max # seed=None, # name="my_classes_sampler") # # print(classes_sampler) # # print("debug") # # print(classes_sampler.set_sampler) # # exit() """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # print("images") # print(images) # images = tf.Print(images, [images]) # print() # print(images[1]) print("------------------- train calling interference ---------------------") print(cifar10.__file__) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): # manually load the contents of images and labels # before calling this sess.run() # 1. have Cifar10 dataset in memory # 2. create a mini-batch # 3. set the placeholders/vars to the the mini-batch data # 4. run one forward-backward step # print("training step: " + str(step)) start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) # debug, temp change, go back to the one below summary_str = sess.run(summary_op) # print("summary: " + summary_str) summary_writer.add_summary(summary_str, step) summary_writer.flush() # if step % 100 == 0: # summary_str = sess.run(summary_op) # # print("summary: " + summary_str) # summary_writer.add_summary(summary_str, step) # summary_writer.flush() # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(lambs): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. lambs = tf.constant(lambs, dtype=tf.float32) loss = infor.loss(logits, labels, lambs) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op, lr_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. previous_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print ('Start training from previous') else: print('Strart training from step 0') previous_step = 0 init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) step = previous_step while step <= FLAGS.max_steps: start_time = time.time() _, loss_value, lr_value = sess.run([train_op, loss, lr_op]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f, lr_value = %.4f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, lr_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) step += 1
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # # Visualize conv1 features # with tf.variable_scope('conv1') as scope_conv: # #tf.get_variable_scope().reuse_variables() # scope_conv.reuse_variables() # weights = tf.get_variable('weights') # grid_x = grid_y = 8 # to get a square grid for 64 conv1 features # grid = put_kernels_on_grid (weights, (grid_y, grid_x)) # tf.image_summary('conv1/features', grid, max_images=1) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / float(duration) sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)