def testGetVariablesToRestore(self): with self.test_session(): with variable_scope.variable_scope('A'): a = variables_lib2.variable('a', [5]) with variable_scope.variable_scope('B'): b = variables_lib2.variable('a', [5]) self.assertEquals([a, b], variables_lib2.get_variables_to_restore())
def testVariableRestoreWithArgScopeNested(self): with self.test_session(): a = variables_lib2.variable('a', []) with arg_scope( [variables_lib2.variable], trainable=False, collections=['A', 'B']): b = variables_lib2.variable('b', []) c = variables_lib2.variable('c', [], trainable=False) self.assertEquals([a, c], variables_lib2.get_variables_to_restore()) self.assertEquals([a], variables_lib.trainable_variables()) self.assertEquals([b], ops.get_collection('A')) self.assertEquals([b], ops.get_collection('B'))
def testExcludeGetMixedVariablesToRestore(self): with self.test_session(): with variable_scope.variable_scope('A'): a = variables_lib2.variable('a', [5]) b = variables_lib2.variable('b', [5]) with variable_scope.variable_scope('B'): c = variables_lib2.variable('c', [5]) d = variables_lib2.variable('d', [5]) self.assertEquals([a, b, c, d], variables_lib2.get_variables()) self.assertEquals( [b, d], variables_lib2.get_variables_to_restore(exclude=['A/a', 'B/c']))
def evaluate_once(master, checkpoint_path, logdir, num_evals=1, initial_op=None, initial_op_feed_dict=None, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, session_config=None): """Evaluates the model at the given checkpoint path. Args: master: The BNS address of the TensorFlow master. checkpoint_path: The path to a checkpoint to use for evaluation. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. initial_op: An operation run at the beginning of evaluation. initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V1) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, initial_op=initial_op, initial_op_feed_dict=initial_op_feed_dict, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_op_value
def evaluation_loop(master, checkpoint_dir, logdir, num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None): """Runs TF-Slim's Evaluation Loop. Args: master: The BNS address of the TensorFlow master. checkpoint_dir: The directory where checkpoints are stored. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. eval_interval_secs: The minimum number of seconds between evaluations. max_number_of_evaluations: the max number of iterations of the evaluation. If the value is left as 'None', the evaluation continues indefinitely. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=saver) last_checkpoint = None number_of_evaluations = 0 while True: last_checkpoint = wait_for_new_checkpoint(checkpoint_dir, last_checkpoint) start = time.time() logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False) as sess: sv.saver.restore(sess, last_checkpoint) sv.start_queue_runners(sess) evaluation(sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) number_of_evaluations += 1 if (max_number_of_evaluations and number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) break time_to_next_eval = start + eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def evaluation_loop(master, checkpoint_dir, logdir, num_evals=1, initial_op=None, initial_op_feed_dict=None, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None, session_config=None, timeout=None): """Runs TF-Slim's Evaluation Loop. Args: master: The BNS address of the TensorFlow master. checkpoint_dir: The directory where checkpoints are stored. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. initial_op: An operation run at the beginning of evaluation. initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.summary.merge_all(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. eval_interval_secs: The minimum number of seconds between evaluations. max_number_of_evaluations: the max number of iterations of the evaluation. If the value is left as 'None', the evaluation continues indefinitely. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() global_step = variables.get_or_create_global_step() saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, summary_op=None, summary_writer=None, global_step=None, saver=saver) number_of_evaluations = 0 for checkpoint_path in checkpoints_iterator(checkpoint_dir, eval_interval_secs, timeout): logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: sv.saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation( sess, num_evals=num_evals, initial_op=initial_op, initial_op_feed_dict=initial_op_feed_dict, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) number_of_evaluations += 1 if (max_number_of_evaluations and number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) return final_op_value logging.info( 'Timed-out waiting for new checkpoint file. Exiting evaluation loop.') return final_op_value
def evaluate_once(master, checkpoint_path, logdir, num_evals=1, initial_op=None, initial_op_feed_dict=None, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, session_config=None): """Evaluates the model at the given checkpoint path. Args: master: The BNS address of the TensorFlow master. checkpoint_path: The path to a checkpoint to use for evaluation. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. initial_op: An operation run at the beginning of evaluation. initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.summary.merge_all(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() global_step = variables.get_or_create_global_step() saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V1) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, initial_op=initial_op, initial_op_feed_dict=initial_op_feed_dict, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_op_value
def _test(args): args.solver.master = '' container_name = "" checkpoint_dir = os.path.join(format(args.logdir)) logging.error('Checkpoint_dir: %s', args.logdir) config = tf.ConfigProto() config.device_count['GPU'] = 1 m = utils.Foo() m.tf_graph = tf.Graph() rng_data_seed = 0 rng_action_seed = 0 R = lambda: nav_env.get_multiplexer_class(args.navtask, rng_data_seed) with m.tf_graph.as_default(): with tf.container(container_name): m = args.setup_to_run(m, args, is_training=False, batch_norm_is_training=args.control. force_batchnorm_is_training_at_test, summary_mode=args.control.test_mode) train_step_kwargs = args.setup_train_step_kwargs( m, R(), os.path.join(args.logdir, args.control.test_name), rng_seed=rng_data_seed, is_chief=True, num_steps=args.navtask.task_params.num_steps * args.navtask.task_params.num_goals, iters=args.summary.test_iters, train_display_interval=None, dagger_sample_bn_false=args.arch.dagger_sample_bn_false) saver = slim.learning.tf_saver.Saver( variables.get_variables_to_restore()) sv = slim.learning.supervisor.Supervisor( graph=ops.get_default_graph(), logdir=None, init_op=m.init_op, summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op) last_checkpoint = None reported = False while True: last_checkpoint_ = None while last_checkpoint_ is None: last_checkpoint_ = slim.evaluation.wait_for_new_checkpoint( checkpoint_dir, last_checkpoint, seconds_to_sleep=10, timeout=60) if last_checkpoint_ is None: break last_checkpoint = last_checkpoint_ checkpoint_iter = int( os.path.basename(last_checkpoint).split('-')[1]) logging.info( 'Starting evaluation at %s using checkpoint %s.', time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), last_checkpoint) if (args.control.only_eval_when_done == False or checkpoint_iter >= args.solver.max_steps): start = time.time() logging.info( 'Starting evaluation at %s using checkpoint %s.', time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), last_checkpoint) with sv.managed_session( args.solver.master, config=config, start_standard_services=False) as sess: sess.run(m.init_op) sv.saver.restore(sess, last_checkpoint) sv.start_queue_runners(sess) if args.control.reset_rng_seed: train_step_kwargs['rng_data'] = [ np.random.RandomState(rng_data_seed), np.random.RandomState(rng_data_seed) ] train_step_kwargs[ 'rng_action'] = np.random.RandomState( rng_action_seed) vals, _ = tf_utils.train_step_custom_online_sampling( sess, None, m.global_step_op, train_step_kwargs, mode=args.control.test_mode) should_stop = False if checkpoint_iter >= args.solver.max_steps: should_stop = True if should_stop: break
def _make_saver(self): return tf.train.Saver( variables.get_variables_to_restore(), write_version=tf.train.SaverDef.V1)
def _test(args): # Give checkpoint directory container_name = "" checkpoint_dir = os.path.join(format(args.logdir)) logging.error('Checkpoint_dir: %s', args.logdir) # Load Agent agent = navi_env.Environment('5cf0e1e9493994e483e985c436b9d3bc', args.navi) # Add Configure config = tf.compat.v1.ConfigProto() config.device_count['GPU'] = 1 Z = utils.Foo() Z.tf_graph = tf.Graph() with Z.tf_graph.as_default(): with tf.compat.v1.container(container_name): Z = args.setup_to_run(Z, args, is_training=False, batch_norm_is_training=args.control. force_batchnorm_is_training_at_test, summary_mode=args.control.test_mode) train_step_kwargs = args.setup_train_step_kwargs( Z, agent, os.path.join(args.logdir, args.control.test_name), rng_seed=1008, is_chief=True, num_steps=args.navi.num_steps * args.navi.num_goals, iters=args.summary.test_iters, train_display_interval=None, dagger_sample_bn_false=args.solver.dagger_sample_bn_false) saver = slim.learning.tf_saver.Saver( variables.get_variables_to_restore()) sv = slim.learning.supervisor.Supervisor( graph=ops.get_default_graph(), logdir=None, init_op=Z.init_op, summary_op=None, summary_writer=None, global_step=None, saver=Z.saver_op) last_checkpoint = None # reported = False while True: last_checkpoint_ = None while last_checkpoint_ is None: last_checkpoint_ = slim.evaluation.wait_for_new_checkpoint( checkpoint_dir, last_checkpoint, seconds_to_sleep=10, timeout=60) if last_checkpoint_ is None: break last_checkpoint = last_checkpoint_ checkpoint_iter = int( os.path.basename(last_checkpoint).split('-')[1]) logging.info( 'Starting evaluation at %s using checkpoint %s.', time.strftime('%Y-%Z-%d-%H:%Z:%S', time.localtime()), last_checkpoint) if (not args.control.only_eval_when_done or checkpoint_iter >= args.solver.max_steps): # start = time.time() logging.info( 'Starting evaluation at %s using checkpoint %s.', time.strftime('%Y-%Z-%d-%H:%Z:%S', time.localtime()), last_checkpoint) with sv.managed_session( args.solver.master, config=config, start_standard_services=False) as sess: sess.run(Z.init_op) sv.saver.restore(sess, last_checkpoint) sv.start_queue_runners(sess) if args.control.reset_rng_seed: train_step_kwargs['rng_data'] = [ np.random.RandomState(1008), np.random.RandomState(1008) ] train_step_kwargs[ 'rng_action'] = np.random.RandomState(1008) vals, _ = tf_utils.train_step_fn( sess, None, Z.global_step_op, train_step_kwargs, mode=args.control.test_mode) should_stop = False if checkpoint_iter >= args.solver.max_steps: should_stop = True if should_stop: break
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') times = {} tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): start = time.time() tf_global_step = slim.get_or_create_global_step() times['global_step'] = time.time() - start ###################### # Select the dataset # start = time.time() dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir, suffix=FLAGS.dataset_name_suffix) times['get_dataset'] = time.time() - start #################### # Select the model # #################### start = time.time() network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) times['select_model'] = time.time() - start ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## start = time.time() provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) times['get_provider'] = time.time() - start start = time.time() [image] = provider.get(['image']) times['get_image'] = time.time() - start ##################################### # Select the preprocessing function # ##################################### start = time.time() preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) times['get_preprocessing'] = time.time() - start eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size start = time.time() image = image_preprocessing_fn(image, eval_image_size, eval_image_size) times['preprocessing'] = time.time() - start start = time.time() images = tf.train.batch( [image], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) times['get_batch'] = time.time() - start start = time.time() tf.image_summary('test_images', images, FLAGS.batch_size) times['image_summary'] = time.time() - start #################### # Define the model # #################### start = time.time() logits, _ = network_fn(images) times['do_network'] = time.time() - start # with tf.variable_scope('resnet_v2_152/block1/unit_1/bottleneck_v2/conv1', reuse=True): # weights = tf.get_variable('weights') # kernel_transposed = put_kernels_on_grid(weights) # scale weights to [0 1], type is still float # x_min = tf.reduce_min(weights) # x_max = tf.reduce_max(weights) # kernel_0_to_1 = (weights - x_min) / (x_max - x_min) # # # to tf.image_summary format [batch_size, height, width, channels] # kernel_transposed = tf.transpose(kernel_0_to_1, [3, 0, 1, 2]) # this will display random 3 filters from the 64 in conv1 # tf.image_summary('conv1/filters', kernel_transposed, max_images=50) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() if len(logits.get_shape()) == 4: logits = tf.reshape(logits, [int(logits.get_shape()[0]), -1]) softmax = tf.nn.softmax(logits) # predictions = tf.argmax(logits, 1) # Define the metrics: # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ # 'Predictions': predictions, # 'Predictions': slim.metrics.streaming_accuracy(predictions, labels), # 'Predictions@5': slim.metrics.streaming_recall_at_k( # logits, labels, 5), # }) # Print the summaries to screen. # for name, value in names_to_values.iteritems(): # summary_name = 'eval/%s' % name # op = tf.scalar_summary(summary_name, value, collections=[]) # op = tf.Print(op, [value], summary_name) # tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) start = time.time() if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path times['load_checkpoint'] = time.time() - start tf.logging.info('Evaluating %s' % checkpoint_path) # evaluate_loop from tensorflow.contrib.framework.python.ops import variables from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.training import saver as tf_saver from tensorflow.python.framework import ops from tensorflow.python.training import supervisor saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V1) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=FLAGS.eval_dir, summary_op=None, summary_writer=None, global_step=None, saver=None) # init = tf.initialize_all_variables() # sess = tf.Session() with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: # sess.run(init) saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) start = time.time() final_op_value = sess.run(logits) # final_op_value = slim.evaluation.evaluate_once( # master=FLAGS.master, # checkpoint_path=checkpoint_path, # logdir=FLAGS.eval_dir, # num_evals=num_batches, # final_op=[softmax, logits], # # eval_op=names_to_updates.values(), # variables_to_restore=variables_to_restore) times['exec'] = time.time() - start print(final_op_value[1].shape) result_predict = np.reshape(final_op_value[1], (FLAGS.batch_size, final_op_value[1].shape[-1])) # print(final_op_value) print(result_predict) print(np.argsort(result_predict[:, 1])[-5:]) print(times)
def evaluate_once(checkpoint_path, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, variables_to_restore=None, hooks=None, config=None): """Evaluates the model at the given checkpoint path. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immedietly after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_path: The path to a checkpoint to use for evaluation. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A operation which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then tf.contrib.framework.get_variables_to_restore() is used. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = get_or_create_eval_step() if eval_ops is not None: eval_ops = control_flow_ops.with_dependencies([eval_ops], state_ops.assign_add( eval_step, 1)) # Must come before the scaffold check. if scaffold and scaffold.saver: saver = scaffold.saver else: saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V2) scaffold = scaffold or monitored_session.Scaffold() scaffold = _scaffold_with_init(scaffold, saver, checkpoint_path) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) # Prepare the session creator. session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=None, master=master, config=config) # Prepare the run hooks. hooks = hooks or [] final_ops_hook = _FinalOpsHook(final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) with monitored_session.MonitoredSession(session_creator=session_creator, hooks=hooks) as session: if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_ops_hook.final_ops_values
def testLocalVariableNotInVariablesToRestore(self): with self.test_session(): with variable_scope.variable_scope('A'): a = variables_lib2.local_variable(0) self.assertFalse(a in variables_lib2.get_variables_to_restore()) self.assertTrue(a in variables_lib.local_variables())
def evaluation_loop(master, checkpoint_dir, logdir, num_evals=1, initial_op=None, initial_op_feed_dict=None, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None, session_config=None, timeout=None): """Runs TF-Slim's Evaluation Loop. Args: master: The BNS address of the TensorFlow master. checkpoint_dir: The directory where checkpoints are stored. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. initial_op: An operation run at the beginning of evaluation. initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. eval_interval_secs: The minimum number of seconds between evaluations. max_number_of_evaluations: the max number of iterations of the evaluation. If the value is left as 'None', the evaluation continues indefinitely. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, summary_op=None, summary_writer=None, global_step=None, saver=saver) number_of_evaluations = 0 for checkpoint_path in checkpoints_iterator(checkpoint_dir, eval_interval_secs, timeout): logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: sv.saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, initial_op=initial_op, initial_op_feed_dict=initial_op_feed_dict, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) number_of_evaluations += 1 if (max_number_of_evaluations and number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) return final_op_value logging.info( 'Timed-out waiting for new checkpoint file. Exiting evaluation loop.') return final_op_value
def evaluate_repeatedly( checkpoint_dir, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, hooks=None, config=None, max_number_of_evaluations=None, timeout=None): """Repeatedly searches for a checkpoint in `checkpoint_dir` and evaluates it. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immedietly after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_dir: The directory where checkpoints are stored. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`, which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then tf.contrib.framework.get_variables_to_restore() is used. eval_interval_secs: The minimum number of seconds between evaluations. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. max_number_of_evaluations: The maximum times to run the evaluation. If left as `None`, then evaluation runs indefinitely. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = get_or_create_eval_step() if eval_ops is not None: update_eval_step = state_ops.assign_add(eval_step, 1) if isinstance(eval_ops, dict): eval_ops['update_eval_step'] = update_eval_step elif isinstance(eval_ops, (tuple, list)): eval_ops = list(eval_ops) + [update_eval_step] else: eval_ops = [eval_ops, update_eval_step] # Must come before the scaffold check. if scaffold and scaffold.saver: saver = scaffold.saver else: saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore()) scaffold = scaffold or monitored_session.Scaffold() # Prepare the run hooks. hooks = hooks or [] final_ops_hook = _FinalOpsHook(final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) num_evaluations = 0 for checkpoint_path in checkpoints_iterator( checkpoint_dir, eval_interval_secs, timeout): session_creator = monitored_session.ChiefSessionCreator( scaffold=_scaffold_with_init(scaffold, saver, checkpoint_path), checkpoint_dir=None, master=master, config=config) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as session: logging.info( 'Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info( 'Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) num_evaluations += 1 reached_max = num_evaluations >= max_number_of_evaluations if max_number_of_evaluations and reached_max: return final_ops_hook.final_ops_values logging.info('Timed-out waiting for a checkpoint.') return final_ops_hook.final_ops_values
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') print("START!") tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) #print(dataset.num_classes) #print(dir(dataset)) #print(dataset.num_samples) #print(dataset.get_shape()) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) files = True if files: [image, label, filename] = provider.get(['image', 'label', 'filename']) else: [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) if files: images, labels, filenames = tf.train.batch( [image, label, filename], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=FLAGS.batch_size, allow_smaller_final_batch=True) else: images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=FLAGS.batch_size, allow_smaller_final_batch=True) #################### # Define the model # #################### logits, endpoints = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() probabilities = tf.nn.softmax(logits) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path init_fn = slim.assign_from_checkpoint_fn(checkpoint_path, variables_to_restore) tf.logging.info('Evaluating %s' % checkpoint_path) ### import time from tensorflow.contrib.framework.python.ops import variables from tensorflow.python.framework import ops from tensorflow.python.ops import logging_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as tf_saver from tensorflow.python.training import summary_io from tensorflow.python.training import supervisor from tensorflow.python.training import training_util saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) #summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=FLAGS.eval_dir, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) import collections with sv.managed_session(FLAGS.master, start_standard_services=False, config=None) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) if FLAGS.result_type == "classify": ##export classification classifications = {"classifications": {}} filenamelist = [] for i in xrange(int(num_batches) + 1): np_probabilities, np_labels, np_filenames, np_endpoints = sess.run( [probabilities, labels, filenames, endpoints]) #print({i:endpoints[i].get_shape() for i in endpoints.keys()}) #return -1 for j in xrange(FLAGS.batch_size): if not np_filenames[j] in filenamelist: filenamelist.append(np_filenames[j]) tmpprob = [] for l in np.argsort( np_probabilities[j, :] )[::-1][:5]: #iterate over best 5 probs tmpprob.append([ str(dataset.labels_to_names[l]).rstrip( "\r"), "{0:.2f}".format( np_probabilities[j, l] * 100) ]) tmp = {np_filenames[j]: tmpprob} classifications["classifications"].update(tmp) else: pass print(i) print(len(classifications["classifications"])) #print(filenamelist) sortedclass = collections.OrderedDict() for k in sorted(classifications["classifications"]): sortedclass.update( {k: classifications["classifications"][k]}) classifications["classifications"] = sortedclass jsonecoded = json.dumps(classifications) loadconf = open( os.path.join(FLAGS.result_path, FLAGS.result_name + ".json"), 'wb') loadconf.write(jsonecoded) loadconf.close() if FLAGS.result_type == "stats": np_probabilities, np_labels, np_filenames, np_endpoints = sess.run( [probabilities, labels, filenames, endpoints]) print({i: endpoints[i].get_shape() for i in endpoints.keys()}) #layer shapes allparams = 0 for variable in tf.trainable_variables(): #iterate over vars shape = variable.get_shape() currpar = 1 for dim in shape: #iterate over shape of var currpar *= dim.value allparams += currpar #add print(allparams) return -1 #kill if FLAGS.result_type == "decaf": ##extract DeCAFs features = [] filenamelist = [] layerdefinition = { "alexnet_v2": "alexnet_v2/fc7/Relu:0", "inception_v1": "MaxPool_0a_7x7", "inception_v3": "AvgPool_1a_{}x{}", "inception_resnet_v2": "AvgPool_1a_8x8", "vgg_16": "vgg_16/fc7/Relu:0", "resnet_v1_152": "pool5" } for i in xrange(int(num_batches)): np_probabilities, np_labels, np_filenames, np_endpoints = sess.run( [probabilities, labels, filenames, endpoints]) for j in xrange(FLAGS.batch_size): if not np_filenames[j] in filenamelist: filenamelist.append(np_filenames[j]) tmp_descr = (np_endpoints[layerdefinition[ FLAGS.model_name]][j][0][0]).tolist() tmp_descr.insert(0, (np_filenames[j]).replace( ".jpg", "")) features.append(tmp_descr) print(i) toARFF( features, FLAGS.result_name, os.path.join(FLAGS.result_path, FLAGS.result_name + ".arff"))
def evaluate_once( checkpoint_path, master='', scaffold=None, eval_ops=None, feed_dict=None, final_ops=None, final_ops_feed_dict=None, variables_to_restore=None, hooks=None, config=None): """Evaluates the model at the given checkpoint path. During a single evaluation, the `eval_ops` is run until the session is interrupted or requested to finish. This is typically requested via a `tf.contrib.training.StopAfterNEvalsHook` which results in `eval_ops` running the requested number of times. Optionally, a user can pass in `final_ops`, a single `Tensor`, a list of `Tensors` or a dictionary from names to `Tensors`. The `final_ops` is evaluated a single time after `eval_ops` has finished running and the fetched values of `final_ops` are returned. If `final_ops` is left as `None`, then `None` is returned. One may also consider using a `tf.contrib.training.SummaryAtEndHook` to record summaries after the `eval_ops` have run. If `eval_ops` is `None`, the summaries run immedietly after the model checkpoint has been restored. Note that `evaluate_once` creates a local variable used to track the number of evaluations run via `tf.contrib.training.get_or_create_eval_step`. Consequently, if a custom local init op is provided via a `scaffold`, the caller should ensure that the local init op also initializes the eval step. Args: checkpoint_path: The path to a checkpoint to use for evaluation. master: The BNS address of the TensorFlow master. scaffold: An tf.train.Scaffold instance for initializing variables and restoring variables. Note that `scaffold.init_fn` is used by the function to restore the checkpoint. If you supply a custom init_fn, then it must also take care of restoring the model from its checkpoint. eval_ops: A operation which is run until the session is requested to stop, commonly done by a `tf.contrib.training.StopAfterNEvalsHook`. feed_dict: The feed dictionary to use when executing the `eval_ops`. final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names to `Tensors`. final_ops_feed_dict: A feed dictionary to use when evaluating `final_ops`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then tf.contrib.framework.get_variables_to_restore() is used. hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the evaluation loop. config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The fetched values of `final_ops` or `None` if `final_ops` is `None`. """ eval_step = get_or_create_eval_step() if eval_ops is not None: eval_ops = control_flow_ops.with_dependencies( [eval_ops], state_ops.assign_add(eval_step, 1)) # Must come before the scaffold check. if scaffold and scaffold.saver: saver = scaffold.saver else: saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V2) scaffold = scaffold or monitored_session.Scaffold() scaffold = _scaffold_with_init(scaffold, saver, checkpoint_path) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) # Prepare the session creator. session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=None, master=master, config=config) # Prepare the run hooks. hooks = hooks or [] final_ops_hook = _FinalOpsHook(final_ops, final_ops_feed_dict) hooks.append(final_ops_hook) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=hooks) as session: if eval_ops is not None: while not session.should_stop(): session.run(eval_ops, feed_dict) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_ops_hook.final_ops_values