def main(args): if not os.path.exists(FLAGS.checkpoint): tf.logging.fatal( 'Checkpoint %s does not exist. Have you download it? See tools/download_data.sh', FLAGS.checkpoint) g = tf.Graph() with g.as_default(): input_image = PreprocessImage(FLAGS.image_path[0]) with slim.arg_scope(inception.inception_v3_arg_scope()): logits, end_points = inception.inception_v3( input_image, num_classes=FLAGS.num_classes, is_training=False) bottleneck = end_points['PreLogits'] init_op = control_flow_ops.group( variables.initialize_all_variables(), variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver() sess = tf.Session() saver.restore(sess, FLAGS.checkpoint) # Run the evaluation on the image bottleneck_eval = np.squeeze(sess.run(bottleneck)) first = True for val in bottleneck_eval: if not first: sys.stdout.write(",") first = False sys.stdout.write('{:.3f}'.format(val)) sys.stdout.write('\n')
def prep_graph(): global predictions global labelmap global label_dict global sess global input_image global food_list food_list = [] with open(food_names) as f: for x in f: food_list.append(x.rstrip()) g = tf.Graph() with g.as_default(): input_image = tf.placeholder(tf.string) processed_image = PreprocessImage(input_image) with slim.arg_scope(inception.inception_v3_arg_scope()): logits, end_points = inception.inception_v3(processed_image, num_classes=6012, is_training=False) predictions = end_points['multi_predictions'] = tf.nn.sigmoid( logits, name='multi_predictions') init_op = control_flow_ops.group( variables.initialize_all_variables(), variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver() sess = tf.Session() saver.restore(sess, checkpoint) labelmap, label_dict = LoadLabelMaps(6012, labelmap_file, label_dict_file)
def _get_local_init_op(): local_init_op = _get_first_op_from_collection(ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) return local_init_op
def _export_graph(graph, saver, checkpoint_path, export_dir, default_graph_signature, named_graph_signatures, exports_to_keep): """Exports graph via session_bundle, by creating a Session.""" with graph.as_default(): with tf_session.Session('') as session: variables.initialize_local_variables() data_flow_ops.initialize_all_tables() saver.restore(session, checkpoint_path) export = exporter.Exporter(saver) export.init(init_op=control_flow_ops.group( variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()), default_graph_signature=default_graph_signature, named_graph_signatures=named_graph_signatures) export.export(export_dir, contrib_variables.get_global_step(), session, exports_to_keep=exports_to_keep)
def _test_prepare_inputs_for_rnn(self, sequence_features, context_features, num_unroll, batch_size, expected): features_by_time = ssre._prepare_inputs_for_rnn( sequence_features, context_features, num_unroll) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) features_val = sess.run(features_by_time) self.assertAllEqual(expected, features_val)
def _get_local_init_op(): local_init_op = _get_first_op_from_collection( ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.local_variables_initializer(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) return local_init_op
def testPrepareFeaturesForSQSS(self): mode = model_fn_lib.ModeKeys.TRAIN seq_feature_name = 'seq_feature' ctx_feature_name = 'ctx_feature' input_key_column_name = 'input_key_column' sequence_length = 4 seq_feature = constant_op.constant(1.0, shape=[sequence_length]) ctx_feature = constant_op.constant(2.0) input_key0 = constant_op.constant('input0') features = { input_key_column_name: input_key0, seq_feature_name: seq_feature, ctx_feature_name: ctx_feature } labels = constant_op.constant(5.0, shape=[sequence_length]) sequence_feature_columns = [ feature_column.real_valued_column(seq_feature_name, dimension=1) ] context_feature_columns = [ feature_column.real_valued_column(ctx_feature_name, dimension=1) ] expected_input_key = b'input0' expected_sequence = { ssre.RNNKeys.LABELS_KEY: np.array([5., 5., 5., 5.]), seq_feature_name: np.array([1., 1., 1., 1.]), } expected_context = {ctx_feature_name: 2.} input_key, sequence, context = ssre._prepare_features_for_sqss( features, labels, mode, input_key_column_name, sequence_feature_columns, context_feature_columns) def assert_equal(a, b): self.assertEqual(sorted(a), sorted(b)) for k, v in a.items(): self.assertAllEqual(v, b[k]) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) actual_input_key, actual_sequence, actual_context = sess.run( [input_key, sequence, context]) self.assertEqual(expected_input_key, actual_input_key) assert_equal(expected_sequence, actual_sequence) assert_equal(expected_context, actual_context)
def label(image_path, checkpoint="openimages_dataset/data/2016_08/model.ckpt", num_classes=6012, labelmap_path="openimages_dataset/data/2016_08/labelmap.txt", dict_path="openimages_dataset/dict.csv", threshold=0.5, rounding_digits=1): if not os.path.exists(checkpoint): tf.logging.fatal( 'Checkpoint %s does not exist. Have you download it? See tools/download_data.sh', checkpoint) g = tf.Graph() with g.as_default(): input_image = PreprocessImage(image_path) with slim.arg_scope(inception.inception_v3_arg_scope()): logits, end_points = inception.inception_v3( input_image, num_classes=num_classes, is_training=False) predictions = end_points['multi_predictions'] = tf.nn.sigmoid( logits, name='multi_predictions') init_op = control_flow_ops.group( variables.initialize_all_variables(), variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver.restore(sess, checkpoint) # Run the evaluation on the image predictions_eval = np.squeeze(sess.run(predictions)) # Print top(n) results labelmap, label_dict = LoadLabelMaps(num_classes, labelmap_path, dict_path) top_k = predictions_eval.argsort()[:][::-1] returned_labels = [] for idx in top_k: mid = labelmap[idx] display_name = label_dict.get(mid, 'unknown') score = predictions_eval[idx] if score < threshold: if returned_labels: break else: threshold -= 0.1 if threshold < 0.1: break returned_labels.append((display_name, score)) return returned_labels
def main_op(): """Returns a main op to init variables and tables. Returns the main op including the group of ops that initializes all variables, initializes local variables and initialize all tables. Returns: The set of ops to be run as part of the main op upon the load operation. """ init = tf.global_variables_initializer() init_local = tf.local_variables_initializer() init_tables = tf_data_flow_ops.initialize_all_tables() return tf.group(init, init_local, init_tables)
def _test_prepare_inputs_for_rnn(self, sequence_features, context_features, sequence_feature_columns, num_unroll, expected): features_by_time = ssre._prepare_inputs_for_rnn(sequence_features, context_features, sequence_feature_columns, num_unroll) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) features_val = sess.run(features_by_time) self.assertAllEqual(expected, features_val)
def run_feeds_iter(output_dict, feed_dicts, restore_checkpoint_path=None): """Run `output_dict` tensors with each input in `feed_dicts`. If `restore_checkpoint_path` is supplied, restore from checkpoint. Otherwise, init all variables. Args: output_dict: A `dict` mapping string names to `Output` objects to run. Tensors must all be from the same graph. feed_dicts: Iterable of `dict` objects of input values to feed. restore_checkpoint_path: A string containing the path to a checkpoint to restore. Yields: A sequence of dicts of values read from `output_dict` tensors, one item yielded for each item in `feed_dicts`. Keys are the same as `output_dict`, values are the results read from the corresponding `Output` in `output_dict`. Raises: ValueError: if `output_dict` or `feed_dicts` is None or empty. """ if not output_dict: raise ValueError('output_dict is invalid: %s.' % output_dict) if not feed_dicts: raise ValueError('feed_dicts is invalid: %s.' % feed_dicts) graph = contrib_ops.get_graph_from_inputs(output_dict.values()) with graph.as_default() as g: with tf_session.Session('') as session: session.run( resources.initialize_resources(resources.shared_resources() + resources.local_resources())) if restore_checkpoint_path: _restore_from_checkpoint(session, g, restore_checkpoint_path) else: session.run(variables.global_variables_initializer()) session.run(variables.local_variables_initializer()) session.run(data_flow_ops.initialize_all_tables()) coord = coordinator.Coordinator() threads = None try: threads = queue_runner.start_queue_runners(session, coord=coord) for f in feed_dicts: yield session.run(output_dict, f) finally: coord.request_stop() if threads: coord.join(threads, stop_grace_period_secs=120)
def testBuildSequenceInputInput(self): sequence_input = dynamic_rnn_estimator.build_sequence_input( self.GetColumnsToTensors(), self.sequence_feature_columns, self.context_feature_columns) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) sequence_input_val = sess.run(sequence_input) expected_shape = np.array([ 3, # expected batch size 2, # padded sequence length 3 + 8 + 2 # location keys + embedding dim + measurement dimension ]) self.assertAllEqual(expected_shape, sequence_input_val.shape)
def run_feeds_iter(output_dict, feed_dicts, restore_checkpoint_path=None): """Run `output_dict` tensors with each input in `feed_dicts`. If `restore_checkpoint_path` is supplied, restore from checkpoint. Otherwise, init all variables. Args: output_dict: A `dict` mapping string names to `Tensor` objects to run. Tensors must all be from the same graph. feed_dicts: Iterable of `dict` objects of input values to feed. restore_checkpoint_path: A string containing the path to a checkpoint to restore. Yields: A sequence of dicts of values read from `output_dict` tensors, one item yielded for each item in `feed_dicts`. Keys are the same as `output_dict`, values are the results read from the corresponding `Tensor` in `output_dict`. Raises: ValueError: if `output_dict` or `feed_dicts` is None or empty. """ if not output_dict: raise ValueError('output_dict is invalid: %s.' % output_dict) if not feed_dicts: raise ValueError('feed_dicts is invalid: %s.' % feed_dicts) graph = contrib_ops.get_graph_from_inputs(output_dict.values()) with graph.as_default() as g: with tf_session.Session('') as session: session.run( resources.initialize_resources(resources.shared_resources() + resources.local_resources())) if restore_checkpoint_path: _restore_from_checkpoint(session, g, restore_checkpoint_path) else: session.run(variables.global_variables_initializer()) session.run(variables.local_variables_initializer()) session.run(data_flow_ops.initialize_all_tables()) coord = coordinator.Coordinator() threads = None try: threads = queue_runner.start_queue_runners(session, coord=coord) for f in feed_dicts: yield session.run(output_dict, f) finally: coord.request_stop() if threads: coord.join(threads, stop_grace_period_secs=120)
def main(args): if not os.path.exists(FLAGS.checkpoint): tf.logging.fatal( 'Checkpoint %s does not exist. Have you download it? See tools/download_data.sh', FLAGS.checkpoint) g = tf.Graph() with g.as_default(): input_image = tf.placeholder(tf.string) processed_image = PreprocessImage(input_image) with slim.arg_scope(inception.inception_v3_arg_scope()): logits, end_points = inception.inception_v3( processed_image, num_classes=FLAGS.num_classes, is_training=False) predictions = end_points['multi_predictions'] = tf.nn.sigmoid( logits, name='multi_predictions') init_op = control_flow_ops.group( variables.initialize_all_variables(), variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver() sess = tf.Session() saver.restore(sess, FLAGS.checkpoint) # Run the evaluation on the images for image_path in FLAGS.image_path: if not os.path.exists(image_path): tf.logging.fatal('Input image does not exist %s', FLAGS.image_path[0]) img_data = tf.gfile.FastGFile(image_path).read() print(image_path) predictions_eval = np.squeeze( sess.run(predictions, {input_image: img_data})) # Print top(n) results labelmap, label_dict = LoadLabelMaps(FLAGS.num_classes, FLAGS.labelmap, FLAGS.dict) top_k = predictions_eval.argsort()[-FLAGS.n:][::-1] for idx in top_k: mid = labelmap[idx] display_name = label_dict.get(mid, 'unknown') score = predictions_eval[idx] print('{}: {} - {} (score = {:.2f})'.format( idx, mid, display_name, score)) print()
def _init_local_init_op(self, local_init_op=USE_DEFAULT): """Initializes local_init_op. Args: local_init_op: `Operation` run for every new supervisor instance. If set to USE_DEFAULT create an op based on the `LOCAL_INITIALIZERS` graph collection. """ if local_init_op is Supervisor.USE_DEFAULT: local_init_op = self._get_first_op_from_collection(ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) self._local_init_op = local_init_op
def _init_local_init_op(self, local_init_op=USE_DEFAULT): """Initializes local_init_op. Args: local_init_op: `Operation` run for every new supervisor instance. If set to USE_DEFAULT, use the first op from the GraphKeys.LOCAL_INIT_OP collection. If the collection is empty, create an op that initializes all local variables and all tables. """ if local_init_op is Supervisor.USE_DEFAULT: local_init_op = self._get_first_op_from_collection(ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) self._local_init_op = local_init_op
def _init_local_init_op(self, local_init_op=USE_DEFAULT): """Initializes local_init_op. Args: local_init_op: `Operation` run for every new supervisor instance. If set to USE_DEFAULT create an op based on the `LOCAL_INITIALIZERS` graph collection. """ if local_init_op is Supervisor.USE_DEFAULT: local_init_op = self._get_first_op_from_collection( ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) self._local_init_op = local_init_op
def _init_local_init_op(self, local_init_op=USE_DEFAULT): """Initializes local_init_op. Args: local_init_op: `Operation` run for every new supervisor instance. If set to USE_DEFAULT, use the first op from the GraphKeys.LOCAL_INIT_OP collection. If the collection is empty, create an op that initializes all local variables and all tables. """ if local_init_op is Supervisor.USE_DEFAULT: local_init_op = self._get_first_op_from_collection( ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [variables.local_variables_initializer(), data_flow_ops.initialize_all_tables()] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) self._local_init_op = local_init_op
def run_feeds(output_dict, feed_dicts, restore_checkpoint_path=None): """Run `output_dict` tensors with each input in `feed_dicts`. If `checkpoint_path` is supplied, restore from checkpoint. Otherwise, init all variables. Args: output_dict: A `dict` mapping string names to `Tensor` objects to run. Tensors must all be from the same graph. feed_dicts: Iterable of `dict` objects of input values to feed. restore_checkpoint_path: A string containing the path to a checkpoint to restore. Returns: A list of dicts of values read from `output_dict` tensors, one item in the list for each item in `feed_dicts`. Keys are the same as `output_dict`, values are the results read from the corresponding `Tensor` in `output_dict`. Raises: ValueError: if `output_dict` or `feed_dicts` is None or empty. """ if not output_dict: raise ValueError('output_dict is invalid: %s.' % output_dict) if not feed_dicts: raise ValueError('feed_dicts is invalid: %s.' % feed_dicts) graph = contrib_ops.get_graph_from_inputs(output_dict.values()) with graph.as_default() as g: with tf_session.Session('') as session: if restore_checkpoint_path: _restore_from_checkpoint(session, g, restore_checkpoint_path) else: session.run(variables.initialize_all_variables()) session.run(variables.initialize_local_variables()) session.run(data_flow_ops.initialize_all_tables()) coord = Coordinator() try: queue_runner.start_queue_runners(session, coord=coord) return [_run_dict(session, output_dict, f) for f in feed_dicts] finally: coord.request_stop()
def testConstructRNN(self): initial_state = None sequence_input = dynamic_rnn_estimator.build_sequence_input( self.GetColumnsToTensors(), self.sequence_feature_columns, self.context_feature_columns) activations_t, final_state_t = dynamic_rnn_estimator.construct_rnn( initial_state, sequence_input, self.rnn_cell, self.mock_target_column.num_label_columns) # Obtain values of activations and final state. with session.Session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) activations, final_state = sess.run([activations_t, final_state_t]) expected_activations_shape = np.array([3, 2, self.NUM_LABEL_COLUMNS]) self.assertAllEqual(expected_activations_shape, activations.shape) expected_state_shape = np.array([3, self.NUM_RNN_CELL_UNITS]) self.assertAllEqual(expected_state_shape, final_state.shape)
def testPrepareFeaturesForSQSS(self): mode = model_fn_lib.ModeKeys.TRAIN seq_feature_name = 'seq_feature' sparse_seq_feature_name = 'wire_cast' ctx_feature_name = 'ctx_feature' sequence_length = 4 embedding_dimension = 8 features = { sparse_seq_feature_name: sparse_tensor.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), seq_feature_name: constant_op.constant( 1.0, shape=[sequence_length]), ctx_feature_name: constant_op.constant(2.0) } labels = constant_op.constant(5.0, shape=[sequence_length]) wire_cast = feature_column.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) sequence_feature_columns = [ feature_column.real_valued_column( seq_feature_name, dimension=1), feature_column.embedding_column( wire_cast, dimension=embedding_dimension, initializer=init_ops.ones_initializer()) ] context_feature_columns = [ feature_column.real_valued_column( ctx_feature_name, dimension=1) ] expected_sequence = { rnn_common.RNNKeys.LABELS_KEY: np.array([5., 5., 5., 5.]), seq_feature_name: np.array([1., 1., 1., 1.]), sparse_seq_feature_name: sparse_tensor.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), } expected_context = {ctx_feature_name: 2.} sequence, context = ssre._prepare_features_for_sqss( features, labels, mode, sequence_feature_columns, context_feature_columns) def assert_equal(expected, got): self.assertEqual(sorted(expected), sorted(got)) for k, v in expected.items(): if isinstance(v, sparse_tensor.SparseTensor): self.assertAllEqual(v.values.eval(), got[k].values) self.assertAllEqual(v.indices.eval(), got[k].indices) self.assertAllEqual(v.dense_shape.eval(), got[k].dense_shape) else: self.assertAllEqual(v, got[k]) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) actual_sequence, actual_context = sess.run( [sequence, context]) assert_equal(expected_sequence, actual_sequence) assert_equal(expected_context, actual_context)
def evaluate_once(master, checkpoint_path, logdir, num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, session_config=None): """Evaluates the model at the given checkpoint path. Args: master: The BNS address of the TensorFlow master. checkpoint_path: The path to a checkpoint to use for evaluation. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_op_value
def testPrepareFeaturesForSQSS(self): mode = model_fn_lib.ModeKeys.TRAIN seq_feature_name = 'seq_feature' sparse_seq_feature_name = 'wire_cast' ctx_feature_name = 'ctx_feature' sequence_length = 4 embedding_dimension = 8 features = { sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), seq_feature_name: constant_op.constant(1.0, shape=[sequence_length]), ctx_feature_name: constant_op.constant(2.0) } labels = constant_op.constant(5.0, shape=[sequence_length]) wire_cast = feature_column.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) sequence_feature_columns = [ feature_column.real_valued_column(seq_feature_name, dimension=1), feature_column.embedding_column( wire_cast, dimension=embedding_dimension, initializer=init_ops.ones_initializer()) ] context_feature_columns = [ feature_column.real_valued_column(ctx_feature_name, dimension=1) ] expected_sequence = { rnn_common.RNNKeys.LABELS_KEY: np.array([5., 5., 5., 5.]), seq_feature_name: np.array([1., 1., 1., 1.]), sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), } expected_context = {ctx_feature_name: 2.} sequence, context = ssre._prepare_features_for_sqss( features, labels, mode, sequence_feature_columns, context_feature_columns) def assert_equal(expected, got): self.assertEqual(sorted(expected), sorted(got)) for k, v in expected.items(): if isinstance(v, sparse_tensor.SparseTensor): self.assertAllEqual(v.values.eval(), got[k].values) self.assertAllEqual(v.indices.eval(), got[k].indices) self.assertAllEqual(v.dense_shape.eval(), got[k].dense_shape) else: self.assertAllEqual(v, got[k]) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(data_flow_ops.initialize_all_tables()) actual_sequence, actual_context = sess.run([sequence, context]) assert_equal(expected_sequence, actual_sequence) assert_equal(expected_context, actual_context)
def _default_local_init_op(): return control_flow_ops.group(variables.local_variables_initializer(), data_flow_ops.initialize_all_tables())
def evaluate_once(checkpoint_path, logdir, master='', num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, session_config=None): """Evaluates the model at the given checkpoint path. Args: checkpoint_path: The path to a checkpoint to use for evaluation. logdir: The directory where the TensorFlow summaries are written to. master: The BNS address of the TensorFlow master. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_op_value
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, trace_every_n_steps=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_all_variables()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_local_variables()` and `tf.initialize_all_tables()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.' ) if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = tf_variables.initialize_all_variables() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT cleanup_op = None if is_chief and sync_optimizer is not None: if not isinstance( sync_optimizer, (sync_replicas_optimizer.SyncReplicasOptimizer, sync_replicas_optimizer.SyncReplicasOptimizerV2)): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer or ' 'tf.train.SyncReplicasOptimizerV2.') # Need to create these BEFORE the supervisor finalizes the graph: with ops.control_dependencies([init_op]): init_tokens_op = sync_optimizer.get_init_tokens_op() init_op = init_tokens_op chief_queue_runner = sync_optimizer.get_chief_queue_runner() if isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): cleanup_op = sync_optimizer.get_clean_up_op() if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal( global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer should_retry = True while should_retry: try: should_retry = False with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, [chief_queue_runner]) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break if logdir and sv.is_chief: logging.info( 'Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except: if sv.is_chief and cleanup_op is not None: logging.info('About to execute sync_clean_up_op!') sess.run(cleanup_op) raise except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_all_variables()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_local_variables()` and `tf.initialize_all_tables()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is negative. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if sync_optimizer and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() if init_op == _USE_DEFAULT: init_op = tf_variables.initialize_all_variables() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) cleanup_op = None if is_chief and sync_optimizer: if not isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer') # Need to create these BEFORE the supervisor finalizes the graph: with ops.control_dependencies([init_op]): init_tokens_op = sync_optimizer.get_init_tokens_op() init_op = init_tokens_op chief_queue_runner = sync_optimizer.get_chief_queue_runner() cleanup_op = sync_optimizer.get_clean_up_op() if train_step_kwargs == _USE_DEFAULT: train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) should_retry = True while should_retry: try: should_retry = False with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer: sv.start_queue_runners(sess, [chief_queue_runner]) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except: if sv.is_chief and cleanup_op is not None: logging.info('About to execute sync_clean_up_op!') sess.run(cleanup_op) raise except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def _default_local_init_op(): return control_flow_ops.group(variables.initialize_local_variables(), data_flow_ops.initialize_all_tables())
def evaluation_loop(master, checkpoint_dir, logdir, num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None): """Runs TF-Slim's Evaluation Loop. Args: master: The BNS address of the TensorFlow master. checkpoint_dir: The directory where checkpoints are stored. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. eval_interval_secs: The minimum number of seconds between evaluations. max_number_of_evaluations: the max number of iterations of the evaluation. If the value is left as 'None', the evaluation continues indefinitely. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=saver) last_checkpoint = None number_of_evaluations = 0 while True: last_checkpoint = wait_for_new_checkpoint(checkpoint_dir, last_checkpoint) start = time.time() logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False) as sess: sv.saver.restore(sess, last_checkpoint) sv.start_queue_runners(sess) evaluation(sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) number_of_evaluations += 1 if (max_number_of_evaluations and number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) break time_to_next_eval = start + eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)