def testIdTableWithHashBucketsWithMultipleInitializersDifferentDefault(self): vocab_file = self._createVocabFile("feat_to_id_6.txt") with self.test_session() as sess: default_value1 = -1 vocab_size = 3 oov_buckets = 0 table1 = lookup_ops.IdTableWithHashBuckets( lookup_ops.HashTable( lookup_ops.TextFileIdTableInitializer( vocab_file, vocab_size=vocab_size), default_value1), oov_buckets) default_value2 = -2 table2 = lookup_ops.IdTableWithHashBuckets( lookup_ops.HashTable( lookup_ops.TextFileIdTableInitializer( vocab_file, vocab_size=vocab_size), default_value2), oov_buckets) lookup_ops.tables_initializer().run() input_string_1 = constant_op.constant( ["brain", "salad", "surgery", "UNK"]) input_string_2 = constant_op.constant(["fruit", "salad", "UNK"]) out1 = table1.lookup(input_string_1) out2 = table2.lookup(input_string_2) out1, out2 = sess.run([out1, out2]) self.assertAllEqual([0, 1, 2, -1], out1) self.assertAllEqual([-2, 1, -2], out2) self.assertEquals(vocab_size + oov_buckets, table1.size().eval()) self.assertEquals(vocab_size + oov_buckets, table2.size().eval())
def test_asset_loading(self): first_path = self._v1_asset_saved_model() imported = load.load(first_path) self.evaluate(lookup_ops.tables_initializer()) fn = imported.signatures["serving_default"] self.assertAllClose({"output": [2, 0]}, fn(start=constant_op.constant(["gamma", "alpha"]))) second_path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid())) save.save(imported, second_path, signatures=imported.signatures) shutil.rmtree(first_path) del ops.get_collection_ref(ops.GraphKeys.TABLE_INITIALIZERS)[:] second_import = load.load(second_path) self.evaluate(lookup_ops.tables_initializer()) fn = second_import.signatures["serving_default"] self.assertAllClose({"output": [2, 0]}, fn(start=constant_op.constant(["gamma", "alpha"]))) third_path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid())) save.save(second_import, third_path, signatures=second_import.signatures) shutil.rmtree(second_path) del ops.get_collection_ref(ops.GraphKeys.TABLE_INITIALIZERS)[:] third_import = load.load(third_path) self.evaluate(lookup_ops.tables_initializer()) fn = third_import.signatures["serving_default"] self.assertAllClose({"output": [2, 0]}, fn(start=constant_op.constant(["gamma", "alpha"])))
def testMultipleHashTables(self): with self.test_session() as sess: default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) table1 = lookup_ops.HashTable( lookup_ops.KeyValueTensorInitializer(keys, values), default_val) table2 = lookup_ops.HashTable( lookup_ops.KeyValueTensorInitializer(keys, values), default_val) table3 = lookup_ops.HashTable( lookup_ops.KeyValueTensorInitializer(keys, values), default_val) lookup_ops.tables_initializer().run() self.assertAllEqual(3, table1.size().eval()) self.assertAllEqual(3, table2.size().eval()) self.assertAllEqual(3, table3.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) output1 = table1.lookup(input_string) output2 = table2.lookup(input_string) output3 = table3.lookup(input_string) out1, out2, out3 = sess.run([output1, output2, output3]) self.assertAllEqual([0, 1, -1], out1) self.assertAllEqual([0, 1, -1], out2) self.assertAllEqual([0, 1, -1], out3)
def test_index_table_from_tensor_with_tensor_init(self): with self.test_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=("brain", "salad", "surgery"), num_oov_buckets=1) ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus"))) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, 3), ids.eval())
def test_duplicate_entries(self): with self.test_session(): vocabulary_list = constant_op.constant(["hello", "hello"]) table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=vocabulary_list) indices = constant_op.constant([0, 1, 4], dtypes.int64) features = table.lookup(indices) lookup_ops.tables_initializer().run() self.assertAllEqual((b"hello", b"hello", b"UNK"), features.eval())
def test_index_table_from_tensor_empty_vocabulary_list(self): with self.test_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=np.array([], dtype=np.str_), num_oov_buckets=1) ids = table.lookup(constant_op.constant(["salad", "surgery", "brain"])) self.assertRaises(errors_impl.OpError, ids.eval) with self.assertRaisesRegexp( errors_impl.OpError, "keys and values cannot be empty"): lookup_ops.tables_initializer().run()
def test_string_index_table_from_file(self): vocabulary_file = self._createVocabFile("f2i_vocab1.txt") with self.test_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, 3), ids.eval())
def test_index_to_string_table_with_vocab_size(self): vocabulary_file = self._createVocabFile("f2i_vocab7.txt") with self.test_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=3) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) self.assertRaises(errors_impl.OpError, features.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((b"salad", b"surgery", b"UNK"), features.eval())
def test_int64_index_table_from_tensor_with_tensor_init(self): with self.test_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=(42, 1, -1000), num_oov_buckets=1, dtype=dtypes.int64) ids = table.lookup( constant_op.constant((1, -1000, 11), dtype=dtypes.int64)) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, 3), ids.eval())
def test_index_table_from_file_with_default_value(self): default_value = -42 vocabulary_file = self._createVocabFile("f2i_vocab4.txt") with self.test_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, default_value=default_value) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, default_value), ids.eval())
def test_index_table_from_tensor_with_default_value(self): default_value = -42 with self.test_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=["brain", "salad", "surgery"], default_value=default_value) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, default_value), ids.eval())
def test_index_table_from_file_with_vocab_size_too_small(self): vocabulary_file = self._createVocabFile("f2i_vocab6.txt") with self.test_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, vocab_size=2) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, -1, -1), ids.eval()) self.assertEqual(2, table.size().eval())
def test_index_to_string_with_default_value(self): default_value = b"NONE" with self.test_session(): vocabulary_list = constant_op.constant(["brain", "salad", "surgery"]) table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=vocabulary_list, default_value=default_value) indices = constant_op.constant([1, 2, 4], dtypes.int64) features = table.lookup(indices) self.assertRaises(errors_impl.OpError, features.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((b"salad", b"surgery", default_value), features.eval())
def test_index_to_string_table_with_vocab_size_too_small(self): default_value = b"NONE" vocabulary_file = self._createVocabFile("f2i_vocab2.txt") with self.test_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=2, default_value=default_value) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) self.assertRaises(errors_impl.OpError, features.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((b"salad", default_value, default_value), features.eval())
def test_int64_index_table_from_file(self): vocabulary_file = self._createVocabFile( "f2i_vocab3.txt", values=("42", "1", "-1000")) with self.test_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1, key_dtype=dtypes.int64) ids = table.lookup( constant_op.constant((1, -1000, 11), dtype=dtypes.int64)) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, 3), ids.eval())
def testDecodeExampleWithBranchedLookup(self): example = example_pb2.Example(features=feature_pb2.Features(feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), })) serialized_example = example.SerializeToString() # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) with self.test_session() as sess: sess.run(lookup_ops.tables_initializer()) serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), } items_to_handlers = { 'labels': tf_example_decoder.LookupTensor('image/object/class/text', table), } decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) obtained_class_ids = decoder.decode(serialized_example)[0].eval() self.assertAllClose([2, 0, 1], obtained_class_ids)
def _text_vocab_subsample_vocab_helper(self, vocab_freq_file, vocab_min_count, vocab_freq_dtype, corpus_size=None): # The outputs are non-deterministic, so set random seed to help ensure that # the outputs remain constant for testing. random_seed.set_random_seed(42) input_tensor = constant_op.constant([ # keep_prob = (sqrt(30/(0.05*100)) + 1) * (0.05*100/30) = 0.57. b"the", b"answer", # Not in vocab. (Always discarded) b"to", # keep_prob = 0.75. b"life", # keep_prob > 1. (Always kept) b"and", # keep_prob = 0.48. b"universe" # Below vocab threshold of 3. (Always discarded) ]) # keep_prob calculated from vocab file with relative frequencies of: # and: 40 # life: 8 # the: 30 # to: 20 # universe: 2 tokens, labels = text.skip_gram_sample_with_text_vocab( input_tensor=input_tensor, vocab_freq_file=vocab_freq_file, vocab_token_index=0, vocab_freq_index=1, vocab_freq_dtype=vocab_freq_dtype, vocab_min_count=vocab_min_count, vocab_subsampling=0.05, corpus_size=corpus_size, min_skips=1, max_skips=1, seed=123) expected_tokens, expected_labels = self._split_tokens_labels([ (b"the", b"to"), (b"to", b"the"), (b"to", b"life"), (b"life", b"to"), ]) with self.test_session() as sess: lookup_ops.tables_initializer().run() tokens_eval, labels_eval = sess.run([tokens, labels]) self.assertAllEqual(expected_tokens, tokens_eval) self.assertAllEqual(expected_labels, labels_eval)
def test_index_table_from_file_with_oov_buckets(self): vocabulary_file = self._createVocabFile("f2i_vocab5.txt") with self.test_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1000) ids = table.lookup( constant_op.constant(["salad", "surgery", "tarkus", "toccata"])) self.assertRaises(errors_impl.OpError, ids.eval) lookup_ops.tables_initializer().run() self.assertAllEqual( ( 1, # From vocabulary file. 2, # From vocabulary file. 867, # 3 + fingerprint("tarkus") mod 300. 860), # 3 + fingerprint("toccata") mod 300. ids.eval())
def testMissingValueInOneHotColumnForSparseColumnWithKeys(self): ids = fc.sparse_column_with_keys("ids", ["marlo", "omar", "stringer"]) one_hot = fc.one_hot_column(ids) features = {"ids": constant_op.constant([["marlo", "unknown", "omar"]])} one_hot_tensor = feature_column_ops.input_from_feature_columns( features, [one_hot]) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) self.assertAllEqual([[1., 1., 0.]], one_hot_tensor.eval())
def test_index_to_string_table_with_vocab_size_too_large(self): vocabulary_file = self._createVocabFile("f2i_vocab6.txt") with self.test_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=4) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) self.assertRaises(errors_impl.OpError, features.eval) init = lookup_ops.tables_initializer() self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Invalid vocab_size", init.run)
def _export_graph(graph, saver, checkpoint_path, export_dir, default_graph_signature, named_graph_signatures, exports_to_keep): """Exports graph via session_bundle, by creating a Session.""" with graph.as_default(): with tf_session.Session('') as session: variables.local_variables_initializer() lookup_ops.tables_initializer() saver.restore(session, checkpoint_path) export = exporter.Exporter(saver) export.init( init_op=control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()), default_graph_signature=default_graph_signature, named_graph_signatures=named_graph_signatures, assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS)) return export.export(export_dir, contrib_variables.get_global_step(), session, exports_to_keep=exports_to_keep)
def testInitializeSameTableWithMultipleNodes(self): vocabulary_file = self._createVocabFile("one_column_5.txt") with self.test_session() as sess: shared_name = "shared-one-columm" default_value = -1 table1 = lookup_ops.HashTable( lookup_ops.TextFileInitializer(vocabulary_file, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE, dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER), default_value, shared_name=shared_name) table2 = lookup_ops.HashTable( lookup_ops.TextFileInitializer(vocabulary_file, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE, dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER), default_value, shared_name=shared_name) table3 = lookup_ops.HashTable( lookup_ops.TextFileInitializer(vocabulary_file, dtypes.string, lookup_ops.TextFileIndex.WHOLE_LINE, dtypes.int64, lookup_ops.TextFileIndex.LINE_NUMBER), default_value, shared_name=shared_name) lookup_ops.tables_initializer().run() input_string = constant_op.constant(["brain", "salad", "tank"]) output1 = table1.lookup(input_string) output2 = table2.lookup(input_string) output3 = table3.lookup(input_string) out1, out2, out3 = sess.run([output1, output2, output3]) self.assertAllEqual([0, 1, -1], out1) self.assertAllEqual([0, 1, -1], out2) self.assertAllEqual([0, 1, -1], out3)
def testIdTableWithHashBucketsWithMultipleInitializers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") with self.test_session() as sess: default_value = -1 vocab_size = 3 oov_buckets = 3 vocab_table = lookup_ops.HashTable( lookup_ops.TextFileIdTableInitializer( vocab_file, vocab_size=vocab_size), default_value) table1 = lookup_ops.IdTableWithHashBuckets( vocab_table, oov_buckets, hasher_spec=lookup_ops.FastHashSpec, name="table1") table2 = lookup_ops.IdTableWithHashBuckets( vocab_table, oov_buckets, hasher_spec=lookup_ops.StrongHashSpec((1, 2)), name="table2") lookup_ops.tables_initializer().run() input_string = constant_op.constant( ["fruit", "brain", "salad", "surgery", "UNK"]) out1 = table1.lookup(input_string) out2 = table2.lookup(input_string) out1, out2 = sess.run([out1, out2]) self.assertAllEqual([5, 0, 1, 2, 5], out1) self.assertAllEqual([5, 0, 1, 2, 3], out2) self.assertEquals(vocab_size + oov_buckets, table1.size().eval()) self.assertEquals(vocab_size + oov_buckets, table2.size().eval()) test_util.assert_ops_in_graph({ "table1_Lookup/hash_bucket": "StringToHashBucketFast", "table2_Lookup/hash_bucket": "StringToHashBucketStrong", }, sess.graph)
def main_op(): """Returns a main op to init variables and tables. Returns the main op including the group of ops that initializes all variables, initializes local variables and initialize all tables. Returns: The set of ops to be run as part of the main op upon the load operation. """ init = variables.global_variables_initializer() init_local = variables.local_variables_initializer() init_tables = lookup_ops.tables_initializer() return control_flow_ops.group(init, init_local, init_tables)
def _get_local_init_op(): """Returns the local init ops to initialize tables and local variables.""" local_init_op = _get_first_op_from_collection( ops.GraphKeys.LOCAL_INIT_OP) if local_init_op is None: op_list = [ variables.local_variables_initializer(), lookup_ops.tables_initializer() ] if op_list: local_init_op = control_flow_ops.group(*op_list) ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, local_init_op) return local_init_op
def _test_prepare_inputs_for_rnn(self, sequence_features, context_features, sequence_feature_columns, num_unroll, expected): features_by_time = ssre._prepare_inputs_for_rnn(sequence_features, context_features, sequence_feature_columns, num_unroll) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) features_val = sess.run(features_by_time) self.assertAllEqual(expected, features_val)
def testDecodeExampleWithBranchedBackupHandler(self): example1 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), 'image/object/class/label': self._Int64FeatureFromList(np.array([42, 10, 900])) })) example2 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), })) example3 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/label': self._Int64FeatureFromList(np.array([42, 10, 901])) })) # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), 'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64), } backup_handler = tf_example_decoder.BackupHandler( handler=slim_example_decoder.Tensor('image/object/class/label'), backup=tf_example_decoder.LookupTensor('image/object/class/text', table)) items_to_handlers = { 'labels': backup_handler, } decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) obtained_class_ids_each_example = [] with self.test_session() as sess: sess.run(lookup_ops.tables_initializer()) for example in [example1, example2, example3]: serialized_example = array_ops.reshape( example.SerializeToString(), shape=[]) obtained_class_ids_each_example.append( decoder.decode(serialized_example)[0].eval()) self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0]) self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1]) self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testBuildSequenceInputInput(self): sequence_input = dynamic_rnn_estimator.build_sequence_input( self.GetColumnsToTensors(), self.sequence_feature_columns, self.context_feature_columns) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) sequence_input_val = sess.run(sequence_input) expected_shape = np.array([ 3, # expected batch size 2, # padded sequence length 3 + 8 + 2 # location keys + embedding dim + measurement dimension ]) self.assertAllEqual(expected_shape, sequence_input_val.shape)
def run_feeds_iter(output_dict, feed_dicts, restore_checkpoint_path=None): """Run `output_dict` tensors with each input in `feed_dicts`. If `restore_checkpoint_path` is supplied, restore from checkpoint. Otherwise, init all variables. Args: output_dict: A `dict` mapping string names to `Tensor` objects to run. Tensors must all be from the same graph. feed_dicts: Iterable of `dict` objects of input values to feed. restore_checkpoint_path: A string containing the path to a checkpoint to restore. Yields: A sequence of dicts of values read from `output_dict` tensors, one item yielded for each item in `feed_dicts`. Keys are the same as `output_dict`, values are the results read from the corresponding `Tensor` in `output_dict`. Raises: ValueError: if `output_dict` or `feed_dicts` is None or empty. """ if not output_dict: raise ValueError('output_dict is invalid: %s.' % output_dict) if not feed_dicts: raise ValueError('feed_dicts is invalid: %s.' % feed_dicts) graph = contrib_ops.get_graph_from_inputs(output_dict.values()) with graph.as_default() as g: with tf_session.Session('') as session: session.run( resources.initialize_resources(resources.shared_resources() + resources.local_resources())) if restore_checkpoint_path: _restore_from_checkpoint(session, g, restore_checkpoint_path) else: session.run(variables.global_variables_initializer()) session.run(variables.local_variables_initializer()) session.run(lookup_ops.tables_initializer()) coord = coordinator.Coordinator() threads = None try: threads = queue_runner.start_queue_runners(session, coord=coord) for f in feed_dicts: yield session.run(output_dict, f) finally: coord.request_stop() if threads: coord.join(threads, stop_grace_period_secs=120)
def default_local_init_op(): """Returns an op that groups the default local init ops. This op is used during session initialization when a Scaffold is initialized without specifying the local_init_op arg. It includes `tf.local_variables_initializer`, `tf.tables_initializer`, and also initializes local session resources. Returns: The default Scaffold local init op. """ return control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer(), resources.initialize_resources(resources.local_resources()))
def my_main_op(): init_local = variables.local_variables_initializer() init_tables = lookup_ops.tables_initializer() return tf.group(init_local, init_tables)
def _default_local_init_op(): return control_flow_ops.group(variables.local_variables_initializer(), lookup_ops.tables_initializer())
def _initialize(self, init_op, sess): sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) sess.run(init_op)
def _restore(self, saver, sess): sess.run(lookup_ops.tables_initializer()) saver.restore(sess, self._latest_ckpt())
def test_dense_features(self, use_safe_embedding_lookup, partition_variables): # Inputs. vocabulary_size = 4 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) def _initializer(shape, dtype, partition_info=None): if partition_variables: self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) self.assertEqual(dtypes.float32, dtype) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) # Build columns. categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) partitioner = None if partition_variables: partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) with variable_scope.variable_scope('vars', partitioner=partitioner): embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer, use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. l = df.DenseFeatures((embedding_column, )) dense_features = l({'aaa': sparse_input}) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) if partition_variables: self.assertCountEqual(( 'vars/dense_features/aaa_embedding/embedding_weights/part_0:0', 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0' ), tuple([v.name for v in global_vars])) else: self.assertCountEqual( ('vars/dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) for v in global_vars: self.assertIsInstance(v, variables_lib.Variable) trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) if partition_variables: self.assertCountEqual(( 'vars/dense_features/aaa_embedding/embedding_weights/part_0:0', 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0' ), tuple([v.name for v in trainable_vars])) else: self.assertCountEqual( ('vars/dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in trainable_vars])) self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) if use_safe_embedding_lookup: self.assertIn( 'SparseFillEmptyRows', [x.type for x in ops.get_default_graph().get_operations()]) else: self.assertNotIn( 'SparseFillEmptyRows', [x.type for x in ops.get_default_graph().get_operations()])
def test_encode_features(self): # Inputs. vocabulary_size = 4 # -1 values are ignored. input_a = np.array([ [3, -1, -1], # example 0, ids [3] [0, 1, -1], # example 1, ids [0, 1] ]) input_b = np.array([ [0, -1, -1], # example 0, ids [0] [-1, -1, -1], # example 1, ids [] ]) input_features = {"aaa": input_a, "bbb": input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (9., 13.), # ids [3], embedding = [9, 13] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: (1., 2.), # ids [0], embedding = [1, 2] # example 1: (0., 0.), # ids [], embedding = [0, 0] ) # Build columns. categorical_column_a = feature_column.categorical_column_with_identity( key="aaa", num_buckets=vocabulary_size) categorical_column_b = feature_column.categorical_column_with_identity( key="bbb", num_buckets=vocabulary_size) embed_column_a, embed_column_b = feature_column.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values, shared_embedding_collection_name="custom_collection_name") feature_columns = {"aaa": embed_column_a, "bbb": embed_column_b} cols_to_tensors = feature_lib.encode_features( input_features, feature_columns.values(), mode=model_fn.ModeKeys.EVAL) embedding_lookup_a = cols_to_tensors[feature_columns["aaa"]] embedding_lookup_b = cols_to_tensors[feature_columns["bbb"]] # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with session.Session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval()) self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
def test_encode_listwise_features(self): # Batch size = 2, list_size = 2. features = { "query_length": ops.convert_to_tensor([[1], [2]]), "utility": ops.convert_to_tensor([[[1.0], [0.0]], [[0.0], [1.0]]]), "unigrams": sparse_tensor_lib.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], values=["ranking", "regression", "classification", "ordinal"], dense_shape=[2, 2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column("query_length", shape=(1, ), default_value=0, dtype=dtypes.int64) } example_feature_columns = { "utility": feature_column.numeric_column("utility", shape=(1, ), default_value=0.0, dtype=dtypes.float32), "unigrams": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } with self.assertRaisesRegexp( ValueError, r"2nd dimesion of tensor must be equal to input size: 3, but found .*" ): feature_lib.encode_listwise_features( features, input_size=3, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) context_features, example_features = feature_lib.encode_listwise_features( features, input_size=2, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) self.assertAllEqual([2, 2, 10], example_features["unigrams"].get_shape().as_list()) with session.Session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [2]], context_features["query_length"]) self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]], example_features["utility"])
def testPrepareFeaturesForSQSS(self): mode = model_fn_lib.ModeKeys.TRAIN seq_feature_name = 'seq_feature' sparse_seq_feature_name = 'wire_cast' ctx_feature_name = 'ctx_feature' sequence_length = 4 embedding_dimension = 8 features = { sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), seq_feature_name: constant_op.constant(1.0, shape=[sequence_length]), ctx_feature_name: constant_op.constant(2.0) } labels = constant_op.constant(5.0, shape=[sequence_length]) wire_cast = feature_column.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) sequence_feature_columns = [ feature_column.real_valued_column(seq_feature_name, dimension=1), feature_column.embedding_column( wire_cast, dimension=embedding_dimension, initializer=init_ops.ones_initializer()) ] context_feature_columns = [ feature_column.real_valued_column(ctx_feature_name, dimension=1) ] expected_sequence = { rnn_common.RNNKeys.LABELS_KEY: np.array([5., 5., 5., 5.]), seq_feature_name: np.array([1., 1., 1., 1.]), sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), } expected_context = {ctx_feature_name: 2.} sequence, context = ssre._prepare_features_for_sqss( features, labels, mode, sequence_feature_columns, context_feature_columns) def assert_equal(expected, got): self.assertEqual(sorted(expected), sorted(got)) for k, v in expected.items(): if isinstance(v, sparse_tensor.SparseTensor): self.assertAllEqual(v.values.eval(), got[k].values) self.assertAllEqual(v.indices.eval(), got[k].indices) self.assertAllEqual(v.dense_shape.eval(), got[k].dense_shape) else: self.assertAllEqual(v, got[k]) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) actual_sequence, actual_context = sess.run([sequence, context]) assert_equal(expected_sequence, actual_sequence) assert_equal(expected_context, actual_context)
def train(data_dicts, class_num, input_size, lr, n_epochs, num_clones, iters_cnt, val_every, model_init_fn, save_cback, atrous_rates=[6, 12, 18], fine_tune_batch_norm=True, output_stride=16): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=num_clones, clone_on_cpu=clone_on_cpu, replica_id=task, num_replicas=num_replicas, num_ps_tasks=num_ps_tasks) with tf.Graph().as_default(): with tf.device(config.inputs_device()): samples = get(data_dicts['train'], input_size, is_training=True, model_variant=model_variant) samples_val = get(data_dicts['val'], input_size, is_training=True, model_variant=model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones, dynamic_pad=True) inputs_queue_val = prefetch_queue.prefetch_queue(samples_val, capacity=128 * config.num_clones, dynamic_pad=True) coord = tf.train.Coordinator() # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (inputs_queue, { 'semantic': class_num }, input_size, atrous_rates, output_stride, fine_tune_batch_norm) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = lr optimizer = tf.train.AdamOptimizer(learning_rate) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') model_fn_val = _build_deeplab_val model_args_val = (inputs_queue_val, { 'semantic': class_num }, input_size, atrous_rates, output_stride) val_clones, val_losses = create_val_clones(num_clones, config, model_fn_val, args=model_args_val) val_total_loss = get_clones_val_losses(val_clones, None, val_losses) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes() grad_mult = train_utils.get_model_gradient_multipliers( last_layers, last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) coord.clear_stop() sess = tf.Session(config=config) graph = ops.get_default_graph() with graph.as_default(): with ops.name_scope('init_ops'): init_op = variables.global_variables_initializer() ready_op = variables.report_uninitialized_variables() local_init_op = control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()) # graph.finalize() sess.run([init_op, ready_op, local_init_op]) queue_runners = graph.get_collection(ops.GraphKeys.QUEUE_RUNNERS) threads = [] for qr in queue_runners: threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) # # for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): # # print(i) # vary_23 = [v for v in tf.global_variables() if v.name == 'xception_65/middle_flow/block1/unit_8/xception_module/separable_conv3_depthwise/BatchNorm/moving_mean:0'][0] # # beta_23 = [v for v in tf.global_variables() if v.name == 'xception_65/middle_flow/block1/unit_8/xception_module/separable_conv3_depthwise/BatchNorm/gamma:0'][0] # for i in range(1000): # train_loss = sess.run(train_tensor) # print(train_loss) # vary, beta = sess.run([vary_23, beta_23]) # print('mean', vary[0:3]) # print('beta', beta[0:3]) # if (i + 1) % 10 == 0: # for i in range(10): # val_loss = sess.run(val_total_loss) # vary, beta = sess.run([vary_23, beta_23]) # print('mean val', vary[0:3]) # print('beta', beta[0:3]) # print('VAl_loss', val_loss) model_init_fn(sess) saver = tf.train.Saver() eval_planner = EvalPlanner(n_epochs, val_every) progress = sly.progress_counter_train(n_epochs, iters_cnt['train']) best_val_loss = float('inf') epoch_flt = 0 for epoch in range(n_epochs): logger.info("Before new epoch", extra={'epoch': epoch_flt}) for train_it in range(iters_cnt['train']): total_loss = sess.run(train_tensor) metrics_values_train = { 'loss': total_loss, } progress.iter_done_report() epoch_flt = epoch_float(epoch, train_it + 1, iters_cnt['train']) sly.report_metrics_training(epoch_flt, metrics_values_train) if eval_planner.need_validation(epoch_flt): logger.info("Before validation", extra={'epoch': epoch_flt}) overall_val_loss = 0 for val_it in range(iters_cnt['val']): overall_val_loss += sess.run(val_total_loss) logger.info("Validation in progress", extra={ 'epoch': epoch_flt, 'val_iter': val_it, 'val_iters': iters_cnt['val'] }) metrics_values_val = { 'loss': overall_val_loss / iters_cnt['val'], } sly.report_metrics_validation(epoch_flt, metrics_values_val) logger.info("Validation has been finished", extra={'epoch': epoch_flt}) eval_planner.validation_performed() val_loss = metrics_values_val['loss'] model_is_best = val_loss < best_val_loss if model_is_best: best_val_loss = val_loss logger.info( 'It\'s been determined that current model is the best one for a while.' ) save_cback(saver, sess, model_is_best, opt_data={ 'epoch': epoch_flt, 'val_metrics': metrics_values_val, }) logger.info("Epoch was finished", extra={'epoch': epoch_flt})
def _initialized_session(self, config=None): sess = session_lib.Session(config=config) sess.run(variables_lib.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) return sess
def train(datasets_dicts, epochs, val_every, iters_cnt, validate_with_eval_model, pipeline_config, num_clones=1, save_cback=None): logger.info('Start train') configs = configs_from_pipeline(pipeline_config) model_config = configs['model'] train_config = configs['train_config'] create_model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=True) detection_model = create_model_fn() def get_next(dataset): return dataset_util.make_initializable_iterator( build_dataset(dataset)).get_next() create_tensor_dict_fn = functools.partial(get_next, datasets_dicts['train']) create_tensor_dict_fn_val = functools.partial(get_next, datasets_dicts['val']) data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options ] with tf.Graph().as_default(): # Build a configuration specifying multi-GPU and multi-replicas. deploy_config = model_deploy.DeploymentConfig( num_clones=4, clone_on_cpu=False, replica_id=0, num_replicas=1, num_ps_tasks=0, worker_job_name='lonely_worker') # Place the global step on the device storing the variables. with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() with tf.device(deploy_config.inputs_device()): coord = coordinator.Coordinator() input_queue = create_input_queue( train_config.batch_size, create_tensor_dict_fn, train_config.batch_queue_capacity, train_config.num_batch_queue_threads, train_config.prefetch_queue_capacity, data_augmentation_options) input_queue_val = create_input_queue( train_config.batch_size, create_tensor_dict_fn_val, train_config.batch_queue_capacity, train_config.num_batch_queue_threads, train_config.prefetch_queue_capacity, data_augmentation_options) # create validation graph create_model_fn_val = functools.partial( model_builder.build, model_config=model_config, is_training=not validate_with_eval_model) with tf.device(deploy_config.optimizer_device()): training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var, family='LearningRate') train_losses = [] grads_and_vars = [] with slim.arg_scope([slim.model_variable, slim.variable], device='/device:CPU:0'): for curr_dev_id in range(num_clones): with tf.device('/gpu:{}'.format(curr_dev_id)): with tf.name_scope( 'clone_{}'.format(curr_dev_id)) as scope: with tf.variable_scope( tf.get_variable_scope(), reuse=True if curr_dev_id > 0 else None): losses = _create_losses_val( input_queue, create_model_fn, train_config) clones_loss = tf.add_n(losses) clones_loss = tf.divide(clones_loss, 1.0 * num_clones) grads = training_optimizer.compute_gradients( clones_loss) train_losses.append(clones_loss) grads_and_vars.append(grads) if curr_dev_id == 0: update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS) val_total_loss = get_val_loss(num_clones, input_queue_val, create_model_fn_val, train_config) with tf.device(deploy_config.optimizer_device()): total_loss = tf.add_n(train_losses) grads_and_vars = model_deploy._sum_clones_gradients(grads_and_vars) total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.') # Optionally multiply bias gradients by train_config.bias_grad_multiplier. if train_config.bias_grad_multiplier: biases_regex_list = ['.*/biases'] grads_and_vars = variables_helper.multiply_gradients_matching_regex( grads_and_vars, biases_regex_list, multiplier=train_config.bias_grad_multiplier) # Optionally freeze some layers by setting their gradients to be zero. if train_config.freeze_variables: grads_and_vars = variables_helper.freeze_gradients_matching_regex( grads_and_vars, train_config.freeze_variables) # Optionally clip gradients if train_config.gradient_clipping_by_norm > 0: with tf.name_scope('clip_grads'): grads_and_vars = slim.learning.clip_gradient_norms( grads_and_vars, train_config.gradient_clipping_by_norm) # Create gradient updates. grad_updates = training_optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops, name='update_barrier') with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) coord.clear_stop() sess = tf.Session(config=config) saver = tf.train.Saver() graph = ops.get_default_graph() with graph.as_default(): with ops.name_scope('init_ops'): init_op = variables.global_variables_initializer() ready_op = variables.report_uninitialized_variables() local_init_op = control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()) # graph.finalize() sess.run([init_op, ready_op, local_init_op]) queue_runners = graph.get_collection(ops.GraphKeys.QUEUE_RUNNERS) threads = [] for qr in queue_runners: threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) logger.info('Start restore') if train_config.fine_tune_checkpoint: var_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( var_map, train_config.fine_tune_checkpoint)) if 'global_step' in available_var_map: del available_var_map['global_step'] init_saver = tf.train.Saver(available_var_map) logger.info('Restoring model weights from previous checkpoint.') init_saver.restore(sess, train_config.fine_tune_checkpoint) logger.info('Model restored.') eval_planner = EvalPlanner(epochs, val_every) progress = sly.progress_counter_train(epochs, iters_cnt['train']) best_val_loss = float('inf') epoch_flt = 0 for epoch in range(epochs): logger.info("Before new epoch", extra={'epoch': epoch_flt}) for train_it in range(iters_cnt['train']): total_loss, np_global_step = sess.run( [train_tensor, global_step]) metrics_values_train = { 'loss': total_loss, } progress.iter_done_report() epoch_flt = epoch_float(epoch, train_it + 1, iters_cnt['train']) sly.report_metrics_training(epoch_flt, metrics_values_train) if eval_planner.need_validation(epoch_flt): logger.info("Before validation", extra={'epoch': epoch_flt}) overall_val_loss = 0 for val_it in range(iters_cnt['val']): overall_val_loss += sess.run(val_total_loss) logger.info("Validation in progress", extra={ 'epoch': epoch_flt, 'val_iter': val_it, 'val_iters': iters_cnt['val'] }) metrics_values_val = { 'loss': overall_val_loss / iters_cnt['val'], } sly.report_metrics_validation(epoch_flt, metrics_values_val) logger.info("Validation has been finished", extra={'epoch': epoch_flt}) eval_planner.validation_performed() val_loss = metrics_values_val['loss'] model_is_best = val_loss < best_val_loss if model_is_best: best_val_loss = val_loss logger.info( 'It\'s been determined that current model is the best one for a while.' ) save_cback(saver, sess, model_is_best, opt_data={ 'epoch': epoch_flt, 'val_metrics': metrics_values_val, }) logger.info("Epoch was finished", extra={'epoch': epoch_flt}) coord.request_stop() coord.join(threads)
def main_op(): init_local = variables.local_variables_initializer() init_tables = lookup_ops.tables_initializer() return control_flow_ops.group(init_local, init_tables)
def _initialized_session(): sess = session.Session() sess.run(variables_lib.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) return sess
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, session_wrapper=None, trace_every_n_steps=None, ignore_live_threads=False): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer, or a list of them. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. session_wrapper: A function that takes a `tf.Session` object as the only argument and returns a wrapped session object that has the same methods that the original object has, or `None`. Iff not `None`, the wrapped object will be used for training. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. ignore_live_threads: If `True` ignores threads that remain running after a grace period when stopping the supervisor, instead of raising a RuntimeError. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): sync_optimizer = [sync_optimizer] if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.' ) if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = training_util.get_or_create_global_step() saver = saver or tf_saver.Saver() if sync_optimizer is not None: for opt in sync_optimizer: if not isinstance( opt, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.' ) with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()) if sync_optimizer is not None and isinstance(sync_optimizer, list): with ops.control_dependencies( [local_init_op] if local_init_op is not None else []): if is_chief: local_init_op = control_flow_ops.group( *[opt.chief_init_op for opt in sync_optimizer]) else: local_init_op = control_flow_ops.group( * [opt.local_step_init_op for opt in sync_optimizer]) ready_for_local_init_op = control_flow_ops.group( *[opt.ready_for_local_init_op for opt in sync_optimizer]) else: ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if is_chief and sync_optimizer is not None: # Need to create these BEFORE the supervisor finalizes the graph: init_tokens_op = [ opt.get_init_tokens_op() for opt in sync_optimizer ] chief_queue_runner = [ opt.get_chief_queue_runner() for opt in sync_optimizer ] if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal( global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer total_loss = None should_retry = True while should_retry: try: should_retry = False with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if session_wrapper is not None: logging.info('Wrapping session with wrapper function: %s', session_wrapper) sess = session_wrapper(sess) if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: # (use sys.maxsize because sys.maxint doesn't exist in Python 3) _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxsize)) threads = sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, chief_queue_runner) sess.run(init_tokens_op) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') sv.request_stop() break except errors.OutOfRangeError as e: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info( 'Caught OutOfRangeError. Stopping Training. %s', e) if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) sv.stop(threads, close_summary_writer=True, ignore_live_threads=ignore_live_threads) except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def test_dense_features_not_trainable(self): # Inputs. vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) # Build columns. categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column(categorical_column, dimension=embedding_dimension, initializer=_initializer, trainable=False) # Provide sparse input and get dense result. dense_features = df.DenseFeatures((embedding_column, ))({ 'aaa': sparse_input }) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ('dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) self.assertCountEqual([], ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES)) self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
def _test_dense_features(self, trainable=True): # Inputs. vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 4)), values=(2, 0, 1), dense_shape=(2, 5)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [0] # example 1, ids [] indices=((0, 0), ), values=(0, ), dense_shape=(2, 5)) sparse_input_c = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 1), (1, 1), (1, 3)), values=(2, 0, 1), dense_shape=(2, 5)) sparse_input_d = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [] indices=((0, 1), ), values=(2, ), dense_shape=(2, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0: # A ids [2], embedding = [7, 11] # B ids [0], embedding = [1, 2] # C ids [2], embedding = [7, 11] # D ids [2], embedding = [7, 11] (7., 11., 1., 2., 7., 11., 7., 11.), # example 1: # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] # B ids [], embedding = [0, 0] # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] # D ids [], embedding = [0, 0] (2., 3.5, 0., 0., 2., 3.5, 0., 0.), ) # Build columns. categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) categorical_column_c = fc.categorical_column_with_identity( key='ccc', num_buckets=vocabulary_size) categorical_column_d = fc.categorical_column_with_identity( key='ddd', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, trainable=trainable) embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2( [categorical_column_c, categorical_column_d], dimension=embedding_dimension, initializer=_initializer, trainable=trainable) features = { 'aaa': sparse_input_a, 'bbb': sparse_input_b, 'ccc': sparse_input_c, 'ddd': sparse_input_d } # Provide sparse input and get dense result. dense_features = df.DenseFeatures( feature_columns=(embedding_column_b, embedding_column_a, embedding_column_c, embedding_column_d))(features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], tuple([v.name for v in global_vars])) for v in global_vars: self.assertIsInstance(v, variables_lib.Variable) trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) if trainable: self.assertCountEqual( ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], tuple([v.name for v in trainable_vars])) else: self.assertCountEqual([], tuple([v.name for v in trainable_vars])) shared_embedding_vars = global_vars self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(shared_embedding_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features))