class InterfaceTests(keras_parameterized.TestCase): def testNoDependency(self): root = tf.Module() hasdep = tf.Module() root.hasdep = hasdep nodep = tf.Module() root.nodep = data_structures.NoDependency(nodep) self.assertEqual(1, len(root._checkpoint_dependencies)) self.assertIs(root._checkpoint_dependencies[0].ref, root.hasdep) self.assertIs(root.hasdep, hasdep) self.assertIs(root.nodep, nodep) class NoDependencyModel(training.Model): @base.no_automatic_dependency_tracking def __init__(self): super(NoDependencyModel, self).__init__() self.a = [] self.b = tf.Module() nodeps = NoDependencyModel() self.assertEqual([nodeps], util.list_objects(nodeps)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testDictionariesBasic(self): a = training.Model() b = training.Model() a.attribute = {"b": b} c = training.Model() a.attribute["c"] = [] a.attribute["c"].append(c) a_deps = util.list_objects(a) self.assertIn(b, a_deps) self.assertIn(c, a_deps) self.assertIs(b, a.attribute["b"]) six.assertCountEqual( self, ["b", "c"], [dep.name for dep in a.attribute._checkpoint_dependencies]) self.assertEqual([b, c], a.layers) self.assertEqual([b, c], a.attribute.layers) self.assertEqual([c], a.attribute["c"].layers) checkpoint = tf.train.Checkpoint(a=a) save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) with self.cached_session(): checkpoint.restore(save_path).assert_consumed().initialize_or_restore() @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testNoDepList(self): a = training.Model() a.l1 = data_structures.NoDependency([]) a.l1.insert(1, 0) self.assertIsInstance(a.l1, list) checkpoint = tf.train.Checkpoint(a=a) checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) a.l2 = [] a.l2.insert(1, tf.Module()) with self.assertRaisesRegex(ValueError, "A list element was replaced"): checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
class MixedPrecisionTest(keras_parameterized.TestCase): IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE' def setUp(self): super(MixedPrecisionTest, self).setUp() # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass # to ignore performance and always transform the graph. self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR) os.environ[self.IGNORE_PERF_VAR] = '1' def tearDown(self): # Set the IGNORE_PERF_VAR variable back to it's original value. if self._original_ignore_perf_value is not None: os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value else: del os.environ[self.IGNORE_PERF_VAR] tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() super(MixedPrecisionTest, self).tearDown() @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer(self): opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt, 123.) self.assertIsInstance( opt, loss_scale_optimizer_v2.LossScaleOptimizerV1) self.assertEqual(self.evaluate(opt.loss_scale), 123.) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_optimizer_errors(self): opt = gradient_descent_v2.SGD(1.0) opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic') with self.assertRaisesRegex( ValueError, '"opt" must not already be an instance of a ' 'LossScaleOptimizer.'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt) self.assertFalse(tf.config.optimizer.get_experimental_options() .get('auto_mixed_precision', False)) @testing_utils.enable_v2_dtype_behavior def test_error_if_policy_is_set(self): with policy.policy_scope('mixed_float16'): with self.assertRaisesRegex(ValueError, 'the global Keras dtype Policy has been set'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is currently the default. tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is a non-mixed policy. with policy.policy_scope('float64'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0))
class GRULayerGradientTapeTest(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=['eager'])) def test_in_tape(self): with self.test_session(config=_config): time_steps = 10 embedding_size = 11 gru_unit_size = 12 gru = rnn.GRU(gru_unit_size, return_sequences=True, return_state=True, recurrent_activation='sigmoid', recurrent_initializer='glorot_uniform') x = tf.random.uniform([1, time_steps, embedding_size]) y = tf.random.uniform([1, gru_unit_size]) with tf.GradientTape() as tape: hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32) _, state = gru(x, initial_state=hidden_state) loss = tf.reduce_mean(tf.square(state - y)) tape.gradient(loss, gru.variables)
class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_with_sequence_features(self): cols = [ tf.feature_column.sequence_numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column.sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input( shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = tf.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = tf.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
class BatchNormalizationV1Test(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_v1_fused_attribute(self): norm = batch_normalization_v1.BatchNormalization() inp = keras.layers.Input((4, 4, 4)) norm(inp) self.assertEqual(norm.fused, True) norm = batch_normalization_v1.BatchNormalization(fused=False) self.assertEqual(norm.fused, False) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, False) norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2) self.assertEqual(norm.fused, True) inp = keras.layers.Input(shape=(2, 2, 2)) norm(inp) self.assertEqual(norm.fused, False)
class EmbeddingTest(keras_parameterized.TestCase): @keras_parameterized.run_all_keras_modes def test_embedding(self): if tf.test.is_gpu_available(): self.skipTest('Only test embedding on CPU.') testing_utils.layer_test(keras.layers.Embedding, kwargs={ 'output_dim': 4, 'input_dim': 10, 'input_length': 2 }, input_shape=(3, 2), input_dtype='int32', expected_output_dtype='float32') testing_utils.layer_test(keras.layers.Embedding, kwargs={ 'output_dim': 4, 'input_dim': 10, 'mask_zero': True }, input_shape=(3, 2), input_dtype='int32', expected_output_dtype='float32') testing_utils.layer_test(keras.layers.Embedding, kwargs={ 'output_dim': 4, 'input_dim': 10, 'mask_zero': True }, input_shape=(3, 4, 2), input_dtype='int32', expected_output_dtype='float32') testing_utils.layer_test(keras.layers.Embedding, kwargs={ 'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 2) }, input_shape=(3, 4, 2), input_dtype='int32', expected_output_dtype='float32') @keras_parameterized.run_all_keras_modes def test_embedding_correctness(self): layer = keras.layers.Embedding(output_dim=2, input_dim=2) model = keras.models.Sequential([layer]) layer.set_weights([np.array([[1, 1], [2, 2]])]) model.run_eagerly = testing_utils.should_run_eagerly() outputs = model.predict(np.array([[0, 1, 0]], dtype='int32')) self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) def test_embedding_incorrect_dimension(self): with self.assertRaises(ValueError): keras.layers.Embedding(input_dim=0, output_dim=1) with self.assertRaises(ValueError): keras.layers.Embedding(input_dim=1, output_dim=0) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_eager_gpu_cpu(self): l = keras.layers.Embedding(output_dim=2, input_dim=2) l.build((None, 2)) inputs = keras.backend.constant([[0, 1, 0]], dtype='int32') with tf.GradientTape() as tape: output = l(inputs) gs = tape.gradient(output, l.weights) opt = tf.compat.v1.train.AdagradOptimizer(0.1) opt.apply_gradients(zip(gs, l.weights)) self.assertAllEqual(len(gs), 1) @keras_parameterized.run_all_keras_modes def test_embedding_with_ragged_input(self): layer = keras.layers.Embedding( input_dim=3, output_dim=2, weights=[np.array([[0., 0.], [1., 1.], [2., 2.]])]) inputs = keras.layers.Input(shape=(None, ), dtype=tf.float32, ragged=True) # pylint: disable=unnecessary-lambda outputs = keras.layers.Lambda( lambda args: keras.backend.identity(args))(inputs) # pylint: enable=unnecessary-lambda outputs = layer(outputs) model = keras.Model(inputs, outputs) model.run_eagerly = testing_utils.should_run_eagerly() outputs = model.predict( tf.ragged.constant([[1., 2., 2.], [0.], [1., 2.]], ragged_rank=1)) self.assertAllClose( outputs, tf.ragged.constant([[[1., 1.], [2., 2.], [2., 2.]], [[0., 0.]], [[1., 1.], [2., 2.]]], ragged_rank=1)) @keras_parameterized.run_all_keras_modes(always_skip_v1=True) def test_embedding_with_sharded_variable(self): layer = keras.layers.Embedding(input_dim=5, output_dim=2) v = [ tf.Variable([[1., 2.], [3., 4.]]), tf.Variable([[5., 6.], [7., 8.]]), tf.Variable([[9., 10.]]) ] model = keras.models.Sequential([layer]) layer.embeddings = sharded_variable.ShardedVariable(v) model.run_eagerly = testing_utils.should_run_eagerly() outputs = model.predict(np.array([[0, 2, 4]], dtype='int32')) self.assertAllClose(outputs, [[[1., 2.], [5., 6.], [9., 10.]]]) @testing_utils.enable_v2_dtype_behavior def test_mixed_precision_embedding(self): try: policy.set_policy('mixed_float16') layer = keras.layers.Embedding(input_dim=5, output_dim=2) self.assertEqual(layer._dtype_policy.name, 'mixed_float16') outputs = layer(np.array([0, 1, 2])) self.assertEqual(outputs.dtype, 'float16') finally: policy.set_policy('float32')
class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase): def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, rho, momentum, epsilon, centered): rms_t = rms * rho + (1 - rho) * g * g if centered: mg_t = mg * rho + (1 - rho) * g denom_t = rms_t - mg_t * mg_t else: mg_t = mg denom_t = rms_t if momentum > 0.: mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon)) var_t = var - mom_t else: mom_t = mom var_t = var - lr * g / (np.sqrt(denom_t) + epsilon) return var_t, mg_t, rms_t, mom_t def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom, lr, rho, momentum, epsilon, centered): mg_t = copy.deepcopy(mg) rms_t = copy.deepcopy(rms) mom_t = copy.deepcopy(mom) var_t = copy.deepcopy(var) for i in range(len(gindexs)): gindex = gindexs[i] gvalue = gvalues[i] rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue if centered: mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex] else: denom_t = rms_t[gindex] if momentum > 0.: mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt( denom_t + epsilon) var_t[gindex] = var[gindex] - mom_t[gindex] else: mom_t[gindex] = mom[gindex] var_t[gindex] = var[gindex] - lr * gvalue / (np.sqrt(denom_t) + epsilon) return var_t, mg_t, rms_t, mom_t def testDense(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: with tf.compat.v1.get_default_graph().as_default( ), testing_utils.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, dtype=dtype) var1 = tf.Variable(var1_np, dtype=dtype) grads0 = tf.constant(grads0_np, dtype=dtype) grads1 = tf.constant(grads1_np, dtype=dtype) opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") mg1 = opt.get_slot(var1, "mg") else: mg0 = None mg1 = None if momentum > 0.: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of RMSprop for _ in range(1, 4): self.evaluate(update) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0)) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1)) if momentum > 0.: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testDenseWithLearningRateDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): var0_np = np.array([1.0, 2.0]) grads0_np = np.array([0.1, 0.2]) var1_np = np.array([3.0, 4.0]) grads1_np = np.array([0.01, 0.2]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.01 rho = 0.9 momentum = 0.0 epsilon = 1e-7 centered = False decay = 0.5 opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, decay=decay) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0]) mg1_np = np.array([0.0, 0.0]) rms0_np = np.array([0.0, 0.0]) rms1_np = np.array([0.0, 0.0]) mom0_np = np.array([0.0, 0.0]) mom1_np = np.array([0.0, 0.0]) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for t in range(2): self.evaluate(update) lr = learning_rate / (1 + decay * t) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, epsilon, centered) # Validate updated params self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) if momentum > 0.: self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testDenseWithLearningRateInverseTimeDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): var0_np = np.array([1.0, 2.0]) grads0_np = np.array([0.1, 0.2]) var1_np = np.array([3.0, 4.0]) grads1_np = np.array([0.01, 0.2]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.01 rho = 0.9 momentum = 0.0 epsilon = 1e-7 centered = False decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) opt = rmsprop.RMSprop(learning_rate=lr_schedule, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0]) mg1_np = np.array([0.0, 0.0]) rms0_np = np.array([0.0, 0.0]) rms1_np = np.array([0.0, 0.0]) mom0_np = np.array([0.0, 0.0]) mom1_np = np.array([0.0, 0.0]) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for t in range(2): self.evaluate(update) lr = learning_rate / (1 + decay * t) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, epsilon, centered) # Validate updated params self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) if momentum > 0.: self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred sgd_op = rmsprop.RMSprop(learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=0.0, centered=False).minimize( loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[0., 1.]], self.evaluate(var0), atol=0.01) def testMinimizeSparseResourceVariableCentered(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: if test_util.is_xla_enabled() and dtype.is_complex: self.skipTest("b/143578550") var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred # loss = lambda: pred * pred # pylint: disable=cell-var-from-loop sgd_op = rmsprop.RMSprop(learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=1.0, centered=True).minimize( loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0), atol=0.01) def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: with tf.compat.v1.get_default_graph().as_default( ), testing_utils.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([1])) grads1_np_indices = np.array([1], dtype=np.int32) grads1 = tf.IndexedSlices(tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([1])) opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") self.assertEqual(mg0 is not None, centered) mg1 = opt.get_slot(var1, "mg") self.assertEqual(mg1 is not None, centered) else: mg0 = None mg1 = None rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of RMSprop for _ in range(1, 4): self.evaluate(update) var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy( var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy( var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0)) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1)) self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) if momentum > 0.: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["eager"])) def testCallableParams(self): for dtype in _DATA_TYPES: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) learning_rate = lambda: 2.0 rho = lambda: 0.9 momentum = lambda: 0.0 epsilon = 1.0 opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Step 1: the rms accumulators where 1. So we should see a normal # update: v -= grad * learning_rate opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) ]), self.evaluate(var1)) # Step 2: the root mean square accumulators contain the previous update. opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)) ]), self.evaluate(var1)) def testConstructRMSpropWithLR(self): opt = rmsprop.RMSprop(lr=1.0) opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0) opt_3 = rmsprop.RMSprop(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) @combinations.generate(combinations.combine(mode=["eager"])) def testSlotsUniqueEager(self): v1 = tf.Variable(1.) v2 = tf.Variable(1.) opt = rmsprop.RMSprop(1., momentum=0., centered=False) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and one unique slot variable for v1 and v2. self.assertLen(set({id(v) for v in opt.variables()}), 3) self.assertEqual(self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=False) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen(set({id(v) for v in opt.variables()}), 5) self.assertEqual(self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=True) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and three unique slot variables for v1 and v2 self.assertLen(set({id(v) for v in opt.variables()}), 7) self.assertEqual(self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))
class KerasFunctionalMetricsTest(tf.test.TestCase, parameterized.TestCase): def test_metrics(self): with self.cached_session(): y_a = K.variable(np.random.random((6, 7))) y_b = K.variable(np.random.random((6, 7))) for metric in [ metrics.binary_accuracy, metrics.categorical_accuracy ]: output = metric(y_a, y_b) self.assertEqual(K.eval(output).shape, (6, )) def test_sparse_categorical_accuracy_int(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy y_true = K.variable(np.random.randint(0, 7, (6, ))) y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6, )) # Test correctness if the shape of y_true is (num_samples,) y_true = K.variable([1., 0., 0., 0.]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) # Test correctness if the shape of y_true is (num_samples, 1) y_true = K.variable([[1.], [0.], [0.], [0.]]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) # Test correctness if the shape of y_true is (batch_size, seq_length) and # y_pred is (batch_size, seq_length, num_classes) y_pred = K.variable( np.array([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]])) y_true = K.variable(np.array([[1, 0], [1, 0]])) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [[1., 0.], [0., 1.]]) def test_sparse_categorical_accuracy_float(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy y_true = K.variable(np.random.random((6, ))) y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6, )) @combinations.generate(combinations.combine(mode=['eager'])) def test_sparse_categorical_accuracy_eager(self): """Tests that ints passed in via Eager return results. See b/113504761.""" metric = metrics.sparse_categorical_accuracy y_true = np.arange(6).reshape([6, 1]) y_pred = np.arange(36).reshape([6, 6]) self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.]) @combinations.generate(combinations.combine(mode=['eager'])) def test_sparse_categorical_accuracy_float_eager(self): """Tests that floats passed in via Eager return results. See b/113504761.""" metric = metrics.sparse_categorical_accuracy y_true = np.arange(6, dtype=np.float32).reshape([6, 1]) y_pred = np.arange(36).reshape([6, 6]) self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.]) def test_sparse_top_k_categorical_accuracy(self): with self.cached_session(): # Test correctness if the shape of y_true is (num_samples, 1) y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[1], [0]])) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(np.mean(result), 1) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(np.mean(result), 0.5) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(np.mean(result), 0.) # Test correctness if the shape of y_true is (num_samples,) y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([1, 0])) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(np.mean(result), 1) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(np.mean(result), 0.5) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(np.mean(result), 0.) # Test correctness if the shape of y_true is (batch_size, seq_length) and # y_pred is (batch_size, seq_length, num_classes) y_pred = K.variable( np.array([[[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]], [[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.3, 0.2, 0.1]]])) y_true = K.variable(np.array([[1, 0, 0], [1, 0, 1]])) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(np.mean(result), 1) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(np.mean(result), 0.5) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(np.mean(result), 0.) def test_top_k_categorical_accuracy(self): with self.cached_session(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(np.mean(result), 1) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(np.mean(result), 0.5) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(np.mean(result), 0.)
class MappingTests(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTracking(self): with self.test_session(): model = HasMapping() output = model(tf.ones([32, 2])) self.assertAllEqual([32, 7], output.shape.as_list()) self.assertEqual(5, len(model.layers)) six.assertCountEqual(self, model.layers, model.layer_dict.layers) self.assertEqual(1, len(model._checkpoint_dependencies)) self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref) self.evaluate([v.initializer for v in model.variables]) test_var = model.layer_dict["output"].kernel self.evaluate(test_var.assign(tf.ones([6, 7]))) save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) self.evaluate(test_var.assign(tf.zeros([6, 7]))) model.load_weights(save_path) self.assertAllEqual(numpy.ones([6, 7]), self.evaluate(test_var)) def testLayerCollectionWithExternalMutation(self): d = {} root = tf.Module() root.wrapper = d self.assertEqual([], root.wrapper.layers) self.assertEqual([], root.wrapper.trainable_weights) layer1 = core.Dense(1) layer2 = core.Dense(1) d["a"] = layer1 d["b"] = layer2 self.assertEqual([layer1, layer2], root.wrapper.layers) # The layers have still not created variables self.assertEqual([], root.wrapper.trainable_weights) def testDictWrapperBadKeys(self): a = tf.Module() a.d = {} a.d[1] = data_structures.wrap_or_unwrap([]) model = training.Model() model.sub = a save_path = os.path.join(self.get_temp_dir(), "ckpt") with self.assertRaisesRegex(ValueError, "non-string key"): model.save_weights(save_path) def testDictWrapperNoDependency(self): a = tf.Module() a.d = data_structures.NoDependency({}) a.d[1] = [3] self.assertEqual([a], util.list_objects(a)) model = training.Model() model.sub = a save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) model.load_weights(save_path) def testNonStringKeyNotTrackableValue(self): a = tf.Module() a.d = {} a.d["a"] = [3] a.d[1] = data_structures.NoDependency([3]) self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) model = training.Model() model.sub = a save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) model.load_weights(save_path) def testNonAppendNotTrackable(self): # Non-append mutations (deleting or overwriting values) are OK when the # values aren't tracked. a = tf.Module() a.d = {} a.d["a"] = [3] a.d[1] = 3 a.d[1] = 2 self.assertEqual(2, a.d[1]) del a.d[1] a.d[2] = data_structures.NoDependency(tf.Module()) second = tf.Module() a.d[2] = data_structures.NoDependency(second) self.assertIs(second, a.d[2]) self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) model = training.Model() model.sub = a save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) model.load_weights(save_path) def testPopNoSave(self): model = training.Model() model.d = {} model.d["a"] = [] model.d.pop("a") save_path = os.path.join(self.get_temp_dir(), "ckpt") with self.assertRaisesRegex(ValueError, "Unable to save"): model.save_weights(save_path) def testExternalModificationNoSave(self): model = training.Model() external_reference = {} model.d = external_reference external_reference["a"] = [] save_path = os.path.join(self.get_temp_dir(), "ckpt") with self.assertRaisesRegex(ValueError, "modified outside the wrapper"): model.save_weights(save_path) def testOverwriteCanStillSave(self): model = training.Model() model.d = {} model.d["a"] = {} model.d["a"] = {} save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) def testIter(self): model = training.Model() model.d = {1: 3} model.d[1] = 3 self.assertEqual([1], list(model.d)) new_dict = {} # This update() is super tricky. If the dict wrapper subclasses dict, # CPython will access its storage directly instead of calling any # methods/properties on the object. So the options are either not to # subclass dict (in which case update will call normal iter methods, but the # object won't pass isinstance checks) or to subclass dict and keep that # storage updated (no shadowing all its methods like ListWrapper). new_dict.update(model.d) self.assertEqual({1: 3}, new_dict)
class TraceModelCallTest(keras_parameterized.TestCase): def _assert_all_close(self, expected, actual): if not tf.executing_eagerly(): with self.cached_session() as sess: K._initialize_variables(sess) self.assertAllClose(expected, actual) else: self.assertAllClose(expected, actual) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes def test_trace_model_outputs(self): input_dim = 5 if testing_utils.get_model_type() == 'functional' else None model = testing_utils.get_small_mlp(10, 3, input_dim) inputs = tf.ones((8, 5)) if input_dim is None: with self.assertRaisesRegex(ValueError, 'input shapes have not been set'): saving_utils.trace_model_call(model) model._set_inputs(inputs) fn = saving_utils.trace_model_call(model) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {'output_1': model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes def test_trace_model_outputs_after_fitting(self): input_dim = 5 if testing_utils.get_model_type() == 'functional' else None model = testing_utils.get_small_mlp(10, 3, input_dim) model.compile( optimizer='sgd', loss='mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit( x=np.random.random((8, 5)).astype(np.float32), y=np.random.random((8, 3)).astype(np.float32), epochs=2) inputs = tf.ones((8, 5)) fn = saving_utils.trace_model_call(model) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {'output_1': model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @keras_parameterized.run_with_all_model_types(exclude_models='sequential') @keras_parameterized.run_all_keras_modes def test_trace_multi_io_model_outputs(self): input_dim = 5 num_classes = 3 num_classes_b = 4 input_a = keras.layers.Input(shape=(input_dim,), name='input_a') input_b = keras.layers.Input(shape=(input_dim,), name='input_b') dense = keras.layers.Dense(num_classes, name='dense') dense2 = keras.layers.Dense(num_classes_b, name='dense2') dropout = keras.layers.Dropout(0.5, name='dropout') branch_a = [input_a, dense] branch_b = [input_b, dense, dense2, dropout] model = testing_utils.get_multi_io_model(branch_a, branch_b) input_a_np = np.random.random((10, input_dim)).astype(np.float32) input_b_np = np.random.random((10, input_dim)).astype(np.float32) if testing_utils.get_model_type() == 'subclass': with self.assertRaisesRegex(ValueError, 'input shapes have not been set'): saving_utils.trace_model_call(model) model.compile( optimizer='sgd', loss='mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(x=[np.random.random((8, input_dim)).astype(np.float32), np.random.random((8, input_dim)).astype(np.float32)], y=[np.random.random((8, num_classes)).astype(np.float32), np.random.random((8, num_classes_b)).astype(np.float32)], epochs=2) fn = saving_utils.trace_model_call(model) signature_outputs = fn([input_a_np, input_b_np]) outputs = model([input_a_np, input_b_np]) if model.output_names: expected_outputs = { model.output_names[0]: outputs[0], model.output_names[1]: outputs[1] } else: expected_outputs = {'output_1': outputs[0], 'output_2': outputs[1]} self._assert_all_close(expected_outputs, signature_outputs) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_trace_features_layer(self): columns = [tf.feature_column.numeric_column('x')] model = sequential.Sequential([dense_features.DenseFeatures(columns)]) model_input = {'x': tf.constant([[1.]])} model.predict(model_input, steps=1) fn = saving_utils.trace_model_call(model) self.assertAllClose({'output_1': [[1.]]}, fn({'x': [[1.]]})) columns = [ tf.feature_column.numeric_column('x'), tf.feature_column.numeric_column('y') ] model = sequential.Sequential([dense_features.DenseFeatures(columns)]) model_input = {'x': tf.constant([[1.]]), 'y': tf.constant([[2.]])} model.predict(model_input, steps=1) fn = saving_utils.trace_model_call(model) self.assertAllClose({'output_1': [[1., 2.]]}, fn({'x': [[1.]], 'y': [[2.]]})) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_specify_input_signature(self): model = testing_utils.get_small_sequential_mlp(10, 3, None) inputs = tf.ones((8, 5)) with self.assertRaisesRegex(ValueError, 'input shapes have not been set'): saving_utils.trace_model_call(model) fn = saving_utils.trace_model_call( model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)]) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {'output_1': model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_subclassed_model_with_input_signature(self): class Model(keras.Model): def __init__(self): super(Model, self).__init__() self.dense = keras.layers.Dense(3, name='dense') @tf.function( input_signature=[[tf.TensorSpec([None, 5], tf.float32), tf.TensorSpec([None], tf.float32)]],) def call(self, inputs, *args): x, y = inputs return self.dense(x) + y model = Model() fn = saving_utils.trace_model_call(model) x = tf.ones((8, 5), dtype=tf.float32) y = tf.ones((3,), dtype=tf.float32) expected_outputs = {'output_1': model([x, y])} signature_outputs = fn([x, y]) self._assert_all_close(expected_outputs, signature_outputs) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes def test_model_with_fixed_input_dim(self): """Ensure that the batch_dim is removed when saving. When serving or retraining, it is important to reset the batch dim. This can be an issue inside of tf.function. See b/132783590 for context. """ model = testing_utils.get_small_mlp(10, 3, 5) loss_object = keras.losses.MeanSquaredError() optimizer = gradient_descent.SGD() @tf.function def train_step(data, labels): with tf.GradientTape() as tape: predictions = model(data) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) x = np.random.random((8, 5)) y = np.random.random((8, 3)) train_step(x, y) fn = saving_utils.trace_model_call(model) self.assertEqual(fn.input_signature[0].shape.as_list(), tf.TensorShape([None, 5]).as_list())
class TestLayerCallTracing(tf.test.TestCase, parameterized.TestCase): def test_functions_have_same_trace(self): class Layer(keras.engine.base_layer.Layer): def call(self, inputs): return inputs def call2(self, inputs): return inputs * 2 layer = Layer() call_collection = keras_save.LayerCallCollection(layer) fn = call_collection.add_function(layer.call, 'call', True) fn2 = call_collection.add_function(layer.call2, 'call2', True) with keras_save.tracing_scope(): fn(np.ones((2, 3))) fn(np.ones((4, 5))) self.assertLen( fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2) self.assertLen( fn2.wrapped_call._list_all_concrete_functions_for_serialization(), 2) # Check that the shapes are correct self.assertEqual( {(2, 3), (4, 5)}, set(tuple(c.structured_input_signature[0][0].shape.as_list()) for c in fn2.wrapped_call._list_all_concrete_functions_for_serialization())) def test_training_arg_replacement(self): def assert_num_traces(layer_cls, training_keyword): layer = layer_cls() call_collection = keras_save.LayerCallCollection(layer) fn = call_collection.add_function(layer.call, 'call', True) with keras_save.tracing_scope(): fn(np.ones((2, 3)), training=True) self.assertLen( fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2) with keras_save.tracing_scope(): fn(np.ones((2, 4)), training=False) self.assertLen( fn.wrapped_call._list_all_concrete_functions_for_serialization(), 4) if training_keyword: with keras_save.tracing_scope(): fn(np.ones((2, 5)), True) self.assertLen( fn.wrapped_call._list_all_concrete_functions_for_serialization(), 6) with keras_save.tracing_scope(): fn(np.ones((2, 6))) self.assertLen( fn.wrapped_call._list_all_concrete_functions_for_serialization(), 8) class LayerWithTrainingKeyword(keras.engine.base_layer.Layer): def call(self, inputs, training=False): return inputs * training assert_num_traces(LayerWithTrainingKeyword, training_keyword=True) class LayerWithKwargs(keras.engine.base_layer.Layer): def call(self, inputs, **kwargs): return inputs * kwargs['training'] assert_num_traces(LayerWithKwargs, training_keyword=False) class LayerWithChildLayer(keras.engine.base_layer.Layer): def __init__(self): self.child = LayerWithKwargs() super(LayerWithChildLayer, self).__init__() def call(self, inputs): return self.child(inputs) assert_num_traces(LayerWithChildLayer, training_keyword=False) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_maintains_losses(self): layer = LayerWithLoss() layer(np.ones((2, 3))) previous_losses = layer.losses[:] call_collection = keras_save.LayerCallCollection(layer) fn = call_collection.add_function(layer.call, 'call', True) fn(np.ones((2, 3))) self.assertAllEqual(previous_losses, layer.losses)
class DatasetCreatorTest(tf.test.TestCase, parameterized.TestCase): def test_dataset_creator(self): with self.assertRaisesRegex( TypeError, "`dataset_fn` for `DatasetCreator` must be a `callable`."): dataset_creator.DatasetCreator(2) dataset_fn = lambda: 3 with self.assertRaisesRegex( TypeError, "The `callable` provided to `DatasetCreator` must return " "a Dataset."): dataset_creator.DatasetCreator(dataset_fn)() dataset_fn = lambda: tf.data.Dataset.from_tensor_slices([1, 1]) got = dataset_creator.DatasetCreator(dataset_fn)() self.assertEqual( next(iter(got)), next(iter(tf.data.Dataset.from_tensor_slices([1, 1])))) def _get_dataset_fn(self): def dataset_fn(input_context): global_batch_size = 64 batch_size = input_context.get_per_replica_batch_size( global_batch_size) dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat() dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(2) return dataset return dataset_fn @combinations.generate( combinations.combine(use_input_options=[True, False])) def test_dataset_creator_model_fit_without_strategy( self, use_input_options): model = sequential.Sequential([core_layers.Dense(10)]) model.compile(gradient_descent.SGD(), loss="mse") input_options = tf.distribute.InputOptions( ) if use_input_options else None history = model.fit(dataset_creator.DatasetCreator( self._get_dataset_fn(), input_options), epochs=10, steps_per_epoch=10, verbose=0) self.assertLen(history.history["loss"], 10) def _get_parameter_server_strategy(self): cluster_def = multi_worker_testing_utils.create_in_process_cluster( num_workers=2, num_ps=1, rpc_layer="grpc") return tf.distribute.experimental.ParameterServerStrategy( SimpleClusterResolver(ClusterSpec(cluster_def), rpc_layer="grpc")) @combinations.generate( combinations.combine(use_input_options=[True, False])) def test_dataset_creator_usage_in_parameter_server_model_fit( self, use_input_options): strategy = self._get_parameter_server_strategy() with strategy.scope(): model = sequential.Sequential([core_layers.Dense(10)]) model.compile(gradient_descent.SGD(), loss="mse") input_options = tf.distribute.InputOptions( ) if use_input_options else None history = model.fit(dataset_creator.DatasetCreator( self._get_dataset_fn(), input_options), epochs=10, steps_per_epoch=10, verbose=0) self.assertLen(history.history["loss"], 10) def test_dataset_creator_input_options(self): dataset_fn = lambda _: tf.data.Dataset.from_tensor_slices([1, 1]) input_options = tf.distribute.InputOptions( experimental_fetch_to_device=True, experimental_per_replica_buffer_size=2) x = dataset_creator.DatasetCreator(dataset_fn, input_options=input_options) with tf.distribute.MultiWorkerMirroredStrategy().scope(): data_handler = data_adapter.get_data_handler( x, steps_per_epoch=2, model=sequential.Sequential([core_layers.Dense(10)])) # Ensuring the resulting `DistributedDatasetsFromFunction` has the right # options. self.assertTrue( data_handler._dataset._options.experimental_fetch_to_device) self.assertEqual( data_handler._dataset._options. experimental_per_replica_buffer_size, 2) def test_dataset_creator_input_options_with_cluster_coordinator(self): dataset_fn = lambda _: tf.data.Dataset.from_tensor_slices([1, 1]) input_options = tf.distribute.InputOptions( experimental_fetch_to_device=True, experimental_per_replica_buffer_size=2) x = dataset_creator.DatasetCreator(dataset_fn, input_options=input_options) strategy = self._get_parameter_server_strategy() with strategy.scope(): model = sequential.Sequential([core_layers.Dense(10)]) model._cluster_coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator( strategy) data_handler = data_adapter.get_data_handler(x, steps_per_epoch=2, model=model) iter_rv = iter(data_handler._dataset)._values[0] iter_rv._rebuild_on(model._cluster_coordinator._cluster.workers[0]) distributed_iterator = iter_rv._get_values() # Ensuring the resulting `DistributedIterator` has the right options. self.assertTrue( distributed_iterator._options.experimental_fetch_to_device) self.assertEqual( distributed_iterator._options.experimental_per_replica_buffer_size, 2)
class AdagradOptimizerTest(tf.test.TestCase, parameterized.TestCase): def doTestBasic(self, use_callable_params=False): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = lambda: 3.0 if not use_callable_params: learning_rate = learning_rate() ada_opt = adagrad.Adagrad(learning_rate) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, 3.0) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, 3.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): self.doTestBasic() @combinations.generate(combinations.combine(mode=["eager"])) def testBasicCallableParams(self): self.doTestBasic(use_callable_params=True) def testBasicWithLearningRateDecay(self): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 decay = 0.5 ada_opt = adagrad.Adagrad(learning_rate, decay=decay) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testBasicWithLargeEpsilon(self): var0_np = np.array([1.0, 2.0]) var1_np = np.array([3.0, 4.0]) grads0_np = np.array([0.1, 0.1]) grads1_np = np.array([0.01, 0.01]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) accum0_np = np.array([0.1, 0.1]) accum1_np = np.array([0.1, 0.1]) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, 3.0, 1.0) var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, 3.0, 1.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testBasicWithLearningRateInverseTimeDecay(self): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) ada_opt = adagrad.Adagrad(lr_schedule) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[0, 1], [3, 4]], self.evaluate(var0), atol=0.01) def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = tf.constant(3.0) ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 steps of adagrad for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testSparseBasic(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 2], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) grads1_np_indices = np.array([0, 2], dtype=np.int32) grads1 = tf.IndexedSlices( tf.constant(grads1_np[grads1_np_indices]), tf.constant(grads1_np_indices), tf.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) accum0_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate) var1_np, accum1_np = sparse_adagrad_update_numpy( var1_np, accum1_np, grads1_np_indices, grads1_np[grads1_np_indices], learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testSparseSingleVarDim(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0], self.evaluate(var0)) accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate, epsilon=1.) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) repeated_index_update_var = tf.Variable(var_np, dtype=dtype) aggregated_update_var = tf.Variable(var_np, dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_repeated_index, repeated_index_update_var) ]) aggregated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_aggregated, aggregated_update_var) ]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) for _ in range(3): self.evaluate(repeated_update) self.evaluate(aggregated_update) self.assertAllClose( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) def testSparseRepeatedIndicesByEmbeddingLookUp(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_repeated = tf.Variable([1.0, 2.0], dtype=dtype) loss_repeated = lambda: tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0])) # pylint: disable=cell-var-from-loop var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype) loss_aggregated = lambda: 2 * tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_aggregated, [0])) # pylint: disable=cell-var-from-loop update_op_repeated = adagrad.Adagrad(2.0).minimize( loss_repeated, var_list=[var_repeated]) update_op_aggregated = adagrad.Adagrad(2.0).minimize( loss_aggregated, var_list=[var_aggregated]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated)) for _ in range(3): self.evaluate(update_op_repeated) self.evaluate(update_op_aggregated) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated)) def testSparseStability(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half]: shape = [1, 6] var0_np = np.array([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np = np.array([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], dtype=dtype.as_numpy_dtype) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant([0]), tf.constant(shape)) ada_opt = adagrad.Adagrad(1.0) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) slot0 = ada_opt.get_slot(var0, "accumulator") init = tf.compat.v1.global_variables_initializer() for _ in range(100): self.evaluate(init) self.evaluate(ada_update) self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), self.evaluate(var0)) def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEqual(slot0.shape, var0.shape) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEqual(slot1.shape, var1.shape) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Mix the first and the second adagrad for 3 steps. self.evaluate(ada_update1) self.evaluate(ada_update2) self.evaluate(ada_update1) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) for _ in range(3): var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testConstructAdagradWithLR(self): opt = adagrad.Adagrad(lr=1.0) opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0) opt_3 = adagrad.Adagrad(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
class AdadeltaOptimizerTest(tf.test.TestCase, parameterized.TestCase): def doTestBasic(self, use_resource=False, use_callable_params=False): num_updates = 4 # number of ADADELTA steps to perform for dtype in _DATA_TYPES: for grad in [0.2, 0.1, 0.01]: for lr in [1.0, 0.5, 0.1]: var0_init = [1.0, 2.0] var1_init = [3.0, 4.0] if use_resource: var0 = tf.Variable(var0_init, dtype=dtype) var1 = tf.Variable(var1_init, dtype=dtype) else: var0 = tf.Variable(var0_init, dtype=dtype) var1 = tf.Variable(var1_init, dtype=dtype) grads = tf.constant([grad, grad], dtype=dtype) accum = 0.0 accum_update = 0.0 # ADADELTA gradient optimizer rho = 0.95 epsilon = 1e-8 if use_callable_params: adadelta_opt = adadelta.Adadelta( learning_rate=lambda: lr, # pylint: disable=cell-var-from-loop rho=lambda: rho, # pylint: disable=cell-var-from-loop epsilon=epsilon) # pylint: disable=cell-var-from-loop else: adadelta_opt = adadelta.Adadelta( learning_rate=lr, rho=rho, epsilon=epsilon) if not tf.executing_eagerly(): adadelta_update = adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Assign slots slot = [None] * 2 slot_update = [None] * 2 slot[0] = adadelta_opt.get_slot(var0, "accum_grad") self.assertEqual(slot[0].shape, var0.shape) slot_update[0] = adadelta_opt.get_slot(var0, "accum_var") self.assertEqual(slot_update[0].shape, var0.shape) slot[1] = adadelta_opt.get_slot(var1, "accum_grad") self.assertEqual(slot[1].shape, var1.shape) slot_update[1] = adadelta_opt.get_slot(var1, "accum_var") self.assertEqual(slot_update[1].shape, var1.shape) # Fetch params to validate initial values self.assertAllClose(var0_init, self.evaluate(var0)) self.assertAllClose(var1_init, self.evaluate(var1)) update = [None] * num_updates tot_update = 0 for step in range(num_updates): # Run adadelta update for comparison if not tf.executing_eagerly(): self.evaluate(adadelta_update) else: adadelta_opt.apply_gradients(zip([grads, grads], [var0, var1])) # Perform initial update without previous accum values accum = accum * rho + (grad**2) * (1 - rho) update[step] = ( np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad) accum_update = ( accum_update * rho + (update[step]**2) * (1.0 - rho)) tot_update += update[step] * lr if not tf.executing_eagerly(): # Check that the accumulators have been updated # TODO(lxuechen): This is hard to test in eager mode for slot_idx in range(2): self.assertAllCloseAccordingToType( np.array([accum, accum], dtype=dtype.as_numpy_dtype(0)), self.evaluate(slot[slot_idx]), rtol=1e-5) self.assertAllCloseAccordingToType( np.array( [accum_update, accum_update], dtype=dtype.as_numpy_dtype(0)), self.evaluate(slot_update[slot_idx]), rtol=1e-5) # Check that the parameters have been updated self.assertAllCloseAccordingToType( np.array( [var0_init[0] - tot_update, var0_init[1] - tot_update], dtype=dtype.as_numpy_dtype(0)), self.evaluate(var0), rtol=1e-5) self.assertAllCloseAccordingToType( np.array( [var1_init[0] - tot_update, var1_init[1] - tot_update], dtype=dtype.as_numpy_dtype(0)), self.evaluate(var1), rtol=1e-5) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testResourceBasic(self): self.doTestBasic(use_resource=True) @combinations.generate(combinations.combine(mode=["eager"])) def testBasicCallableParams(self): self.doTestBasic(use_resource=True, use_callable_params=True) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize( loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) def testConstructAdadeltaWithLR(self): opt = adadelta.Adadelta(lr=1.0, rho=0.9, epsilon=1.) opt_2 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1., lr=1.0) opt_3 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1.) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) def testConstructAdadeltaWithEpsilonValues(self): opt = adadelta.Adadelta(epsilon=None) self.assertEqual(opt.epsilon, 1e-7) opt = adadelta.Adadelta(epsilon=1e-8) self.assertEqual(opt.epsilon, 1e-8)
class AdamOptimizerTest(tf.test.TestCase, parameterized.TestCase): def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 2], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) grads1_np_indices = np.array([0, 2], dtype=np.int32) grads1 = tf.IndexedSlices( tf.constant(grads1_np[grads1_np_indices]), tf.constant(grads1_np_indices), tf.constant([3])) opt = adam.Adam() update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adam for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) update.run() var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testSparseDevicePlacement(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for index_dtype in [tf.int32, tf.int64]: with tf.Graph().as_default(), self.cached_session( force_gpu=tf.test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = tf.Variable([[1.0], [2.0]]) indices = tf.constant([0, 1], dtype=index_dtype) g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop optimizer = adam.Adam(3.0) minimize_op = optimizer.minimize(g_sum, var_list=[var]) self.evaluate(tf.compat.v1.global_variables_initializer()) minimize_op.run() def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): repeated_index_update_var = tf.Variable( [[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable( [[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant( [0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant( [0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adam.Adam().apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = adam.Adam().apply_gradients( [(grad_aggregated, aggregated_update_var)]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(aggregated_update_var, self.evaluate(repeated_index_update_var)) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var, self.evaluate(repeated_index_update_var)) def doTestBasic(self, use_callable_params=False): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = lambda: 0.001 beta1 = lambda: 0.9 beta2 = lambda: 0.999 epsilon = lambda: 1e-8 if not use_callable_params: learning_rate = learning_rate() beta1 = beta1() beta2 = beta2() epsilon = epsilon() opt = adam.Adam(learning_rate=learning_rate) if not tf.executing_eagerly(): update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testResourceBasic(self): self.doTestBasic() @combinations.generate(combinations.combine(mode=["eager"])) def testBasicCallableParams(self): self.doTestBasic(use_callable_params=True) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithAmsgrad(self): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adam.Adam(amsgrad=True) if not tf.executing_eagerly(): update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad( var0_np, grads0_np, t, m0, v0, v0hat) var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad( var1_np, grads1_np, t, m1, v1, v1hat) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseWithAmsgrad(self): # dtypes.half does not work on gpu + eager. for dtype in [tf.float32, tf.float64]: with self.cached_session(): m0 = np.array([[0.0], [0.0]]) v0 = np.array([[0.0], [0.0]]) v0hat = np.array([[0.0], [0.0]]) indices_np = np.array([1]) indices = tf.constant(indices_np, dtype=tf.int32) var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) aggregated_update_var = tf.Variable(var0_np, dtype=dtype) grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices(grads0_np, indices, tf.constant([2, 1])) opt_repeated = adam.Adam(amsgrad=True) opt_aggregated = adam.Adam(amsgrad=True) if not tf.executing_eagerly(): repeated_update = opt_repeated.apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = opt_aggregated.apply_gradients( [(grad_aggregated, aggregated_update_var)]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) for t in range(3): if not tf.executing_eagerly(): self.evaluate(repeated_update) self.evaluate(aggregated_update) else: opt_repeated.apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) opt_aggregated.apply_gradients( [(grad_aggregated, aggregated_update_var)]) var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( var0_np, indices_np, grads0_np, t, m0, v0, v0hat) # Validate updated params self.assertAllCloseAccordingToType( var0_np, self.evaluate(aggregated_update_var)) self.assertAllCloseAccordingToType( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) def testBasicWithLearningRateDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 decay = 0.5 opt = adam.Adam( learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): self.evaluate(update) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = adam_update_numpy( var0_np, grads0_np, t, m0, v0, lr=lr_np) var1_np, m1, v1 = adam_update_numpy( var1_np, grads1_np, t, m1, v1, lr=lr_np) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testBasicWithLearningRateInverseTimeDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 opt = adam.Adam( learning_rate=lr_schedule, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): self.evaluate(update) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = adam_update_numpy( var0_np, grads0_np, t, m0, v0, lr=lr_np) var1_np, m1, v1 = adam_update_numpy( var1_np, grads1_np, t, m1, v1, lr=lr_np) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adam.Adam(tf.constant(0.001)) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adam for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) update.run() var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adam.Adam() update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of intertwined Adam1 and Adam2. for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) if t % 2 == 0: update1.run() else: update2.run() var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["eager"])) def testSlotsUniqueEager(self): v1 = tf.Variable(1.) v2 = tf.Variable(1.) opt = adam.Adam(1.) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen(set(v.ref() for v in opt.variables()), 5) self.assertEqual( self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) def testSetWeightsFromV1AdamWithoutMinimize(self): keras_v1_adam = optimizer_v1.Adam() keras_v2_adam = adam.Adam() keras_v2_adam.set_weights(keras_v1_adam.get_weights()) keras_v1_iteration = keras_v1_adam.iterations keras_v2_iteration = keras_v2_adam.iterations self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual( self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration)) def testConstructAdamWithLR(self): opt = adam.Adam(lr=1.0) opt_2 = adam.Adam(learning_rate=0.1, lr=1.0) opt_3 = adam.Adam(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
class CheckpointCompatibilityTests(keras_parameterized.TestCase): def _initialized_model(self): input_value = tf.constant([[3.]]) model = MyModel() optimizer = adam.Adam(0.001) root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate( optimizer.get_slot(var=model._named_dense.bias, slot_name="m").assign([2.])) self.evaluate(optimizer.beta_1.assign(3.)) return root_trackable def _set_sentinels(self, root_trackable): self.evaluate(root_trackable.model._named_dense.bias.assign([101.])) self.evaluate( root_trackable.optimizer.get_slot( var=root_trackable.model._named_dense.bias, slot_name="m").assign([102.])) self.evaluate(root_trackable.optimizer.beta_1.assign(103.)) def _check_sentinels(self, root_trackable): self.assertAllEqual([1.], self.evaluate( root_trackable.model._named_dense.bias)) self.assertAllEqual([2.], self.evaluate( root_trackable.optimizer.get_slot( var=root_trackable.model._named_dense.bias, slot_name="m"))) self.assertAllEqual(3., self.evaluate(root_trackable.optimizer.beta_1)) def _write_name_based_checkpoint(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() name_saver = tf.compat.v1.train.Saver() return name_saver.save(sess=session, save_path=checkpoint_prefix, global_step=root.optimizer.iterations) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with testing_utils.device(should_use_gpu=True): with self.test_session(): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = trackable_utils.TrackableSaver( graph_view.ObjectGraphView(root)) self._set_sentinels(root) status = object_saver.restore(save_path) if tf.executing_eagerly(): self._check_sentinels(root) if tf.executing_eagerly(): status.assert_consumed() status.assert_existing_objects_matched() status.assert_nontrivial_match() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_consumed() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status = object_saver.restore(save_path) status.initialize_or_restore() status.assert_nontrivial_match() self._check_sentinels(root) # Check that there is no error when keys are missing from the name-based # checkpoint. root.not_in_name_checkpoint = tf.Variable([1.]) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched() def testSaveGraphLoadEager(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session(graph=save_graph): root = self._initialized_model() save_path = root.save(file_prefix=checkpoint_prefix) with tf.__internal__.eager_context.eager_mode(): root = self._initialized_model() self._set_sentinels(root) root.restore(save_path).assert_consumed() self._check_sentinels(root) def testSaveEagerLoadGraph(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with tf.__internal__.eager_context.eager_mode(): root = self._initialized_model() save_path = root.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session(graph=save_graph): root = self._initialized_model() self._set_sentinels(root) root.restore(save_path).assert_consumed().run_restore_ops() self._check_sentinels(root) def testIgnoreSaveCounter(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with self.cached_session() as session: # Create and save a model using Saver() before using a Checkpoint. This # generates a snapshot without the Checkpoint's `save_counter`. model = sequential.Sequential() model.add(core.Flatten(input_shape=(1, ))) model.add(core.Dense(1)) name_saver = tf.compat.v1.train.Saver(model.trainable_variables) save_path = name_saver.save(sess=session, save_path=checkpoint_prefix, global_step=1) # Checkpoint.restore must successfully load that checkpoint. ckpt = tf.train.Checkpoint(model=model) status = ckpt.restore(save_path) status.assert_existing_objects_matched() # It should, however, refuse to load a checkpoint where an unrelated # `save_counter` variable is missing. model.layers[1].var = tf.Variable(0., name="save_counter") status = ckpt.restore(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched()
class CheckpointingTests(keras_parameterized.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): input_value = tf.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = adam.Adam(0.001) step = tf.compat.v1.train.get_or_create_global_step() root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model, step=step) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = tf.group( optimizer.apply_gradients(zip(gradients, variables)), step.assign_add(1)) with tf.GradientTape() as tape: loss = other_model(input_value) variables = other_model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = graph_view.ObjectGraphView( root_trackable).serialize_object_graph() expected_slot_keys = ( "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) expected_checkpoint_names = ( # Created in the root node, so no prefix. "step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", "optimizer/learning_rate", "optimizer/beta_1", "optimizer/beta_2", "optimizer/iter", "optimizer/decay", ) + expected_slot_keys suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] named_variables = {v.name: v for v in named_variables} self.assertEqual(len(expected_checkpoint_names), len(named_variables.keys())) # Check that we've mapped to the right variable objects (not exhaustive) self.assertEqual("global_step", named_variables["step" + suffix].full_name) self.assertEqual( "my_model/dense_1/kernel", named_variables["model/_second/kernel" + suffix].full_name) self.assertEqual( "my_model/dense/kernel", named_variables["model/_named_dense/kernel" + suffix].full_name) self.assertEqual( "Adam/beta_1", named_variables["optimizer/beta_1" + suffix].full_name) self.assertEqual( "Adam/beta_2", named_variables["optimizer/beta_2" + suffix].full_name) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] children = [node.local_name for node in optimizer_node.children] self.assertEqual( # hyper variable dependencies len(["beta_1", "beta_2", "iter", "decay", "learning_rate"]), len(children)) serialized_slot_keys = [] for slot in optimizer_node.slot_variables: for attribute in (serialized_graph.nodes[ slot.slot_variable_node_id].attributes): serialized_slot_keys.append(attribute.checkpoint_key) self.assertEqual(len([key + suffix for key in expected_slot_keys]), len(serialized_slot_keys)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSaveRestore(self): with self.test_session(): model = MyModel() optimizer = adam.Adam(0.001) root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients(zip(gradients, variables)) self.assertFalse(root_trackable.save_counter.trainable) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) save_path = root_trackable.save(file_prefix=prefix) self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [43.])) self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) optimizer_variables = self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name)) self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_trackable.restore( save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not tf.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.Adam(0.001) on_create_root = tf.train.Checkpoint(optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) status.assert_nontrivial_match() status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() on_create_model(tf.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") status.assert_existing_objects_matched() if not tf.executing_eagerly(): with self.assertRaises(AssertionError): status.assert_consumed() # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) dummy_var = tf.Variable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value, var_list=[dummy_var]) status.assert_existing_objects_matched() status.assert_consumed() self.assertAllEqual( optimizer_variables, # Creation order is different, so .variables() needs to be re-sorted. self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name))) # TODO(allenl): Debug garbage created by this test in python3. def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): model = MyModel() optimizer = adam.Adam(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model) root.restore(tf.train.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer.iterations.numpy()) def testUsageGraph(self): """Expected usage when graph building.""" with context.graph_mode(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with tf.Graph().as_default(): model = MyModel() optimizer = adam.Adam(0.001) root = tf.compat.v1.train.Checkpoint(optimizer=optimizer, model=model) input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients( zip(gradients, variables)) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) with self.session( graph=tf.compat.v1.get_default_graph()) as session: status = root.restore(save_path=checkpoint_path) status.initialize_or_restore(session=session) if checkpoint_path is None: self.assertEqual(0, training_continuation) with self.assertRaises(AssertionError): status.assert_consumed() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() else: status.assert_consumed() status.assert_existing_objects_matched() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix, session=session) self.assertEqual( (training_continuation + 1) * num_training_steps, session.run(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, session.run(root.save_counter)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() optimizer = adam.Adam(0.001) def _train_fn(model, input_value): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() root = tf.train.Checkpoint(optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = tf.constant([[3.]]) train_fn = functools.partial(_train_fn, model, input_value) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) @combinations.generate(combinations.combine(mode=["eager"])) def testPartialRestoreWarningObject(self): optimizer = adam.Adam(0.0) original_root = tf.train.Checkpoint(v1=tf.Variable(2.), v2=tf.Variable(3.), optimizer=optimizer) # Create a slot variable to save optimizer.minimize(original_root.v1.read_value, [original_root.v1]) prefix = os.path.join(self.get_temp_dir(), "ckpt") save_path = original_root.save(prefix) partial_root = tf.train.Checkpoint(v1=tf.Variable(0.)) weak_partial_root = weakref.ref(partial_root) weak_v1 = weakref.ref(partial_root.v1) partial_root.restore(save_path) self.assertEqual(2., partial_root.v1.numpy()) with tf.compat.v1.test.mock.patch.object(logging, "warning") as mock_log: del partial_root self.assertIsNone(weak_partial_root()) self.assertIsNone(weak_v1()) messages = str(mock_log.call_args_list) self.assertIn("(root).v2'", messages) self.assertIn("(root).optimizer's state 'm' for (root).v1", messages) self.assertNotIn("(root).v1'", messages) self.assertIn("expect_partial()", messages) # pylint: disable=cell-var-from-loop @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testWithDefun(self): with self.test_session(): num_training_steps = 2 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() # Don't actually train so we can test variable values optimizer = adam.Adam(0.) root = tf.train.Checkpoint(optimizer=optimizer, model=model) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) def train_fn(): @tf.function def _call_model(x): return model(x) with tf.GradientTape() as tape: loss = _call_model(tf.constant([[3.]])) gradients = tape.gradient(loss, model.variables) return optimizer.apply_gradients( zip(gradients, model.variables)) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() if training_continuation > 0: status.assert_consumed() self.assertAllClose([[42.]], self.evaluate(model.variables[0])) else: self.evaluate(model.variables[0].assign([[42.]])) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) # pylint: enable=cell-var-from-loop @combinations.generate(combinations.combine(mode=["eager"])) def testAnonymousVarsInInit(self): class Model(training.Model): def __init__(self): super(Model, self).__init__() self.w = tf.Variable(0.0) self.b = tf.Variable(0.0) self.vars = [self.w, self.b] def call(self, x): return x * self.w + self.b model = Model() optimizer = adam.Adam(learning_rate=0.05) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) for _ in range(2): checkpoint.save(checkpoint_prefix) with tf.GradientTape() as tape: loss = (tf.constant(1.) - model(tf.constant(1.)))**2 grad = tape.gradient(loss, model.vars) optimizer.apply_gradients([(g, v) for g, v in zip(grad, model.vars)]) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testDeferredSlotRestoration(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() root = tf.train.Checkpoint() root.var = trackable_utils.add_variable(root, name="var", initializer=0.) optimizer = adam.Adam(0.1) variables = [root.var] gradients = [1.] train_op = optimizer.apply_gradients(zip(gradients, variables)) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate( trackable_utils.gather_initializers( tf.train.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(tf.compat.v1.assign(root.var, 12.)) no_slots_path = root.save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(tf.compat.v1.assign(root.var, 13.)) self.evaluate( tf.compat.v1.assign( optimizer.get_slot(slot_name="m", var=root.var), 14.)) slots_path = root.save( os.path.join(checkpoint_directory, "with_slots")) new_root = tf.train.Checkpoint() # Load the slot-containing checkpoint (deferred), then immediately # overwrite the non-slot variable (also deferred). slot_status = new_root.restore(slots_path) no_slot_status = new_root.restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = trackable_utils.add_variable(new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.Adam(0.1) slot_status.assert_existing_objects_matched() if not tf.executing_eagerly(): with self.assertRaisesRegex(AssertionError, "Unresolved object"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if tf.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) else: # Slot variables are not created eagerly when graph building. with self.assertRaises(KeyError): new_root.optimizer.get_slot(slot_name="m", var=new_root.var) variables = [new_root.var] gradients = [1.] train_op = new_root.optimizer.apply_gradients( zip(gradients, variables)) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() if not tf.executing_eagerly(): # The train op hasn't run when graph building, so the slot variable has # its restored value. It has run in eager, so the value will # be different. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed() def testManySavesGraph(self): """Saves after the first should not modify the graph.""" with context.graph_mode(): graph = tf.Graph() with graph.as_default(), self.session(graph): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tf.train.Checkpoint() obj.var = tf.Variable(0., name="v") obj.opt = adam.Adam(0.1) variables = [obj.var] gradients = [1.] obj.opt.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(obj)) obj.save(checkpoint_prefix) graph.finalize() obj.save(checkpoint_prefix) def testManyRestoresGraph(self): """Restores after the first should not modify the graph.""" with context.graph_mode(): graph = tf.Graph() with graph.as_default(), self.session(graph): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tf.train.Checkpoint() obj.var = tf.Variable(0., name="v") obj.opt = adam.Adam(0.1) variables = [obj.var] gradients = [1.] obj.opt.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(obj)) save_path = obj.save(checkpoint_prefix) obj.restore(save_path) graph.finalize() obj.restore(save_path) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def test_sequential(self): with self.test_session(): model = sequential.Sequential() checkpoint = tf.train.Checkpoint(model=model) model.add(core.Dense(4)) second_dense = core.Dense(5) model.add(second_dense) model(tf.constant([[1.]])) checkpoint.restore(None).initialize_or_restore() self.evaluate( second_dense.bias.assign(tf.constant([1., 2., 3., 4., 5.]))) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") save_path = checkpoint.save(checkpoint_prefix) self.evaluate( second_dense.bias.assign(tf.constant([5., 6., 7., 8., 9.]))) checkpoint.restore(save_path).assert_consumed().run_restore_ops() self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(second_dense.bias)) deferred_sequential = sequential.Sequential() deferred_sequential_checkpoint = tf.train.Checkpoint( model=deferred_sequential) status = deferred_sequential_checkpoint.restore(save_path) deferred_sequential.add(core.Dense(4)) deferred_second_dense = core.Dense(5) deferred_sequential.add(deferred_second_dense) deferred_sequential(tf.constant([[1.]])) status.run_restore_ops() self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(deferred_second_dense.bias)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def test_initialize_if_not_restoring(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = tf.train.Checkpoint( model=model ) # Do not save the optimizer with the checkpoint. optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = tf.constant([[3.]]) def train_fn(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() # TODO(tanzheny): Add hyper variables to .variables(), and set them with # set_weights etc. variables_not_in_the_variables_property = [ obj for obj in optimizer._hyper.values() if isinstance(obj, tf.Variable) ] self.evaluate([ v.initializer for v in optimizer.variables() + variables_not_in_the_variables_property ]) train_fn() model_save_path = root.save(file_prefix=checkpoint_prefix) self.evaluate(optimizer.beta_1.assign(42.)) optimizer_save_path = optimizer_checkpoint.save( optimizer_only_prefix) del train_fn # Restore into a graph with the optimizer with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model) status = root.restore(save_path=model_save_path) input_value = tf.constant([[3.]]) def train_fn1(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn1 = functools.partial(self.evaluate, train_fn1()) status.initialize_or_restore() train_fn1() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() del train_fn1 # Make sure initialization doesn't clobber later restores with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001, beta_1=1.0) root = tf.train.Checkpoint(optimizer=optimizer, model=model) opt_root = tf.train.Checkpoint(optimizer=optimizer) status = root.restore(save_path=model_save_path) init_only_optimizer_status = opt_root.restore(save_path=None) optimizer_status = opt_root.restore( save_path=optimizer_save_path) input_value = tf.constant([[3.]]) def train_fn2(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn2 = functools.partial(self.evaluate, train_fn2()) optimizer_status.run_restore_ops() status.initialize_or_restore() init_only_optimizer_status.initialize_or_restore() train_fn2() self.assertEqual(42., self.evaluate(optimizer.beta_1))
class GradientDescentOptimizerTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) sgd = gradient_descent.SGD(3.0) sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0)) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) def _test_basic_sgd_with_learning_rate_decay(self, sgd, dtype): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) if not tf.executing_eagerly(): sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 2 steps of sgd if not tf.executing_eagerly(): self.evaluate(sgd_op) else: sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) # Validate updated params self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0)) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) if not tf.executing_eagerly(): self.evaluate(sgd_op) else: sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1 - 2.0 * 0.1, 2.0 - 3.0 * 0.1 - 2.0 * 0.1], self.evaluate(var0)) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01], self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateDecay(self): for dtype in [tf.half, tf.float32, tf.float64]: learning_rate = 3.0 decay = 0.5 sgd = gradient_descent.SGD(learning_rate=learning_rate, decay=decay) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecay(self): for dtype in [tf.half, tf.float32, tf.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( 3.0, decay_steps=1.0, decay_rate=0.5) sgd = gradient_descent.SGD(learning_rate=learning_rate) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self): for dtype in [tf.half, tf.float32, tf.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( 3.0, decay_steps=1.0, decay_rate=0.5) sgd = gradient_descent.SGD(learning_rate=learning_rate) sgd = gradient_descent.SGD.from_config(sgd.get_config()) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicCallableParams(self): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) lr = lambda: 3.0 sgd = gradient_descent.SGD(lr) sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0)) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeResourceVariable(self): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) var1 = tf.Variable([3.0], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) loss = lambda: tf.matmul(var0, x) + var1 # pylint: disable=cell-var-from-loop sgd = gradient_descent.SGD(1.0) sgd_op = sgd.minimize(loss, [var0, var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) var1 = tf.Variable([3.0], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop pred += var1 # pylint: disable=cell-var-from-loop return pred * pred sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 np_grad = 2 * np_pred self.assertAllCloseAccordingToType( [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) def testTensorLearningRate(self): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) lrate = tf.constant(3.0) sgd_op = gradient_descent.SGD(lrate).apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0)) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) def testGradWrtRef(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: opt = gradient_descent.SGD(3.0) values = [1.0, 3.0] vars_ = [tf.Variable([v], dtype=dtype) for v in values] loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop grads_and_vars = opt._compute_gradients(loss, vars_) self.evaluate(tf.compat.v1.global_variables_initializer()) for grad, _ in grads_and_vars: self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) def testSparseBasic(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) grads0 = tf.IndexedSlices( tf.constant([0.1], shape=[1, 1], dtype=dtype), tf.constant([0]), tf.constant([2, 1])) grads1 = tf.IndexedSlices( tf.constant([0.01], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) sgd_op = gradient_descent.SGD(3.0).apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], self.evaluate(var1)) def testSparseBasicWithLearningRateDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) grads0 = tf.IndexedSlices( tf.constant([0.1], shape=[1, 1], dtype=dtype), tf.constant([0]), tf.constant([2, 1])) grads1 = tf.IndexedSlices( tf.constant([0.01], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) sgd_op = gradient_descent.SGD(3.0, decay=0.5).apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 2 steps of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], self.evaluate(var1)) self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType( [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["eager"])) def testCapturingInFunctionWhileExecutingEagerly(self): optimizer = gradient_descent.SGD(1.0) var_holder = {} def step(): if not var_holder: var_holder["var"] = tf.Variable(1.0) else: var_holder["var"].assign(1.0) with tf.GradientTape() as tape: loss = var_holder["var"]**2 grad = tape.gradient(loss, var_holder["var"]) optimizer.apply_gradients([(grad, var_holder["var"])]) return var_holder["var"].read_value() compiled_step = tf.function(step) self.assertEqual(float(step()), -1.0) self.assertEqual(float(compiled_step()), -1.0) # This shouldn't fail; in particular, the learning rate tensor should # be an EagerTensor once again, not a graph Tensor. self.assertEqual(float(step()), -1.0) def testConstructSGDWithLR(self): opt = gradient_descent.SGD(lr=1.0) opt_2 = gradient_descent.SGD(learning_rate=0.1, lr=1.0) opt_3 = gradient_descent.SGD(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
class TrainingGPUTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_model_with_crossentropy_losses_channels_first(self): """Tests use of all crossentropy losses with `channels_first`. Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, and `binary_crossentropy`. Verifies that evaluate gives the same result with either `channels_first` or `channels_last` image_data_format. """ def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if backend.image_data_format() == 'channels_first' else -1 loss = None num_channels = None activation = None if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = int(np.amax(target) + 1) activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: backend.categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: backend.binary_crossentropy( # pylint: disable=g-long-lambda, unnecessary-lambda y_true, y_pred) num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = training.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model if tf.test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): losses_to_test = ['sparse_categorical_crossentropy', 'categorical_crossentropy', 'binary_crossentropy'] data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], dtype=np.float32) # Labels for testing 4-class sparse_categorical_crossentropy, 4-class # categorical_crossentropy, and 2-class binary_crossentropy: labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[0, 0, 0], [1, 0, 0], [0, 0, 1]], [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)] # pylint: disable=line-too-long # Compute one loss for each loss function in the list `losses_to_test`: loss_channels_last = [0., 0., 0.] loss_channels_first = [0., 0., 0.] old_data_format = backend.image_data_format() # Evaluate a simple network with channels last, with all three loss # functions: backend.set_image_data_format('channels_last') data = np.moveaxis(data_channels_first, 1, -1) for index, loss_function in enumerate(losses_to_test): labels = np.moveaxis(labels_channels_first[index], 1, -1) inputs = input_layer.Input(shape=(3, 3, 1)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_last[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) # Evaluate the same network with channels first, with all three loss # functions: backend.set_image_data_format('channels_first') data = data_channels_first for index, loss_function in enumerate(losses_to_test): labels = labels_channels_first[index] inputs = input_layer.Input(shape=(1, 3, 3)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_first[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) backend.set_image_data_format(old_data_format) np.testing.assert_allclose( loss_channels_first, loss_channels_last, rtol=1e-06, err_msg='{}{}'.format('Computed different losses for ', 'channels_first and channels_last'))
class MomentumOptimizerTest(tf.test.TestCase, parameterized.TestCase): def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): accum = accum * momentum - g * lr var += (accum * momentum - g * lr) return var, accum @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for _, dtype in enumerate([tf.half, tf.float32, tf.float64]): var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) learning_rate = 2.0 momentum = 0.9 mom_opt = gradient_descent.SGD(learning_rate=learning_rate, momentum=momentum) # self.assertFalse(mom_opt._initial_decay) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) # Check we have slots slot0 = mom_opt.get_slot(var0, "momentum") self.assertEqual(slot0.shape, var0.shape) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]), self.evaluate(slot0)) self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the momentum accumulators contain the previous update. self.evaluate(mom_update) if tf.executing_eagerly(): mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.02) - 2.0 * 0.01), (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) def testNesterovMomentum(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0") var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1") var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop mom_op = gradient_descent.SGD(learning_rate=2.0, momentum=0.9, nesterov=True) opt_op = mom_op.minimize(loss, [var0, var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) for _ in range(1, 5): self.evaluate(opt_op) var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) self.assertAllClose(var0_np, self.evaluate(var0)) self.assertAllClose(var1_np, self.evaluate(var1)) def testSparseNesterovMomentum(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session() as sess: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) grads = [] for t in range(1, 5): grads.append(var0_np * 10) var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, dtype=dtype, name="var0") var1 = tf.Variable(var1_np, dtype=dtype, name="var1") mom_op = gradient_descent.SGD(learning_rate=2.0, momentum=0.9, nesterov=True) x_feed = tf.compat.v1.placeholder(dtype) y_feed = tf.IndexedSlices(x_feed, tf.constant([0, 1]), tf.constant([2])) grads_and_vars = [(y_feed, var0), (tf.constant([3.0, 3.0], dtype=dtype), var1)] opt_update = mom_op.apply_gradients(grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) for t in range(1, 5): sess.run(opt_update, feed_dict={x_feed: grads[t - 1]}) var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) self.assertAllClose(var0_np, self.evaluate(var0)) self.assertAllClose(var1_np, self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) return pred * pred # pylint: enable=cell-var-from-loop opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) sgd_op = opt.minimize(loss, [var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeWith2DIndicesForEmbeddingLookup(self): var0 = tf.Variable(tf.ones([2, 2])) def loss(): return tf.reduce_sum(tf.compat.v1.nn.embedding_lookup(var0, [[1]])) opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) sgd_op = opt.minimize(loss, [var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(sgd_op) self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) def testTensorLearningRateAndMomentum(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) mom_opt = gradient_descent.SGD(learning_rate=tf.constant(2.0), momentum=tf.constant(0.9)) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Check we have slots slot0 = mom_opt.get_slot(var0, "momentum") self.assertEqual(slot0.shape, var0.shape) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]), self.evaluate(slot0)) self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the momentum accumulators contain the previous update. self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.02) - 2.0 * 0.01), (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices(tf.constant([[.1, .1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2])) grads1 = tf.IndexedSlices( tf.constant([[.01, .01], [.01, .01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2])) mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Check we have slots slot0 = mom_opt.get_slot(var0, "momentum") self.assertEqual(slot0.shape, var0.shape) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) # Fetch params to validate initial values self.assertAllClose([0, 0], self.evaluate(var0)[0]) self.assertAllClose([0, 0], self.evaluate(var0)[1]) self.assertAllClose([1, 1], self.evaluate(var1)[2]) # Step 1: the momentum accumulators are 0. So we should see a normal # update: v -= grad * learning_rate self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([0, 0]), self.evaluate(slot0)[0]) self.assertAllCloseAccordingToType( np.array([-2.0 * .1, -2.0 * .1]), self.evaluate(slot0)[1]) self.assertAllCloseAccordingToType( np.array([-2.0 * .01, -2.0 * .01]), self.evaluate(slot1)[2]) # Check that the parameters have been updated. self.assertAllCloseAccordingToType(np.array([0, 0]), self.evaluate(var0)[0]) self.assertAllCloseAccordingToType( np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), self.evaluate(var0)[1]) self.assertAllCloseAccordingToType( np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), self.evaluate(var1)[2]) # Step 2: the momentum accumulators contain the previous update. self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0)[1]) self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.02) - 2.0 * 0.01), (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)[2]) # Check that the parameters have been updated. self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) self.assertAllCloseAccordingToType( np.array([ -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)[1]) self.assertAllCloseAccordingToType( np.array([ 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)[2]) def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) mom_update1 = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) mom_update2 = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) slot0 = mom_opt.get_slot(var0, "momentum") self.assertEqual(slot0.shape, var0.shape) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate self.evaluate(mom_update1) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]), self.evaluate(slot0)) self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the second momentum accumulators contain the previous update. self.evaluate(mom_update2) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * (-0.02) - 2.0 * 0.01), (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testConfig(self): opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9, nesterov=True) config = opt.get_config() opt2 = gradient_descent.SGD.from_config(config) lr = opt.lr lr2 = opt2.lr self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(lr), self.evaluate(lr2)) self.assertAllClose(self.evaluate(opt._get_hyper("momentum")), self.evaluate(opt2._get_hyper("momentum"))) self.assertAllClose(self.evaluate(opt._get_hyper("decay")), self.evaluate(opt2._get_hyper("decay"))) var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) loss = lambda: 3 * var0 # learning rate variable created when calling minimize. opt.minimize(loss, [var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) config = opt.get_config() opt3 = gradient_descent.SGD.from_config(config) lr3 = opt3.lr self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(lr), self.evaluate(lr3)) self.assertAllClose(self.evaluate(opt._get_hyper("momentum")), self.evaluate(opt3._get_hyper("momentum"))) self.assertAllClose(self.evaluate(opt._get_hyper("decay")), self.evaluate(opt3._get_hyper("decay"))) self.assertTrue(opt3.nesterov) def testNesterovWithoutMomentum(self): with self.assertRaisesRegex(ValueError, "must be between"): gradient_descent.SGD(learning_rate=1.0, momentum=2.0) def testConstructMomentumWithLR(self): opt = gradient_descent.SGD(lr=1.0, momentum=0.9) opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0) opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) @combinations.generate(combinations.combine(mode=["eager"])) def testMinimizeLossTensor(self): for dtype in [tf.half, tf.float32, tf.float64]: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) var1 = tf.Variable([3.0], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) tape = tf.GradientTape() with tape: loss = tf.matmul(var0, x) + var1 sgd = gradient_descent.SGD(1.0) with self.assertRaisesRegex(ValueError, "`tape` is required"): sgd.minimize(loss, [var0, var1]) sgd.minimize(loss, [var0, var1], tape=tape) self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]], self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1))
class TupleTests(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTracking(self): with self.test_session(): model = HasTuple() output = model(tf.ones([32, 2])) self.assertAllEqual([32, 5], output.shape.as_list()) self.assertLen(model.layers, 4) self.assertLen(model.layer_list.layers, 3) six.assertCountEqual( self, model.layers, tuple(model.layer_list.layers) + model.layers_with_updates) self.assertEqual(3, model.layer_list.layers[0].units) self.assertEqual(4, model.layer_list.layers[1].units) self.assertEqual(5, model.layer_list.layers[2].units) self.assertLen(model._checkpoint_dependencies, 2) self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) self.assertIs(model.layers_with_updates, model._checkpoint_dependencies[1].ref) self.assertLen( model._checkpoint_dependencies[0].ref._checkpoint_dependencies, 3) self.evaluate([v.initializer for v in model.variables]) self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) model.load_weights(save_path) self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], self.evaluate(model.variables[0])) v = tf.Variable(1.) model.var_list = (v,) self.assertIn(id(v), [id(obj) for obj in model.variables]) self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) self.assertNotIn(id(v), [id(obj) for obj in model.non_trainable_variables]) self.assertIn(id(model.layer_list[0].trainable_weights[0]), [id(obj) for obj in model.trainable_weights]) @parameterized.named_parameters( ("Module", tf.Module), ("Model", training.Model), ) def testSubModelTracking(self, module_subclass): model = module_subclass() model.v = tf.Variable(1.) self.assertIn(model.v, model.trainable_variables) model2 = module_subclass() model2.m = (model,) self.assertIn(model.v, model2.trainable_variables) def testSubSequentialTracking(self): class _Subclassed(training.Model): def __init__(self, wrapped): super(_Subclassed, self).__init__() self._wrapped = wrapped def call(self, x): return self._wrapped(x) model = sequential.Sequential() layer = core.Dense(1) model.add(layer) model2 = _Subclassed(model) model2(tf.ones([1, 2])) model2.m = (model,) self.assertIn(layer.kernel, model2.trainable_weights) def testUpdatesForwarded(self): with tf.Graph().as_default(): model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertNotEmpty(model.layers_with_updates[0].updates) self.assertEqual(set(model.layers_with_updates[0].updates), set(model.updates)) model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertEmpty(model.updates) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLossesForwarded(self): model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertLen(model.losses, 1) def testModelContainersCompareEqual(self): class HasEqualContainers(training.Model): def __init__(self): super(HasEqualContainers, self).__init__() self.l1 = () self.l2 = () model = HasEqualContainers() first_layer = HasEqualContainers() model.l1 = (first_layer,) second_layer = HasEqualContainers() model.l2 = (second_layer,) self.assertEqual((first_layer,), model.l1) d = {model.l1: 1, model.l2: 2} self.assertEqual(1, d[model.l1]) self.assertEqual(1, d[(first_layer,)]) self.assertEqual(2, d[model.l2]) self.assertEqual(2, d[(second_layer,)]) self.assertEqual([first_layer, second_layer], model.layers) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class TupleToTensor(training.Model): def __init__(self): super(TupleToTensor, self).__init__() self.l = (1., 2., 3.) self.assertAllEqual( (1., 2., 3.), self.evaluate(tf.constant(TupleToTensor().l))) self.assertAllEqual( (1., 2., 3.), self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l)))
class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase): def testResourceSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) # pylint: disable=cell-var-from-loop m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots( ), zero_slots() var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([3])) grads1_np_indices = np.array([2, 1], dtype=np.int32) grads1 = tf.IndexedSlices(tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([3])) opt = adamax.Adamax() update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0, 3.0], var0) self.assertAllClose([4.0, 5.0, 6.0], var1) beta1_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adamax for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) update.run() var0_np, m0, v0 = adamax_sparse_update_numpy( var0_np, grads0_np_indices, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_sparse_update_numpy( var1_np, grads1_np_indices, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) def testSparseDevicePlacement(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for index_dtype in [tf.int32, tf.int64]: with tf.Graph().as_default(), self.cached_session( force_gpu=tf.test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = tf.Variable([[1.0], [2.0]]) indices = tf.constant([0, 1], dtype=index_dtype) g_sum = lambda: tf.reduce_sum(tf.compat.v1.gather( var, indices)) # pylint: disable=cell-var-from-loop optimizer = adamax.Adamax(3.0) minimize_op = optimizer.minimize(g_sum, var_list=[var]) self.evaluate(tf.compat.v1.global_variables_initializer()) minimize_op.run() def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adamax.Adamax().apply_gradients([ (grad_repeated_index, repeated_index_update_var) ]) aggregated_update = adamax.Adamax().apply_gradients([ (grad_aggregated, aggregated_update_var) ]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(aggregated_update_var, repeated_index_update_var.eval()) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var, repeated_index_update_var.eval()) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.session(graph=tf.Graph(), use_gpu=True): # Initialize variables for numpy implementation. m0 = np.array([0.0, 0.0]) v0 = np.array([0.0, 0.0]) m1 = np.array([0.0, 0.0]) v1 = np.array([0.0, 0.0]) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax() if not tf.executing_eagerly(): update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of Adamax for t in range(3): beta_1_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta_1_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateDecay(self): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.session(graph=tf.Graph(), use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 decay = 0.002 opt = adamax.Adamax(learning_rate=learning_rate, decay=decay) if not tf.executing_eagerly(): update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of Adamax for t in range(3): beta_1_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta_1_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr = learning_rate / (1 + decay * t) var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0, alpha=lr) var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1, alpha=lr) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax(tf.constant(0.001)) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0) self.assertAllClose([3.0, 4.0], var1) beta1_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adamax for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) update.run() var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax() update1 = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) update2 = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) beta1_power = get_beta_accumulators(opt, dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0) self.assertAllClose([3.0, 4.0], var1) # Run 3 steps of intertwined Adamax1 and Adamax2. for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) if t % 2 == 0: update1.run() else: update2.run() var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) @combinations.generate(combinations.combine(mode=["eager"])) def testSlotsUniqueEager(self): v1 = tf.Variable(1.) v2 = tf.Variable(1.) opt = adamax.Adamax(1.) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen({id(v) for v in opt.variables()}, 5) def testConstructAdamaxWithLR(self): opt = adamax.Adamax(lr=1.0) opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0) opt_3 = adamax.Adamax(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
class ListTests(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTracking(self): with self.test_session(): model = HasList() output = model(tf.ones([32, 2])) self.assertAllEqual([32, 12], output.shape) self.assertEqual(11, len(model.layers)) self.assertEqual(10, len(model.layer_list.layers)) six.assertCountEqual( self, model.layers, model.layer_list.layers + model.layers_with_updates) for index in range(10): self.assertEqual(3 + index, model.layer_list.layers[index].units) self.assertEqual(2, len(model._checkpoint_dependencies)) self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) self.assertIs(model.layers_with_updates, model._checkpoint_dependencies[1].ref) self.assertEqual( 10, len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies)) self.evaluate([v.initializer for v in model.variables]) self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) model.load_weights(save_path) self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], self.evaluate(model.variables[0])) v = tf.Variable(1.) model.var_list = [v] self.assertTrue(any(v is t for t in model.variables)) self.assertTrue(any(v is t for t in model.trainable_variables)) self.assertFalse(any(v is t for t in model.non_trainable_variables)) self.assertTrue(any(model.layer_list[0].trainable_weights[0] is t for t in model.trainable_weights)) def testSubModelTracking(self): model = training.Model() model.v = tf.Variable(1.) self.assertIn(model.v, model.trainable_weights) model2 = training.Model() model2.m = [model] self.assertIn(model.v, model2.trainable_weights) def testSubSequentialTracking(self): class _Subclassed(training.Model): def __init__(self, wrapped): super(_Subclassed, self).__init__() self._wrapped = wrapped def call(self, x): return self._wrapped(x) model = sequential.Sequential() layer = core.Dense(1) model.add(layer) model2 = _Subclassed(model) model2(tf.ones([1, 2])) model2.m = [model] self.assertIn(layer.kernel, model2.trainable_weights) def testLayerTrackedThroughSequential(self): class AttrDict(dict): def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__ = self def ffnet(layer_sizes, name): ff = sequential.Sequential(name=name) for i, width in enumerate(layer_sizes): ff.add(core.Dense( width, activation=("relu" if i < len(layer_sizes)-1 else None))) return ff class MyModel2(training.Model): def __init__(self, config, name="my_model_2"): super(MyModel2, self).__init__(name=name) self._num_tokens = config.num_tokens # list of sub-models self._ffnet = [ffnet(config.module_layers + (self._num_tokens,), "ff")] def null_input(self): return tf.zeros([1, self._num_tokens], dtype=tf.float32) def call(self, input_, module_index=None): return self._ffnet[0](input_) m2 = MyModel2(AttrDict( num_tokens=5, module_layers=(50, 30))) # Construct m2(m2.null_input()) self.assertLen(m2.trainable_variables, 6) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testUpdatesForwarded(self): model = HasList() model_input = tf.ones([32, 2]) model(model_input) if tf.executing_eagerly(): self.assertEqual(0, len(model.updates)) else: self.assertGreater(len(model.layers_with_updates[0].updates), 0) self.assertEqual(set(model.layers_with_updates[0].updates), set(model.updates)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLossesForwarded(self): model = HasList() model_input = tf.ones([32, 2]) model(model_input) self.assertEqual(2, len(model.losses)) def testModelContainersCompareEqual(self): class HasEqualContainers(training.Model): def __init__(self): super(HasEqualContainers, self).__init__() self.l1 = [] self.l2 = [] model = HasEqualContainers() first_layer = HasEqualContainers() model.l1.append(first_layer) second_layer = HasEqualContainers() model.l2.append(second_layer) self.assertEqual([first_layer, second_layer], model.layers) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class ListToTensor(training.Model): def __init__(self): super(ListToTensor, self).__init__() self.l = [1., 2., 3.] self.assertAllEqual( [1., 2., 3.], self.evaluate(tf.constant(ListToTensor().l))) self.assertAllEqual( [1., 2., 3.], self.evaluate(tf.raw_ops.Pack(values=ListToTensor().l)))
class DenseTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testDenseProperties(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') self.assertEqual(dense.units, 2) self.assertEqual(dense.activation, tf.nn.relu) self.assertEqual(dense.kernel_regularizer, None) self.assertEqual(dense.bias_regularizer, None) self.assertEqual(dense.activity_regularizer, None) self.assertEqual(dense.use_bias, True) # Test auto-naming dense = core_layers.Dense(2, activation=tf.nn.relu) dense.apply(tf.random.uniform((5, 2))) self.assertEqual(dense.name, 'dense_1') dense = core_layers.Dense(2, activation=tf.nn.relu) dense.apply(tf.random.uniform((5, 2))) self.assertEqual(dense.name, 'dense_2') @test_util.run_deprecated_v1 def testVariableInput(self): with self.cached_session(): v = tf.compat.v1.get_variable( 'X', initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1)) x = core_layers.Dense(1)(v) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllEqual(x, [[0.0]]) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testCall(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 4), seed=1) outputs = dense(inputs) self.assertListEqual([5, 2], outputs.get_shape().as_list()) self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias.name, 'my_dense/bias:0') @test_util.assert_no_new_pyobjects_executing_eagerly def testNoEagerLeak(self): # Tests that repeatedly constructing and building a Layer does not leak # Python objects. inputs = tf.random.uniform((5, 4), seed=1) core_layers.Dense(5)(inputs) core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')(inputs) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testCallTensorDot(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 4, 3), seed=1) outputs = dense(inputs) self.assertListEqual([5, 4, 2], outputs.get_shape().as_list()) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testNoBias(self): dense = core_layers.Dense(2, use_bias=False, name='my_dense') inputs = tf.random.uniform((5, 2), seed=1) _ = dense(inputs) self.assertListEqual(dense.variables, [dense.kernel]) self.assertListEqual(dense.trainable_variables, [dense.kernel]) self.assertListEqual(dense.non_trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias, None) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testNonTrainable(self): dense = core_layers.Dense(2, trainable=False, name='my_dense') inputs = tf.random.uniform((5, 2), seed=1) _ = dense(inputs) self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 0) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testOutputShape(self): dense = core_layers.Dense(7, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense.apply(inputs) self.assertEqual(outputs.get_shape().as_list(), [5, 7]) inputs = tf.random.uniform((5, 2, 3), seed=1) outputs = dense(inputs) self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7]) inputs = tf.random.uniform((1, 2, 4, 3), seed=1) outputs = dense.apply(inputs) self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7]) @test_util.run_deprecated_v1 def testCallOnPlaceHolder(self): inputs = tf.compat.v1.placeholder(dtype=tf.float32) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None]) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder( dtype=tf.float32, shape=[None, None, None]) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) dense = core_layers.Dense(4, name='my_dense') dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None, 3]) dense = core_layers.Dense(4, name='my_dense') dense(inputs) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testActivation(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense(inputs) if not tf.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense1/Relu') dense = core_layers.Dense(2, name='dense2') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense(inputs) if not tf.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense2/BiasAdd') @test_util.run_deprecated_v1 def testActivityRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense( 2, name='my_dense', activity_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.assertListEqual(dense.losses, loss_keys) @test_util.run_deprecated_v1 def testKernelRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense( 2, name='my_dense', kernel_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.evaluate([v.initializer for v in dense.variables]) self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) @test_util.run_deprecated_v1 def testKernelRegularizerWithReuse(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 inputs = tf.random.uniform((5, 3), seed=1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer) self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True) self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) @test_util.run_deprecated_v1 def testBiasRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense(2, name='my_dense', bias_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.evaluate([v.initializer for v in dense.variables]) self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) @test_util.run_deprecated_v1 def testFunctionalDense(self): with self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) outputs = core_layers.dense( inputs, 2, activation=tf.nn.relu, name='my_dense') self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(outputs.op.name, 'my_dense/Relu') @test_util.run_deprecated_v1 def testFunctionalDenseTwice(self): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) vars1 = _get_variable_dict_from_varstore().values() core_layers.dense(inputs, 2) vars2 = _get_variable_dict_from_varstore().values() self.assertEqual(len(vars1), 2) self.assertEqual(len(vars2), 4) # TODO(alive): get this to work in eager mode. def testFunctionalDenseTwiceReuse(self): with self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = tf.compat.v1.trainable_variables() core_layers.dense(inputs, 2, name='my_dense', reuse=True) vars2 = tf.compat.v1.trainable_variables() self.assertEqual(vars1, vars2) # TODO(alive): get this to work in eager mode. def testFunctionalDenseTwiceReuseFromScope(self): with self.cached_session(): with tf.compat.v1.variable_scope('scope'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = tf.compat.v1.trainable_variables() with tf.compat.v1.variable_scope('scope', reuse=True): core_layers.dense(inputs, 2, name='my_dense') vars2 = tf.compat.v1.trainable_variables() self.assertEqual(vars1, vars2) @test_util.run_deprecated_v1 def testFunctionalDenseInitializerFromScope(self): with tf.compat.v1.variable_scope( 'scope', initializer=tf.compat.v1.ones_initializer()), self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) self.evaluate(tf.compat.v1.global_variables_initializer()) weights = _get_variable_dict_from_varstore() self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). self.assertAllClose(weights['scope/dense/kernel'].read_value(), np.ones((3, 2))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros( (2))) def testFunctionalDenseWithCustomGetter(self): called = [0] def custom_getter(getter, *args, **kwargs): called[0] += 1 return getter(*args, **kwargs) with tf.compat.v1.variable_scope('test', custom_getter=custom_getter): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) self.assertEqual(called[0], 2) @test_util.run_deprecated_v1 def testFunctionalDenseInScope(self): with self.cached_session(): with tf.compat.v1.variable_scope('test'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var_dict = _get_variable_dict_from_varstore() var_key = 'test/my_dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with tf.compat.v1.variable_scope('test1') as scope: inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var_dict = _get_variable_dict_from_varstore() var_key = 'test1/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with tf.compat.v1.variable_scope('test2'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var_dict = _get_variable_dict_from_varstore() var_key = 'test2/dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testComputeOutputShape(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') ts = tf.TensorShape # pylint: disable=protected-access with self.assertRaises(ValueError): dense.compute_output_shape(ts(None)) with self.assertRaises(ValueError): dense.compute_output_shape(ts([])) with self.assertRaises(ValueError): dense.compute_output_shape(ts([1])) self.assertEqual( [None, 2], dense.compute_output_shape((None, 3)).as_list()) self.assertEqual( [None, 2], dense.compute_output_shape(ts([None, 3])).as_list()) self.assertEqual( [None, 4, 2], dense.compute_output_shape(ts([None, 4, 3])).as_list()) # pylint: enable=protected-access @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testConstraints(self): k_constraint = lambda x: x / tf.reduce_sum(x) b_constraint = lambda x: x / tf.reduce_max(x) dense = core_layers.Dense(2, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = tf.random.uniform((5, 3), seed=1) dense(inputs) self.assertEqual(dense.kernel_constraint, k_constraint) self.assertEqual(dense.bias_constraint, b_constraint)
opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=False) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen(set({id(v) for v in opt.variables()}), 5) self.assertEqual(self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=True) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and three unique slot variables for v1 and v2 self.assertLen(set({id(v) for v in opt.variables()}), 7) self.assertEqual(self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) class SlotColocationTest(tf.test.TestCase, parameterized.TestCase): @parameterized.parameters([True, False]) @test_util.run_gpu_only def testRunMinimizeOnGPUForCPUVariables(self, use_resource): with tf.compat.v1.device("/device:CPU:0"): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) else: var0 = tf.Variable([1.0, 2.0], dtype=tf.float32) var1 = tf.Variable([3.0, 4.0], dtype=tf.float32) def loss(): return 5 * var0 + 3 * var1
class DropoutTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testDropoutProperties(self): dp = core_layers.Dropout(0.5, name='dropout') self.assertEqual(dp.rate, 0.5) self.assertEqual(dp.noise_shape, None) dp.apply(tf.ones(())) self.assertEqual(dp.name, 'dropout') @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testBooleanLearningPhase(self): dp = core_layers.Dropout(0.5) inputs = tf.ones((5, 3)) dropped = dp.apply(inputs, training=True) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = dp.apply(inputs, training=False) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 3)), np_output) @test_util.run_deprecated_v1 def testDynamicLearningPhase(self): with self.cached_session() as sess: dp = core_layers.Dropout(0.5, seed=1) inputs = tf.ones((5, 5)) training = tf.compat.v1.placeholder(dtype='bool') dropped = dp.apply(inputs, training=training) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={training: True}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={training: False}) self.assertAllClose(np.ones((5, 5)), np_output) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testDynamicNoiseShape(self): inputs = tf.ones((5, 3, 2)) noise_shape = [None, 1, None] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp.apply(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) def testCustomNoiseShape(self): inputs = tf.ones((5, 3, 2)) noise_shape = [5, 1, 2] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp.apply(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) @test_util.run_deprecated_v1 def testFunctionalDropout(self): with self.cached_session(): inputs = tf.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output) @test_util.run_deprecated_v1 def testDynamicRate(self): with self.cached_session() as sess: rate = tf.compat.v1.placeholder(dtype='float32', name='rate') dp = core_layers.Dropout(rate, name='dropout') inputs = tf.ones((5, 5)) dropped = dp.apply(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={rate: 0.5}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={rate: 0.0}) self.assertAllClose(np.ones((5, 5)), np_output)
class TestWeightSavingAndLoadingTFFormat(tf.test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_tensorflow_format_overwrite(self): with self.cached_session() as session: model = SubclassedModel() temp_dir = self.get_temp_dir() prefix = os.path.join(temp_dir, 'ckpt') x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) executing_eagerly = tf.executing_eagerly() model(x) # pylint: disable=not-callable if not executing_eagerly: session.run([v.initializer for v in model.variables]) model.save_weights(prefix, save_format='tensorflow') model.save_weights(prefix, save_format='tensorflow', overwrite=True) with self.assertRaises(EOFError): # Indirectly tests that the user is prompted model.save_weights(prefix, save_format='tensorflow', overwrite=False) def test_no_default_session(self): with tf.Graph().as_default(): self.assertFalse(tf.compat.v1.get_default_session()) data = np.random.random((1000, 32)).astype(np.float32) labels = np.random.random((1000, 10)).astype(np.float32) model = keras.models.Sequential([ keras.layers.Dense(10, activation='softmax'), keras.layers.Dense(10, activation='softmax')]) model.compile(optimizer=tf.compat.v1.train.RMSPropOptimizer(0.001), loss='categorical_crossentropy', metrics=['accuracy']) model.fit(data, labels) fname = os.path.join(self.get_temp_dir(), 'weights', 'ckpt') model.save_weights(fname) model.load_weights(fname) def test_no_graph_pollution(self): with tf.compat.v1.get_default_graph().as_default(): graph = tf.Graph() with graph.as_default(), self.session(graph) as session: model = SubclassedModel() temp_dir = self.get_temp_dir() prefix = os.path.join(temp_dir, 'ckpt') x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) model(x) # pylint: disable=not-callable session.run([v.initializer for v in model.variables]) model.save_weights(prefix, save_format='tensorflow') op_count = len(graph.get_operations()) model.save_weights(prefix, save_format='tensorflow') self.assertLen(graph.get_operations(), op_count) model.load_weights(prefix) op_count = len(graph.get_operations()) model.load_weights(prefix) self.assertLen(graph.get_operations(), op_count) def _weight_loading_test_template(self, make_model_fn): with self.cached_session(): model = make_model_fn() model.compile( loss='mse', optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), metrics=['acc', keras.metrics.CategoricalAccuracy()]) temp_dir = self.get_temp_dir() prefix = os.path.join(temp_dir, 'ckpt') train_x = np.random.random((3, 2)) train_y = np.random.random((3,)) x = tf.constant(train_x, dtype=tf.float32) model.train_on_batch(train_x, train_y) model.save_weights(prefix, save_format='tf') ref_y_before_train = model.predict(train_x) model.train_on_batch(train_x, train_y) ref_y_after_train = model.predict(train_x) for v in model.variables: self.evaluate( v.assign(tf.random.normal(shape=tf.shape(v)))) self.addCleanup(shutil.rmtree, temp_dir) model.load_weights(prefix) self.assertAllClose(ref_y_before_train, self.evaluate(model(x))) # Test restore-on-create if this is a subclassed Model (graph Networks # will have already created their variables). load_model = make_model_fn() load_model.load_weights(prefix) self.assertAllClose( ref_y_before_train, self.evaluate(load_model(x))) load_model = make_model_fn() load_model.load_weights(prefix) # We need to run some of the restore ops for predict(), but not all # variables have been created yet (optimizer slot variables). Tests # incremental restore. load_model.predict(train_x) load_model.compile( loss='mse', optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1), metrics=['acc', keras.metrics.CategoricalAccuracy()]) load_model.train_on_batch(train_x, train_y) self.assertAllClose(ref_y_after_train, self.evaluate(load_model(x))) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_weight_loading_graph_model(self): def _make_graph_model(): a = keras.layers.Input(shape=(2,)) x = keras.layers.Dense(3)(a) b = keras.layers.Dense(1)(x) return keras.models.Model(a, b) self._weight_loading_test_template(_make_graph_model) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_weight_loading_subclassed_model(self): self._weight_loading_test_template(SubclassedModel) def _new_layer_weight_loading_test_template( self, first_model_fn, second_model_fn): with self.cached_session() as session: model = first_model_fn() temp_dir = self.get_temp_dir() prefix = os.path.join(temp_dir, 'ckpt') x = tf.constant(np.random.random((3, 2)), dtype=tf.float32) executing_eagerly = tf.executing_eagerly() ref_y_tensor = model(x) if not executing_eagerly: session.run([v.initializer for v in model.variables]) ref_y = self.evaluate(ref_y_tensor) model.save_weights(prefix) self.assertEqual( prefix, tf.train.latest_checkpoint(temp_dir)) for v in model.variables: self.evaluate( v.assign(tf.random.normal(shape=tf.shape(v)))) self.addCleanup(shutil.rmtree, temp_dir) second_model = second_model_fn() status = second_model.load_weights(prefix) second_model(x) status.run_restore_ops() second_model.save_weights(prefix) # Check that the second model's checkpoint loads into the original model status = model.load_weights(prefix) status.run_restore_ops(session) y = self.evaluate(model(x)) self.assertAllClose(ref_y, y) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_weight_loading_graph_model_added_layer(self): def _save_graph_model(): a = keras.layers.Input(shape=(2,)) x = keras.layers.Dense(3, name='first')(a) b = keras.layers.Dense(1, name='second')(x) return keras.models.Model(a, b) def _restore_graph_model(): a = keras.layers.Input(shape=(2,)) x = keras.layers.Dense(3, name='first')(a) y = keras.layers.Dense(1, name='second')(x) b = keras.layers.Dense(3, name='secondjr')(y) return keras.models.Model(a, b) self._new_layer_weight_loading_test_template( _save_graph_model, _restore_graph_model) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_weight_loading_graph_model_added_no_weight_layer(self): def _save_graph_model(): a = keras.layers.Input(shape=(2,)) x = keras.layers.Dense(3, name='first')(a) b = keras.layers.Dense(1, name='second')(x) return keras.models.Model(a, b) def _restore_graph_model(): a = keras.layers.Input(shape=(2,)) x = keras.layers.Dense(3, name='first')(a) b = keras.layers.Dense(1, name='second')(x) y = keras.layers.Dropout(rate=0.1)(b) return keras.models.Model(a, y) self._new_layer_weight_loading_test_template( _save_graph_model, _restore_graph_model) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_weight_loading_subclassed_model_added_layer(self): class SubclassedModelRestore(training.Model): def __init__(self): super(SubclassedModelRestore, self).__init__() self.x_layer = keras.layers.Dense(3) self.y_layer = keras.layers.Dense(3) self.b_layer = keras.layers.Dense(1) def call(self, a): return self.b_layer(self.y_layer(self.x_layer(a))) self._new_layer_weight_loading_test_template( SubclassedModel, SubclassedModelRestore) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_incompatible_checkpoint(self): save_path = tf.train.Checkpoint().save( os.path.join(self.get_temp_dir(), 'ckpt')) m = DummySubclassModel() with self.assertRaisesRegex(AssertionError, 'Nothing to load'): m.load_weights(save_path) m.dense = keras.layers.Dense(2) m.dense(tf.constant([[1.]])) with self.assertRaisesRegex(AssertionError, 'Nothing except the root object matched'): m.load_weights(save_path) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_directory_passed(self): with self.cached_session(): m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'ckpt/') m.save_weights(prefix) self.evaluate(v.assign(2.)) m.load_weights(prefix) self.assertEqual(42., self.evaluate(v)) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_relative_path(self): with self.cached_session(): m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) os.chdir(self.get_temp_dir()) prefix = 'ackpt' self.evaluate(v.assign(42.)) m.save_weights(prefix) self.assertTrue(tf.io.gfile.exists('ackpt.index')) self.evaluate(v.assign(1.)) m.load_weights(prefix) self.assertEqual(42., self.evaluate(v)) prefix = 'subdir/ackpt' self.evaluate(v.assign(43.)) m.save_weights(prefix) self.assertTrue(tf.io.gfile.exists('subdir/ackpt.index')) self.evaluate(v.assign(2.)) m.load_weights(prefix) self.assertEqual(43., self.evaluate(v)) prefix = 'ackpt/' self.evaluate(v.assign(44.)) m.save_weights(prefix) self.assertTrue(tf.io.gfile.exists('ackpt/.index')) self.evaluate(v.assign(3.)) m.load_weights(prefix) self.assertEqual(44., self.evaluate(v)) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_nonexistent_prefix_directory(self): with self.cached_session(): m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'bckpt') m.save_weights(prefix) self.evaluate(v.assign(2.)) m.load_weights(prefix) self.assertEqual(42., self.evaluate(v))
class TestWholeModelSaving(keras_parameterized.TestCase): def _save_model_dir(self, dirname='saved_model'): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) return os.path.join(temp_dir, dirname) def _assert_same_weights_and_metrics(self, model, loaded_model): """Checks that the loaded weights and metrics are the same as the original. Args: model: original model loaded_model: loaded model """ self.assertAllClose(model.weights, loaded_model.weights) if loaded_model.optimizer: if testing_utils.get_save_format() == 'tf': # TODO(b/153110928): Keras TF format doesn't restore optimizer weights # currently. return self.assertAllClose(model.optimizer.weights, loaded_model.optimizer.weights) # In V1/Graph mode, the model isn't built, so the metrics are not loaded # immediately (requires model to be called on some data before building # metrics). check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly() if check_metrics: self.assertAllEqual([m.name for m in model.metrics], [m.name for m in loaded_model.metrics]) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes def test_save_and_load(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() save_kwargs = testing_utils.get_save_kwargs() if ((save_format == 'h5' or not save_kwargs.get('save_traces', True)) and testing_utils.get_model_type() == 'subclass'): # HDF5 format currently does not allow saving subclassed models. # When saving with `save_traces=False`, the subclassed model must have a # get_config/from_config, which the autogenerated model does not have. return with self.cached_session(): model = testing_utils.get_model_from_layers( [keras.layers.Dense(2), keras.layers.RepeatVector(3), keras.layers.TimeDistributed(keras.layers.Dense(3))], input_shape=(3,)) model.compile( loss=keras.losses.MSE, optimizer=keras.optimizer_v2.rmsprop.RMSprop(lr=0.0001), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalCrossentropy( name='cce', label_smoothing=tf.constant(0.2)), ], weighted_metrics=[ keras.metrics.categorical_crossentropy, keras.metrics.CategoricalCrossentropy( name='cce', label_smoothing=tf.constant(0.2)), ], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model( model, saved_model_dir, save_format=save_format, **save_kwargs) loaded_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, loaded_model) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) eval_out = model.evaluate(x, y) eval_out2 = loaded_model.evaluate(x, y) self.assertArrayNear(eval_out, eval_out2, 0.001) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_input_shape(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2)) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy(name='cat_acc') ], weighted_metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy(name='cat_acc2') ], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) model.save(saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_compile(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) x = np.random.random((1, 3)) out = model.predict(x) # Save the model without any compilation or training. keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_sequential_model_saving_2(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with tf.Graph().as_default(), self.cached_session(): # test with custom optimizer, loss class CustomOp(optimizer_v1.RMSprop): pass def custom_loss(y_true, y_pred): return keras.losses.mse(y_true, y_pred) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model( saved_model_dir, custom_objects={'CustomOp': CustomOp, 'custom_loss': custom_loss}) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_saving_without_compilation(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_with_tf_optimizer(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1), metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_right_after_compilation(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) if not tf.compat.v1.executing_eagerly_outside_functions(): model._make_train_function() keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_lambda_numpy_array_arguments(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() if h5py is None: self.skipTest('h5py required to run this test') mean = np.random.random((4, 2, 3)) std = np.abs(np.random.random((4, 2, 3))) + 1e-5 inputs = keras.layers.Input(shape=(4, 2, 3)) output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, arguments={'mu': mean, 'std': std})(inputs) model = keras.models.Model(inputs, output) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) self.assertAllClose(mean, model.layers[1].arguments['mu']) self.assertAllClose(std, model.layers[1].arguments['std']) def test_saving_model_with_long_layer_names(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): # This layer name will make the `layers_name` HDF5 attribute blow # out of proportion. Note that it fits into the internal HDF5 # attribute memory limit on its own but because h5py converts # the list of layer names into numpy array, which uses the same # amount of memory for every item, it increases the memory # requirements substantially. x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15))) f = x for i in range(4): f = keras.layers.Dense(2, name='dense_%d' % (i,))(f) model = keras.Model(inputs=[x], outputs=[f]) model.compile( 'adam', loss=keras.losses.MeanSquaredError(), metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) if save_format in ['tf', 'tensorflow']: return # Check that the HDF5 files contains chunked array # of layer names. with h5py.File(saved_model_dir, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) # The chunking of layer names array should have happened. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_saving_model_with_long_weights_names(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): x = keras.Input(shape=(2,), name='nested_model_input') f = x for i in range(4): f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f) # This layer name will make the `weights_name` # HDF5 attribute blow out of proportion. f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f) nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') x = keras.Input(shape=(2,), name='outer_model_input') f = nested_model(x) f = keras.layers.Dense(2, name='outer_model_output')(f) model = keras.Model(inputs=[x], outputs=[f]) model.compile(loss='mse', optimizer='adam', metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) if save_format in ['h5', 'hdf5', 'keras']: # Check that the HDF5 files contains chunked array # of weight names. with h5py.File(saved_model_dir, 'r') as h5file: num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_model_saving_to_pre_created_h5py_file(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with tf.Graph().as_default(), self.cached_session(): inputs = keras.Input(shape=(3,)) x = keras.layers.Dense(2)(inputs) outputs = keras.layers.Dense(3)(x) model = keras.Model(inputs, outputs) model.compile( loss=keras.losses.MSE, optimizer=optimizer_v1.Adam(), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy() ]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded_model = keras.models.load_model(saved_model_dir) out1 = loaded_model.predict(x) self.assertAllClose(out, out1, atol=1e-05) if save_format in ['tf', 'tensorflow']: return # Test h5 format specifically fd, fname = tempfile.mkstemp('.h5') with h5py.File(fname, mode='r+') as h5file: keras.models.save_model(model, h5file) loaded_model = keras.models.load_model(h5file) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # Test non-default options in h5 with h5py.File( '_', driver='core', mode='w', backing_store=False) as h5file: keras.models.save_model(model, h5file) loaded_model = keras.models.load_model(h5file) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # Cleanup os.close(fd) os.remove(fname) def test_model_saving_to_new_dir_path(self): saved_model_dir = os.path.join(self._save_model_dir(), 'newdir', 'saved_model') save_format = testing_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) x = np.random.random((1, 3)) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_model_raise_exception_with_failed_saving(self): if h5py is None: self.skipTest('h5py required to run this test') saved_model_dir = self._save_model_dir() saved_model_path = os.path.join(saved_model_dir, 'saved_model.h5') with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) with self.assertRaisesRegex(OSError, 'Unable to create file'): with h5py.File(saved_model_path, 'w'): keras.models.save_model(model, saved_model_path) def test_saving_constant_initializer_with_numpy(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() model = keras.models.Sequential() model.add( keras.layers.Dense( 2, input_shape=(3,), kernel_initializer=keras.initializers.Constant(np.ones((3, 2))))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_group_naming_h5py(self): # Test saving model with layer which name is prefix to a previous layer # name. temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) h5_path = os.path.join(temp_dir, 'test.h5') input_layer = keras.layers.Input((None, None, 3), name='test_input') x = keras.layers.Conv2D(1, 1, name='conv1/conv')(input_layer) x = keras.layers.Activation('relu', name='conv1')(x) model = keras.models.Model(inputs=input_layer, outputs=x) model.save_weights(h5_path) model.load_weights(h5_path) def test_primitive_attrs_contain_no_extraneous_strings(self): if h5py is None: self.skipTest('h5py required to run this test') saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(1, input_shape=[2])) model.save(saved_model_dir, save_format=save_format) if save_format in ['tf', 'tensorflow']: return h5file = h5py.File(saved_model_dir, 'r') self.assertRegex(h5file.attrs['keras_version'], r'^[\d]+\.[\d]+\.[\S]+$') @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_functional_model_with_custom_loss_and_metric(self): def _make_model(): inputs = keras.Input(shape=(4,)) x = keras.layers.Dense(8, activation='relu')(inputs) outputs = keras.layers.Dense(3, activation='softmax')(x) model = keras.Model(inputs=inputs, outputs=outputs) custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(x) model.add_loss(custom_loss) model.add_metric(custom_loss, aggregation='mean', name='custom_loss') return model saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() with self.cached_session(): model = _make_model() model.compile( loss=keras.losses.SparseCategoricalCrossentropy(), optimizer=optimizers.gradient_descent_v2.SGD(), metrics=[keras.metrics.SparseCategoricalCrossentropy()]) x = np.random.normal(size=(32, 4)) y = np.random.randint(0, 3, size=32) model.train_on_batch(x, y) evaluation_results = model.evaluate(x, y) # Save and reload model. model.save(saved_model_dir, save_format=save_format) del model # Prevent misuse. loaded_model = keras.models.load_model(saved_model_dir) loaded_model_eval_results = loaded_model.evaluate(x, y) # Assert all evaluation results are the same. self.assertAllClose(evaluation_results, loaded_model_eval_results, 1e-9) # Check correctness of the loss calculation. self.assertAllGreater(evaluation_results, 0.) evaluation_results = dict( zip(loaded_model.metrics_names, evaluation_results)) self.assertNear( evaluation_results['sparse_categorical_crossentropy'] + evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_save_uncompiled_model_with_optimizer(self): with self.cached_session() as session: saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))]) # Set the model's optimizer but don't compile. This can happen if the # model is trained with a custom training loop. model.optimizer = keras.optimizer_v2.rmsprop.RMSprop(lr=0.0001) if not tf.executing_eagerly(): session.run([v.initializer for v in model.variables]) model.save(saved_model_dir, save_format=save_format) if save_format in ['tf', 'tensorflow']: loaded = keras.models.load_model(saved_model_dir) self.assertIsInstance(loaded.optimizer, keras.optimizer_v2.optimizer_v2.OptimizerV2) @combinations.generate(combinations.combine(mode=['eager'])) def test_functional_model_with_getitem_op_layer(self): inp = keras.Input(shape=(8)) out = inp[:] model = keras.Model( inputs=[inp], outputs=out) batch_size = 7 x = tf.stack([ tf.range(8) for _ in range(batch_size)]) args = [x] expected = x[:] self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded save_format = testing_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded_model = keras.models.load_model(saved_model_dir) self.assertAllEqual(loaded_model(args), expected) self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size), expected) @combinations.generate(combinations.combine(mode=['eager'])) def test_shared_objects(self): class OuterLayer(keras.layers.Layer): def __init__(self, inner_layer): super(OuterLayer, self).__init__() self.inner_layer = inner_layer def call(self, inputs): return self.inner_layer(inputs) def get_config(self): return { 'inner_layer': generic_utils.serialize_keras_object( self.inner_layer) } @classmethod def from_config(cls, config): return cls(generic_utils.deserialize_keras_object( config['inner_layer'])) class InnerLayer(keras.layers.Layer): def __init__(self): super(InnerLayer, self).__init__() self.v = self.add_weight(name='v', shape=[], dtype=tf.float32) def call(self, inputs): return self.v + inputs @classmethod def from_config(cls, config): return cls() # Create a model with 2 output layers that share the same inner layer. inner_layer = InnerLayer() outer_layer_1 = OuterLayer(inner_layer) outer_layer_2 = OuterLayer(inner_layer) input_ = keras.Input(shape=(1,)) model = keras.Model( inputs=input_, outputs=[outer_layer_1(input_), outer_layer_2(input_)]) # Changes to the shared layer should affect both outputs. model.layers[1].inner_layer.v.assign(5) self.assertAllEqual(model(1), [6.0, 6.0]) model.layers[1].inner_layer.v.assign(3) self.assertAllEqual(model(1), [4.0, 4.0]) # After loading, changes to the shared layer should still affect both # outputs. def _do_assertions(loaded): loaded.layers[1].inner_layer.v.assign(5) self.assertAllEqual(loaded(1), [6.0, 6.0]) loaded.layers[1].inner_layer.v.assign(3) self.assertAllEqual(loaded(1), [4.0, 4.0]) loaded.layers[2].inner_layer.v.assign(5) self.assertAllEqual(loaded(1), [6.0, 6.0]) loaded.layers[2].inner_layer.v.assign(3) self.assertAllEqual(loaded(1), [4.0, 4.0]) # We'd like to make sure we only attach shared object IDs when strictly # necessary, so we'll recursively traverse the generated config to count # whether we have the exact number we expect. def _get_all_keys_recursive(dict_or_iterable): if isinstance(dict_or_iterable, dict): for key in dict_or_iterable.keys(): yield key for key in _get_all_keys_recursive(dict_or_iterable.values()): yield key elif isinstance(dict_or_iterable, string_types): return else: try: for item in dict_or_iterable: for key in _get_all_keys_recursive(item): yield key # Not an iterable or dictionary except TypeError: return with generic_utils.CustomObjectScope({ 'OuterLayer': OuterLayer, 'InnerLayer': InnerLayer}): # Test saving and loading to disk save_format = testing_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) _do_assertions(loaded) # Test recreating directly from config config = model.get_config() key_count = collections.Counter(_get_all_keys_recursive(config)) self.assertEqual(key_count[generic_utils.SHARED_OBJECT_KEY], 2) loaded = keras.Model.from_config(config) _do_assertions(loaded) @combinations.generate(combinations.combine(mode=['eager'])) def test_shared_objects_wrapper(self): """Tests that shared layers wrapped with `Wrapper` restore correctly.""" input_ = keras.Input(shape=(1,)) unwrapped = keras.layers.Layer(name='unwrapped') wrapped = keras.layers.Wrapper(unwrapped, name='wrapped') model = keras.Model(inputs=input_, outputs=[unwrapped(input_), wrapped(input_)]) # Test recreating directly from config config = model.get_config() loaded = keras.Model.from_config(config) self.assertIs(loaded.layers[1], loaded.layers[2].layer) # Test saving and loading to disk save_format = testing_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) self.assertIs(loaded.layers[1], loaded.layers[2].layer) @combinations.generate(combinations.combine(mode=['eager'])) def test_multi_output_metrics_name_stay_same(self): """Tests that metric names don't change with each save/load cycle. e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after saving and loading a model. """ input_ = keras.Input((4,)) model = keras.Model( input_, [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)), keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))]) metric = keras.metrics.BinaryAccuracy() model.compile(optimizer='rmsprop', loss='mse', metrics={'head_0': [metric, 'accuracy']}) # Run one iteration. x = np.random.rand(2, 4) y = {'head_0': np.random.randint(2, size=(2, 3)), 'head_1': np.random.randint(2, size=(2, 5))} model.fit(x, y, verbose=0) # Save and reload. save_format = testing_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) # Make sure the metrics names from the model before saving match the loaded # model. self.assertSequenceEqual(model.metrics_names, loaded.metrics_names)
self.assertAllEqual(out_dense, out_ragged) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name(layer=[ keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply, keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average ])) def test_merge_with_scalar_input(self, layer): x1 = np.array((1)) x2 = np.array((2)) out = layer()([x1, x2]) self.assertEqual(out.shape, ()) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) class MergeLayersTestNoExecution(tf.test.TestCase): def test_merge_elementwise_errors(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 6)) with self.assertRaises(ValueError): keras.layers.add([i1, i2]) with self.assertRaises(ValueError): keras.layers.add([i1]) with self.assertRaises(ValueError): keras.layers.add(i1) with self.assertRaises(ValueError): keras.layers.add([i1]) def test_concatenate_errors(self): i1 = keras.layers.Input(shape=(4, 5))
class TestSaveModel(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(TestSaveModel, self).setUp() self.model = testing_utils.get_small_sequential_mlp(1, 2, 3) self.subclassed_model = testing_utils.get_small_subclass_mlp(1, 2) def assert_h5_format(self, path): if h5py is not None: self.assertTrue(h5py.is_hdf5(path), 'Model saved at path {} is not a valid hdf5 file.' .format(path)) def assert_saved_model(self, path): loader_impl.parse_saved_model(path) @testing_utils.run_v2_only def test_save_format_defaults(self): path = os.path.join(self.get_temp_dir(), 'model_path') save.save_model(self.model, path) self.assert_saved_model(path) @testing_utils.run_v2_only def test_save_format_defaults_pathlib(self): if sys.version_info < (3, 6): self.skipTest('pathlib is only available for python version >= 3.6') path = pathlib.Path(self.get_temp_dir()) / 'model_path' save.save_model(self.model, path) self.assert_saved_model(path) @testing_utils.run_v2_only def test_save_hdf5(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='h5') self.assert_h5_format(path) with self.assertRaisesRegex( NotImplementedError, 'requires the model to be a Functional model or a Sequential model.'): save.save_model(self.subclassed_model, path, save_format='h5') @testing_utils.run_v2_only def test_save_load_hdf5_pathlib(self): if sys.version_info < (3, 6): self.skipTest('pathlib is only available for python version >= 3.6') path = pathlib.Path(self.get_temp_dir()) / 'model' save.save_model(self.model, path, save_format='h5') save.load_model(path) @testing_utils.run_v2_only def test_save_tf(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='tf') self.assert_saved_model(path) with self.assertRaisesRegex(ValueError, 'input shapes have not been set'): save.save_model(self.subclassed_model, path, save_format='tf') self.subclassed_model.predict(np.random.random((3, 5))) save.save_model(self.subclassed_model, path, save_format='tf') self.assert_saved_model(path) @testing_utils.run_v2_only def test_save_load_tf_string(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='tf') save.load_model(path) @testing_utils.run_v2_only def test_save_load_tf_pathlib(self): if sys.version_info < (3, 6): self.skipTest('pathlib is only available for python version >= 3.6') path = pathlib.Path(self.get_temp_dir()) / 'model' save.save_model(self.model, path, save_format='tf') save.load_model(path) @testing_utils.run_v2_only def test_save_load_weights_tf_pathlib(self): if sys.version_info < (3, 6): self.skipTest('pathlib is only available for python version >= 3.6') path = pathlib.Path(self.get_temp_dir()) / 'model' self.model.save_weights(path, save_format='tf') self.model.load_weights(path) @testing_utils.run_v2_only def test_save_load_weights_hdf5_pathlib(self): if sys.version_info < (3, 6): self.skipTest('pathlib is only available for python version >= 3.6') path = pathlib.Path(self.get_temp_dir()) / 'model' self.model.save_weights(path, save_format='h5') self.model.load_weights(path) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_with_dense_features(self): cols = [ tf.feature_column.numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1,), name='a'), 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_with_sequence_features(self): cols = [ tf.feature_column.sequence_numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column.sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input( shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = tf.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = tf.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_h5_for_rnn_layers(self): # See https://github.com/tensorflow/tensorflow/issues/35731 for details. inputs = keras.Input([10, 91], name='train_input') rnn_layers = [ keras.layers.LSTMCell(size, recurrent_dropout=0, name='rnn_cell%d' % i) for i, size in enumerate([512, 512]) ] rnn_output = keras.layers.RNN( rnn_layers, return_sequences=True, name='rnn_layer')(inputs) pred_feat = keras.layers.Dense(91, name='prediction_features')(rnn_output) pred = keras.layers.Softmax()(pred_feat) model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat]) path = os.path.join(self.get_temp_dir(), 'model_path.h5') model.save(path) # Make sure the variable name is unique. self.assertNotEqual(rnn_layers[0].kernel.name, rnn_layers[1].kernel.name) self.assertIn('rnn_cell1', rnn_layers[1].kernel.name) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_optimizer_weights(self): class MyModel(keras.Model): def __init__(self): super(MyModel, self).__init__() self.layer = keras.layers.Dense(1) def call(self, x): return self.layer(x) path = os.path.join(self.get_temp_dir(), 'weights_path') x, y = np.ones((10, 10)), np.ones((10, 1)) model = MyModel() model.compile('rmsprop', loss='bce') model.train_on_batch(x, y) model.reset_metrics() model.save_weights(path, save_format='tf') batch_loss = model.train_on_batch(x, y) new_model = MyModel() new_model.compile('rmsprop', loss='bce') new_model.train_on_batch(x, y) new_model.reset_metrics() new_model.load_weights(path) new_batch_loss = new_model.train_on_batch(x, y) self.assertAllClose(batch_loss, new_batch_loss) @combinations.generate(combinations.combine(mode=['eager', 'graph'])) def test_save_include_optimizer_false(self): def get_variables(file_name): reader = tf.train.load_checkpoint( os.path.join(file_name, 'variables/variables')) shape_from_key = reader.get_variable_to_shape_map() return sorted(shape_from_key.keys()) with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(1)) model.compile('adam', loss='mse') x, y = np.ones((10, 10)), np.ones((10, 1)) model.train_on_batch(x, y) path = os.path.join(self.get_temp_dir(), 'no_optimizer') model.save(path, save_format='tf', include_optimizer=False) variables = get_variables(path) for v in variables: self.assertNotIn('optimizer', v) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_model_with_custom_object(self): with generic_utils.custom_object_scope(), self.cached_session(): @generic_utils.register_keras_serializable() class CustomLoss(losses.MeanSquaredError): pass model = sequential.Sequential( [core.Dense(units=1, input_shape=(1,))]) model.compile(optimizer='sgd', loss=CustomLoss()) model.fit(np.zeros([10, 1]), np.zeros([10, 1])) temp_dir = self.get_temp_dir() filepath = os.path.join(temp_dir, 'saving') model.save(filepath) # Make sure the model can be correctly load back. _ = save.load_model(filepath, compile=True)