def test_autolambda(self, model_fn): model = model_fn() model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) np_inputs = tf.nest.map_structure( lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.inputs) np_outputs = tf.nest.map_structure( lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.outputs) model.fit(np_inputs, np_outputs, batch_size=2) model(np_inputs) # Test calling the model directly on inputs. new_model = keras.Model.from_config( model.get_config(), custom_objects={ 'LayerWithLayer': LayerWithLayer, 'MyAdd': MyAdd }) new_model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) new_model.fit(np_inputs, np_outputs, batch_size=2) new_model(np_inputs) # Test calling the new model directly on inputs. # Assert that metrics are preserved and in the right order. self.assertAllEqual(model.metrics_names, new_model.metrics_names) # Assert that layer names don't change. self.assertAllEqual([layer.name for layer in model.layers], [layer.name for layer in new_model.layers])
def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): repeated_index_update_var = tf.Variable( [[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable( [[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant( [0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant( [0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adam.Adam().apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = adam.Adam().apply_gradients( [(grad_aggregated, aggregated_update_var)]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(aggregated_update_var, self.evaluate(repeated_index_update_var)) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var, self.evaluate(repeated_index_update_var))
def test_trackable_save_restore(self): with self.test_session(): def _templated(): v = tf.compat.v1.get_variable( "v", shape=[1], initializer=tf.compat.v1.zeros_initializer(), use_resource=True) v2 = tf.compat.v1.get_variable( "v2", shape=[1], initializer=tf.compat.v1.zeros_initializer(), use_resource=True) manual = _ManualScope() return v, v + 1., v2, manual, manual() save_template = tf.compat.v1.make_template("s1", _templated) v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() six.assertCountEqual( self, [id(v1_save), id(v2_save), id(manual_scope), id(manual_scope_v), id(save_template)], map(id, trackable_utils.list_objects(save_template))) manual_dep, = manual_scope._checkpoint_dependencies self.assertEqual("in_manual_scope", manual_dep.name) self.assertIs(manual_scope_v, manual_dep.ref) optimizer = adam.Adam(0.0) save_root = tf.train.Checkpoint( my_template=save_template, optimizer=optimizer) optimizer.minimize(v1_save.read_value, var_list=[v1_save]) self.evaluate([v.initializer for v in save_template.variables]) optimizer_variables = optimizer.variables() + list( optimizer._hyper.values()) self.evaluate([v.initializer for v in optimizer_variables]) self.evaluate(v1_save.assign([12.])) self.evaluate(v2_save.assign([14.])) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") save_path = save_root.save(checkpoint_prefix) load_template = tf.compat.v1.make_template("s2", _templated) load_optimizer = adam.Adam(0.0) load_root = tf.train.Checkpoint( my_template=load_template, optimizer=load_optimizer) status = load_root.restore(save_path) var, var_plus_one, var2, _, _ = load_template() load_optimizer.minimize(var.read_value, var_list=[var]) self.assertLen(load_template._checkpoint_dependencies, 3) self.assertEqual("v", load_template._checkpoint_dependencies[0].name) self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) self.assertEqual("ManualScope", load_template._checkpoint_dependencies[2].name) status.assert_consumed().run_restore_ops() self.assertAllEqual([12.], self.evaluate(var)) self.assertAllEqual([13.], self.evaluate(var_plus_one)) self.assertAllEqual([14.], self.evaluate(var2))
def testConstructAdamWithLR(self): opt = adam.Adam(lr=1.0) opt_2 = adam.Adam(learning_rate=0.1, lr=1.0) opt_3 = adam.Adam(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
def test_stack_preserves_correct_shape(self): ## Test stack([x]) inp = keras.Input(shape=(), dtype='float32') out = tf.stack([inp]) model = keras.Model( inputs=inp, outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) x = tf.ones(shape=(4, 4)) expected = tf.stack([x]) self.assertAllEqual(expected.shape, (1, 4, 4)) self.assertAllEqual(model(x).shape, (1, 4, 4)) self.assertAllEqual(model(x), expected) config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(x).shape, (1, 4, 4)) self.assertAllEqual(model(x), expected) ## Test stack(x) inp = keras.Input(shape=(), dtype='float32') out = tf.stack(inp) model = keras.Model( inputs=inp, outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) x = tf.ones(shape=(4, 4)) expected = tf.stack(x) self.assertAllEqual(expected.shape, (4, 4)) self.assertAllEqual(model(x).shape, (4, 4)) self.assertAllEqual(model(x), expected) config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(x).shape, (4, 4)) self.assertAllEqual(model(x), expected)
def testSparseWithAmsgrad(self): # dtypes.half does not work on gpu + eager. for dtype in [tf.float32, tf.float64]: with self.cached_session(): m0 = np.array([[0.0], [0.0]]) v0 = np.array([[0.0], [0.0]]) v0hat = np.array([[0.0], [0.0]]) indices_np = np.array([1]) indices = tf.constant(indices_np, dtype=tf.int32) var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) repeated_index_update_var = tf.Variable(var0_np, dtype=dtype) aggregated_update_var = tf.Variable(var0_np, dtype=dtype) grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices(grads0_np, indices, tf.constant([2, 1])) opt_repeated = adam.Adam(amsgrad=True) opt_aggregated = adam.Adam(amsgrad=True) if not tf.executing_eagerly(): repeated_update = opt_repeated.apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = opt_aggregated.apply_gradients( [(grad_aggregated, aggregated_update_var)]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) for t in range(3): if not tf.executing_eagerly(): self.evaluate(repeated_update) self.evaluate(aggregated_update) else: opt_repeated.apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) opt_aggregated.apply_gradients( [(grad_aggregated, aggregated_update_var)]) var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad( var0_np, indices_np, grads0_np, t, m0, v0, v0hat) # Validate updated params self.assertAllCloseAccordingToType( var0_np, self.evaluate(aggregated_update_var)) self.assertAllCloseAccordingToType( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var))
def testAnonymousVarsInInit(self): class Model(training.Model): def __init__(self): super(Model, self).__init__() self.w = resource_variable_ops.ResourceVariable(0.0) self.b = resource_variable_ops.ResourceVariable(0.0) self.vars = [self.w, self.b] def call(self, x): return x * self.w + self.b model = Model() optimizer = adam.Adam(learning_rate=0.05) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") checkpoint = tf.train.Checkpoint( model=model, optimizer=optimizer) for _ in range(2): checkpoint.save(checkpoint_prefix) with tf.GradientTape() as tape: loss = (tf.constant(1.) - model(tf.constant(1.))) ** 2 grad = tape.gradient(loss, model.vars) optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)])
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() optimizer = adam.Adam(0.001) def _train_fn(model, input_value): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() root = tf.train.Checkpoint( optimizer=optimizer, model=model) manager = tf.train.CheckpointManager( root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = tf.constant([[3.]]) train_fn = functools.partial(_train_fn, model, input_value) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual((training_continuation + 1) * num_training_steps, self.evaluate(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def test_getitem_slice_real_tensor(self): if not tf.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') x = tf.range(10.0) slice_stop = keras.Input(shape=(), dtype='int32') out = x[:slice_stop[0]] model = keras.Model( inputs=slice_stop, outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) batch_size = 7 stop = 6 args = tf.constant(stop, shape=(batch_size,)) expected = x[:stop] if tf.compat.v1.executing_eagerly_outside_functions(): self.assertIn('tf.__operators__.getitem', ( x.name for x in model.layers)) # TODO(b/161925288): Fix the dispatch triggering then uncomment: # self.assertNotIn('tf.strided_slice', ( # x.name for x in model.layers)) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
def test_validate_callbacks_predefined_callbacks(self): supported_predefined_callbacks = [ callbacks.TensorBoard(), callbacks.CSVLogger(filename='./log.csv'), callbacks.EarlyStopping(), callbacks.ModelCheckpoint(filepath='./checkpoint'), callbacks.TerminateOnNaN(), callbacks.ProgbarLogger(), callbacks.History(), callbacks.RemoteMonitor() ] distributed_training_utils_v1.validate_callbacks( supported_predefined_callbacks, adam.Adam()) unsupported_predefined_callbacks = [ callbacks.ReduceLROnPlateau(), callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) ] for callback in unsupported_predefined_callbacks: with self.assertRaisesRegex( ValueError, 'You must specify a Keras Optimizer V2'): distributed_training_utils_v1.validate_callbacks( [callback], tf.compat.v1.train.AdamOptimizer())
def testOptimizerWithCallableVarList(self): train_samples = 20 input_dim = 1 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = np_utils.to_categorical(y) num_hidden = 1 model = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes) opt = adam.Adam() loss = lambda: losses.mean_squared_error(model(x), y) var_list = lambda: model.trainable_weights with self.assertRaisesRegex( ValueError, 'Weights for model .* have not yet been created'): var_list() train_op = opt.minimize(loss, var_list) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual( [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.evaluate(train_op) self.assertNotEqual( [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.assertLen(var_list(), 4)
def test_getitem_slice_with_stop_only(self): if not tf.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') inp = keras.Input(shape=(8,)) slice_stop = keras.Input(shape=(), dtype='int32') out = inp[:slice_stop[0]] model = keras.Model( inputs=[inp, slice_stop], outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) batch_size = 7 stop = 6 x = tf.stack([ tf.range(8) for _ in range(batch_size)]) args = [x, tf.constant(stop, shape=(batch_size,))] expected = x[:stop] if keras_tensor.keras_tensors_enabled(): self.assertIn('tf.__operators__.getitem', ( x.name for x in model.layers)) self.assertNotIn('tf.strided_slice', ( x.name for x in model.layers)) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
def test_getitem_index_real_tensor(self): if not tf.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') x = tf.range(10.0) slice_stop = keras.Input(shape=(), dtype='int32') out = x[slice_stop[0]] model = keras.Model( inputs=slice_stop, outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) batch_size = 7 index = 6 args = tf.constant(index, shape=(batch_size,)) expected = x[index] if keras_tensor.keras_tensors_enabled(): self.assertIn('tf.__operators__.getitem', ( x.name for x in model.layers)) # TODO(b/161925288): Fix the bug then uncomment: # self.assertNotIn('tf.strided_slice', ( # x.name for x in model.layers)) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
def testSlotsUniqueEager(self): v1 = tf.Variable(1.) v2 = tf.Variable(1.) opt = adam.Adam(1.) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen(set(v.ref() for v in opt.variables()), 5) self.assertEqual( self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))
def testSetWeightsFromV1AdamWithoutMinimize(self): keras_v1_adam = optimizer_v1.Adam() keras_v2_adam = adam.Adam() keras_v2_adam.set_weights(keras_v1_adam.get_weights()) keras_v1_iteration = keras_v1_adam.iterations keras_v2_iteration = keras_v2_adam.iterations self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual( self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))
def test_getitem_complex_slicing(self): if not tf.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') inp = keras.Input(shape=(4, 3, 8)) first_dim = keras.Input(shape=(), dtype='int32') slice_start = keras.Input(shape=(), dtype='int32') slice_stop = keras.Input(shape=(), dtype='int32') slice_stride = keras.Input(shape=(), dtype='int32') out = inp[..., first_dim[0], slice_start[0]:slice_stop[0]:slice_stride[0]] model = keras.Model( inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], outputs=out) model.compile(adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) batch_size = 7 start = 1 stop = 6 step = 2 x = tf.stack([ tf.stack( [tf.stack([tf.range(8) for _ in range(3)]) for _ in range(4)]) for _ in range(batch_size) ]) args = [ x, tf.constant(0, shape=(batch_size, )), tf.constant(start, shape=(batch_size, )), tf.constant(stop, shape=(batch_size, )), tf.constant(step, shape=(batch_size, )) ] # Slice the innermost dim. only grab one index from the second-to-innermost # dim, removing that dim from the shape. expected = tf.stack([ tf.stack([tf.range(8)[start:stop:step] for _ in range(4)]) for _ in range(batch_size) ]) if keras_tensor.keras_tensors_enabled(): self.assertIn('tf.__operators__.getitem', (x.name for x in model.layers)) self.assertNotIn('tf.strided_slice', (x.name for x in model.layers)) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
def testBasicWithConstantDecay(self): var = tf.Variable([1.0, 2.0], dtype=tf.float32) loss = lambda: 3 * var opt = adam.Adam(learning_rate=1.0) @tf.function def fn(): opt.minimize(loss, [var]) return var self.assertAllClose([0., 1.], fn(), atol=1e-4) self.assertAllClose([-1, 0.], fn(), atol=1e-4)
def testOptimizerSetIterations(self): global_step = tf.compat.v1.train.get_or_create_global_step() opt = adam.Adam(learning_rate=1.0) opt.iterations = global_step var = tf.Variable([1.0, 2.0], dtype=tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) init_step_value = self.evaluate(global_step) loss = lambda: 3 * var opt_op = opt.minimize(loss, [var]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) new_step_value = self.evaluate(global_step) self.assertEqual(new_step_value, init_step_value + 1)
def testKerasOptimizerWithUnequalInput(self, distribution): with distribution.scope(): var = tf.Variable(2.0, name='var', aggregation=tf.compat.v1.VariableAggregation.SUM) optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2) all_vars = [] def model_fn(): def loss_fn(): replica_id = _replica_id() return tf.cast(replica_id + 1, dtype=tf.float32) * 0.5 * var train_op = optimizer.minimize(loss_fn, var_list=[var]) return train_op, optimizer def train_fn(): train_op, optimizer = distribution.extended.call_for_each_replica( model_fn) if not all_vars: all_vars.append(var) all_vars.append(optimizer.get_slot(var, 'm')) all_vars.append(optimizer.get_slot(var, 'v')) return distribution.group(train_op) if not tf.executing_eagerly(): with self.cached_session() as sess: train_fn = sess.make_callable(train_fn()) self.evaluate(tf.compat.v1.global_variables_initializer()) # first step. train_fn() # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 - beta1) # = 2.0 - 0.01 * 1.2 * sqrt(0.8) / sqrt(1.8) / 0.8 self.assertAllClose(1.99, self.evaluate(all_vars[0])) # m(1) = beta1 * m(0) + (1-beta1) * grad = 0.2 * 0 + 0.8 * (1 + 2) / 2 self.assertAllClose(1.2, self.evaluate(all_vars[1])) # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25 self.assertAllClose(1.8, self.evaluate(all_vars[2])) # second step. train_fn() # var(1) = var(0) - lr * 2 = 1.98 self.assertAllClose(1.98, self.evaluate(all_vars[0])) # m(2) = beta1 * m(1) + (1-beta1) * grad = 0.2 * 1.2 + 0.8 * 1.5 self.assertAllClose(1.44, self.evaluate(all_vars[1])) # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25 self.assertAllClose(2.16, self.evaluate(all_vars[2]))
def testSparseDevicePlacement(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for index_dtype in [tf.int32, tf.int64]: with tf.Graph().as_default(), self.cached_session( force_gpu=tf.test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = tf.Variable([[1.0], [2.0]]) indices = tf.constant([0, 1], dtype=index_dtype) g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop optimizer = adam.Adam(3.0) minimize_op = optimizer.minimize(g_sum, var_list=[var]) self.evaluate(tf.compat.v1.global_variables_initializer()) minimize_op.run()
def doTestBasic(self, use_callable_params=False): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = lambda: 0.001 beta1 = lambda: 0.9 beta2 = lambda: 0.999 epsilon = lambda: 1e-8 if not use_callable_params: learning_rate = learning_rate() beta1 = beta1() beta2 = beta2() epsilon = epsilon() opt = adam.Adam(learning_rate=learning_rate) if not tf.executing_eagerly(): update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def state(): with distribution.scope(): v = tf.Variable(tf.random.normal([])) opt = adam.Adam(0.001) @tf.function def step(): def f(): with tf.GradientTape() as tape: loss = v + v gradients = tape.gradient(loss, [v]) opt.apply_gradients(zip(gradients, [v])) distribution.run(f) return v, opt, step
def testManySavesGraph(self): """Saves after the first should not modify the graph.""" with context.graph_mode(): graph = tf.Graph() with graph.as_default(), self.session(graph): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tf.train.Checkpoint() obj.var = tf.Variable(0., name="v") obj.opt = adam.Adam(0.1) variables = [obj.var] gradients = [1.] obj.opt.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(obj)) obj.save(checkpoint_prefix) graph.finalize() obj.save(checkpoint_prefix)
def testBasicWithLearningRateInverseTimeDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 opt = adam.Adam( learning_rate=lr_schedule, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): self.evaluate(update) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = adam_update_numpy( var0_np, grads0_np, t, m0, v0, lr=lr_np) var1_np, m1, v1 = adam_update_numpy( var1_np, grads1_np, t, m1, v1, lr=lr_np) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testVarName(self): with tf.compat.v1.get_default_graph().as_default(): var = tf.Variable([1., 2.], name='var') loss = var + 1. opt = adam.Adam() opt.get_updates(loss, [var]) opt_vars = opt.variables() self.assertLen(opt_vars, 3) self.assertEqual('Adam/iter:0', opt_vars[0].name) self.assertEqual('Adam/var/m:0', opt_vars[1].name) var_2 = tf.Variable([1., 2.], name='var_2') loss = var_2 + 1. with backend.name_scope('outter'): opt.get_updates(loss, [var_2]) opt_vars = opt.variables() self.assertLen(opt_vars, 5) self.assertEqual('outter/Adam/var_2/m:0', opt_vars[3].name)
def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 2], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) grads1_np_indices = np.array([0, 2], dtype=np.int32) grads1 = tf.IndexedSlices( tf.constant(grads1_np[grads1_np_indices]), tf.constant(grads1_np_indices), tf.constant([3])) opt = adam.Adam() update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adam for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) update.run() var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testUsageGraph(self): """Expected usage when graph building.""" with context.graph_mode(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with tf.Graph().as_default(): model = MyModel() optimizer = adam.Adam(0.001) root = tf.compat.v1.train.Checkpoint(optimizer=optimizer, model=model) input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients( zip(gradients, variables)) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) with self.session( graph=tf.compat.v1.get_default_graph()) as session: status = root.restore(save_path=checkpoint_path) status.initialize_or_restore(session=session) if checkpoint_path is None: self.assertEqual(0, training_continuation) with self.assertRaises(AssertionError): status.assert_consumed() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() else: status.assert_consumed() status.assert_existing_objects_matched() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix, session=session) self.assertEqual( (training_continuation + 1) * num_training_steps, session.run(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, session.run(root.save_counter))
def testWithDefun(self): with self.test_session(): num_training_steps = 2 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() # Don't actually train so we can test variable values optimizer = adam.Adam(0.) root = tf.train.Checkpoint(optimizer=optimizer, model=model) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) def train_fn(): @tf.function def _call_model(x): return model(x) with tf.GradientTape() as tape: loss = _call_model(tf.constant([[3.]])) gradients = tape.gradient(loss, model.variables) return optimizer.apply_gradients( zip(gradients, model.variables)) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() if training_continuation > 0: status.assert_consumed() self.assertAllClose([[42.]], self.evaluate(model.variables[0])) else: self.evaluate(model.variables[0].assign([[42.]])) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def testWeights(self): with testing_utils.use_gpu(): opt1 = adam.Adam(learning_rate=1.0) var1 = tf.Variable([1.0, 2.0], dtype=tf.float32) loss1 = lambda: 3 * var1 opt_op_1 = opt1.minimize(loss1, [var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) config = opt1.get_config() opt2 = adam.Adam.from_config(config) var2 = tf.Variable([1.0, 2.0], dtype=tf.float32) loss2 = lambda: 3 * var2 opt_op_2 = opt2.minimize(loss2, [var2]) weights = opt1.get_weights() # Assert set_weights and both variables get updated to same value. self.evaluate(tf.compat.v1.global_variables_initializer()) opt2.set_weights(weights) self.evaluate([opt_op_1, opt_op_2]) self.assertAllClose(self.evaluate(var1), self.evaluate(var2)) self.assertEqual(1, self.evaluate(opt1.iterations)) self.assertEqual(1, self.evaluate(opt2.iterations)) var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) loss3 = lambda: 3 * var3 + 5 * var4 opt_op_3 = opt1.minimize(loss3, [var3, var4]) # Assert set_weights with ValueError since weight list does not match. self.evaluate(tf.compat.v1.global_variables_initializer()) weights = opt1.get_weights() with self.assertRaisesRegex(ValueError, 'but the optimizer was'): opt2.set_weights(weights) # Assert set_weights and variables get updated to same value. var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) loss4 = lambda: 3 * var5 + 5 * var6 opt_op_4 = opt2.minimize(loss4, [var5, var6]) self.evaluate(tf.compat.v1.global_variables_initializer()) opt2.set_weights(weights) self.evaluate([opt_op_3, opt_op_4]) self.assertAllClose( self.evaluate([var3, var4]), self.evaluate([var5, var6]))
def testNumericEquivalenceForAmsgrad(self): if tf.executing_eagerly(): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = np_utils.to_categorical(y) num_hidden = 5 model_k_v1 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2.set_weights(model_k_v1.get_weights()) opt_k_v1 = optimizer_v1.Adam(amsgrad=True) opt_k_v2 = adam.Adam(amsgrad=True) model_k_v1.compile( opt_k_v1, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) model_k_v2.compile( opt_k_v2, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False) hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False) self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights()) self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])