def test_test_problem(self): weights, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() # Function value in initial point is not negligible and gradient non-zero. self.assertGreater(fn(weights), 5.0) self.assertGreater(tf.linalg.norm(grad_fn(weights)), 0.01) # All-zeros is optimum with function value of 0.0. self.assertAllClose(0.0, fn(tf.zeros_like(weights))[0, 0]) self.assertAllClose(tf.zeros_like(weights), grad_fn(tf.zeros_like(weights)))
def test_convergence(self, momentum): weights, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() self.assertGreater(fn(weights), 5.0) optimizer = sgdm.SGD(0.1, momentum=momentum) state = optimizer.initialize(tf.TensorSpec(weights.shape, weights.dtype)) for _ in range(100): gradients = grad_fn(weights) state, weights = optimizer.next(state, weights, gradients) self.assertLess(fn(weights), 0.005)
def test_disjoint_init_and_next_true(self, momentum): """Tests behavior expected as 'TFF server optimizer'. This test creates two `tff.tf_computation`s, which would correspond to parts of the two arguments for creation of a `tff.templates.IterativeProcess`. The `KerasOptimizers` is instantiated in both of these computations, and only one of its `initialize` and `next` methods is invoked in each of them. The state which optimizers need is exposed by the `KerasOptimizer` and needs to be carried between the invocations of the created `tff.Computation`s. Note that even though it is expected that `variables` passed to the `single_step` method are expected to be `tf.Variable` instances, the code can still be written in a functional manner. Args: momentum: Momentum parameter to be used in tf.keras.optimizers.SGD. """ init_w, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() weights = init_w() self.assertGreater(fn(weights), 5.0) optimizer_fn = lambda: tf.keras.optimizers.SGD(0.1, momentum=momentum) @tensorflow_computation.tf_computation() def initialize_fn(): variables = tf.Variable(tf.zeros([5, 1])) optimizer = keras_optimizer.KerasOptimizer( optimizer_fn, variables, disjoint_init_and_next=True) return optimizer.initialize( tf.TensorSpec(variables.shape, variables.dtype)) @tf.function def single_step(optimizer, state, variables): gradients = grad_fn(variables) new_state, updated_weights = optimizer.next( state, variables, gradients) return new_state, updated_weights @tensorflow_computation.tf_computation() def next_fn(state, initial_weights): variables = tf.Variable(initial_weights) optimizer = keras_optimizer.KerasOptimizer( optimizer_fn, variables, disjoint_init_and_next=True) return single_step(optimizer, state, variables) state = initialize_fn() for _ in range(100): state, weights = next_fn(state, weights) self.assertLess(fn(weights), 0.005) # The optimizer variables are exposed by the KerasOptimizer. First variable # of a keras optimzier is the number of steps taken. self.assertEqual(100, state[0])
def test_convergence(self): init_w, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() weights = init_w() self.assertGreater(fn(weights), 5.0) optimizer = adagrad.build_adagrad(0.5) state = optimizer.initialize(tf.TensorSpec(weights.shape, weights.dtype)) for _ in range(100): gradients = grad_fn(weights) state, weights = optimizer.next(state, weights, gradients) self.assertLess(fn(weights), 0.005)
def test_disjoint_init_and_next_false(self, momentum): """Tests behavior expected as 'TFF client optimizer'. The main part of this test is `training_loop` method, which works with already instantiated `KerasOptimizer` and uses both of its `initialize` and `next` methods to perform a number of training steps. This is the behavior expected to happen during local training at clients. Note that even though it is expected that `variables` passed to the `training_loop` method are expected to be `tf.Variable` instances, the code can still be written in a functional manner. Args: momentum: Momentum parameter to be used in tf.keras.optimizers.SGD. """ init_w, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() weights = init_w() self.assertGreater(fn(weights), 5.0) optimizer_fn = lambda: tf.keras.optimizers.SGD(0.1, momentum=momentum) @tf.function def training_loop(optimizer, variables): state = optimizer.initialize( tf.TensorSpec(variables.shape, variables.dtype)) for _ in range(100): gradients = grad_fn(variables) state, variables = optimizer.next(state, variables, gradients) return state, variables @tensorflow_computation.tf_computation() def local_training(initial_weights): variables = tf.Variable(initial_weights) optimizer = keras_optimizer.KerasOptimizer( optimizer_fn, variables, disjoint_init_and_next=False) return training_loop(optimizer, variables) state, optimized_weights = local_training(weights) self.assertLess(fn(optimized_weights), 0.005) # The optimizer variables are handled internally in the KerasOptimizer. self.assertEmpty(state)