def test_ops_with_var_and_adadelta(self): var_list = [ deo.get_variable('sp_var', initializer=0.0, dim=2), ] opt_list = [ adadelta.AdadeltaOptimizer(), ] self.common_run_context(var_list, opt_list, name='adadelta_test')
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = adadelta.AdadeltaOptimizer( 1.0, 1.0, 1.0).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0)) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
def doTestBasic(self, use_resource=False, use_callable_params=False): num_updates = 4 # number of ADADELTA steps to perform for dtype in [dtypes.half, dtypes.float32]: for grad in [0.2, 0.1, 0.01]: for lr in [1.0, 0.5, 0.1]: var0_init = [1.0, 2.0] var1_init = [3.0, 4.0] if use_resource: var0 = resource_variable_ops.ResourceVariable( var0_init, dtype=dtype) var1 = resource_variable_ops.ResourceVariable( var1_init, dtype=dtype) else: var0 = variables.Variable(var0_init, dtype=dtype) var1 = variables.Variable(var1_init, dtype=dtype) grads = constant_op.constant([grad, grad], dtype=dtype) accum = 0.0 accum_update = 0.0 # ADADELTA gradient optimizer rho = 0.95 epsilon = 1e-8 if use_callable_params: adadelta_opt = adadelta.AdadeltaOptimizer( learning_rate=lambda: lr, # pylint: disable=cell-var-from-loop rho=lambda: rho, # pylint: disable=cell-var-from-loop epsilon=lambda: epsilon) # pylint: disable=cell-var-from-loop else: adadelta_opt = adadelta.AdadeltaOptimizer( learning_rate=lr, rho=rho, epsilon=epsilon) if not context.executing_eagerly(): adadelta_update = adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # TODO(lxuechen): This is hard to test in eager mode, # since the optimizer is not fully initialized until the first # call to `apply_gradients` opt_vars = adadelta_opt.variables() self.assertStartsWith(opt_vars[0].name, var0._shared_name) self.assertStartsWith(opt_vars[1].name, var0._shared_name) self.assertStartsWith(opt_vars[2].name, var1._shared_name) self.assertStartsWith(opt_vars[3].name, var1._shared_name) self.assertEqual(4, len(opt_vars)) # Assign slots slot = [None] * 2 slot_update = [None] * 2 self.assertEqual(["accum", "accum_update"], adadelta_opt.get_slot_names()) slot[0] = adadelta_opt.get_slot(var0, "accum") self.assertEquals(slot[0].get_shape(), var0.get_shape()) self.assertFalse( slot[0] in variables.trainable_variables()) slot_update[0] = adadelta_opt.get_slot( var0, "accum_update") self.assertEquals(slot_update[0].get_shape(), var0.get_shape()) self.assertFalse( slot_update[0] in variables.trainable_variables()) slot[1] = adadelta_opt.get_slot(var1, "accum") self.assertEquals(slot[1].get_shape(), var1.get_shape()) self.assertFalse( slot[1] in variables.trainable_variables()) slot_update[1] = adadelta_opt.get_slot( var1, "accum_update") self.assertEquals(slot_update[1].get_shape(), var1.get_shape()) self.assertFalse( slot_update[1] in variables.trainable_variables()) # Fetch params to validate initial values self.assertAllClose(var0_init, self.evaluate(var0)) self.assertAllClose(var1_init, self.evaluate(var1)) update = [None] * num_updates tot_update = 0 for step in range(num_updates): # Run adadelta update for comparison if not context.executing_eagerly(): self.evaluate(adadelta_update) else: adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) # Perform initial update without previous accum values accum = accum * rho + (grad**2) * (1 - rho) update[step] = (np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad) accum_update = (accum_update * rho + (update[step]**2) * (1.0 - rho)) tot_update += update[step] * lr if not context.executing_eagerly(): # Check that the accumulators have been updated # TODO(lxuechen): This is hard to test in eager mode for slot_idx in range(2): self.assertAllCloseAccordingToType( np.array([accum, accum], dtype=dtype.as_numpy_dtype()), self.evaluate(slot[slot_idx]), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([accum_update, accum_update], dtype=dtype.as_numpy_dtype()), self.evaluate(slot_update[slot_idx]), rtol=1e-5) # Check that the parameters have been updated self.assertAllCloseAccordingToType( np.array([ var0_init[0] - tot_update, var0_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), self.evaluate(var0), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([ var1_init[0] - tot_update, var1_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), self.evaluate(var1), rtol=1e-5)
def doTestBasic(self, use_resource=False): num_updates = 4 # number of ADADELTA steps to perform for dtype in [dtypes.half, dtypes.float32]: for grad in [0.2, 0.1, 0.01]: for lr in [1.0, 0.5, 0.1]: with self.test_session(): var0_init = [1.0, 2.0] var1_init = [3.0, 4.0] if use_resource: var0 = resource_variable_ops.ResourceVariable( var0_init, dtype=dtype) var1 = resource_variable_ops.ResourceVariable( var1_init, dtype=dtype) else: var0 = variables.Variable(var0_init, dtype=dtype) var1 = variables.Variable(var1_init, dtype=dtype) grads = constant_op.constant([grad, grad], dtype=dtype) accum = 0.0 accum_update = 0.0 # ADADELTA gradient optimizer rho = 0.95 epsilon = 1e-8 adadelta_opt = adadelta.AdadeltaOptimizer( lr, rho, epsilon) adadelta_update = adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) variables.global_variables_initializer().run() # Assign slots slot = [None] * 2 slot_update = [None] * 2 self.assertEqual(["accum", "accum_update"], adadelta_opt.get_slot_names()) slot[0] = adadelta_opt.get_slot(var0, "accum") self.assertEquals(slot[0].get_shape(), var0.get_shape()) self.assertFalse( slot[0] in variables.trainable_variables()) slot_update[0] = adadelta_opt.get_slot( var0, "accum_update") self.assertEquals(slot_update[0].get_shape(), var0.get_shape()) self.assertFalse( slot_update[0] in variables.trainable_variables()) slot[1] = adadelta_opt.get_slot(var1, "accum") self.assertEquals(slot[1].get_shape(), var1.get_shape()) self.assertFalse( slot[1] in variables.trainable_variables()) slot_update[1] = adadelta_opt.get_slot( var1, "accum_update") self.assertEquals(slot_update[1].get_shape(), var1.get_shape()) self.assertFalse( slot_update[1] in variables.trainable_variables()) # Fetch params to validate initial values self.assertAllClose(var0_init, var0.eval()) self.assertAllClose(var1_init, var1.eval()) update = [None] * num_updates tot_update = 0 for step in range(num_updates): # Run adadelta update for comparison adadelta_update.run() # Perform initial update without previous accum values accum = accum * rho + (grad**2) * (1 - rho) update[step] = (np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad) accum_update = (accum_update * rho + (update[step]**2) * (1.0 - rho)) tot_update += update[step] * lr # Check that the accumulators have been updated for slot_idx in range(2): self.assertAllCloseAccordingToType( np.array([accum, accum], dtype=dtype.as_numpy_dtype()), slot[slot_idx].eval(), rtol=1e-3) self.assertAllCloseAccordingToType( np.array([accum_update, accum_update], dtype=dtype.as_numpy_dtype()), slot_update[slot_idx].eval(), rtol=1e-3) # Check that the parameters have been updated self.assertAllCloseAccordingToType(np.array( [ var0_init[0] - tot_update, var0_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), var0.eval(), rtol=1e-3) self.assertAllCloseAccordingToType(np.array( [ var1_init[0] - tot_update, var1_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), var1.eval(), rtol=1e-3)
def test_adadelta_apply_restriction(self): opt = adadelta.AdadeltaOptimizer() self.commonly_apply_restriction_verify(opt)
def testBasic(self): num_updates = 4 # number of ADADELTA steps to perform if "CPU" in self.device: # To avoid timeout on CPU. all_grad = [0.2, 0.01] all_lr = [1.0, 0.1] else: all_grad = [0.2, 0.1, 0.01] all_lr = [1.0, 0.5, 0.1] for dtype in self.float_types | self.complex_types: with self.session(), self.test_scope(): for grad in all_grad: for lr in all_lr: var0_init = [1.0, 2.0] var1_init = [3.0, 4.0] var0 = resource_variable_ops.ResourceVariable( var0_init, dtype=dtype) var1 = resource_variable_ops.ResourceVariable( var1_init, dtype=dtype) grads = constant_op.constant([grad, grad], dtype=dtype) accum = 0.0 accum_update = 0.0 # ADADELTA gradient optimizer rho = 0.95 epsilon = 1e-8 adadelta_opt = adadelta.AdadeltaOptimizer( learning_rate=lr, rho=rho, epsilon=epsilon) adadelta_update = adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) self.evaluate(variables.global_variables_initializer()) opt_vars = adadelta_opt.variables() self.assertStartsWith(opt_vars[0].name, var0._shared_name) self.assertStartsWith(opt_vars[1].name, var0._shared_name) self.assertStartsWith(opt_vars[2].name, var1._shared_name) self.assertStartsWith(opt_vars[3].name, var1._shared_name) self.assertEqual(4, len(opt_vars)) # Assign slots slot = [None] * 2 slot_update = [None] * 2 self.assertEqual(["accum", "accum_update"], adadelta_opt.get_slot_names()) slot[0] = adadelta_opt.get_slot(var0, "accum") self.assertEqual(slot[0].get_shape(), var0.get_shape()) self.assertNotIn(slot[0], variables.trainable_variables()) slot_update[0] = adadelta_opt.get_slot( var0, "accum_update") self.assertEqual(slot_update[0].get_shape(), var0.get_shape()) self.assertNotIn(slot_update[0], variables.trainable_variables()) slot[1] = adadelta_opt.get_slot(var1, "accum") self.assertEqual(slot[1].get_shape(), var1.get_shape()) self.assertNotIn(slot[1], variables.trainable_variables()) slot_update[1] = adadelta_opt.get_slot( var1, "accum_update") self.assertEqual(slot_update[1].get_shape(), var1.get_shape()) self.assertNotIn(slot_update[1], variables.trainable_variables()) # Fetch params to validate initial values self.assertAllClose(var0_init, self.evaluate(var0)) self.assertAllClose(var1_init, self.evaluate(var1)) update = [None] * num_updates tot_update = 0 for step in range(num_updates): # Run adadelta update for comparison self.evaluate(adadelta_update) # Perform initial update without previous accum values accum = accum * rho + (grad**2) * (1 - rho) update[step] = (np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad) accum_update = (accum_update * rho + (update[step]**2) * (1.0 - rho)) tot_update += update[step] * lr # Check that the accumulators have been updated for slot_idx in range(2): self.assertAllCloseAccordingToType( np.array([accum, accum], dtype=dtype), self.evaluate(slot[slot_idx]), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([accum_update, accum_update], dtype=dtype), self.evaluate(slot_update[slot_idx]), rtol=1e-5) # Check that the parameters have been updated self.assertAllCloseAccordingToType(np.array( [ var0_init[0] - tot_update, var0_init[1] - tot_update ], dtype=dtype), self.evaluate(var0), rtol=1e-5) self.assertAllCloseAccordingToType(np.array( [ var1_init[0] - tot_update, var1_init[1] - tot_update ], dtype=dtype), self.evaluate(var1), rtol=1e-5)
def test_adadelta_restrict_on_policy(self): opt = adadelta.AdadeltaOptimizer() self.common_single_step_restrict_verification(opt)
def test_adadelta_restrictor_update(self): opt = adadelta.AdadeltaOptimizer() self.common_single_step_update_verification(opt)
def test_adadelta_minimize_trainable(self): base_opt = adadelta.AdadeltaOptimizer(1.0) test_opt = adadelta.AdadeltaOptimizer(1.0) self.common_minimize_trainable(base_opt, test_opt, name="adadelta")