def testPrecomputedGradient(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 grad_loss = constant_op.constant([42, -42], dtype=dtype) global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = gradient_descent.GradientDescentOptimizer(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1], grad_loss=grad_loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose( [1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], var0.eval()) self.assertAllClose( [3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], var1.eval())
def testBasicResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) sgd_op = gradient_descent.GradientDescentOptimizer( 3.0).apply_gradients(zip([grads0, grads1], [var0, var1])) # TODO(apassos) calling initialize_resources on all resources here # doesn't work because the sessions and graph are reused across unit # tests and this would mean trying to reinitialize variables. Figure out # a long-term solution for this. resources.initialize_resources([var0, var1]).run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
def testMinimizeResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(var0, x) + var1 loss = pred * pred sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss) # TODO(apassos) calling initialize_resources on all resources here # doesn't work because the sessions and graph are reused across unit # tests and this would mean trying to reinitialize variables. Figure out # a long-term solution for this. resources.initialize_resources([var0, var1]).run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 np_grad = 2 * np_pred self.assertAllCloseAccordingToType( [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
def testBasic(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop # Note that for eager execution, minimize expects a function instead of a # Tensor. global_step = resource_variable_ops.ResourceVariable( array_ops.zeros([], dtypes.int64), name='global_step_%d' % i) sgd_op = gradient_descent.GradientDescentOptimizer(3.0) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd_op.minimize(loss, global_step, [var0, var1]) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant( [0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant( [0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval()) self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], var0.eval()) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], var1.eval())
def testAggregationMethod(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = gradient_descent.GradientDescentOptimizer(3.0) opt_op = sgd_op.minimize( cost, global_step, [var0, var1], aggregation_method=gradients_impl.AggregationMethod. EXPERIMENTAL_ACCUMULATE_N) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-14., -13.], var0.eval()) self.assertAllClose([-6., -5.], var1.eval())
def testGradWrtRef(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): opt = gradient_descent.GradientDescentOptimizer(3.0) values = [1.0, 3.0] vars_ = [variables.Variable([v], dtype=dtype) for v in values] grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) variables.global_variables_initializer().run() for grad, _ in grads_and_vars: self.assertAllCloseAccordingToType([1.0], grad.eval())
def testTrainOp(self): with self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([3.0, 4.0]) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = gradient_descent.GradientDescentOptimizer(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) self.assertTrue( opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
def testNoGradientsForAnyVariables_ApplyGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.apply_gradients([(None, var0), (None, var1)])
def testStopGradients(self): with self.cached_session(): var0 = variables.Variable([1.0, 2.0], name='var0') var1 = variables.Variable([3.0, 4.0], name='var1') var0_id = array_ops.identity(var0) cost = 5 * var0_id + 3 * var1 sgd_op = gradient_descent.GradientDescentOptimizer(3.0) grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1], stop_gradients=[var0_id]) grad_dict = {var.op.name: grad for grad, var in grads_and_vars} self.assertIsNone(grad_dict['var0']) self.assertIsNotNone(grad_dict['var1'])
def testNoGradientsForAnyVariables_Minimize(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return constant_op.constant(5.0) sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.minimize(loss, var_list=[var0, var1])
def testNoGradientsForAnyVariables_ApplyGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): # Note that we name the variables uniquely here since the variables don't # seem to be getting deleted at the end of the loop. var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, name='a_%d' % i) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, name='b_%d' % i) sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.apply_gradients([(None, var0), (None, var1)])
def testComputeGradientsWithTensors(self): x = ops.convert_to_tensor(1.0) def f(): return x * x sgd_op = gradient_descent.GradientDescentOptimizer(3.0) grads_and_vars = sgd_op.compute_gradients(f, [x]) self.assertEqual(1, len(grads_and_vars)) grad, x_as_var = grads_and_vars[0] self.assertIs(x, x_as_var) self.assertEqual(2.0, self.evaluate(grad)) with self.assertRaises(NotImplementedError): sgd_op.apply_gradients(grads_and_vars)
def testNoGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): return 5 * var0 # pylint: enable=cell-var-from-loop sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp(ValueError, 'No gradients'): # var1 has no gradient sgd_op.minimize(loss, var_list=[var1])
def testNoVariables(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: # pylint: disable=cell-var-from-loop def loss(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, trainable=False, name='a') var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, trainable=False, name='b') return 5 * var0 + var1 # pylint: enable=cell-var-from-loop sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp(ValueError, 'No.*variables'): sgd_op.minimize(loss)
def testGradientsAsVariables(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): # Note that we name the variables uniquely here since the variables don't # seem to be getting deleted at the end of the loop. var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, name='a%d' % i) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, name='b%d' % i) def loss(): return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop sgd_op = gradient_descent.GradientDescentOptimizer(3.0) grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1]) # Convert gradients to tf.Variables converted_grads = [ resource_variable_ops.ResourceVariable(array_ops.zeros([2], dtype), name='c_%d_%d' % (i, j)) for j, gv in enumerate(grads_and_vars) ] convert_ops = [ state_ops.assign(converted_grads[j], gv[0]) for j, gv in enumerate(grads_and_vars) ] self.evaluate(variables.global_variables_initializer()) # Run convert_ops to achieve the gradietns converting self.evaluate(convert_ops) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) opt_op = sgd_op.apply_gradients(converted_grads_and_vars) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testNoGradientsForAnyVariables_Minimize(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): # Note that we name the variables uniquely here since the variables don't # seem to be getting deleted at the end of the loop. var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, name='a_%d' % i) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, name='b_%d' % i) def loss(): return constant_op.constant(5.0) sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.minimize(loss, var_list=[var0, var1])
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) lrate = constant_op.constant(3.0) sgd_op = gradient_descent.GradientDescentOptimizer( lrate).apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
def testConstraint(self): constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.) constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.) with self.cached_session(): var0 = variables.Variable([1.0, 2.0], constraint=constraint_01) var1 = variables.Variable([3.0, 4.0], constraint=constraint_0) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = gradient_descent.GradientDescentOptimizer(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-0.1, -0.1], var0.eval()) self.assertAllClose([0., 0.], var1.eval())
def testNoGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): # Note that we name the variables uniquely here since the variables don't # seem to be getting deleted at the end of the loop. var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, name='a%d' % i) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, name='b%d' % i) # pylint: disable=cell-var-from-loop def loss(): return 5 * var0 # pylint: enable=cell-var-from-loop sgd_op = gradient_descent.GradientDescentOptimizer(3.0) with self.assertRaisesRegexp(ValueError, 'No gradients'): # var1 has no gradient sgd_op.minimize(loss, var_list=[var1])
def testWithGlobalStep(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): global_step = variables.Variable(0, trainable=False) var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients( zip([grads0, grads1], [var0, var1]), global_step=global_step) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params and global_step self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval()) self.assertAllCloseAccordingToType(1, global_step.eval())
gradient_descent_optimizer_v1_fn = NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = NamedObject("AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) rmsprop_optimizer_v1_fn = NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] gradient_descent_optimizer_v2_fn = NamedObject( "GradientDescentV2", lambda: gradient_descent_v2.GradientDescentOptimizer(0.2)) adagrad_optimizer_v2_fn = NamedObject( "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001)) adam_optimizer_v2_fn = NamedObject( "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1)) optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn] graph_and_eager_modes = ["graph", "eager"] def distributions_and_v1_optimizers(): """A common set of combination with DistributionStrategies and Optimizers.""" return combine( distribution=[ one_device_strategy,