def testConfig(self): opt = sgd.SGD(learning_rate=1.0, momentum=2.0, nesterov=True) config = opt.get_config() opt2 = sgd.SGD.from_config(config) self.assertEqual(opt._hyper["learning_rate"][1], opt2._hyper["learning_rate"][1]) self.assertEqual(opt._hyper["momentum"][1], opt2._hyper["momentum"][1]) self.assertEqual(opt2._use_nesterov, True) opt = sgd.SGD(momentum=None) config = opt.get_config() opt2 = sgd.SGD.from_config(config) self.assertEqual(False, opt2._use_momentum)
def testMinimizeResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(var0, x) + var1 loss = pred * pred sgd_op = sgd.SGD(1.0).minimize(loss) # TODO(apassos) calling initialize_resources on all resources here # doesn't work because the sessions and graph are reused across unit # tests and this would mean trying to reinitialize variables. Figure out # a long-term solution for this. resources.initialize_resources([var0, var1]).run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 np_grad = 2 * np_pred self.assertAllCloseAccordingToType( [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
def testAggregationMethod(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = sgd.SGD(3.0) opt_op = sgd_op.minimize( cost, global_step, [var0, var1], aggregation_method=gradients_impl.AggregationMethod. EXPERIMENTAL_ACCUMULATE_N) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-14., -13.], var0.eval()) self.assertAllClose([-6., -5.], var1.eval())
def testNesterovMomentum(self): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) cost = 5 * var0 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name="global_step") mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True) opt_op = mom_op.minimize(cost, global_step, [var0, var1]) variables.global_variables_initializer().run() for t in range(1, 5): opt_op.run() var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) self.assertAllClose(var0_np, var0.eval()) self.assertAllClose(var1_np, var1.eval())
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: # This test invokes the ResourceSparseApplyMomentum operation, which # did not have a registered GPU kernel as of April 2018. With graph # execution, the placement algorithm notices this and automatically # places the variable in CPU (host) memory. With eager execution, # the variable would be placed in GPU memory if available, which # would then conflict with the future invocation of the # ResourceSparseApplyMomentum operation. # To work around this discrepancy, for now we force the variable # to be placed on CPU. with ops.device("/cpu:0"): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul( embedding_ops.embedding_lookup([var0], [0]), x) return pred * pred # pylint: enable=cell-var-from-loop opt = sgd.SGD(learning_rate=1.0, momentum=0.0) sgd_op = opt.minimize(loss) self.evaluate(variables.global_variables_initializer()) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
def testPrecomputedGradient(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 grad_loss = constant_op.constant([42, -42], dtype=dtype) global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = sgd.SGD(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1], grad_loss=grad_loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose( [1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], var0.eval()) self.assertAllClose( [3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], var1.eval())
def testVariablesAcrossGraphs(self): optimizer = sgd.SGD(0.01, 0.5) with ops.Graph().as_default(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtypes.float32, name="var0") var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtypes.float32, name="var1") loss = math_ops.reduce_sum(var0 + var1) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var0") self.assertStartsWith(optimizer_variables[1].name, "var1") self.assertEquals(2, len(optimizer_variables)) with ops.Graph().as_default(): var2 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtypes.float32, name="var2") var3 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtypes.float32, name="var3") loss = math_ops.reduce_sum(var2 + var3) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var2") self.assertStartsWith(optimizer_variables[1].name, "var3") self.assertEquals(2, len(optimizer_variables))
def testBasic(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop # Note that for eager execution, minimize expects a function instead of a # Tensor. global_step = resource_variable_ops.ResourceVariable( array_ops.zeros([], dtypes.int64), name='global_step_%d' % i) sgd_op = sgd.SGD(3.0) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd_op.minimize(loss, global_step, [var0, var1]) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testBasicResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) sgd_op = sgd.SGD(3.0).apply_gradients( zip([grads0, grads1], [var0, var1])) # TODO(apassos) calling initialize_resources on all resources here # doesn't work because the sessions and graph are reused across unit # tests and this would mean trying to reinitialize variables. Figure out # a long-term solution for this. resources.initialize_resources([var0, var1]).run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) mom_opt = sgd.SGD(learning_rate=constant_op.constant(2.0), momentum=constant_op.constant(0.9)) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Check we have slots self.assertEqual(["momentum"], mom_opt.get_slot_names()) slot0 = mom_opt.get_slot(var0, "momentum") self.assertEquals(slot0.get_shape(), var0.get_shape()) self.assertFalse(slot0 in variables.trainable_variables()) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEquals(slot1.get_shape(), var1.get_shape()) self.assertFalse(slot1 in variables.trainable_variables()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval()) self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval()) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval()) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval()) # Step 2: the momentum accumulators contain the previous update. mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval()) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), var0.eval()) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), var1.eval())
def testTrainOp(self): with self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([3.0, 4.0]) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = sgd.SGD(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) self.assertTrue( opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
def testNoGradientsForAnyVariables_ApplyGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) sgd_op = sgd.SGD(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.apply_gradients([(None, var0), (None, var1)])
def testGradWrtRef(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): opt = sgd.SGD(3.0) values = [1.0, 3.0] vars_ = [variables.Variable([v], dtype=dtype) for v in values] grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) variables.global_variables_initializer().run() for grad, _ in grads_and_vars: self.assertAllCloseAccordingToType([1.0], grad.eval())
def testStopGradients(self): with self.cached_session(): var0 = variables.Variable([1.0, 2.0], name='var0') var1 = variables.Variable([3.0, 4.0], name='var1') var0_id = array_ops.identity(var0) cost = 5 * var0_id + 3 * var1 sgd_op = sgd.SGD(3.0) grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1], stop_gradients=[var0_id]) grad_dict = {var.op.name: grad for grad, var in grads_and_vars} self.assertIsNone(grad_dict['var0']) self.assertIsNotNone(grad_dict['var1'])
def testLikeDistBeliefMom01(self): with self.cached_session(): db_grad, db_out = self._dbParamsMom01() num_samples = len(db_grad) var0 = variables.Variable([0.0] * num_samples) grads0 = constant_op.constant([0.0] * num_samples) mom_opt = sgd.SGD(learning_rate=0.1, momentum=0.1) mom_update = mom_opt.apply_gradients(zip([grads0], [var0])) variables.global_variables_initializer().run() for i in xrange(num_samples): mom_update.run(feed_dict={grads0: db_grad[i]}) self.assertAllClose(np.array(db_out[i]), var0.eval())
def testNoGradientsForAnyVariables_Minimize(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return constant_op.constant(5.0) sgd_op = sgd.SGD(3.0) with self.assertRaisesRegexp( ValueError, 'No gradients provided for any variable'): sgd_op.minimize(loss, var_list=[var0, var1])
def testComputeGradientsWithTensors(self): x = ops.convert_to_tensor(1.0) def f(): return x * x sgd_op = sgd.SGD(3.0) grads_and_vars = sgd_op.compute_gradients(f, [x]) self.assertEqual(1, len(grads_and_vars)) grad, x_as_var = grads_and_vars[0] self.assertIs(x, x_as_var) self.assertEqual(2.0, self.evaluate(grad)) with self.assertRaises(NotImplementedError): sgd_op.apply_gradients(grads_and_vars)
def testNoGradients(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) # pylint: disable=cell-var-from-loop def loss(): return 5 * var0 # pylint: enable=cell-var-from-loop sgd_op = sgd.SGD(3.0) with self.assertRaisesRegexp(ValueError, 'No gradients'): # var1 has no gradient sgd_op.minimize(loss, var_list=[var1])
def testNoVariables(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: # pylint: disable=cell-var-from-loop def loss(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, trainable=False, name='a') var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, trainable=False, name='b') return 5 * var0 + var1 # pylint: enable=cell-var-from-loop sgd_op = sgd.SGD(3.0) with self.assertRaisesRegexp(ValueError, 'No.*variables'): sgd_op.minimize(loss)
def testSparseNesterovMomentum(self): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) grads = [] for t in range(1, 5): grads.append(var0_np * 10) var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) loss = 5 * var0 * var0 + 3 * var1 mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True) x_feed = array_ops.placeholder(dtype) y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0, 1]), constant_op.constant([2])) grads_and_vars = [(y_feed, var0), (constant_op.constant([3.0, 3.0], dtype=dtype), var1)] opt_update = mom_op.apply_gradients(grads_and_vars) variables.global_variables_initializer().run() for t in range(1, 5): opt_update.run(feed_dict={x_feed: grads[t - 1]}) var0_np, accum0_np = self._update_nesterov_momentum_numpy( var0_np, accum0_np, var0_np * 10, 2.0, 0.9) var1_np, accum1_np = self._update_nesterov_momentum_numpy( var1_np, accum1_np, 3, 2.0, 0.9) self.assertAllClose(var0_np, var0.eval()) self.assertAllClose(var1_np, var1.eval())
def testConstraint(self): constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.) constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.) with self.cached_session(): var0 = variables.Variable([1.0, 2.0], constraint=constraint_01) var1 = variables.Variable([3.0, 4.0], constraint=constraint_0) cost = 5 * var0 + 3 * var1 global_step = variables.Variable(array_ops.zeros([], dtypes.int64), name='global_step') sgd_op = sgd.SGD(3.0) opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-0.1, -0.1], var0.eval()) self.assertAllClose([0., 0.], var1.eval())
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) lrate = constant_op.constant(3.0) sgd_op = sgd.SGD(lrate).apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval())
def testGradientsAsVariables(self): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop sgd_op = sgd.SGD(3.0) grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1]) # Convert gradients to tf.Variables converted_grads = [ resource_variable_ops.ResourceVariable(array_ops.zeros([2], dtype), name='c_%d_%d' % (i, j)) for j, gv in enumerate(grads_and_vars) ] convert_ops = [ state_ops.assign(converted_grads[j], gv[0]) for j, gv in enumerate(grads_and_vars) ] self.evaluate(variables.global_variables_initializer()) # Run convert_ops to achieve the gradietns converting self.evaluate(convert_ops) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) opt_op = sgd_op.apply_gradients(converted_grads_and_vars) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testWithGlobalStep(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): global_step = variables.Variable(0, trainable=False) var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) sgd_op = sgd.SGD(3.0).apply_gradients(zip([grads0, grads1], [var0, var1]), global_step=global_step) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params and global_step self.assertAllCloseAccordingToType( [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], var0.eval()) self.assertAllCloseAccordingToType( [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], var1.eval()) self.assertAllCloseAccordingToType(1, global_step.eval())
def testMinimizeWith2DIndiciesForEmbeddingLookup(self): # This test invokes the ResourceSparseApplyMomentum operation, which # did not have a registered GPU kernel as of April 2018. With graph # execution, the placement algorithm notices this and automatically # places the variable in CPU (host) memory. With eager execution, # the variable would be placed in GPU memory if available, which # would then conflict with the future invocation of the # ResourceSparseApplyMomentum operation. # To work around this discrepancy, for now we force the variable # to be placed on CPU. with ops.device("/cpu:0"): var0 = resource_variable_ops.ResourceVariable( array_ops.ones([2, 2])) def loss(): return math_ops.reduce_sum( embedding_ops.embedding_lookup(var0, [[1]])) opt = sgd.SGD(learning_rate=1.0, momentum=0.0) sgd_op = opt.minimize(loss) self.evaluate(variables.global_variables_initializer()) self.evaluate(sgd_op) self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant([0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant([0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) sgd_op = sgd.SGD(3.0).apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval()) self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], var0.eval()) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], var1.eval())
def testSparse(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) var1 = variables.Variable( constant_op.constant(1.0, dtype, [4, 2])) grads0 = ops.IndexedSlices( constant_op.constant([[.1, .1]], dtype=dtype), constant_op.constant([1]), constant_op.constant([4, 2])) grads1 = ops.IndexedSlices( constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype), constant_op.constant([2, 3]), constant_op.constant([4, 2])) mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Check we have slots self.assertEqual(["momentum"], mom_opt.get_slot_names()) slot0 = mom_opt.get_slot(var0, "momentum") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEquals(slot1.get_shape(), var1.get_shape()) # Fetch params to validate initial values self.assertAllClose([0, 0], var0.eval()[0]) self.assertAllClose([0, 0], var0.eval()[1]) self.assertAllClose([1, 1], var1.eval()[2]) # Step 1: the momentum accumulators are 0. So we should see a normal # update: v -= grad * learning_rate mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([0, 0]), slot0.eval()[0]) self.assertAllCloseAccordingToType(np.array([.1, .1]), slot0.eval()[1]) self.assertAllCloseAccordingToType(np.array([.01, .01]), slot1.eval()[2]) # Check that the parameters have been updated. self.assertAllCloseAccordingToType(np.array([0, 0]), var0.eval()[0]) self.assertAllCloseAccordingToType( np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), var0.eval()[1]) self.assertAllCloseAccordingToType( np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), var1.eval()[2]) # Step 2: the momentum accumulators contain the previous update. mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllClose(np.array([0, 0]), slot0.eval()[0]) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()[1]) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval()[2]) # Check that the parameters have been updated. self.assertAllClose(np.array([0, 0]), var0.eval()[0]) self.assertAllCloseAccordingToType( np.array([ -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), var0.eval()[1]) self.assertAllCloseAccordingToType( np.array([ 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), var1.eval()[2])
def doTestBasic(self, use_resource=False, use_callable_params=False): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): if use_resource: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, name="var0_%d" % i) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, name="var1_%d" % i) else: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) learning_rate = lambda: 2.0 momentum = lambda: 0.9 if not use_callable_params: learning_rate = learning_rate() momentum = momentum() mom_opt = sgd.SGD(learning_rate=learning_rate, momentum=momentum) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["momentum"], mom_opt.get_slot_names()) slot0 = mom_opt.get_slot(var0, "momentum") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not context.executing_eagerly(): self.assertFalse(slot0 in variables.trainable_variables()) self.assertFalse(slot1 in variables.trainable_variables()) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate if not context.executing_eagerly(): self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the momentum accumulators contain the previous update. if context.executing_eagerly(): mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1))