def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) repeated_index_update_var = tf.Variable(var_np, dtype=dtype) aggregated_update_var = tf.Variable(var_np, dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_repeated_index, repeated_index_update_var) ]) aggregated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_aggregated, aggregated_update_var) ]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) for _ in range(3): self.evaluate(repeated_update) self.evaluate(aggregated_update) self.assertAllClose( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var))
def testConstructAdagradWithLR(self): opt = adagrad.Adagrad(lr=1.0) opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0) opt_3 = adagrad.Adagrad(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
def testSparseStability(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half]: shape = [1, 6] var0_np = np.array([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np = np.array([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], dtype=dtype.as_numpy_dtype) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant([0]), tf.constant(shape)) ada_opt = adagrad.Adagrad(1.0) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) slot0 = ada_opt.get_slot(var0, "accumulator") init = tf.compat.v1.global_variables_initializer() for _ in range(100): self.evaluate(init) self.evaluate(ada_update) self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), self.evaluate(var0))
def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = tf.constant(3.0) ada_opt = adagrad.Adagrad(learning_rate) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) # Run 3 steps of adagrad for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testSparseSingleVarDim(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0], self.evaluate(var0)) accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate, epsilon=1.) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
def test_wide_deep_model(self, distribution, use_dataset_creator, data_fn): if ((not use_dataset_creator) and isinstance( distribution, tf.distribute.experimental.ParameterServerStrategy)): self.skipTest( 'Parameter Server strategy requires dataset creator to be used in ' 'model.fit.') if (not tf.__internal__.tf2.enabled() and use_dataset_creator and isinstance( distribution, tf.distribute.experimental.ParameterServerStrategy)): self.skipTest( 'Parameter Server strategy with dataset creator needs to be run when ' 'eager execution is enabled.') with distribution.scope(): linear_model = linear.LinearModel(units=1) dnn_model = sequential.Sequential([core.Dense(units=1)]) wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) linear_opt = gradient_descent.SGD(learning_rate=0.05) dnn_opt = adagrad.Adagrad(learning_rate=0.1) wide_deep_model.compile(optimizer=[linear_opt, dnn_opt], loss='mse') if use_dataset_creator: x = dataset_creator.DatasetCreator(dataset_fn) hist = wide_deep_model.fit(x, epochs=5, steps_per_epoch=INPUT_SIZE) else: if data_fn == 'numpy': inputs, output = get_numpy() hist = wide_deep_model.fit(inputs, output, epochs=5) else: hist = wide_deep_model.fit(get_dataset(), epochs=5) self.assertLess(hist.history['loss'][4], 0.2)
def testSparseRepeatedIndicesByEmbeddingLookUp(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_repeated = tf.Variable([1.0, 2.0], dtype=dtype) loss_repeated = lambda: tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0])) # pylint: disable=cell-var-from-loop var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype) loss_aggregated = lambda: 2 * tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_aggregated, [0])) # pylint: disable=cell-var-from-loop update_op_repeated = adagrad.Adagrad(2.0).minimize( loss_repeated, var_list=[var_repeated]) update_op_aggregated = adagrad.Adagrad(2.0).minimize( loss_aggregated, var_list=[var_aggregated]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated)) for _ in range(3): self.evaluate(update_op_repeated) self.evaluate(update_op_aggregated) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated))
def test_wide_deep_model(self, distribution, data_fn): with distribution.scope(): linear_model = linear.LinearModel(units=1) dnn_model = sequential.Sequential([core.Dense(units=1)]) wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) linear_opt = gradient_descent.SGD(learning_rate=0.05) dnn_opt = adagrad.Adagrad(learning_rate=0.1) wide_deep_model.compile(optimizer=[linear_opt, dnn_opt], loss='mse') if data_fn == 'numpy': inputs, output = get_numpy() hist = wide_deep_model.fit(inputs, output, epochs=5) else: hist = wide_deep_model.fit(get_dataset(), epochs=5) self.assertLess(hist.history['loss'][4], 0.2)
def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEqual(slot0.shape, var0.shape) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEqual(slot1.shape, var1.shape) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Mix the first and the second adagrad for 3 steps. self.evaluate(ada_update1) self.evaluate(ada_update2) self.evaluate(ada_update1) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) for _ in range(3): var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, learning_rate) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, learning_rate) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testBasicWithLearningRateInverseTimeDecay(self): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) ada_opt = adagrad.Adagrad(lr_schedule) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def doTestBasic(self, use_callable_params=False): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = lambda: 3.0 if not use_callable_params: learning_rate = learning_rate() ada_opt = adagrad.Adagrad(learning_rate) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, 3.0) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, 3.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testBasicWithLargeEpsilon(self): var0_np = np.array([1.0, 2.0]) var1_np = np.array([3.0, 4.0]) grads0_np = np.array([0.1, 0.1]) grads1_np = np.array([0.01, 0.01]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) accum0_np = np.array([0.1, 0.1]) accum1_np = np.array([0.1, 0.1]) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, 3.0, 1.0) var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, 3.0, 1.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[0, 1], [3, 4]], self.evaluate(var0), atol=0.01)
"AdamV1", lambda: tf.compat.v1.train.AdamOptimizer(0.001, epsilon=1)) ftrl_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( "FtrlV1", lambda: tf.compat.v1.train.FtrlOptimizer(0.001)) rmsprop_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject( "RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001)) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [ gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn, ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn ] adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)) adam_experimental_fn = tf.__internal__.test.combinations.NamedObject( "AdamExperimental", lambda: adam_experimental.Adam(0.001)) adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)) nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)) ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)) gradient_descent_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.001)) rmsprop_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001))
def testAdagrad(self): self._compare_numerical(adagrad_old.Adagrad(), adagrad_new.Adagrad())
def testAdagradCompatibility(self): opt_v1 = optimizer_v1.Adagrad(lr=0.01) opt_v2 = adagrad.Adagrad(learning_rate=0.01) self._testOptimizersCompatibility(opt_v1, opt_v2)