def testSparseRepeatedIndicesResourceVariable(self): with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var_repeated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_repeated = math_ops.reduce_sum( embedding_ops.embedding_lookup(var_repeated, [0, 0])) var_aggregated = resource_variable_ops.ResourceVariable( [1.0, 2.0], dtype=dtype) loss_aggregated = 2 * math_ops.reduce_sum( embedding_ops.embedding_lookup(var_aggregated, [0])) update_op_repeated = adagrad.AdagradOptimizer( 2.0).minimize(loss_repeated) update_op_aggregated = adagrad.AdagradOptimizer( 2.0).minimize(loss_aggregated) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated)) for _ in range(3): update_op_repeated.run() update_op_aggregated.run() self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated))
def testSparseRepeatedIndices(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): repeated_index_update_var = variables.Variable([[1.0], [2.0]], dtype=dtype) aggregated_update_var = variables.Variable([[1.0], [2.0]], dtype=dtype) grad_repeated_index = ops.IndexedSlices( constant_op.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), constant_op.constant([1, 1]), constant_op.constant([2, 1])) grad_aggregated = ops.IndexedSlices( constant_op.constant([0.2], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) repeated_update = adagrad.AdagradOptimizer( 3.0).apply_gradients([(grad_repeated_index, repeated_index_update_var)]) aggregated_update = adagrad.AdagradOptimizer( 3.0).apply_gradients([(grad_aggregated, aggregated_update_var)]) self.evaluate(variables.global_variables_initializer()) self.assertAllClose(aggregated_update_var, self.evaluate(repeated_index_update_var)) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose( aggregated_update_var, self.evaluate(repeated_index_update_var))
def get_opt(self): self.dnn_optimizer = adagrad.AdagradOptimizer( learning_rate=_DNN_LEARNING_RATE) self.seq_optimizer = adagrad.AdagradOptimizer( learning_rate=_SEQ_LEARNING_RATE) self.linear_optimizer = ftrl.FtrlOptimizer( #learning_rate=_linear_learning_rate(len(self.linear_feature_columns)), learning_rate=_LINEAR_LEARNING_RATE, learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=3.0, l2_regularization_strength=5.0)
def testTensorLearningRate(self): for dtype in self.float_types: with self.cached_session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer( constant_op.constant(3.0), initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType(np.array( [-1.6026098728179932, -0.6026098728179932]), var0.eval(), float_rtol=1e-5) self.assertAllCloseAccordingToType(np.array( [2.715679168701172, 3.715679168701172]), var1.eval(), float_rtol=1e-5)
def test_check_find_ops_number(self): self.assertTrue(deo.get_update_mode() == "convergence_priority") deo.enable_speed_priority() self.assertTrue(deo.get_update_mode() == "speed_priority") deo.enable_convergence_priority() self.assertTrue(deo.get_update_mode() == "convergence_priority") for fn, nm in [(deo.enable_speed_priority, 2), (deo.enable_convergence_priority, 6)]: fn() embeddings = deo.get_variable('UpdateModeTest' + str(nm), key_dtype=dtypes.int64, value_dtype=dtypes.float32, devices=_get_devices(), initializer=1., dim=8) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) test_var, trainable = deo.embedding_lookup([embeddings], ids, return_trainable=True) pred = math_ops.add(test_var, 1) loss = pred * pred opt = adagrad.AdagradOptimizer(0.1) opt.minimize(loss, var_list=[trainable]) op_list = ops.get_default_graph().get_operations() op_list = [op.name for op in op_list if "Find" in op.name] self.assertTrue(len(op_list) == nm) ops.reset_default_graph()
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant([0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant([0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) ada_opt = adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([[1.0], [2.0]], self.evaluate(var0)) self.assertAllClose([[3.0], [4.0]], self.evaluate(var1)) # Run 3 step of sgd for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([[-1.6026098728179932], [2.0]]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([[3.0], [3.715679168701172]]), self.evaluate(var1))
def testTensorLearningRate(self): with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer( constant_op.constant(3.0), initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), self.evaluate(var1))
def testSparseStability(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): shape = [1, 6] var0 = variables.Variable([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], shape=shape, dtype=dtype), constant_op.constant([0]), constant_op.constant(shape)) ada_opt = adagrad.AdagradOptimizer( 1.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") init = variables.global_variables_initializer() for _ in range(100): init.run() ada_update.run() self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), self.evaluate(var0))
def doTestBasic(self, use_locking=False, use_resource=False): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): if use_resource: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) else: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1, use_locking=use_locking) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 3 steps of adagrad for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer(3.0) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEqual(slot0.get_shape(), var0.get_shape()) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEqual(slot1.get_shape(), var1.get_shape()) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() # Validate updated params (the same as with only 1 Adagrad). self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), self.evaluate(var1))
def testDynamicShapeVariableWithCallableInit(self): var0 = variable_scope.get_variable( "var0", initializer=constant_op.constant(1.), validate_shape=False) self.assertFalse(var0.shape.is_fully_defined()) grads0 = constant_op.constant(0.1, dtype=dtypes.float32) learning_rate = lambda: 3.0 ada_opt = adagrad.AdagradOptimizer(learning_rate, initial_accumulator_value=0.1, use_locking=True) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val = self.evaluate([var0]) self.assertAllClose([1.0], v0_val) # Run 3 steps of adagrad for _ in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0], [var0])) # Validate updated params v0_val = self.evaluate([var0]) self.assertAllCloseAccordingToType(np.array([-1.6026098728179932]), v0_val)
def testDynamicShapeVariable_Ok(self): with self.cached_session(): v = variable_scope.get_variable("v", initializer=constant_op.constant(1.), validate_shape=False) self.assertFalse(v.shape.is_fully_defined()) # Creating optimizer should cause no exception. adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)
def get_multiple_optimizers(): return [ adagrad.AdagradOptimizer(0.1), adam.AdamOptimizer(0.1), ftrl.FtrlOptimizer(0.1), momentum.MomentumOptimizer(0.1, 0.1), rmsprop.RMSPropOptimizer(0.1) ]
def test_ops_with_var_and_adagrad(self): var_list = [ deo.get_variable('sp_var', initializer=0.0, dim=2), ] opt_list = [ adagrad.AdagradOptimizer(0.1), ] self.common_run_context(var_list, opt_list, name='adagrad_test')
def test_eager_gpu_cpu(self): l = keras.layers.Embedding(output_dim=2, input_dim=2) l.build((None, 2)) inputs = keras.backend.constant([[0, 1, 0]], dtype='int32') with backprop.GradientTape() as tape: output = l(inputs) gs = tape.gradient(output, l.weights) opt = adagrad.AdagradOptimizer(0.1) opt.apply_gradients(zip(gs, l.weights)) self.assertAllEqual(len(gs), 1)
def test_w_grad(): e = TokEmbed(ps) e.build((None, 3)) ins = tf.constant([[0, 1, 0]], dtype='int32') with tf.GradientTape() as tape: out = e(ins) print('===', out, e.weights) gs = tape.gradient(out, e.weights) opt = adagrad.AdagradOptimizer(0.1) opt.apply_gradients(zip(gs, e.weights)) print('###', len(gs), 1)
def _GetOptimizer(self, opt): if opt == "adagrad": return adagrad.AdagradOptimizer(learning_rate=1e-2) elif opt == "adam": return adam.AdamOptimizer(learning_rate=1e-2) elif opt == "rmsprop": return rmsprop.RMSPropOptimizer(learning_rate=1e-2) elif opt == "momentum": return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9) elif opt == "sgd": return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) else: raise ValueError("Unsupported optimizer: %s" % opt)
def equivAdagradTest_AdagradPart(self, steps, dtype): var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) adagrad_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) self.assertAllClose([0.0, 0.0], self.evaluate(var1)) # Run Adagrad for a few steps for _ in range(steps): adagrad_update.run() return self.evaluate(var0), self.evaluate(var1)
def testEquivAdagradwithoutRegularization(self): with self.session(), self.test_scope(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0)) with self.session(), self.test_scope(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def doTestBasic(self, use_locking=False, use_resource=False, use_callable_params=False): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: if use_resource: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) else: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) learning_rate = lambda: 3.0 if not use_callable_params: learning_rate = learning_rate() ada_opt = adagrad.AdagradOptimizer(learning_rate, initial_accumulator_value=0.1, use_locking=use_locking) if not context.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not context.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Validate updated params v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), v0_val) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), v1_val)
def testEquivAdagradwithoutRegularization(self): # ProximalAdagradOptimizer is supported only in V1. with ops.Graph().as_default(), self.cached_session(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0)) with ops.Graph().as_default(), self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def testEquivSparseAdagradwithoutRegularization(self): with self.cached_session(): val0, val1 = self.applyOptimizer( proximal_adagrad.ProximalAdagradOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), is_sparse=True) with self.cached_session(): val2, val3 = self.applyOptimizer(adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1), is_sparse=True) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3)
def test_inference_numberic_correctness(self): train_pred = None infer_pred = None dim = 8 initializer = init_ops.random_normal_initializer(0.0, 0.001) raw_init_vals = np.random.rand(100, dim) for fn in [de.enable_train_mode, de.enable_inference_mode]: with ops.Graph().as_default(): fn() init_ids = constant_op.constant(list(range(100)), dtype=dtypes.int64) init_vals = constant_op.constant(raw_init_vals, dtype=dtypes.float32) with variable_scope.variable_scope("modelmode", reuse=variable_scope.AUTO_REUSE): embeddings = de.get_variable('ModelModeTest-numberic', key_dtype=dtypes.int64, value_dtype=dtypes.float32, devices=_get_devices() * 2, initializer=initializer, dim=dim) w = variables.Variable(1.0, name="w") _ = training_util.create_global_step() init_op = embeddings.upsert(init_ids, init_vals) ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64) test_var, trainable = de.embedding_lookup([embeddings], ids, return_trainable=True) pred = math_ops.add(test_var, 1) * w loss = pred * pred opt = de.DynamicEmbeddingOptimizer(adagrad.AdagradOptimizer(0.1)) opt.minimize(loss) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: if de.get_model_mode() == de.ModelMode.TRAIN: sess.run(init_op) train_pred = sess.run(pred) elif de.get_model_mode() == de.ModelMode.INFERENCE: sess.run(init_op) infer_pred = sess.run(pred) de.enable_train_mode() ops.reset_default_graph() self.assertAllEqual(train_pred, infer_pred)
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable( [[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0)) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[0, 1], [3, 4]], self.evaluate(var0), atol=0.01)
def testEquivAdagradwithoutRegularization(self): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Adagrad learning rate learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1), dtype) self.assertAllCloseAccordingToType(val0, val2) self.assertAllCloseAccordingToType(val1, val3)
def testEquivAdagradwithoutRegularization(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [dtypes.half, dtypes.float32]: with ops.Graph().as_default(), self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.Ftrl( 3.0, # Adagrad learning rate learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with ops.Graph().as_default(), self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1), dtype) self.assertAllCloseAccordingToType(val0, val2) self.assertAllCloseAccordingToType(val1, val3)
def benchmarkCustomOptimizer(self): iris = test_data.prepare_iris_data_for_logistic_regression() cont_feature = feature_column.real_valued_column('feature', dimension=4) bucketized_feature = feature_column.bucketized_column( cont_feature, test_data.get_quantile_based_buckets(iris.data, 10)) classifier = dnn_linear_combined.DNNLinearCombinedClassifier( model_dir=tempfile.mkdtemp(), linear_feature_columns=(bucketized_feature, ), linear_optimizer=ftrl.FtrlOptimizer(learning_rate=0.1), dnn_feature_columns=(cont_feature, ), dnn_hidden_units=(3, 3), dnn_optimizer=adagrad.AdagradOptimizer(learning_rate=0.1)) input_fn = test_data.iris_input_logistic_fn metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(input_fn=input_fn, steps=100) self._assertSingleClassMetrics(metrics)
def testEquivAdagradwithoutRegularization(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Adagrad learning rate learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1), dtype) self.assertAllCloseAccordingToType(val0, val2, half_rtol=2e-3) self.assertAllCloseAccordingToType(val1, val3, half_rtol=2e-3)
lambda: mirrored_lib.MirroredStrategy(["/cpu:1", "/cpu:2"])) central_storage_strategy_with_two_gpus = combinations.NamedDistribution( "CentralStorage2GPUs", lambda: central_storage_strategy.CentralStorageStrategy._from_num_gpus(2), # pylint: disable=protected-access required_gpus=2) central_storage_strategy_with_gpu_and_cpu = combinations.NamedDistribution( "CentralStorageCPUAndGPU", lambda: central_storage_strategy.CentralStorageStrategy( ["/gpu:0", "/cpu:0"]), required_gpus=1) gradient_descent_optimizer_v1_fn = combinations.NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = combinations.NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = combinations.NamedObject( "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) rmsprop_optimizer_v1_fn = combinations.NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] adadelta_optimizer_keras_v2_fn = combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) adagrad_optimizer_keras_v2_fn = combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) adam_optimizer_keras_v2_fn = combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)) adamax_optimizer_keras_v2_fn = combinations.NamedObject(
def test_adagrad_apply_restriction(self): opt = adagrad.AdagradOptimizer(0.1) self.commonly_apply_restriction_verify(opt)