def testFtrlWithL1(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType( np.array([-7.66718769, -10.91273689]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.93460727, -1.86147261]), v1_val)
def testFtrlWithL1_L2(self): for dtype in self.float_types: with self.session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0) ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([4.0, 3.0], self.evaluate(var1)) # Run 10 steps FTRL for _ in range(10): ftrl_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-0.24059935, -0.46829352]), self.evaluate(var0), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([-0.02406147, -0.04830509]), self.evaluate(var1), rtol=1e-5)
def testFtrlWithL1_L2_L2ShrinkageSparse(self): """Tests the new FTRL op with support for l2 shrinkage on sparse grads.""" for dtype in [dtypes.half, dtypes.float32]: with self.test_session() as sess: var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[4.0], [3.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant([0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant([0.02], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val) self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType([[-0.22578995], [2.]], v0_val) self.assertAllCloseAccordingToType([[4.], [-0.13229476]], v1_val)
def testFtrlwithoutRegularization2(self): for dtype in self.float_types: with self.session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([4.0, 3.0], self.evaluate(var1)) # Run 3 steps FTRL for _ in range(3): ftrl_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-2.55607247, -3.98729396]), self.evaluate(var0), 1e-5, 1e-5, float_rtol=1e-4) self.assertAllCloseAccordingToType( np.array([-0.28232238, -0.56096673]), self.evaluate(var1), 1e-5, 1e-5)
def testFtrlwithoutRegularization2(self): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session() as sess: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run 3 steps FTRL for _ in range(3): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType( np.array([-2.55607247, -3.98729396]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.28232238, -0.56096673]), v1_val)
def testFtrlWithL1(self): for dtype in self.float_types: with self.test_session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) ftrl_update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([4.0, 3.0], var1.eval()) # Run 10 steps FTRL for _ in range(10): ftrl_update.run() # Validate updated params self.assertAllClose(np.array([-7.66718769, -10.91273689]), var0.eval()) self.assertAllClose(np.array([-0.93460727, -1.86147261]), var1.eval())
def testFtrlWithL1_L2_L2Shrinkage(self): """Test the new FTRL op with support for l2 shrinkage. The addition of this parameter which places a constant pressure on weights towards the origin causes the gradient descent trajectory to differ. The weights will tend to have smaller magnitudes with this parameter set. """ for dtype in self.float_types: with self.cached_session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) self.assertAllCloseAccordingToType([4.0, 3.0], var1.eval()) # Run 10 steps FTRL for _ in range(10): ftrl_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-0.22578996, -0.44345799]), var0.eval(), rtol=1e-4) self.assertAllCloseAccordingToType( np.array([-0.14378493, -0.13229476]), var1.eval(), rtol=1e-4)
def testFtrlwithoutRegularization(self): for dtype in self.float_types: with self.test_session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) ftrl_update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], var0.eval()) self.assertAllClose([0.0, 0.0], var1.eval()) # Run 3 steps FTRL for _ in range(3): ftrl_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([-2.60260963, -4.29698515]), var0.eval()) self.assertAllCloseAccordingToType( np.array([-0.28432083, -0.56694895]), var1.eval())
def testFtrlWithL1_L2(self): for dtype in [dtypes.half, dtypes.float32]: with self.test_session() as sess: var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType( np.array([-0.24059935, -0.46829352]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.02406147, -0.04830509]), v1_val)
def doTestFtrlwithoutRegularization(self, use_resource=False): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session() as sess: if use_resource: var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) else: var0 = variables.Variable([0.0, 0.0], dtype=dtype) var1 = variables.Variable([0.0, 0.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose([0.0, 0.0], v0_val) self.assertAllClose([0.0, 0.0], v1_val) # Run 3 steps FTRL for _ in range(3): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType( np.array([-2.60260963, -4.29698515]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.28432083, -0.56694895]), v1_val)
def testFtrlWithBeta(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, beta=0.1) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType( np.array([-6.096838, -9.162214]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.717741, -1.425132]), v1_val)
def testFtrlStatelesswithoutRegularization2(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: initial_accumulator_value = 0.1 with self.cached_session() as sess: var0 = constant_op.constant([1.0, 2.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) accum = constant_op.constant( [initial_accumulator_value, initial_accumulator_value], dtype=dtype) linear = constant_op.constant([0.0, 0.0], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=initial_accumulator_value, l1_regularization_strength=0.0, l2_regularization_strength=0.0) opt._prepare() new_var0, new_accm0, new_linear0 = opt._apply_dense_hash( grads0, var0, accum, linear) variables.global_variables_initializer().run() v0_val = self.evaluate(var0) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) # Run 1 steps FTRL v0_val = self.evaluate(new_var0) self.assertAllCloseAccordingToType( np.array([-0.857997, -1.293876]), v0_val)
def test_ops_with_var_and_ftrl(self): var_list = [ deo.get_variable('sp_var', initializer=0.0, dim=2), ] opt_list = [ ftrl.FtrlOptimizer(0.1), ] self.common_run_context(var_list, opt_list, name='ftrl_test')
def get_multiple_optimizers(): return [ adagrad.AdagradOptimizer(0.1), adam.AdamOptimizer(0.1), ftrl.FtrlOptimizer(0.1), momentum.MomentumOptimizer(0.1, 0.1), rmsprop.RMSPropOptimizer(0.1) ]
def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self): """Verifies that l2 shrinkage in FTRL does not change lr schedule.""" # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([1.0, 2.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.1, 0.2], dtype=dtype) opt0 = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) opt1 = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0) update0 = opt0.apply_gradients([(grads0, var0)]) update1 = opt1.apply_gradients([(grads1, var1)]) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([1.0, 2.0], v1_val) # Run 10 steps FTRL for _ in range(10): update0.run() update1.run() v0_val, v1_val = self.evaluate([var0, var1]) # var0 is experiencing L2 shrinkage so it should be smaller than var1 # in magnitude. self.assertTrue((v0_val**2 < v1_val**2).all()) accum0 = list( self.evaluate(opt0._slots)["accum"].values())[0] accum1 = list( self.evaluate(opt1._slots)["accum"].values())[0] # L2 shrinkage should not change how we update grad accumulator. self.assertAllCloseAccordingToType(accum0, accum1)
def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self): """Verifies that l2 shrinkage in FTRL does not change lr schedule.""" for dtype in self.float_types: with self.test_session(), self.test_scope(): var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.1, 0.2], dtype=dtype) opt0 = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) opt1 = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0) update0 = opt0.apply_gradients([(grads0, var0)]) update1 = opt1.apply_gradients([(grads1, var1)]) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType([1.0, 2.0], self.evaluate(var0)) self.assertAllCloseAccordingToType([1.0, 2.0], self.evaluate(var1)) # Run 10 steps FTRL for _ in range(10): update0.run() update1.run() # var0 is experiencing L2 shrinkage so it should be smaller than var1 # in magnitude. self.assertTrue( (var0.eval()**2 < self.evaluate(var1)**2).all()) accum0 = list(opt0._slots["accum"].values())[0].eval() accum1 = list(opt1._slots["accum"].values())[0].eval() # L2 shrinkage should not change how we update grad accumulator. self.assertAllCloseAccordingToType(accum0, accum1)
def get_opt(self): self.dnn_optimizer = adagrad.AdagradOptimizer( learning_rate=_DNN_LEARNING_RATE) self.seq_optimizer = adagrad.AdagradOptimizer( learning_rate=_SEQ_LEARNING_RATE) self.linear_optimizer = ftrl.FtrlOptimizer( #learning_rate=_linear_learning_rate(len(self.linear_feature_columns)), learning_rate=_LINEAR_LEARNING_RATE, learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=3.0, l2_regularization_strength=5.0)
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = ftrl.FtrlOptimizer(1.0).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[0, 1]], var0.eval(), atol=0.01)
def testEquivGradientDescentwithoutRegularization(self): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Fixed learning rate learning_rate_power=-0.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with self.cached_session(): val2, val3 = self.applyOptimizer( gradient_descent.GradientDescentOptimizer(3.0), dtype) self.assertAllCloseAccordingToType(val0, val2) self.assertAllCloseAccordingToType(val1, val3)
def equivGradientDescentTest_FtrlPart(self, steps, dtype): var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) opt = ftrl.FtrlOptimizer( 3.0, learning_rate_power=-0.0, # using Fixed learning rate initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) self.assertAllClose([0.0, 0.0], self.evaluate(var1)) # Run Ftrl for a few steps for _ in range(steps): ftrl_update.run() return self.evaluate(var0), self.evaluate(var1)
def testEquivAdagradwithoutRegularization(self): for dtype in [dtypes.half, dtypes.float32]: with self.test_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Adagrad learning rate learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with self.test_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1), dtype) self.assertAllCloseAccordingToType(val0, val2) self.assertAllCloseAccordingToType(val1, val3)
def benchmarkCustomOptimizer(self): iris = test_data.prepare_iris_data_for_logistic_regression() cont_feature = feature_column.real_valued_column('feature', dimension=4) bucketized_feature = feature_column.bucketized_column( cont_feature, test_data.get_quantile_based_buckets(iris.data, 10)) classifier = dnn_linear_combined.DNNLinearCombinedClassifier( model_dir=tempfile.mkdtemp(), linear_feature_columns=(bucketized_feature, ), linear_optimizer=ftrl.FtrlOptimizer(learning_rate=0.1), dnn_feature_columns=(cont_feature, ), dnn_hidden_units=(3, 3), dnn_optimizer=adagrad.AdagradOptimizer(learning_rate=0.1)) input_fn = test_data.iris_input_logistic_fn metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(input_fn=input_fn, steps=100) self._assertSingleClassMetrics(metrics)
def testFtrlStatelessWithL1_L2_L2Shrinkage(self): """Test the new FTRL Stateless op with support for l2 shrinkage. The addition of this parameter which places a constant pressure on weights towards the origin causes the gradient descent trajectory to differ. The weights will tend to have smaller magnitudes with this parameter set. """ for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: initial_accumulator_value = 0.1 with self.cached_session() as sess: var0 = constant_op.constant([1.0, 2.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) accum = constant_op.constant( [initial_accumulator_value, initial_accumulator_value], dtype=dtype) linear = constant_op.constant([0.0, 0.0], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=initial_accumulator_value, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) opt._prepare() new_var0, new_accum, new_linear = opt._apply_dense_hash( grads0, var0, accum, linear) v0_val = self.evaluate(var0) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) new_var0_val = self.evaluate(new_var0) self.assertAllCloseAccordingToType( np.array([-0.07148796, -0.13585758]), new_var0_val) new_accum_val = self.evaluate(new_accum) self.assertAllCloseAccordingToType(np.array([0.11, 0.14]), new_accum_val) new_linear_val = self.evaluate(new_linear) self.assertAllCloseAccordingToType( np.array([0.29485512, 0.5613747]), new_linear_val)
def testEquivAdagradwithoutRegularization(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Adagrad learning rate learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype) with self.cached_session(): val2, val3 = self.applyOptimizer( adagrad.AdagradOptimizer( 3.0, initial_accumulator_value=0.1), dtype) self.assertAllCloseAccordingToType(val0, val2, half_rtol=2e-3) self.assertAllCloseAccordingToType(val1, val3, half_rtol=2e-3)
def testFtrlWithL1_L2_L2Shrinkage(self): """Test the new FTRL op with support for l2 shrinkage. The addition of this parameter which places a constant pressure on weights towards the origin causes the gradient descent trajectory to differ. The weights will tend to have smaller magnitudes with this parameter set. """ # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) opt = ftrl.FtrlOptimizer( 3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=2.0, l2_shrinkage_regularization_strength=0.1) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllCloseAccordingToType( np.array([-0.22578995, -0.44345796]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.14378493, -0.13229476]), v1_val)
def testEquivSparseGradientDescentwithoutRegularization(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): for dtype in [dtypes.half, dtypes.float32]: with self.cached_session(): val0, val1 = self.applyOptimizer( ftrl.FtrlOptimizer( 3.0, # Fixed learning rate learning_rate_power=-0.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0), dtype, is_sparse=True) with self.cached_session(): val2, val3 = self.applyOptimizer( gradient_descent.GradientDescentOptimizer(3.0), dtype, is_sparse=True) self.assertAllCloseAccordingToType(val0, val2) self.assertAllCloseAccordingToType(val1, val3)
def _get_default_optimizer(feature_columns): learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) return ftrl.FtrlOptimizer(learning_rate=learning_rate)
multi_worker_mirrored_4x1_cpu = combinations.NamedDistribution( "MultiWorkerMirrored4x1CPU", _get_multi_worker_mirrored_creator(required_gpus=0), has_chief=True, num_workers=3, ) gradient_descent_optimizer_v1_fn = combinations.NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.001)) adagrad_optimizer_v1_fn = combinations.NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = combinations.NamedObject( "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) ftrl_optimizer_v1_fn = combinations.NamedObject( "FtrlV1", lambda: ftrl.FtrlOptimizer(0.001)) rmsprop_optimizer_v1_fn = combinations.NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [ gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn, ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn ] adadelta_optimizer_keras_v2_fn = combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) adagrad_optimizer_keras_v2_fn = combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) adam_optimizer_keras_v2_fn = combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0))
def test_ftrl_apply_restriction(self): opt = ftrl.FtrlOptimizer(0.1) self.commonly_apply_restriction_verify(opt)
def test_ftrl_restrict_on_policy(self): opt = ftrl.FtrlOptimizer(0.1) self.common_single_step_restrict_verification(opt)