def test_tensor_learning_rate(): for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = yogi.Yogi(tf.constant(0.01), initial_accumulator_value=1.0) # Fetch params to validate initial values. np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy()) np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy()) # Run 3 steps of Yogi. for t in range(1, 4): beta1_power, beta2_power = get_beta_accumulators(opt, dtype) test_utils.assert_allclose_according_to_type(0.9 ** t, beta1_power) test_utils.assert_allclose_according_to_type(0.999 ** t, beta2_power) opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = yogi_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = yogi_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params. test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def test_sharing(): for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = lamb.LAMB() # Fetch params to validate initial values np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy()) np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy()) # Run 3 steps of intertwined LAMB1 and LAMB2. for t in range(3): beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) test_utils.assert_allclose_according_to_type( 0.9**(t + 1), beta_1_power) test_utils.assert_allclose_according_to_type( 0.999**(t + 1), beta_2_power) opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def test_fit_simple_linear_model_mixed_precision(): if test_utils.is_gpu_available() and LooseVersion( tf.__version__) <= "2.2.0": pytest.xfail( "See https://github.com/tensorflow/tensorflow/issues/39775") np.random.seed(0x2019) tf.random.set_seed(0x2019) x = np.random.standard_normal((10000, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((10000, 1)) * 1e-4 try: tf.keras.mixed_precision.experimental.set_policy("mixed_float16") model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1)) model.compile(Lookahead("sgd"), loss="mse") finally: tf.keras.mixed_precision.experimental.set_policy("float32") model.fit(x, y, epochs=3) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) assert max_abs_diff < 2.3e-3 assert max_abs_diff >= 1e-3
def test_sparse_repeated_indices(): for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()): repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1]), ) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1]), ) opt1 = yogi.Yogi() opt2 = yogi.Yogi() np.testing.assert_allclose( aggregated_update_var.numpy(), repeated_index_update_var.numpy(), ) for _ in range(3): opt1.apply_gradients([(grad_repeated_index, repeated_index_update_var)]) opt2.apply_gradients([(grad_aggregated, aggregated_update_var)]) np.testing.assert_allclose( aggregated_update_var.numpy(), repeated_index_update_var.numpy(), )
def do_test_sparse(beta1=0.0, l1reg=0.0, l2reg=0.0): for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([2])) grads1_np_indices = np.array([0, 1], dtype=np.int32) grads1 = tf.IndexedSlices(tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([2])) opt = yogi.Yogi( beta1=beta1, l1_regularization_strength=l1reg, l2_regularization_strength=l2reg, initial_accumulator_value=1.0, ) # Fetch params to validate initial values. np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy()) np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy()) # Run 3 steps of Yogi. for t in range(1, 4): beta1_power, beta2_power = get_beta_accumulators(opt, dtype) test_utils.assert_allclose_according_to_type(beta1**t, beta1_power) test_utils.assert_allclose_according_to_type(0.999**t, beta2_power) opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = yogi_update_numpy(var0_np, grads0_np, t, m0, v0, beta1=beta1, l1reg=l1reg, l2reg=l2reg) var1_np, m1, v1 = yogi_update_numpy(var1_np, grads1_np, t, m1, v1, beta1=beta1, l1reg=l1reg, l2reg=l2reg) # Validate updated params. test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def test_basic_with_learning_rate_decay(): for i, dtype in enumerate( _dtypes_to_test(use_gpu=test_utils.is_gpu_available())): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 decay = 0.5 lamb_wd = 0.01 opt = lamb.LAMB( learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, weight_decay=lamb_wd, decay=decay, ) # Run 3 steps of LAMB for t in range(3): opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0, lr=lr_np, lamb_wd=lamb_wd) var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1, lr=lr_np, lamb_wd=lamb_wd) # Validate updated params test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def test_dynamic_decode_tflite_conversion(): if test_utils.is_gpu_available(): pytest.skip("cpu-only test") units = 10 vocab_size = 20 cell = tf.keras.layers.LSTMCell(units) sampler = sampler_py.GreedyEmbeddingSampler() embeddings = tf.random.uniform([vocab_size, units]) my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler) @tf.function def _decode(start_tokens, end_token): batch_size = tf.size(start_tokens) initial_state = cell.get_initial_state(batch_size=batch_size, dtype=tf.float32) return decoder.dynamic_decode( my_decoder, maximum_iterations=5, enable_tflite_convertible=True, decoder_init_input=embeddings, decoder_init_kwargs=dict( initial_state=initial_state, start_tokens=start_tokens, end_token=end_token, ), ) concrete_function = _decode.get_concrete_function( tf.TensorSpec([1], dtype=tf.int32), tf.TensorSpec([], dtype=tf.int32) ) if tf.__version__[:3] >= "2.7": converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_function], _decode ) else: converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_function]) converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS, ] _ = converter.convert() with pytest.raises(tf.errors.InvalidArgumentError, match="batch size"): # Batch size > 1 should throw an error. _decode.get_concrete_function( tf.TensorSpec([2], dtype=tf.int32), tf.TensorSpec([], dtype=tf.int32) )
def do_test_sparse_repeated_indices(dtype, optimizer, **optimizer_kwargs): """Test for repeated indices in sparse updates. This test verifies that an update with repeated indices is the same as an update with two times the gradient. Args: optimizer: The tensorflow optimizer class to be tested. **optimizer_kwargs: The parameters to pass to the construcor of the optimizer. Either a constant or a callable. This also passed to the optimizer_params in the update_fn. """ # TODO: Fix #347 issue if test_utils.is_gpu_available(): pytest.skip("Wait #347 to be fixed") repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1]), ) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1]), ) opt_repeated = optimizer(**optimizer_kwargs) _ = opt_repeated.apply_gradients([(grad_repeated_index, repeated_index_update_var)]) opt_aggregated = optimizer(**optimizer_kwargs) _ = opt_aggregated.apply_gradients([(grad_aggregated, aggregated_update_var)]) np.testing.assert_allclose(aggregated_update_var.numpy(), repeated_index_update_var.numpy()) for _ in range(3): opt_repeated.apply_gradients([(grad_repeated_index, repeated_index_update_var)]) opt_aggregated.apply_gradients([(grad_aggregated, aggregated_update_var)]) np.testing.assert_allclose(aggregated_update_var.numpy(), repeated_index_update_var.numpy())
def test_minimize_sparse_resource_variable_nuclear(): # TODO: # to address issue #347 and #36764. for dtype in _dtypes_with_checking_system( use_gpu=test_utils.is_gpu_available(), system=platform.system() ): var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(grads0) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_, ord=ord ) _ = opt.minimize(loss, var_list=[var0]) # Validate updated params test_utils.assert_allclose_according_to_type( [ [ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0], 2.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1], ] ], var0.numpy(), )
def test_resource(): for i, dtype in enumerate( _dtypes_to_test(use_gpu=test_utils.is_gpu_available())): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) def learning_rate(): return 0.001 opt = lamb.LAMB(learning_rate=learning_rate) # Run 3 steps of LAMB for t in range(3): beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) test_utils.assert_allclose_according_to_type( 0.9**(t + 1), beta_1_power) test_utils.assert_allclose_according_to_type( 0.999**(t + 1), beta_2_power) opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def do_test(dtype, optimizer, update_fn, do_sparse=False, do_decay_var_list=False, **optimizer_kwargs): """The major test function. Args: optimizer: The tensorflow optimizer class to be tested. update_fn: The numpy update function of the optimizer, the function signature must be update_fn(var: np.array, grad_t: np.array, slot_vars: dict, **kwargs) -> (updated_var, updated_slot_vars) Note that slot_vars will be initialized to an empty dictionary for each variable, initial values should be handled in the update_fn. do_sparse: If True, test sparse update. Defaults to False, i.e., dense update. do_decay_var_list: If True, test by passing a list of vars to ensure hashing is handled correctly **optimizer_kwargs:The parameters to pass to the construcor of the optimizer. Either a constant or a callable. This also passed to the optimizer_params in the update_fn. """ # TODO: Fix #347 issue if do_sparse and test_utils.is_gpu_available(): pytest.skip("Wait #347 to be fixed") # Initialize variables for numpy implementation. np_slot_vars0, np_slot_vars1 = {}, {} var0_np = np.array([1.0, 2.0], dtype=dtype[0].as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype[0].as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype[0].as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype[0].as_numpy_dtype) # Create Tensorflow variables. var0 = tf.Variable(var0_np, name="var0_%d" % dtype[1]) var1 = tf.Variable(var1_np, name="var1_%d" % dtype[1]) if do_sparse: grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([2]), ) grads1_np_indices = np.array([0, 1], dtype=np.int32) grads1 = tf.IndexedSlices( tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([2]), ) else: grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = optimizer(**optimizer_kwargs) # Create the update op. # Run 3 steps of the optimizer for _ in range(3): if do_decay_var_list: opt.apply_gradients( zip([grads0, grads1], [var0, var1]), decay_var_list=[var0, var1], ) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, np_slot_vars0 = update_fn(var0_np, grads0_np, np_slot_vars0, **optimizer_kwargs) var1_np, np_slot_vars1 = update_fn(var1_np, grads1_np, np_slot_vars1, **optimizer_kwargs) # Validate updated params test_utils.assert_allclose_according_to_type(var0_np, var0.numpy()) test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
def test_sharing_nuclear(): # TODO: # To address the issue #36764. for dtype in _dtypes_with_checking_system( use_gpu=test_utils.is_gpu_available(), system=platform.system()): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Because in the eager mode, as we declare two cg_update # variables, it already altomatically finish executing them. # Thus, we cannot test the param value at this time for # eager mode. We can only test the final value of param # after the second execution. # Step 2: the second conditional_gradient contain # the previous update. # Check that the parameters have been updated. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0], (2.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[0], (4.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[1], ]), var1.numpy(), )
def test_sparse_frobenius(): # TODO: # To address the issue #347. for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices( tf.constant([[0.1, 0.1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2]), ) grads1 = tf.IndexedSlices( tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2]), ) norm0 = tf.math.reduce_sum(tf.math.multiply(grads0, grads0))**0.5 norm1 = tf.math.reduce_sum(tf.math.multiply(grads1, grads1))**0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * 0 / norm0, 0 - (1 - learning_rate) * lambda_ * 0 / norm0, ]), var0[0].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), var1[2].numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy()) test_utils.assert_allclose_according_to_type( np.array([ (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), var1[2].numpy(), )
def test_tensor_learning_rate_and_conditional_gradient_nuclear(): for dtype in _dtypes_with_checking_system( use_gpu=test_utils.is_gpu_available(), system=platform.system()): # TODO: # Based on issue #36764 in the following link, # "https://github.com/tensorflow/tensorflow/issues/36764" # tf.half is not registered for tf.linalg.svd function on Windows # CPU version. # So we have to remove tf.half when testing with Windows CPU version. var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01), ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], 2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], 4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], (2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], (4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), )
def test_basic_nuclear(use_resource): # TODO: # to address issue #36764 for i, dtype in enumerate( _dtypes_with_checking_system(use_gpu=test_utils.is_gpu_available(), system=platform.system())): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i) var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i) else: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) def learning_rate(): return 0.5 def lambda_(): return 0.01 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], 2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], 4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], (2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], (4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), )
def test_sparse_nuclear(): # TODO: # To address the issue #347 and issue #36764. for dtype in _dtypes_with_checking_system( use_gpu=test_utils.is_gpu_available(), system=platform.system()): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices( tf.constant([[0.1, 0.1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2]), ) grads1 = tf.IndexedSlices( tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2]), ) top_singular_vector0 = tf.constant( [[0.0, 0.0], [0.7071067, 0.7071067], [0.0, 0.0], [0.0, 0.0]], dtype=dtype) top_singular_vector1 = tf.constant( [ [-4.2146844e-08, -4.2146844e-08], [0.0000000e00, 0.0000000e00], [4.9999994e-01, 4.9999994e-01], [4.9999994e-01, 4.9999994e-01], ], dtype=dtype, ) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0], 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1], ]), var0[0].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0], 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1], ]), var1[2].numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy()) test_utils.assert_allclose_according_to_type( np.array([ (0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], (0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0], (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1], ]), var1[2].numpy(), )