def testMinimizeWith2DIndiciesForEmbeddingLookup(self): # This test invokes the ResourceSparseApplyConditionalGradient # operation. var0 = tf.Variable(tf.ones([2, 2])) def loss(): return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]])) # the gradient for this loss function: grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_op = opt.minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of cg_op self.evaluate(cg_op) norm0 = self.evaluate(norm0) self.assertAllCloseAccordingToType( [[1, 1], [ learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0, learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0 ]], self.evaluate(var0))
def testVariablesAcrossGraphs(self): optimizer = cg_lib.ConditionalGradient(0.01, 0.5) with tf.Graph().as_default(): var0 = tf.Variable([1.0, 2.0], dtype=tf.float32, name="var0") var1 = tf.Variable([3.0, 4.0], dtype=tf.float32, name="var1") loss = lambda: tf.math.reduce_sum(var0 + var1) optimizer.minimize(loss, var_list=[var0, var1]) optimizer_variables = optimizer.variables() # There should be three items. The first item is iteration, #and one item for each variable. self.assertStartsWith(optimizer_variables[1].name, "ConditionalGradient/var0") self.assertStartsWith(optimizer_variables[2].name, "ConditionalGradient/var1") self.assertEqual(3, len(optimizer_variables))
def testMinimizeSparseResourceVariable(self): # This test invokes the ResourceSparseApplyConditionalGradient # operation. And it will call the 'ResourceScatterUpdate' OpKernel # for 'GPU' devices. However, tf.half is not registered in this case, # based on issue #347. # Thus, we will call the "_DtypesToTest" function. # # TODO: # Wait for the solving of issue #347. After that, we will test # for the dtype to be tf.half, with 'GPU' devices. for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()): var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_op = opt.minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of cg_op self.evaluate(cg_op) # Validate updated params norm0 = self.evaluate(norm0) self.assertAllCloseAccordingToType( [[ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_0 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_1 / norm0, ]], self.evaluate(var0), )
def testLikeDistBeliefCG01(self): with self.cached_session(): db_grad, db_out = self._dbParamsCG01() num_samples = len(db_grad) var0 = tf.Variable([0.0] * num_samples) grads0 = tf.constant([0.0] * num_samples) cg_opt = cg_lib.ConditionalGradient(learning_rate=0.1, lambda_=0.1) if not tf.executing_eagerly(): cg_update = cg_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(tf.compat.v1.global_variables_initializer()) for i in xrange(num_samples): if tf.executing_eagerly(): grads0 = tf.constant(db_grad[i]) cg_opt.apply_gradients(zip([grads0], [var0])) else: cg_update.run(feed_dict={grads0: db_grad[i]}) self.assertAllClose(np.array(db_out[i]), self.evaluate(var0))
def testSharing(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.cached_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_) cg_update1 = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) cg_update2 = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) # Because in the eager mode, as we declare two cg_update variables, # it already altomatically finish executing them. Thus, we cannot # test the param value at this time for eager mode. We can only test # the final value of param after the second execution. if not tf.executing_eagerly(): self.evaluate(cg_update1) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0 ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1 ]), self.evaluate(var1)) # Step 2: the second conditional_gradient contain # the previous update. if not tf.executing_eagerly(): self.evaluate(cg_update2) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([(1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([(3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1]), self.evaluate(var1))
def testSparse(self): # TODO: # To address the issue #347. for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()): with self.cached_session(): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices(tf.constant([[.1, .1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2])) grads1 = tf.IndexedSlices( tf.constant([[.01, .01], [.01, .01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2])) norm0 = tf.math.reduce_sum(tf.math.multiply(grads0, grads0))**0.5 norm1 = tf.math.reduce_sum(tf.math.multiply(grads1, grads1))**0.5 learning_rate = 0.1 lambda_ = 0.1 cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0, 0], self.evaluate(var0)[0]) self.assertAllClose([0, 0], self.evaluate(var0)[1]) self.assertAllClose([1, 1], self.evaluate(var1)[2]) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) # Step 1: if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 0 - (1 - learning_rate) * lambda_ * 0 / norm0, 0 - (1 - learning_rate) * lambda_ * 0 / norm0 ]), self.evaluate(var0)[0]) self.assertAllCloseAccordingToType( np.array([ 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0 ]), self.evaluate(var0)[1]) self.assertAllCloseAccordingToType( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1 ]), self.evaluate(var1)[2]) # Step 2: the conditional_gradient contain the # previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) # Check that the parameters have been updated. self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) self.assertAllCloseAccordingToType( np.array([ (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0 ]), self.evaluate(var0)[1]) self.assertAllCloseAccordingToType( np.array([(1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1]), self.evaluate(var1)[2])
def doTestBasic(self, use_resource=False, use_callable_params=False): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i) var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i) else: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 learning_rate = lambda: 0.5 lambda_ = lambda: 0.01 if not use_callable_params: learning_rate = learning_rate() lambda_ = lambda_() cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse(slot0 in tf.compat.v1.trainable_variables()) self.assertFalse(slot1 in tf.compat.v1.trainable_variables()) if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0 ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1 ]), self.evaluate(var1)) # Step 2: the conditional_gradient contain the previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) self.assertAllCloseAccordingToType( np.array([(1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([(3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1]), self.evaluate(var1))
def testTensorLearningRateAndConditionalGradient(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.cached_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 cg_opt = cg_lib.ConditionalGradient( learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01)) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0 ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1 ]), self.evaluate(var1)) # Step 2: the conditional_gradient contain the # previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0 ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1 ]), self.evaluate(var1))