def testErrors(self): @custom_gradient.custom_gradient def f(x): def grad(_): raise RuntimeError('x') return x, grad # TODO(apassos) raise the right error here with self.assertRaises(RuntimeError): backprop.gradients_function(f)(constant_op.constant(1.0))
def testExceptionSafety(self): def f(unused_x): raise ValueError() try: backprop.gradients_function(f)(1.0) except ValueError: pass def real_f(x): return x * x self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
def testGradient(self): def f(x): return x with self.test_scope(): grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0])
def testDifferentiableFunctionNoneOutputs(self): @function.defun def my_function(x): return x, None def wrapper(x): return my_function(x)[0] g = backprop.gradients_function(wrapper, [0])(constant_op.constant(0.0)) self.assertAllEqual(g[0], 1.) @function.defun def foo(a): return None, a * a x = constant_op.constant(5.0) with backprop.GradientTape() as tp: tp.watch(x) none, r = foo(x) g = tp.gradient(r, x) self.assertIs(none, None) self.assertAllEqual(r, 25.0) self.assertAllEqual(g, 2 * 5.0)
def f(x): (y,) = backprop.gradients_function(lambda x: x * x)(x) def grad(dy): return [2 * dy] return y, grad
def testAggregateGradients(self): def fn(x): ind1 = tensor.Tensor(np.array([0, 1])) ind2 = tensor.Tensor(np.array([2, 3])) ind3 = tensor.Tensor(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = tensor.Tensor(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] with context.graph_mode(), self.test_session(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
def testArgsGradientFunction(self): def f(*args): return args[0] * args[0] grad = backprop.gradients_function(f) self.assertAllEqual(grad(1.0)[0], 2.0)
def testArgmax(self): def argmax(x): i = math_ops.argmax(x) return array_ops.stop_gradient(i) grad = backprop.gradients_function(argmax) self.assertAllEqual(grad([0.0])[0], None)
def testDy(self): def f(x): return x grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0])
def testAggregateGradients(self): def fn(x): ind1 = constant_op.constant(np.array([0, 1])) ind2 = constant_op.constant(np.array([2, 3])) ind3 = constant_op.constant(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * constant_op.constant(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = constant_op.constant(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] grad = self.evaluate(ops.convert_to_tensor(grad)) if not context.executing_eagerly(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad, self.evaluate(tf_dense_grad))
def testGradientInteger(self): def f(x): return x + x int_tensor = constant_op.constant(1) self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None)
def testMulType(self): def mul(x): return math_ops._mul_dispatch(x, x) # pylint: disable=protected-access self.assertAllEqual( backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0)
def testTfTensor(self): def fn(x): return x t = constant_op.constant(1.0) g, = backprop.gradients_function(fn, [0])(t) self.assertAllEqual(g, 1.0)
def testPartial(self): def f(x, y): return x * y part = functools.partial(f, constant_op.constant(2.0)) self.assertAllEqual( backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0)
def testCPU(self): def fn(x): b = tensor.Tensor(2.0) c = math_ops.add(x, b) return math_ops.add(c, tensor.Tensor(3.0)) grad = backprop.gradients_function(fn, [0])(tensor.Tensor(1.0))[0] self.assertEqual(grad.numpy(), 1.0)
def testOutput(self): def multiout(x): return x + 2, x * x x = constant_op.constant([0.0, 1.0, 2.0]) grad = backprop.gradients_function(multiout)(x)[0] self.assertAllEqual([1.0, 3.0, 5.0], grad)
def testGradGradExp(self): def grad(x): value = backprop.gradients_function(math_ops.exp, [0])(x)[0] return value gradgrad = backprop.gradients_function(grad, [0]) self.assertAllEqual(gradgrad(tensor.Tensor(0.0))[0].numpy(), 1.0)
def testCPU(self): def fn(x): b = constant_op.constant(2.0) c = math_ops.add(x, b) return math_ops.add(c, constant_op.constant(3.0)) grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] self.assertAllEqual(grad, 1.0)
def testGradGradExp(self): def grad(x): value = backprop.gradients_function(math_ops.exp, [0])(x)[0] return value gradgrad = backprop.gradients_function(grad, [0]) self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
def testGradient(self): matmul = function.defun(math_ops.matmul) def sq(x): return matmul(x, x, transpose_a=True) t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) grad_t, = backprop.gradients_function(sq, [0])(t) self.assertAllEqual(grad_t, [[6, 6], [14, 14]])
def testEmptyParams(self): def fn(a, b): return a * b x = tensor.Tensor(1.0) y = tensor.Tensor(2.0) dx, dy = backprop.gradients_function(fn)(x, y) self.assertAllEqual(dx.numpy(), y.numpy()) self.assertAllEqual(dy.numpy(), x.numpy())
def testTensorCopyGPU2CPU2GPU(self): def f(a, b): return a.cpu() + b.cpu() with context.device('/gpu:0'): a = constant_op.constant(1.0) b = constant_op.constant(2.0) grad = backprop.gradients_function(f, [0])(a, b)[0] self.assertAllEqual(grad, 1.0)
def testGradient(self): # TODO(b/121134877): Remove the autograph override. matmul = def_function.function(math_ops.matmul, autograph=False) def sq(x): return matmul(x, x, transpose_a=True) t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) grad_t, = backprop.gradients_function(sq, [0])(t) self.assertAllEqual(grad_t, [[6, 6], [14, 14]])
def testGcTwoOutputs(self): def fn(x, y): return nn_ops.sparse_softmax_cross_entropy_with_logits(logits=x, labels=y)[0] labels = constant_op.constant([0]) logits = constant_op.constant([[0.0]]) grad, = backprop.gradients_function(fn, [0])(logits, labels) self.assertAllEqual(grad, [[0.0]])
def testEmptyParams(self): def fn(a, b): return a * b x = constant_op.constant(1.0) y = constant_op.constant(2.0) dx, dy = backprop.gradients_function(fn)(x, y) self.assertAllEqual(dx, y.numpy()) self.assertAllEqual(dy, x.numpy())
def testDifferentiableFunctionNoneOutputs(self): @function.defun def my_function(x): return x, None def wrapper(x): return my_function(x)[0] g = backprop.gradients_function(wrapper, [0])(constant_op.constant(0.0)) self.assertAllEqual(g[0], 1.)
def testCustomGradientIdentity(self): @custom_gradient.custom_gradient def my_identity(x): def grad(dresult): return [2 * dresult] return x, grad self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0)
def testWhereGradient(self): # Note: where is special because only some of its arguments are of # differentiable dtypes. def f(x): return array_ops.where(x < 10, x, x * x) g = backprop.gradients_function(f) self.assertAllEqual(g(5.)[0], 1.0) self.assertAllEqual(g(50.)[0], 100.0)
def testMultiValuePreservesIfNotDiffedAgainst(self): def tfe_conv2d(timage, tkernel, conv2dstrides): return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME') i = constant_op.constant([[[[1.0]]]]) k = constant_op.constant([[[[2.0]]]]) s = [1, 1, 1, 1] grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0] self.assertAllEqual([[[[2.0]]]], grad)
def testDifferentiableFunctionNoneOutputs(self): @function.defun def my_function(x): return x, None def wrapper(x): return my_function(x)[0] g = backprop.gradients_function(wrapper, [0])(tensor.Tensor(0.0)) self.assertAllEqual(g[0].numpy(), 1.)
def g(x): return backprop.gradients_function(math_ops.multiply, [0, 1])(x, x)
def f(x): return backprop.gradients_function(lambda y: y * y, [0])(x)[0]
def second(x): grad = backprop.gradients_function(first, [0])(x)[0] return math_ops.reduce_sum(grad, constant_op.constant([0]))
def testStopGradient(self): grad = backprop.gradients_function( lambda x: array_ops.stop_gradient(math_ops.argmax(x))) self.assertAllEqual(grad([0.0])[0], None)
def testDy(self): def f(x): return x grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0])
def testMulType(self): def mul(x): return math_ops._mul_dispatch(x, x) # pylint: disable=protected-access self.assertAllEqual( backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0)
def testArgsGradientFunction(self): def f(*args): return args[0] * args[0] grad = backprop.gradients_function(f) self.assertAllEqual(grad(1.0)[0], 2.0)
def testReturnSameThing(self): def f(x): return x, 2 * x self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0)
def g(x): return backprop.gradients_function(f, [0])(x)[0]
def benchmark_tf_gradient_function_no_op(self): with context.device(CPU): m = gen_array_ops.identity(self._m_2) self._run(lambda: backprop.gradients_function(lambda x: x, [0])(m), 30000)
def fn(): backprop.gradients_function(math_ops.reduce_sum, [0])(tensor)
def testGradientInteger(self): def f(x): return x + x int_tensor = constant_op.constant(1) self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None)
def grad(x): value = backprop.gradients_function(math_ops.exp, [0])(x)[0] return value
def grad(x): value = backprop.gradients_function(sq, [0])(x)[0] return value
def benchmark_tf_gradient_function_no_op(self): m = self._m_2 self._run(lambda: backprop.gradients_function(lambda x: x, [0])(m), 30000)
def benchmark_tf_gradient_function_identity(self): m = self._m_2 self._run( lambda: backprop.gradients_function(gen_array_ops.identity, [0]) (m), 30000)
def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): """Computes the theoretical Jacobian for f regarding xs[param]. One can think of the relation among f, xs and y as y = f(xs). Args: f: the function. y_shape: the shape of the result. y_dtype: the dtype of the result. xs: a list of tensors. param: the index of the target parameter. Returns: A 2-d numpy array representing the Jacobian. It has "x_size" rows and "y_size" columns where "x_size" is the number of elements in xs[param] and "y_size" is the number of elements in the result. Raises: ValueError: If result is empty but the gradient is nonzero. """ x = xs[param] # Complex vectors are treated as vectors of twice as many reals. x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape y_factor = 2 if y_dtype.is_complex else 1 # To compute the jacobian, we treat x and y as one-dimensional vectors. x_size = _product(x_shape) x_val_size = _product(x_shape[1:]) # This is used for sparse gradients y_size = _product(y_shape) * y_factor # Allocate 2-D Jacobian, with x dimensions smashed into the first # dimension and y dimensions smashed into the second. jacobian = np.zeros((x_size, y_size), dtype=x.dtype.real_dtype.as_numpy_dtype) # For each of the entry of dy, we set this to be 1 and # everything else to be 0 and compute the gradients -- this will give us one # one column of the Jacobian matrix. dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) grad_fn_unprep = backprop.gradients_function(f, [param]) grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), [y_dtype] + [x.dtype for x in xs]) for col in range(y_size): dy_data_flat[col] = 1 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) dy_data_flat[col] = 0 if isinstance(grad, ops.IndexedSlicesValue): for i, v in zip(grad.indices, grad.values): r_begin = i * x_val_size r_end = r_begin + x_val_size jacobian[r_begin:r_end, col] += v.flat else: jacobian[:, col] = grad.ravel().view(jacobian.dtype) # If the output is empty, run the gradients at least once and make sure # they produce zeros. if y_size == 0: # don't use 'not y_size', because y_size may not be an int grad = _to_numpy(grad_fn(dy_data, *xs)[0]) if grad.shape != x.shape: raise ValueError("Empty gradient has wrong shape: expected %s, got %s" % (x.shape, grad.shape)) if np.any(grad): raise ValueError("Empty tensor with nonzero gradients") logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) return jacobian