def testGradientMatchesSegmentSum(self): # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum # and compare the outputs, which should be identical. # NB: for this test to work, indices must be valid for SegmentSum, namely # it must be sorted, the indices must be contiguous, and num_segments # must be max(indices) + 1. indices = [0, 0, 1, 1, 1, 2, 3, 4, 5] n = len(indices) num_cols = 2 shape = [n, num_cols] num_segments = max(indices) + 1 for dtype in self.differentiable_dtypes: with self.cached_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtype) # Results from UnsortedSegmentSum unsorted_s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) unsorted_jacob_t, unsorted_jacob_n = ( gradient_checker.compute_gradient(tf_x, shape, unsorted_s, [num_segments, num_cols], x_init_value=np_x, delta=1)) # Results from SegmentSum sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices) sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient( tf_x, shape, sorted_s, [num_segments, num_cols], x_init_value=np_x, delta=1) self.assertAllClose(unsorted_jacob_t, sorted_jacob_t) self.assertAllClose(unsorted_jacob_n, sorted_jacob_n)
def _compute_gradient_error_float16(self, x, x32, x_shape, y, y32, y_shape): """Computes the gradient error for float16 inputs and/or outputs. This returns the same value as gradient_checker.compute_gradient_error. The difference is that gradient_checker.compute_gradient_error does not numerically compute the gradients in a numerically stable way for float16 tensors. To fix this, this function requires float32 versions of x and y to numerically compute the gradients, to compare with the float16 symbolically computed gradients. Args: x: The input tensor. x32: A float32 version of x. x_shape: The shape of x. y: The output tensor. y32: A float32 version of y. Must be calculated based on x32, not x. y_shape: The shape of y. Returns: The maximum error in between the two Jacobians, as in gradient_checker.compute_gradient_error. """ x_init_val = np.random.random_sample(x_shape).astype(np.float16) x32_init_val = x_init_val.astype(np.float32) # TODO(reedwm): Do not perform the unnecessary computations in # compute_gradient, since they double the computation time of this function. theoretical_grad, _ = gradient_checker.compute_gradient( x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val) _, numerical_grad = gradient_checker.compute_gradient( x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val) return np.fabs(theoretical_grad - numerical_grad).max()
def _compareGradientY(self, x, y, np_func, tf_func, numeric_gradient_type=None): z = np_func(x, y) zs = list(z.shape) with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) if x.dtype in (np.float32, np.float64): out = 1.1 * tf_func(inx, iny) else: out = tf_func(inx, iny) ys = list(np.shape(y)) jacob_t, jacob_n = gradient_checker.compute_gradient( iny, ys, out, zs, x_init_value=y) if numeric_gradient_type is not None: xf = x.astype(numeric_gradient_type) yf = y.astype(numeric_gradient_type) inxf = ops.convert_to_tensor(xf) inyf = ops.convert_to_tensor(yf) outf = tf_func(inxf, inyf) _, jacob_n = gradient_checker.compute_gradient( inyf, ys, outf, zs, x_init_value=yf) jacob_n = jacob_n.astype(x.dtype) tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)] self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
def _compareGpu(self, x, p, conjugate=False): if p is None: rank = x.ndim perm = (rank - 1) - np.arange(rank) else: perm = p np_ans = self._np_transpose(x, perm) if conjugate: np_ans = np.conj(np_ans) with self.test_session(use_gpu=True): inx = ops.convert_to_tensor(x) y = array_ops.transpose(inx, p, conjugate=conjugate) tf_ans = y.eval() self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, y) jacob_t = None # Gradient check on GPU. xs = list(np.shape(x)) ys = list(np.shape(tf_ans)) if x.dtype == np.float32: jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x, 1e-2) self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3) elif x.dtype == np.float64: jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x, 1e-2) self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6) return tf_ans, jacob_t
def _compareCpu(self, x, p, conjugate=False): np_ans = self._np_transpose(x, p) if conjugate: np_ans = np.conj(np_ans) with self.test_session(use_gpu=False): inx = ops.convert_to_tensor(x) y = array_ops.transpose(inx, p, conjugate=conjugate) tf_ans = y.eval() self.assertShapeEqual(np_ans, y) self.assertAllEqual(np_ans, tf_ans) jacob_t = None # Gradient check on CPU. xs = list(np.shape(x)) ys = list(np.shape(tf_ans)) if x.dtype in [np.float32, np.complex64]: jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x, 1e-2) self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3) elif x.dtype in [np.float64, np.complex128]: jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x, 1e-2) self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6) return tf_ans, jacob_t
def testEmptyFails(self): with ops.Graph().as_default() as g: with self.session(graph=g): x = array_ops.placeholder(dtypes.float32) with g.gradient_override_map({"Identity": "BadGrad"}): y = array_ops.identity(x) bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)" with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)) with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3))
def Test(self): with self.session(use_gpu=True): np.random.seed(1) a_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) a = constant_op.constant(a_np) if functor_.__name__ == 'matrix_square_root': # Square the input matrix to ensure that its matrix square root exists a = math_ops.matmul(a, a) a_np = a.eval() b = functor_(a, **kwargs_) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build tol = 1e-6 if dtype_ == np.float64 else 0.05 theoretical, numerical = gradient_checker.compute_gradient( a, a.get_shape().as_list(), b, b.get_shape().as_list(), x_init_value=a_np, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def testGradientWithEmptySegmentsAtEnd(self): shape = [10, 4] num_segments = 5 segment_indices = [0, 1, 2, 2] num_indices = len(segment_indices) for tf_op in [ math_ops.sparse_segment_sum_with_num_segments, math_ops.sparse_segment_mean_with_num_segments, ]: with self.cached_session(): tf_indices, _, tf_x, np_x = self._sparse_input( shape, num_indices, dtype=dtypes_lib.float64) s = tf_op( data=tf_x, indices=tf_indices, segment_ids=segment_indices, num_segments=num_segments) jacob_t, jacob_n = gradient_checker.compute_gradient( tf_x, shape, s, [5, 4], x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n)
def test_high_dim_filter_grad(self): x_shape = [5, 10, 10] # Test inputs: unaries and RGB values unary_np = np.random.randn(*x_shape).astype(np.float32) rgb_np = np.random.randint(low=0, high=256, size=x_shape).astype(np.float32) with self.test_session(): unary_tf = constant_op.constant(unary_np) rgb_tf = constant_op.constant(rgb_np) y_tf = custom_module.high_dim_filter(unary_tf, rgb_tf, bilateral=True, theta_alpha=1000., theta_beta=1000., theta_gamma=1000.) out = gradient_checker.compute_gradient([unary_tf, rgb_tf], [x_shape, x_shape], y_tf, x_shape) # We only need to compare gradients w.r.t. unaries computed = out[0][0].flatten() estimated = out[0][1].flatten() mask = (computed != 0) computed = computed[mask] estimated = estimated[mask] difference = computed - estimated measure1 = np.mean(difference) / np.mean(computed) measure2 = np.max(difference) / np.max(computed) print('Gradient check: measure1 = {:.6f}, measure2 = {:.6f}'.format(measure1, measure2)) self.assertLess(measure1, 1e-3, 'Errors found in the gradient computation.') self.assertLess(measure2, 2e-2, 'Errors found in the gradient computation.') print('Gradient check: success!')
def _compareMulGradient(self, data): # data is a float matrix of shape [n, 4]. data[:, 0], data[:, 1], # data[:, 2], data[:, 3] are real parts of x, imaginary parts of # x, real parts of y and imaginary parts of y. with self.cached_session(): inp = ops.convert_to_tensor(data) xr, xi, yr, yi = array_ops.split(value=inp, num_or_size_splits=4, axis=1) def vec(x): # Reshape to a vector return array_ops.reshape(x, [-1]) xr, xi, yr, yi = vec(xr), vec(xi), vec(yr), vec(yi) def cplx(r, i): # Combine to a complex vector return math_ops.complex(r, i) x, y = cplx(xr, xi), cplx(yr, yi) # z is x times y in complex plane. z = x * y # Defines the loss function as the sum of all coefficients of z. loss = math_ops.reduce_sum(math_ops.real(z) + math_ops.imag(z)) epsilon = 0.005 jacob_t, jacob_n = gradient_checker.compute_gradient( inp, list(data.shape), loss, [1], x_init_value=data, delta=epsilon) self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def Test(self): np.random.seed(42) a = np.random.uniform(low=-1.0, high=1.0, size=shape_).astype(dtype_) if dtype_ in [np.complex64, np.complex128]: a += 1j * np.random.uniform( low=-1.0, high=1.0, size=shape_).astype(dtype_) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps delta = 0.1 * epsilon**(1.0 / 3.0) if dtype_ in [np.float32, np.complex64]: tol = 3e-2 else: tol = 1e-6 with self.session(use_gpu=True): tf_a = constant_op.constant(a) tf_b = linalg_ops.qr(tf_a, full_matrices=full_matrices_) for b in tf_b: x_init = np.random.uniform( low=-1.0, high=1.0, size=shape_).astype(dtype_) if dtype_ in [np.complex64, np.complex128]: x_init += 1j * np.random.uniform( low=-1.0, high=1.0, size=shape_).astype(dtype_) theoretical, numerical = gradient_checker.compute_gradient( tf_a, tf_a.get_shape().as_list(), b, b.get_shape().as_list(), x_init_value=x_init, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def Test(self): # TODO(rmlarsen): Debug illegal address bug on CUDA and re-enable # GPU test for matrix_solve. use_gpu = False if functor_ == linalg_ops.matrix_solve else True with self.test_session(use_gpu=use_gpu): np.random.seed(1) a_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) a = constant_op.constant(a_np) b_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) b = constant_op.constant(b_np) c = functor_(a, b, **kwargs_) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04 # The gradients for a and b may be of very different magnitudes, # so to not get spurious failures we test them separately. for factor, factor_init in [a, a_np], [b, b_np]: theoretical, numerical = gradient_checker.compute_gradient( factor, factor.get_shape().as_list(), c, c.get_shape().as_list(), x_init_value=factor_init, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def Test(self): if not use_static_shape_ or a_np_.dtype in (np.int32, np.float16): self.skipTest("Skipping infeasible gradient test.") # Transpose and possibly conjugate a_np_ and b_np_ according to the # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs) # results in a valid matrix multiplication and produces the same result as # np.matrix(a_np_) * np.matrix(b_np_) effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_) effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_) epsilon = np.finfo(a_np_.dtype).eps delta = epsilon**(1.0 / 3.0) tol = 20 * delta with self.test_session(use_gpu=True): a = constant_op.constant(effective_a_np) b = constant_op.constant(effective_b_np) res = math_ops.matmul(a, b, **kwargs_) for x, x_init in [a, effective_a_np], [b, effective_b_np]: theoretical, numerical = gradient_checker.compute_gradient( x, x_init.shape, res, [a_np_.shape[0], b_np_.shape[1]], x_init_value=x_init, delta=delta) self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol)
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_) a += np.conj(a.T) a = np.tile(a, batch_shape + (1, 1)) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps delta = 0.1 * epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build if dtype_ == np.float32: tol = 1e-2 else: tol = 1e-7 with self.test_session(): tf_a = constant_op.constant(a) tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a) for b in tf_e, tf_v: x_init = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_) x_init += np.conj(x_init.T) x_init = np.tile(x_init, batch_shape + (1, 1)) theoretical, numerical = gradient_checker.compute_gradient( tf_a, tf_a.get_shape().as_list(), b, b.get_shape().as_list(), x_init_value=x_init, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None): if grad_rtol is None: grad_rtol = _default_tolerance(x.dtype) if grad_atol is None: grad_atol = _default_tolerance(x.dtype) np_ans = np_func(x) with self.test_session(use_gpu=False): inx = ops.convert_to_tensor(x) if x.dtype in (np.float32, np.float64, dtypes_lib.bfloat16.as_numpy_dtype): y = 1.1 * tf_func(inx) np_ans *= 1.1 else: y = tf_func(inx) tf_cpu = y.eval() self.assertShapeEqual(np_ans, y) if x.dtype == np.float16: self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3) elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2) else: self.assertAllClose(np_ans, tf_cpu) if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: return # Return early if x.dtype == np.float16: s = list(np.shape(x)) jacob_t, _ = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x) xf = x.astype(np.float) inxf = ops.convert_to_tensor(xf) yf = tf_func(inxf) _, jacob_n = gradient_checker.compute_gradient( inxf, s, yf, s, x_init_value=xf, delta=1e-2) jacob_n = jacob_n.astype(np.float16) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float32, np.complex64): s = list(np.shape(x)) jacob_t, jacob_n = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x, delta=1e-3) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float64, np.complex128): s = list(np.shape(x)) jacob_t, jacob_n = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x, delta=1e-5) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
def _compareGradient(self, shape, axis, exclusive, reverse): x = np.arange(1, 9).reshape(shape).astype(np.float64) with self.cached_session(use_gpu=True): t = ops.convert_to_tensor(x) result = math_ops.cumprod(t, axis, exclusive, reverse) jacob_t, jacob_n = gradient_checker.compute_gradient( t, shape, result, shape, x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def testEmptySucceeds(self): with self.cached_session(): x = array_ops.placeholder(dtypes.float32) y = array_ops.identity(x) for grad in gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)): self.assertEqual(grad.shape, (0, 0)) error = gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3)) self.assertEqual(error, 0)
def testGradient4(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t) jacob_t, jacob_n = gradient_checker.compute_gradient( t, s, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def _compareBroadcastGradient(self, x): x_ = ops.convert_to_tensor(x) epsilon = 1e-3 with self.cached_session(): for args in [(x_, 0.), (0., x_)]: z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args))) jacob_t, jacob_n = gradient_checker.compute_gradient( x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon) self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def _test_grad_accuracy(self, dtype, grid_spec, error_spec): raw_grid = _make_grid(dtype, grid_spec) grid = ops.convert_to_tensor(raw_grid) with self.cached_session(): fn = sm.log_ndtr if self._use_log else sm.ndtr # If there are N points in the grid, # grad_eval.shape = (N, N), with grad_eval[i, j] the partial derivative of # the ith output point w.r.t. the jth grid point. We only expect the # diagonal to be nonzero. # TODO(b/31131137): Replace tf.compat.v1.test.compute_gradient with our # own custom gradient evaluation to ensure we correctly handle small # function delta. grad_eval, _ = gradient_checker.compute_gradient(grid, grid_spec.shape, fn(grid), grid_spec.shape) grad_eval = np.diag(grad_eval) # Check for NaN separately in order to get informative failures. self.assert_all_false(np.isnan(grad_eval)) self.assert_all_true(grad_eval > 0.) # isfinite checks for NaN and Inf. self.assert_all_true(np.isfinite(grad_eval)) # Do the same checks but explicitly compute the gradient. # (We did this because we're not sure if we trust # tf.test.compute_gradient.) grad_eval = gradients_impl.gradients(fn(grid), grid)[0].eval() self.assert_all_false(np.isnan(grad_eval)) if self._use_log: g = np.reshape(grad_eval, [-1]) half = np.ceil(len(g) / 2) self.assert_all_true(g[:int(half)] > 0.) self.assert_all_true(g[int(half):] >= 0.) else: # The ndtr gradient will only be non-zero in the range [-14, 14] for # float32 and [-38, 38] for float64. self.assert_all_true(grad_eval >= 0.) # isfinite checks for NaN and Inf. self.assert_all_true(np.isfinite(grad_eval)) # Versus scipy. if not (special and stats): return expected = stats.norm.pdf(raw_grid) if self._use_log: expected /= special.ndtr(raw_grid) expected[np.isnan(expected)] = 0. # Scipy prematurely goes to zero at some places that we don't. So don't # include these in the comparison. self.assertAllClose( expected.astype(np.float64)[expected < 0], grad_eval.astype(np.float64)[expected < 0], rtol=error_spec.rtol, atol=error_spec.atol)
def _compareGradient(self, x, reduction_axes, rtol=1e-8, atol=1e-8): if reduction_axes is not None and np.shape(reduction_axes) == (1,): # Test scalar reduction_axes argument self._compareGradient(x, reduction_axes[0], rtol=rtol, atol=atol) with self.test_session(use_gpu=True): t = ops.convert_to_tensor(x) su = self._tf_reduce(t, reduction_axes, False) jacob_t, jacob_n = gradient_checker.compute_gradient( t, x.shape, su, su.get_shape().as_list(), x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=rtol, atol=atol)
def _testGradient(self, x, a, mode): with self.test_session(use_gpu=True): inx = ops.convert_to_tensor(x) xs = list(x.shape) ina = ops.convert_to_tensor(a) y = array_ops.pad(inx, ina, mode=mode) # Expected y's shape to be: ys = list(np.array(x.shape) + np.sum(np.array(a), axis=1)) jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x_init_value=x) self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def _testGradient(self, np_input, shift, axis): with self.test_session(): inx = constant_op.constant(np_input.tolist()) xs = list(np_input.shape) y = manip_ops.roll(inx, shift, axis) # Expected y's shape to be the same ys = xs jacob_t, jacob_n = gradient_checker.compute_gradient( inx, xs, y, ys, x_init_value=np_input) self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def testComplexConj(self): with self.test_session(): size = () x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) y = math_ops.conj(x) analytical, numerical = gradient_checker.compute_gradient(x, size, y, size) correct = np.array([[1, 0], [0, -1]]) self.assertAllEqual(correct, analytical) self.assertAllClose(correct, numerical, rtol=3e-6) self.assertLess(gradient_checker.compute_gradient_error(x, size, y, size), 2e-5)
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] np_dtype = dtype_.as_numpy_dtype a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: a += 1j * np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) a += np.conj(a.T) a = np.tile(a, batch_shape + (1, 1)) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(np_dtype).eps delta = 0.1 * epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64): tol = 1e-2 else: tol = 1e-7 with self.session(use_gpu=True): tf_a = constant_op.constant(a) if compute_v_: tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a) # (complex) Eigenvectors are only unique up to an arbitrary phase # We normalize the vectors such that the first component has phase 0. top_rows = tf_v[..., 0:1, :] if tf_a.dtype.is_complex: angle = -math_ops.angle(top_rows) phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle)) else: phase = math_ops.sign(top_rows) tf_v *= phase outputs = [tf_e, tf_v] else: tf_e = linalg_ops.self_adjoint_eigvals(tf_a) outputs = [tf_e] for b in outputs: x_init = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: x_init += 1j * np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) x_init += np.conj(x_init.T) x_init = np.tile(x_init, batch_shape + (1, 1)) theoretical, numerical = gradient_checker.compute_gradient( tf_a, tf_a.get_shape().as_list(), b, b.get_shape().as_list(), x_init_value=x_init, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def _compareGradient(self, shape, sum_shape, reduction_axes): if reduction_axes is not None and np.shape(reduction_axes) == (1,): # Test scalar reduction_axes argument self._compareGradient(shape, sum_shape, reduction_axes[0]) x = np.arange(1.0, 49.0).reshape(shape).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_sum(t, reduction_axes) jacob_t, jacob_n = gradient_checker.compute_gradient( t, shape, su, sum_shape, x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def testComplexMul(self): with self.test_session(): size = () c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) y = c * x analytical, numerical = gradient_checker.compute_gradient(x, size, y, size) correct = np.array([[5, 7], [-7, 5]]) self.assertAllEqual(correct, analytical) self.assertAllClose(correct, numerical, rtol=1e-4) self.assertLess(gradient_checker.compute_gradient_error(x, size, y, size), 2e-4)
def testCompareGpuVsCpu(self): in_shape = [1, 4, 6, 3] out_shape = [1, 8, 16, 3] for nptype in self.TYPES: x = np.arange(0, np.prod(in_shape)).reshape(in_shape).astype(nptype) for align_corners in [True, False]: with self.test_session(use_gpu=False): input_tensor = constant_op.constant(x, shape=in_shape) resize_out = image_ops.resize_nearest_neighbor( input_tensor, out_shape[1:3], align_corners=align_corners) grad_cpu = gradient_checker.compute_gradient( input_tensor, in_shape, resize_out, out_shape, x_init_value=x) with self.test_session(use_gpu=True): input_tensor = constant_op.constant(x, shape=in_shape) resize_out = image_ops.resize_nearest_neighbor( input_tensor, out_shape[1:3], align_corners=align_corners) grad_gpu = gradient_checker.compute_gradient( input_tensor, in_shape, resize_out, out_shape, x_init_value=x) self.assertAllClose(grad_cpu, grad_gpu, rtol=1e-5, atol=1e-5)
def testGradientRandomValues(self): with self.cached_session(): us = [2, 3] u = array_ops.reshape( [0.854, -0.616, 0.767, 0.725, -0.927, 0.159], shape=us) v = array_ops.reshape( [-0.522, 0.755, 0.407, -0.652, 0.241, 0.247], shape=us) s = math_ops.cross(u, v) jacob_u, jacob_v = gradient_checker.compute_gradient([u, v], [us, us], s, us) self.assertAllClose(jacob_u[0], jacob_u[1], rtol=1e-3, atol=1e-3) self.assertAllClose(jacob_v[0], jacob_v[1], rtol=1e-3, atol=1e-3)
def _compareGradient(self, x): with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_prod(t, []) jacob_t, jacob_n = gradient_checker.compute_gradient( t, x.shape, su, [2, 3, 4, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = math_ops.reduce_prod(t, [1, 2]) jacob_t, jacob_n = gradient_checker.compute_gradient( t, x.shape, su, [2, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = math_ops.reduce_prod(t, [0, 1, 2, 3]) jacob_t, jacob_n = gradient_checker.compute_gradient( t, x.shape, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = math_ops.reduce_prod(t, 0) jacob_t, jacob_n = gradient_checker.compute_gradient( t, x.shape, su, [3, 4, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None): if grad_rtol is None: grad_rtol = _default_tolerance(x.dtype) if grad_atol is None: grad_atol = _default_tolerance(x.dtype) np_ans = np_func(x) with self.cached_session(use_gpu=False): inx = ops.convert_to_tensor(x) if x.dtype in (np.float32, np.float64, dtypes_lib.bfloat16.as_numpy_dtype): y = 1.1 * tf_func(inx) np_ans *= 1.1 else: y = tf_func(inx) tf_cpu = self.evaluate(y) self.assertShapeEqual(np_ans, y) if x.dtype == np.float16: self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3) elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2) else: self.assertAllClose(np_ans, tf_cpu) if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: return # Return early if x.dtype == np.float16: s = list(np.shape(x)) jacob_t, _ = gradient_checker.compute_gradient(inx, s, y, s, x_init_value=x) xf = x.astype(np.float) inxf = ops.convert_to_tensor(xf) yf = tf_func(inxf) _, jacob_n = gradient_checker.compute_gradient(inxf, s, yf, s, x_init_value=xf, delta=1e-2) jacob_n = jacob_n.astype(np.float16) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float32, np.complex64): s = list(np.shape(x)) jacob_t, jacob_n = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x, delta=1e-3) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float64, np.complex128): s = list(np.shape(x)) jacob_t, jacob_n = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x, delta=1e-5) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
def _testLargeBatchSparseMatrixMatMulGrad( self, datatype, transpose_a, transpose_b, adjoint_a, adjoint_b, transpose_output, conjugate_output, batched_inputs, ): if batched_inputs: a_shape = (3, 5, 11) b_shape = (3, 11, 13) transpose = lambda x: np.transpose(x, (0, 2, 1)) else: a_shape = (5, 11) b_shape = (11, 13) transpose = np.transpose sparsify = lambda m: m * (m > 0) a_mats_val = sparsify( np.random.randn(*a_shape) + 1.j * np.random.randn(*a_shape)).astype(datatype) if transpose_a or adjoint_a: a_mats_val = transpose(a_mats_val) if adjoint_a: a_mats_val = np.conj(a_mats_val) b_mats_val = (np.random.randn(*b_shape) + 1.j * np.random.randn(*b_shape)).astype(datatype) if transpose_b or adjoint_b: b_mats_val = transpose(b_mats_val) if adjoint_b: b_mats_val = np.conj(b_mats_val) with self.test_session(): a_mats = ops.convert_to_tensor(a_mats_val, dtype=datatype) b_mats = ops.convert_to_tensor(b_mats_val, dtype=datatype) locs = array_ops.where(abs(a_mats_val) > 0) a_sm = sparse_csr_matrix_ops.dense_to_csr_sparse_matrix( a_mats, locs) c_mats = sparse_csr_matrix_ops.sparse_matrix_mat_mul( a_sm, b_mats, transpose_a=transpose_a, transpose_b=transpose_b, adjoint_a=adjoint_a, adjoint_b=adjoint_b, transpose_output=transpose_output, conjugate_output=conjugate_output) for [ten, val, nn] in [[a_mats, a_mats_val, "a"], [b_mats, b_mats_val, "b"]]: tf_logging.info("Testing gradients for %s" % nn) theoretical, numerical = gradient_checker.compute_gradient( ten, ten.get_shape().as_list(), c_mats, c_mats.get_shape().as_list(), x_init_value=val, delta=1e-3) self.assertAllClose(theoretical, numerical, atol=1e-3, rtol=1e-3)
def _testGradient(self, np_input, bias, dtype, data_format, use_gpu): with self.test_session(use_gpu=use_gpu): if data_format == "NCHW": np_input = self._NHWCToNCHW(np_input) input_tensor = constant_op.constant(np_input, shape=np_input.shape, dtype=dtype) bias_tensor = constant_op.constant(bias, shape=bias.shape, dtype=dtype) output_tensor = nn_ops.bias_add(input_tensor, bias_tensor, data_format=data_format) tensor_jacob_t, tensor_jacob_n = gradient_checker.compute_gradient( input_tensor, np_input.shape, output_tensor, np_input.shape) bias_jacob_t, bias_jacob_n = gradient_checker.compute_gradient( bias_tensor, bias.shape, output_tensor, np_input.shape) # Test gradient of BiasAddGrad bias_add_grad = gradients_impl.gradients( nn_ops.l2_loss(output_tensor), bias_tensor)[0] grad_jacob_t, grad_jacob_n = gradient_checker.compute_gradient( output_tensor, np_input.shape, bias_add_grad, bias.shape) if dtype == np.float16: # Compare fp16 theoretical gradients to fp32 numerical gradients, # since fp16 numerical gradients are too imprecise unless great # care is taken with choosing the inputs and the delta. This is # a weaker check (in particular, it does not test the op itself, # only its gradient), but it's much better than nothing. input_tensor = constant_op.constant(np_input, shape=np_input.shape, dtype=np.float32) bias_tensor = constant_op.constant(bias, shape=bias.shape, dtype=np.float32) output_tensor = nn_ops.bias_add(input_tensor, bias_tensor, data_format=data_format) _, tensor_jacob_n = gradient_checker.compute_gradient( input_tensor, np_input.shape, output_tensor, np_input.shape) _, bias_jacob_n = gradient_checker.compute_gradient( bias_tensor, bias.shape, output_tensor, np_input.shape) bias_add_grad = gradients_impl.gradients( nn_ops.l2_loss(output_tensor), bias_tensor)[0] _, grad_jacob_n = gradient_checker.compute_gradient( output_tensor, np_input.shape, bias_add_grad, bias.shape) threshold = 2e-3 if dtype == dtypes.float64: threshold = 1e-10 self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold) # TODO(annarev): Re-add assertion for float16, float32 dtypes and NCHW # once we figure out why this check started failing with cuda mavx. if dtype == dtypes.float64 or data_format != "NCHW": self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold) self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
def _ConstructAndTestGradientForConfig(self, batch, input_shape, filter_shape, in_depth, out_depth, stride, padding, test_input, data_format, use_gpu): input_planes, input_rows, input_cols = input_shape filter_planes, filter_rows, filter_cols = filter_shape input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [ batch, output_planes, output_rows, output_cols, out_depth ] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. if data_type == dtypes.float64: tolerance = 1e-8 elif data_type == dtypes.float32: tolerance = 5e-3 elif data_type == dtypes.float16: tolerance = 1e-3 with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) new_strides = test_util.NHWCToNCHW(strides) else: input_tensor = orig_input_tensor new_strides = strides conv = nn_ops.conv3d(input_tensor, filter_tensor, new_strides, padding, data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) self.assertEqual(conv.shape, tensor_shape.TensorShape(output_shape)) if test_input: jacob_t, jacob_n = gradient_checker.compute_gradient( orig_input_tensor, input_shape, conv, output_shape) else: jacob_t, jacob_n = gradient_checker.compute_gradient( filter_tensor, filter_shape, conv, output_shape) if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() else: # Compare fp16 theoretical gradients to fp32 theoretical gradients, # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() print("conv3d gradient error = ", err) self.assertLess(err, tolerance)