def _directional_backward_gradients(self, xs, ys, params, directions): no_gxs = self.no_gxs gys = ([ None if gy is None # Copy is needed to avoid being updated during backprop, which # would affect the numerical gradient. # TODO(niboshi): Preserve strides, for testing purpose. else chainer.Variable(gy.copy(), requires_grad=False) for gy in self.gys ]) # Backward chainer.backward(ys, gys) for no_gx, x in six.moves.zip(no_gxs, xs): if no_gx and x.grad is not None: raise RuntimeError('gradient of int variable must be None') grads = ([ None if x is None else x.grad for x, no_gx in six.moves.zip(xs, no_gxs) if not no_gx ] + [p.grad for p in params]) gx_accum = 0 assert len(grads) == len(directions) for g, direction in six.moves.zip(grads, directions): if g is not None: assert direction is not None gx_accum += (g.astype(numpy.float64) * direction).sum() return gx_accum
def first_order_grad(*inputs): xs = inputs[:n_x] gys = inputs[n_x:] ys = _as_tuple(func(*xs)) _check_outputs_and_grad_outputs(ys, gys) chainer.backward(ys, gys, enable_double_backprop=True) gxs = [] errors = [] for i, (no_gx, x) in enumerate(six.moves.zip(first_order_no_gxs, xs)): if no_gx: if x.grad is not None: errors.append( '[{}]: Gradient was calculated while expected to not.' .format(i)) else: if x.grad is None: gxs.append(None) else: gxs.append(x.grad_var) if len(errors) > 0: f = six.StringIO() f.write('There are errors retrieving first-order gradients:\n') f.write('Inputs: {}\n'.format(utils._format_array_props(xs))) f.write('Skip: {}\n'.format( ', '.join(str(no_gx) for no_gx in first_order_no_gxs))) f.write('Errors:\n') for error in errors: f.write('{}\n'.format(error)) raise RuntimeError(f.getvalue()) return tuple(gxs + [p.grad_var for p in params])
def test_duplicate_outputs(self): x = chainer.Variable(np.array(0, np.float32)) y = chainer.functions.identity(x) y.grad = np.array(3, np.float32) with testing.assert_warns(RuntimeWarning): chainer.backward([y, y]) # 6 might be expected, but y.grad is used only once testing.assert_allclose(x.grad, np.array(3, np.float32))
def test_warn_no_grad(self): x = chainer.Variable(np.array(4, np.float32)) x.grad = np.array(3, np.float32) y = x * 2 with testing.assert_warns(RuntimeWarning): chainer.backward([y]) testing.assert_allclose(x.grad, np.array(3, np.float32)) assert y.grad is None
def check_multiple_output_2args(self, xp, skip_retain_grad_test=False): x = chainer.Variable(xp.array([1, 2], np.float32)) h = x * 2 y0 = h * 3 y1 = h * 4 gy0 = chainer.Variable(xp.array([1, 10], np.float32)) gy1 = chainer.Variable(xp.array([100, 1000], np.float32)) chainer.backward([y0, y1], [gy0, gy1]) testing.assert_allclose(x.grad, np.array([806, 8060], np.float32)) if skip_retain_grad_test: return assert y0.grad is None assert y1.grad is None
def test_length_check(self): x = chainer.Variable(np.array(3, np.float32)) y = chainer.functions.identity(x) gy = chainer.Variable(np.array(7, np.float32)) with self.assertRaises(ValueError): chainer.backward([y], []) with self.assertRaises(ValueError): chainer.backward([y], [gy, gy]) with self.assertRaises(ValueError): chainer.backward([], [gy]) with self.assertRaises(ValueError): chainer.backward([y, y], [gy]) chainer.backward([y], [gy])
def first_order_grad(*inputs): xs = inputs[:n_x] gys = inputs[n_x:] ys = _as_tuple(func(*xs)) # `gys` (inputs to `first_order_grad` forward function) may have been # casted to float64 by `numerical_grad`. For certain functions demoting # the dtypes (e.g. `F.cast` that casts to float16), the dtypes of `ys` # (e.g. outputs of `F.cast`) and `gys` (e.g. given by `numerical_grad`) # may mismatch and we need to align those dtypes here. gys = [ None if gy is None else chainer.functions.cast(gy, y.dtype) for y, gy in zip(ys, gys) ] _check_outputs_and_grad_outputs(ys, gys) chainer.backward(ys, gys, enable_double_backprop=True) gxs = [] errors = [] for i, (no_gx, x) in enumerate(six.moves.zip(first_order_no_gxs, xs)): if no_gx: if x.grad is not None: errors.append( '[{}]: Gradient was calculated while expected to not.'. format(i)) else: if x.grad is None: gxs.append(None) else: gxs.append(x.grad_var) if len(errors) > 0: f = six.StringIO() f.write('There are errors retrieving first-order gradients:\n') f.write('Inputs: {}\n'.format(utils._format_array_props(xs))) f.write('Skip: {}\n'.format(', '.join( str(no_gx) for no_gx in first_order_no_gxs))) f.write('Errors:\n') for error in errors: f.write('{}\n'.format(error)) raise RuntimeError(f.getvalue()) return tuple(gxs + [p.grad_var for p in params])
def test_multiple_output_call_count(self): x = chainer.Variable(np.array([1, 2], np.float32)) f = chainer.FunctionNode() f.forward = mock.MagicMock( side_effect=lambda xs: tuple(x * 2 for x in xs)) f.backward = mock.MagicMock( side_effect=lambda _, gys: tuple(gy * 2 for gy in gys)) h, = f.apply((x,)) y0 = h * 3 y1 = h * 4 y0.grad = np.array([1, 10], np.float32) y1.grad = np.array([100, 1000], np.float32) chainer.backward([y0, y1]) testing.assert_allclose(x.grad, np.array([806, 8060], np.float32)) assert f.backward.call_count == 1
def test_type_check(self): x = chainer.Variable(self._rand()) y = x * x y.grad = self._rand() gy = chainer.Variable(self._rand()) with self.assertRaises(TypeError): chainer.backward(y) with self.assertRaises(TypeError): chainer.backward([y], gy) chainer.backward([y]) chainer.backward([y], [gy])
def test_no_output(self): chainer.backward([]) chainer.backward([], [])