def test_backprop_multiple_graphs_non_existing(method): shape = (1, ) dtype = chainerx.float32 with chainerx.backprop_scope('bp1') as backprop_id1, \ chainerx.backprop_scope('bp2') as backprop_id2: xs = ( chainerx.full(shape, 2, dtype).require_grad(backprop_id1), chainerx.full(shape, 5, dtype).require_grad(backprop_id1), ) y = xs[0] * xs[1] if method == 'backward': chainerx.backward(y, backprop_id2) assert xs[0].get_grad(backprop_id1) is None assert xs[1].get_grad(backprop_id1) is None elif method == 'grad': grads = chainerx.grad([y], xs, backprop_id2) assert len(grads) == 2 assert grads[0] is None assert grads[1] is None else: assert False with pytest.raises(chainerx.ChainerxError): xs[0].get_grad(backprop_id2) with pytest.raises(chainerx.ChainerxError): xs[1].get_grad(backprop_id2)
def fprop(x0, x1): assert x0.is_grad_required() h = x0 * (x0 + x1) chainerx.backward(h, enable_double_backprop=True) gx0 = x0.get_grad() x0.cleargrad() return gx0,
def test_backward_keyword_arguments(): x = chainerx.full((1,), 2, chainerx.float32) with chainerx.backprop_scope('bp1') as backprop_id1: x.require_grad(backprop_id=backprop_id1) chainerx.backward(x, backprop_id=backprop_id1) with pytest.raises( TypeError, match=r'.*incompatible function arguments.*'): chainerx.backward(body=x, backprop_id=backprop_id1)
def fprop(xs_, extra_xs_): x, = xs_ t, = extra_xs_ y = x * (x + t) chainerx.backward(y, enable_double_backprop=True) gx = x.get_grad() # 2x + y x.cleargrad() return gx,
def test_backward_sole_array_node(): shape = (1,) dtype = chainerx.float32 x = chainerx.full(shape, 2, dtype) expected_gx = chainerx.full(shape, 1, dtype) x.require_grad() chainerx.backward(x) _assert_arrays_equal(x.get_grad(), expected_gx)
def test_backward_multiple_graphs_non_existing(): shape = (1,) dtype = chainerx.float32 x1 = chainerx.full(shape, 2, dtype) x2 = chainerx.full(shape, 5, dtype) with chainerx.backprop_scope('bp1') as backprop_id1, \ chainerx.backprop_scope('bp2') as backprop_id2: x1.require_grad(backprop_id1) x2.require_grad(backprop_id1) y = x1 * x2 with pytest.raises(chainerx.ChainerxError): chainerx.backward(y, backprop_id2)
def test_backprop_sole_array_node(method): shape = (1,) dtype = chainerx.float32 x = chainerx.full(shape, 2, dtype).require_grad() expected_gx = chainerx.full(shape, 1, dtype) if method == 'backward': chainerx.backward(x) gx = x.get_grad() elif method == 'grad': gx, = chainerx.grad([x], [x]) else: assert False _assert_arrays_equal(gx, expected_gx)
def fprop(x0, x1): assert x0.is_grad_required(bp_x0) h = x0 * (x0 + x1) if method0 == 'backward': chainerx.backward(h, backprop_id=bp_x0) gx0 = x0.get_grad(bp_x0) elif method0 == 'grad': gx0, = chainerx.grad([h], [x0], backprop_id=bp_x0) else: assert False assert not gx0.is_backprop_required(bp_x0) assert gx0.is_backprop_required(bp_x1) return x0 * gx0,
def test_backprop_sole_array_node(method): shape = (1, ) dtype = chainerx.float32 x = chainerx.full(shape, 2, dtype).require_grad() expected_gx = chainerx.full(shape, 1, dtype) if method == 'backward': chainerx.backward(x) gx = x.get_grad() elif method == 'grad': gx, = chainerx.grad([x], [x]) else: assert False _assert_arrays_equal(gx, expected_gx)
def _check_backprop( xs, expected_gxs, fprop, extra_xs, gys=None, backprop_id=None): # Checks for test validity assert isinstance(xs, tuple) assert isinstance(expected_gxs, tuple) assert callable(fprop) assert isinstance(extra_xs, tuple) assert len(xs) == len(expected_gxs) assert all([isinstance(a, chainerx.ndarray) for a in xs]) assert all([(isinstance(a, chainerx.ndarray) or a == chainerx.ChainerxError) for a in expected_gxs]) assert all([isinstance(a, chainerx.ndarray) for a in extra_xs]) # Forward outputs = fprop(xs, extra_xs) # Set output gradients if gys is None: gys = (None,) * len(outputs) assert len(gys) == len(outputs) for output, gy in zip(outputs, gys): assert not output.is_grad_required() output.set_grad(gy, backprop_id) # Backward chainerx.backward(outputs, backprop_id) # Check gradients of input arrays for i, expected_gx in enumerate(expected_gxs): x = xs[i] if expected_gx is chainerx.ChainerxError: with pytest.raises(chainerx.ChainerxError): x.get_grad(backprop_id) else: gx = x.get_grad(backprop_id) _assert_arrays_equal(gx, expected_gx) # Check gradients of output arrays for output, gy in zip(outputs, gys): if gy is None: assert not output.is_grad_required(backprop_id) with pytest.raises(chainerx.ChainerxError): output.get_grad(backprop_id) else: assert output.is_grad_required(backprop_id) _assert_arrays_equal(gy, output.get_grad(backprop_id))
def _check_backward(fprop, xs, expected_gxs, gys=None, backprop_id=None): # Checks for test validity. assert callable(fprop) assert isinstance(xs, tuple) assert isinstance(expected_gxs, tuple) assert len(xs) == len(expected_gxs) assert all([isinstance(a, chainerx.ndarray) for a in xs]) assert all( [isinstance(a, chainerx.ndarray) or a is None for a in expected_gxs]) # Forward. ys = fprop(*xs) # Set output gradients. if gys is not None: assert len(gys) == len(ys) for y, gy in zip(ys, gys): assert not y.is_grad_required() y.set_grad(gy, backprop_id) # Backward. chainerx.backward(ys, backprop_id) # Check gradients of input arrays. for x, expected_gx in zip(xs, expected_gxs): if expected_gx is None: with pytest.raises(chainerx.ChainerxError): x.get_grad(backprop_id) else: gx = x.get_grad(backprop_id) _assert_arrays_equal(gx, expected_gx) # Check gradients of output arrays. if gys is None: gys = (None, ) * len(xs) for y, gy in zip(ys, gys): if gy is None: assert not y.is_grad_required(backprop_id) with pytest.raises(chainerx.ChainerxError): y.get_grad(backprop_id) else: assert y.is_grad_required(backprop_id) _assert_arrays_equal(gy, y.get_grad(backprop_id))
def test_backprop_multiple_graphs_non_existing(method): shape = (1,) dtype = chainerx.float32 with chainerx.backprop_scope('bp1') as backprop_id1, \ chainerx.backprop_scope('bp2') as backprop_id2: xs = ( chainerx.full(shape, 2, dtype).require_grad(backprop_id1), chainerx.full(shape, 5, dtype).require_grad(backprop_id1),) y = xs[0] * xs[1] with pytest.raises(chainerx.ChainerxError): if method == 'backward': chainerx.backward(y, backprop_id2) elif method == 'grad': chainerx.grad([y], xs, backprop_id2) else: assert False
def _check_backward(fprop, xs, expected_gxs, gys=None, backprop_id=None): # Checks for test validity. assert callable(fprop) assert isinstance(xs, tuple) assert isinstance(expected_gxs, tuple) assert len(xs) == len(expected_gxs) assert all([isinstance(a, chainerx.ndarray) for a in xs]) assert all([isinstance(a, chainerx.ndarray) or a is None for a in expected_gxs]) # Forward. ys = fprop(*xs) # Set output gradients. if gys is not None: assert len(gys) == len(ys) for y, gy in zip(ys, gys): assert not y.is_grad_required() y.set_grad(gy, backprop_id) # Backward. chainerx.backward(ys, backprop_id) # Check gradients of input arrays. for x, expected_gx in zip(xs, expected_gxs): if expected_gx is None: with pytest.raises(chainerx.ChainerxError): x.get_grad(backprop_id) else: gx = x.get_grad(backprop_id) _assert_arrays_equal(gx, expected_gx) # Check gradients of output arrays. if gys is None: gys = (None,) * len(xs) for y, gy in zip(ys, gys): if gy is None: assert not y.is_grad_required(backprop_id) with pytest.raises(chainerx.ChainerxError): y.get_grad(backprop_id) else: assert y.is_grad_required(backprop_id) _assert_arrays_equal(gy, y.get_grad(backprop_id))
def test_multiple_graphs_double_backprop(): with chainerx.backprop_scope('bp_y') as bp_y, \ chainerx.backprop_scope('bp_x') as bp_x: x = chainerx.full((1,), 2, chainerx.float32) x.require_grad(backprop_id=bp_x) y = chainerx.full((1,), 3, chainerx.float32) y.require_grad(backprop_id=bp_y) z = x * (x + y) chainerx.backward(z, backprop_id=bp_x) gx = x.get_grad(bp_x) # 2x + y assert not gx.is_backprop_required(backprop_id=bp_x) assert gx.is_backprop_required(backprop_id=bp_y) w = x * gx chainerx.backward(w, backprop_id=bp_y) e = chainerx.full((1,), 2, chainerx.float32) _assert_arrays_equal(y.get_grad(bp_y), e) # x
def backward(outputs, grad_outputs=None, **kwargs): """backward(outputs, grad_outputs=None, *, enable_double_backprop=False) Runs backpropagation from variables simultaneously. .. warning:: This feature is experimental. The interface can change in the future. Args: outputs (tuple or list of :class:`~chainer.Variable`): A sequence of output variables from which backprop starts. grad_outputs (None or tuple or list of :class:`~chainer.Variable`): A sequence of variables that gives the initial value of each output gradient. If this argument is ``None``, backprop uses :attr:`~chainer.Variable.grad_var` of ``outputs``. enable_double_backprop (bool): If ``True``, computational trace of the whole backpropagation procedure is recorded to the computational graph so that one can further do backpropagation from the resulting gradients. Note that enabling it results in larger memory consumption needed to store the gradients w.r.t intermediate variables that are required for the second gradient computation. .. seealso:: :meth:`chainer.Variable.backward` :func:`chainer.grad` """ enable_double_backprop, = argument.parse_kwargs( kwargs, ('enable_double_backprop', False), retain_grad='semantics for retain_grad=True is under discussion', loss_scale='chainer.backward does not support loss_scale option', ) if not isinstance(outputs, (tuple, list)): raise TypeError( 'outputs must be a tuple or a list, not {}.'.format(type(outputs))) for v in outputs: if not isinstance(v, chainer.Variable): raise TypeError( 'each output must be a Variable, not {}'.format(type(v))) if grad_outputs is not None: if not isinstance(grad_outputs, (tuple, list)): raise TypeError( 'grad_outputs must be None, a tuple, or a list, not {}.' .format(type(grad_outputs))) if len(outputs) != len(grad_outputs): raise ValueError( 'grad_outputs must be of the same length as outputs.\n' 'len(outputs) = {}, len(grad_outputs) = {}' .format(len(outputs), len(grad_outputs))) is_chainerx = [v._has_chainerx_array for v in outputs] if any(is_chainerx): if not all(is_chainerx): # The restriction is required as soon as the workarounds below # are removed. raise ValueError('cannot mix chainerx and other backends') # Cannot use chainerx.backward directly, because it does not follow # retain_grad=False # TODO(kataoka): Fix chainerx.backward and remove this workaround if grad_outputs is None: grad_outputs = [] for y in outputs: grad_outputs.append(y.grad_var) y.grad_var = None # The check is required because chainerx.backward sets default grads. # TODO(kataoka): Fix chainerx.backward and remove this workaround indices = [i for i, gy in enumerate(grad_outputs) if gy is not None] outputs = [outputs[i] for i in indices] grad_outputs = [grad_outputs[i] for i in indices] # Use new variables to start backprop # TODO(kataoka): Implement chainerx.backward(output, grad_outputs) # and remove this workaround. outputs = chainer.functions.identity(*outputs) if not isinstance(outputs, tuple): outputs = outputs, grad_outputs = chainer.functions.identity(*grad_outputs) if not isinstance(grad_outputs, tuple): grad_outputs = grad_outputs, # TODO(kataoka): Even after F.identity, non-float grad cannot be set. # Move the check to elsewhere and remove this workaround. outputs_ = [] for y, gy in zip(outputs, grad_outputs): if not y.requires_grad and gy is not None: warnings.warn( 'Some of grads are ignored by chainer.backward.\n' 'backend: ChainerX, ' 'output.dtype: {}, grad_output.dtype: {}'.format( y.dtype, gy.dtype), RuntimeWarning) continue y.grad_var = gy outputs_.append(y) outputs = outputs_ del outputs_ # See also the ChainerX case of Variable.backward arrs = [] for y in outputs: arr = y._data[0] assert isinstance(arr, chainerx.ndarray) arrs.append(arr) chainerx.backward( arrs, enable_double_backprop=enable_double_backprop) return if grad_outputs is None: grad_outputs = [] for y in outputs: grad_var = y.grad_var if grad_var is None: warnings.warn( 'outputs contains a Variable without grad, or ' 'duplicate outputs. Note that ' 'chainer.backward does not set default grad.', RuntimeWarning) y.grad_var = None grad_outputs.append(grad_var) outputs = [ (y.node, gy) for y, gy in zip(outputs, grad_outputs) if gy is not None] with chainer.using_config('enable_backprop', enable_double_backprop): _backprop_to_all(outputs, False, None)