示例#1
0
def test_backprop_multiple_graphs_non_existing(method):
    shape = (1, )
    dtype = chainerx.float32

    with chainerx.backprop_scope('bp1') as backprop_id1, \
            chainerx.backprop_scope('bp2') as backprop_id2:
        xs = (
            chainerx.full(shape, 2, dtype).require_grad(backprop_id1),
            chainerx.full(shape, 5, dtype).require_grad(backprop_id1),
        )

        y = xs[0] * xs[1]

        if method == 'backward':
            chainerx.backward(y, backprop_id2)
            assert xs[0].get_grad(backprop_id1) is None
            assert xs[1].get_grad(backprop_id1) is None
        elif method == 'grad':
            grads = chainerx.grad([y], xs, backprop_id2)
            assert len(grads) == 2
            assert grads[0] is None
            assert grads[1] is None
        else:
            assert False

        with pytest.raises(chainerx.ChainerxError):
            xs[0].get_grad(backprop_id2)
        with pytest.raises(chainerx.ChainerxError):
            xs[1].get_grad(backprop_id2)
示例#2
0
    def fprop(x0, x1):
        assert x0.is_grad_required()

        h = x0 * (x0 + x1)
        chainerx.backward(h, enable_double_backprop=True)
        gx0 = x0.get_grad()
        x0.cleargrad()
        return gx0,
示例#3
0
def test_backward_keyword_arguments():
    x = chainerx.full((1,), 2, chainerx.float32)
    with chainerx.backprop_scope('bp1') as backprop_id1:
        x.require_grad(backprop_id=backprop_id1)
        chainerx.backward(x, backprop_id=backprop_id1)
        with pytest.raises(
                TypeError, match=r'.*incompatible function arguments.*'):
            chainerx.backward(body=x, backprop_id=backprop_id1)
示例#4
0
    def fprop(x0, x1):
        assert x0.is_grad_required()

        h = x0 * (x0 + x1)
        chainerx.backward(h, enable_double_backprop=True)
        gx0 = x0.get_grad()
        x0.cleargrad()
        return gx0,
示例#5
0
 def fprop(xs_, extra_xs_):
     x, = xs_
     t, = extra_xs_
     y = x * (x + t)
     chainerx.backward(y, enable_double_backprop=True)
     gx = x.get_grad()  # 2x + y
     x.cleargrad()
     return gx,
示例#6
0
def test_backward_sole_array_node():
    shape = (1,)
    dtype = chainerx.float32

    x = chainerx.full(shape, 2, dtype)
    expected_gx = chainerx.full(shape, 1, dtype)

    x.require_grad()

    chainerx.backward(x)

    _assert_arrays_equal(x.get_grad(), expected_gx)
示例#7
0
def test_backward_multiple_graphs_non_existing():
    shape = (1,)
    dtype = chainerx.float32

    x1 = chainerx.full(shape, 2, dtype)
    x2 = chainerx.full(shape, 5, dtype)

    with chainerx.backprop_scope('bp1') as backprop_id1, \
            chainerx.backprop_scope('bp2') as backprop_id2:

        x1.require_grad(backprop_id1)
        x2.require_grad(backprop_id1)

        y = x1 * x2
        with pytest.raises(chainerx.ChainerxError):
            chainerx.backward(y, backprop_id2)
示例#8
0
def test_backprop_sole_array_node(method):
    shape = (1,)
    dtype = chainerx.float32

    x = chainerx.full(shape, 2, dtype).require_grad()
    expected_gx = chainerx.full(shape, 1, dtype)

    if method == 'backward':
        chainerx.backward(x)
        gx = x.get_grad()
    elif method == 'grad':
        gx, = chainerx.grad([x], [x])
    else:
        assert False

    _assert_arrays_equal(gx, expected_gx)
示例#9
0
        def fprop(x0, x1):
            assert x0.is_grad_required(bp_x0)

            h = x0 * (x0 + x1)
            if method0 == 'backward':
                chainerx.backward(h, backprop_id=bp_x0)
                gx0 = x0.get_grad(bp_x0)
            elif method0 == 'grad':
                gx0, = chainerx.grad([h], [x0], backprop_id=bp_x0)
            else:
                assert False

            assert not gx0.is_backprop_required(bp_x0)
            assert gx0.is_backprop_required(bp_x1)

            return x0 * gx0,
示例#10
0
def test_backprop_sole_array_node(method):
    shape = (1, )
    dtype = chainerx.float32

    x = chainerx.full(shape, 2, dtype).require_grad()
    expected_gx = chainerx.full(shape, 1, dtype)

    if method == 'backward':
        chainerx.backward(x)
        gx = x.get_grad()
    elif method == 'grad':
        gx, = chainerx.grad([x], [x])
    else:
        assert False

    _assert_arrays_equal(gx, expected_gx)
示例#11
0
        def fprop(x0, x1):
            assert x0.is_grad_required(bp_x0)

            h = x0 * (x0 + x1)
            if method0 == 'backward':
                chainerx.backward(h, backprop_id=bp_x0)
                gx0 = x0.get_grad(bp_x0)
            elif method0 == 'grad':
                gx0, = chainerx.grad([h], [x0], backprop_id=bp_x0)
            else:
                assert False

            assert not gx0.is_backprop_required(bp_x0)
            assert gx0.is_backprop_required(bp_x1)

            return x0 * gx0,
示例#12
0
def _check_backprop(
        xs, expected_gxs, fprop, extra_xs, gys=None, backprop_id=None):
    # Checks for test validity
    assert isinstance(xs, tuple)
    assert isinstance(expected_gxs, tuple)
    assert callable(fprop)
    assert isinstance(extra_xs, tuple)
    assert len(xs) == len(expected_gxs)
    assert all([isinstance(a, chainerx.ndarray) for a in xs])
    assert all([(isinstance(a, chainerx.ndarray) or a ==
                 chainerx.ChainerxError) for a in expected_gxs])
    assert all([isinstance(a, chainerx.ndarray) for a in extra_xs])

    # Forward
    outputs = fprop(xs, extra_xs)

    # Set output gradients
    if gys is None:
        gys = (None,) * len(outputs)
    assert len(gys) == len(outputs)
    for output, gy in zip(outputs, gys):
        assert not output.is_grad_required()
        output.set_grad(gy, backprop_id)

    # Backward
    chainerx.backward(outputs, backprop_id)

    # Check gradients of input arrays
    for i, expected_gx in enumerate(expected_gxs):
        x = xs[i]
        if expected_gx is chainerx.ChainerxError:
            with pytest.raises(chainerx.ChainerxError):
                x.get_grad(backprop_id)
        else:
            gx = x.get_grad(backprop_id)
            _assert_arrays_equal(gx, expected_gx)

    # Check gradients of output arrays
    for output, gy in zip(outputs, gys):
        if gy is None:
            assert not output.is_grad_required(backprop_id)
            with pytest.raises(chainerx.ChainerxError):
                output.get_grad(backprop_id)
        else:
            assert output.is_grad_required(backprop_id)
            _assert_arrays_equal(gy, output.get_grad(backprop_id))
示例#13
0
def _check_backward(fprop, xs, expected_gxs, gys=None, backprop_id=None):
    # Checks for test validity.
    assert callable(fprop)
    assert isinstance(xs, tuple)
    assert isinstance(expected_gxs, tuple)
    assert len(xs) == len(expected_gxs)
    assert all([isinstance(a, chainerx.ndarray) for a in xs])
    assert all(
        [isinstance(a, chainerx.ndarray) or a is None for a in expected_gxs])

    # Forward.
    ys = fprop(*xs)

    # Set output gradients.
    if gys is not None:
        assert len(gys) == len(ys)
        for y, gy in zip(ys, gys):
            assert not y.is_grad_required()
            y.set_grad(gy, backprop_id)

    # Backward.
    chainerx.backward(ys, backprop_id)

    # Check gradients of input arrays.
    for x, expected_gx in zip(xs, expected_gxs):
        if expected_gx is None:
            with pytest.raises(chainerx.ChainerxError):
                x.get_grad(backprop_id)
        else:
            gx = x.get_grad(backprop_id)
            _assert_arrays_equal(gx, expected_gx)

    # Check gradients of output arrays.
    if gys is None:
        gys = (None, ) * len(xs)
    for y, gy in zip(ys, gys):
        if gy is None:
            assert not y.is_grad_required(backprop_id)
            with pytest.raises(chainerx.ChainerxError):
                y.get_grad(backprop_id)
        else:
            assert y.is_grad_required(backprop_id)
            _assert_arrays_equal(gy, y.get_grad(backprop_id))
示例#14
0
def test_backprop_multiple_graphs_non_existing(method):
    shape = (1,)
    dtype = chainerx.float32

    with chainerx.backprop_scope('bp1') as backprop_id1, \
            chainerx.backprop_scope('bp2') as backprop_id2:
        xs = (
            chainerx.full(shape, 2, dtype).require_grad(backprop_id1),
            chainerx.full(shape, 5, dtype).require_grad(backprop_id1),)

        y = xs[0] * xs[1]

        with pytest.raises(chainerx.ChainerxError):
            if method == 'backward':
                chainerx.backward(y, backprop_id2)
            elif method == 'grad':
                chainerx.grad([y], xs, backprop_id2)
            else:
                assert False
示例#15
0
def _check_backward(fprop, xs, expected_gxs, gys=None, backprop_id=None):
    # Checks for test validity.
    assert callable(fprop)
    assert isinstance(xs, tuple)
    assert isinstance(expected_gxs, tuple)
    assert len(xs) == len(expected_gxs)
    assert all([isinstance(a, chainerx.ndarray) for a in xs])
    assert all([isinstance(a, chainerx.ndarray) or a is None
                for a in expected_gxs])

    # Forward.
    ys = fprop(*xs)

    # Set output gradients.
    if gys is not None:
        assert len(gys) == len(ys)
        for y, gy in zip(ys, gys):
            assert not y.is_grad_required()
            y.set_grad(gy, backprop_id)

    # Backward.
    chainerx.backward(ys, backprop_id)

    # Check gradients of input arrays.
    for x, expected_gx in zip(xs, expected_gxs):
        if expected_gx is None:
            with pytest.raises(chainerx.ChainerxError):
                x.get_grad(backprop_id)
        else:
            gx = x.get_grad(backprop_id)
            _assert_arrays_equal(gx, expected_gx)

    # Check gradients of output arrays.
    if gys is None:
        gys = (None,) * len(xs)
    for y, gy in zip(ys, gys):
        if gy is None:
            assert not y.is_grad_required(backprop_id)
            with pytest.raises(chainerx.ChainerxError):
                y.get_grad(backprop_id)
        else:
            assert y.is_grad_required(backprop_id)
            _assert_arrays_equal(gy, y.get_grad(backprop_id))
示例#16
0
def test_multiple_graphs_double_backprop():
    with chainerx.backprop_scope('bp_y') as bp_y, \
            chainerx.backprop_scope('bp_x') as bp_x:

        x = chainerx.full((1,), 2, chainerx.float32)
        x.require_grad(backprop_id=bp_x)

        y = chainerx.full((1,), 3, chainerx.float32)
        y.require_grad(backprop_id=bp_y)

        z = x * (x + y)
        chainerx.backward(z, backprop_id=bp_x)

        gx = x.get_grad(bp_x)  # 2x + y
        assert not gx.is_backprop_required(backprop_id=bp_x)
        assert gx.is_backprop_required(backprop_id=bp_y)

        w = x * gx
        chainerx.backward(w, backprop_id=bp_y)

        e = chainerx.full((1,), 2, chainerx.float32)

        _assert_arrays_equal(y.get_grad(bp_y), e)  # x
示例#17
0
def backward(outputs, grad_outputs=None, **kwargs):
    """backward(outputs, grad_outputs=None, *, enable_double_backprop=False)

    Runs backpropagation from variables simultaneously.

    .. warning::

        This feature is experimental. The interface can change in the future.

    Args:
        outputs (tuple or list of :class:`~chainer.Variable`):
            A sequence of output variables from which backprop starts.
        grad_outputs (None or tuple or list of :class:`~chainer.Variable`):
            A sequence of variables that gives the initial value of each output
            gradient.
            If this argument is ``None``, backprop uses
            :attr:`~chainer.Variable.grad_var` of ``outputs``.
        enable_double_backprop (bool): If ``True``,
            computational trace of the whole backpropagation procedure is
            recorded to the computational graph so that one can further do
            backpropagation from the resulting gradients. Note that
            enabling it results in larger memory consumption needed to
            store the gradients w.r.t intermediate variables that are
            required for the second gradient computation.

    .. seealso::
       :meth:`chainer.Variable.backward`
       :func:`chainer.grad`

    """
    enable_double_backprop, = argument.parse_kwargs(
        kwargs, ('enable_double_backprop', False),
        retain_grad='semantics for retain_grad=True is under discussion',
        loss_scale='chainer.backward does not support loss_scale option',
    )
    if not isinstance(outputs, (tuple, list)):
        raise TypeError(
            'outputs must be a tuple or a list, not {}.'.format(type(outputs)))
    for v in outputs:
        if not isinstance(v, chainer.Variable):
            raise TypeError(
                'each output must be a Variable, not {}'.format(type(v)))
    if grad_outputs is not None:
        if not isinstance(grad_outputs, (tuple, list)):
            raise TypeError(
                'grad_outputs must be None, a tuple, or a list, not {}.'
                .format(type(grad_outputs)))
        if len(outputs) != len(grad_outputs):
            raise ValueError(
                'grad_outputs must be of the same length as outputs.\n'
                'len(outputs) = {}, len(grad_outputs) = {}'
                .format(len(outputs), len(grad_outputs)))

    is_chainerx = [v._has_chainerx_array for v in outputs]

    if any(is_chainerx):
        if not all(is_chainerx):
            # The restriction is required as soon as the workarounds below
            # are removed.
            raise ValueError('cannot mix chainerx and other backends')

        # Cannot use chainerx.backward directly, because it does not follow
        # retain_grad=False
        # TODO(kataoka): Fix chainerx.backward and remove this workaround
        if grad_outputs is None:
            grad_outputs = []
            for y in outputs:
                grad_outputs.append(y.grad_var)
                y.grad_var = None

        # The check is required because chainerx.backward sets default grads.
        # TODO(kataoka): Fix chainerx.backward and remove this workaround
        indices = [i for i, gy in enumerate(grad_outputs) if gy is not None]
        outputs = [outputs[i] for i in indices]
        grad_outputs = [grad_outputs[i] for i in indices]

        # Use new variables to start backprop
        # TODO(kataoka): Implement chainerx.backward(output, grad_outputs)
        # and remove this workaround.
        outputs = chainer.functions.identity(*outputs)
        if not isinstance(outputs, tuple):
            outputs = outputs,
        grad_outputs = chainer.functions.identity(*grad_outputs)
        if not isinstance(grad_outputs, tuple):
            grad_outputs = grad_outputs,

        # TODO(kataoka): Even after F.identity, non-float grad cannot be set.
        # Move the check to elsewhere and remove this workaround.
        outputs_ = []
        for y, gy in zip(outputs, grad_outputs):
            if not y.requires_grad and gy is not None:
                warnings.warn(
                    'Some of grads are ignored by chainer.backward.\n'
                    'backend: ChainerX, '
                    'output.dtype: {}, grad_output.dtype: {}'.format(
                        y.dtype, gy.dtype),
                    RuntimeWarning)
                continue
            y.grad_var = gy
            outputs_.append(y)
        outputs = outputs_
        del outputs_

        # See also the ChainerX case of Variable.backward
        arrs = []
        for y in outputs:
            arr = y._data[0]
            assert isinstance(arr, chainerx.ndarray)
            arrs.append(arr)
        chainerx.backward(
            arrs, enable_double_backprop=enable_double_backprop)
        return

    if grad_outputs is None:
        grad_outputs = []
        for y in outputs:
            grad_var = y.grad_var
            if grad_var is None:
                warnings.warn(
                    'outputs contains a Variable without grad, or '
                    'duplicate outputs. Note that '
                    'chainer.backward does not set default grad.',
                    RuntimeWarning)
            y.grad_var = None
            grad_outputs.append(grad_var)
    outputs = [
        (y.node, gy) for y, gy in zip(outputs, grad_outputs) if gy is not None]
    with chainer.using_config('enable_backprop', enable_double_backprop):
        _backprop_to_all(outputs, False, None)