示例#1
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        device = backend.get_device_from_array(x1_data)
        with chainer.using_device(device):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        device = backend.get_device_from_array(x1)
        with chainer.using_device(device):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
示例#2
0
 def check_equal_memory_shared(self, arr1, arr2):
     # Check that the two arrays share the internal memory.
     numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1),
                                      backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 += 2
     numpy.testing.assert_array_equal(backend.CpuDevice().send(arr1),
                                      backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 -= 2
示例#3
0
 def check_equal_memory_shared(self, arr1, arr2):
     # Check that the two arrays share the internal memory.
     numpy.testing.assert_array_equal(
         backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 += 2
     numpy.testing.assert_array_equal(
         backend.CpuDevice().send(arr1), backend.CpuDevice().send(arr2))
     with chainer.using_device(backend.get_device_from_array(arr1)):
         arr1 -= 2
示例#4
0
    def __init__(self, func, x_data, y_grad, params, eps, atol, rtol, no_grads,
                 dtype, detect_nondifferentiable, is_immutable_params):
        # If `is_immutable_params` is `False`, `params` are expected to be of
        # type `chainer.Parameter` and are updated in-place.
        # To run `_CheckBackward` with ChainerX ndarrays however which cannot
        # be updated in-place when wrapped in `chainer.Parameter`s, this flag
        # should be `True` and parameters should be given as ndarrays.
        # `func` in the former case must take inputs as arguments only. In the
        # latter, it must take the parameters in addition.

        if dtype is not None and numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        if is_immutable_params:
            if not all(
                    isinstance(p, chainer.get_array_types()) for p in params):
                raise ValueError(
                    'All parameters in `params` must be ndarrays if '
                    '`is_immutable_params` is `True`. Actual: {}.'.format(
                        ', '.join(str(type(p)) for p in params)))

        x_data = _as_tuple(x_data)
        if y_grad is not None:
            y_grad = _as_tuple(y_grad)
        params = _as_tuple(params)

        if no_grads is None:
            no_grads = [x.dtype.kind != 'f' for x in x_data]
        else:
            if len(no_grads) != len(x_data):
                raise ValueError(
                    'Length of no_grads param and xs should be same.\n'
                    'Actual: {0} != {1}'.format(len(no_grads), len(x_data)))

        device = backend.get_device_from_array(*x_data)

        if device.xp is chainerx:
            if len(params) > 0 and not is_immutable_params:
                raise NotImplementedError(
                    'gradient_check must be called with '
                    'is_immutable_params=True to test parameters with '
                    'ChainerX.')
            if any(no_grads):
                raise NotImplementedError(
                    'gradient_check does not support no_grads argument for '
                    'ChainerX arrays')

        self.device = device

        self.func = func
        self.x_data = x_data
        self.y_grad = y_grad
        self.params = params
        self.no_grads = no_grads
        self.atol = atol
        self.rtol = rtol
        self.is_immutable_params = is_immutable_params
        # options for numeric gradients
        self.eps = eps
        self.dtype = dtype
        self.detect_nondifferentiable = detect_nondifferentiable
    def test_device(self, model_initial_backend_config, model_backend_config,
                    input_backend_config):
        model_initial_device = model_initial_backend_config.device
        device = model_backend_config.device
        input_device = input_backend_config.device

        model = chainer.Link()
        model.to_device(model_initial_device)
        optimizer = DummyOptimizer()
        optimizer.setup(model)
        iterator = DummyIterator([numpy.array(1), numpy.array(2)])

        updater = training.updaters.StandardUpdater(iterator,
                                                    optimizer,
                                                    device=device,
                                                    input_device=input_device)

        assert updater.device is device
        assert updater.input_device is input_device

        # Check the model device.
        assert model.device == device

        updater.update_core()

        assert optimizer.update.call_count == 1
        args, kwargs = optimizer.update.call_args
        assert len(args) == 2
        assert len(kwargs) == 0
        loss, v1 = args

        # Check the input device.
        assert backend.get_device_from_array(v1) == input_device
示例#6
0
    def test_double_backward(self, src_backend_config, dst_backend_config):
        src_device = src_backend_config.device
        dst_device = dst_backend_config.device
        if (src_device.xp is chainerx) is not (dst_device.xp is chainerx):
            raise unittest.SkipTest(
                'ChainerX to non-ChainerX does not support backward.')

        x = src_backend_config.get_array(self.x)
        gy = dst_backend_config.get_array(self.gy)
        ggx = src_backend_config.get_array(self.ggx)

        x_var = chainer.Variable(x, requires_grad=True)

        y_var = functions.copy(x_var, dst_device)

        y_var.grad = gy

        gy_var = y_var.grad_var
        y_var.backward(enable_double_backprop=True)

        assert x_var.grad_var.requires_grad is True

        x_var.grad_var.grad = ggx
        x_var.grad_var.backward()

        assert gy_var.grad_var.device == dst_device
        assert (backend.get_device_from_array(
            gy_var.grad_var.array) == dst_device)
        numpy.testing.assert_array_equal(
            _numpy_device.send(gy_var.grad_var.array), self.ggx)
示例#7
0
    def test_double_backward(self, src_backend_config, dst_backend_config):
        x = src_backend_config.get_array(self.x)
        gy = dst_backend_config.get_array(self.gy)
        ggx = src_backend_config.get_array(self.ggx)
        dst_device = dst_backend_config.device

        x_var = chainer.Variable(x, requires_grad=True)

        y_var = functions.copy(x_var, dst_device)

        y_var.grad = gy

        gy_var = y_var.grad_var
        y_var.backward(enable_double_backprop=True)

        assert x_var.grad_var.requires_grad is True

        x_var.grad_var.grad = ggx
        x_var.grad_var.backward()

        assert gy_var.grad_var.device == dst_device
        assert (backend.get_device_from_array(
            gy_var.grad_var.array) == dst_device)
        numpy.testing.assert_array_equal(
            _numpy_device.send(gy_var.grad_var.array), self.ggx)
示例#8
0
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype
     device = backend.get_device_from_array(array)
     if not array.shape:  # 0-dim case
         array[...] = self.scale * (2 * numpy.random.randint(2) - 1)
     elif not array.size:
         raise ValueError('Array to be initialized must be non-empty.')
     else:
         # numpy.prod returns float value when the argument is empty.
         out_dim = len(array)
         in_dim = utils.size_of_shape(array.shape[1:])
         if (in_dim > out_dim and self._checks[0]) or (in_dim < out_dim
                                                       and self._checks[1]):
             raise ValueError('Cannot make orthogonal {}.'
                              'shape = {}, interpreted as '
                              '{}-dim input and {}-dim output.'.format(
                                  self.mode, array.shape, in_dim, out_dim))
         transpose = in_dim > out_dim
         a = numpy.random.normal(size=(out_dim, in_dim))
         if transpose:
             a = a.T
         # cupy.linalg.qr requires cusolver in CUDA 8+
         q, r = numpy.linalg.qr(a)
         q *= numpy.copysign(self.scale, numpy.diag(r))
         if transpose:
             q = q.T
         array[...] = device.xp.asarray(q.reshape(array.shape))
示例#9
0
 def visit_array(self, arr):
     assert isinstance(arr, chainer.get_array_types())
     device = backend.get_device_from_array(arr)
     if self._skip_visiting(device):
         self._warn_to_gpu(device, self._device)
         return arr
     return self._device.send(arr)
示例#10
0
文件: uniform.py 项目: zwcdp/chainer
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype
     device = backend.get_device_from_array(array)
     array[...] = device.xp.random.uniform(low=-self.scale,
                                           high=self.scale,
                                           size=array.shape)
示例#11
0
    def test_double_backward(self, src_backend_config, dst_backend_config):
        x = src_backend_config.get_array(self.x)
        gy = dst_backend_config.get_array(self.gy)
        ggx = src_backend_config.get_array(self.ggx)
        dst_device = dst_backend_config.device

        x_var = chainer.Variable(x, requires_grad=True)

        y_var = functions.copy(x_var, dst_device)

        # TODO(niboshi): Remove this workround after Variable.grad.setter is
        # fixed so that it calls gy.require_grad() internally.
        if dst_backend_config.xp is chainerx:
            gy.require_grad()

        y_var.grad = gy

        gy_var = y_var.grad_var
        y_var.backward(enable_double_backprop=True)

        assert x_var.grad_var.requires_grad is True

        x_var.grad_var.grad = ggx
        x_var.grad_var.backward()

        assert gy_var.grad_var.device == dst_device
        assert (backend.get_device_from_array(
            gy_var.grad_var.array) == dst_device)
        numpy.testing.assert_array_equal(
            _numpy_device.send(gy_var.grad_var.array), self.ggx)
示例#12
0
    def _check_forward_internal(self, dst_device_spec, src_device, dst_device,
                                x_mode):
        x = src_device.send(self.x)

        if x_mode == 'array':
            pass
        elif x_mode == 'non_requires_grad':
            x = chainer.Variable(x, requires_grad=False)
        elif x_mode == 'requires_grad':
            x = chainer.Variable(x, requires_grad=True)
        else:
            assert False, x_mode

        error_expected = ((src_device.xp is chainerx) !=
                          (dst_device.xp is chainerx)
                          and x_mode == 'requires_grad')
        if error_expected:
            with pytest.raises(RuntimeError):
                functions.copy(x, dst_device_spec)
            return

        y = functions.copy(x, dst_device_spec)

        assert y.device == dst_device
        assert backend.get_device_from_array(y.array) == dst_device
        assert y.dtype == self.dtype
        numpy.testing.assert_array_equal(_numpy_device.send(y.array), self.x)
示例#13
0
    def test_from_array(self):
        arr = numpy.ndarray((2, ), numpy.float32)
        expected_device = backend.CpuDevice()

        device = backend.CpuDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#14
0
    def check_concat_arrays(self, arrays, device, expected_device):
        array = self.converter(arrays, device)
        self.assertEqual(array.shape, (len(arrays),) + arrays[0].shape)

        assert backend.get_device_from_array(array) == expected_device

        np_array = backend.CpuDevice().send(array)
        for x, y in zip(np_array, arrays):
            numpy.testing.assert_array_equal(x, backend.CpuDevice().send(y))
示例#15
0
    def test_from_array(self):
        arr = numpy.ndarray((2,), numpy.float32)
        expected_device = backend.CpuDevice()

        device = backend.CpuDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#16
0
    def check_concat_arrays(self, arrays, device, expected_device):
        array = self.converter(arrays, device)
        self.assertEqual(array.shape, (len(arrays),) + arrays[0].shape)

        assert backend.get_device_from_array(array) == expected_device

        np_array = backend.CpuDevice().send(array)
        for x, y in zip(np_array, arrays):
            numpy.testing.assert_array_equal(x, backend.CpuDevice().send(y))
示例#17
0
    def __init__(
            self, func, xs, gys, params, eps, atol, rtol, no_gxs,
            dtype, detect_nondifferentiable, is_immutable_params):
        # If `is_immutable_params` is `False`, `params` are expected to be of
        # type `chainer.Parameter` and are updated in-place.
        # To run `_CheckBackward` with ChainerX ndarrays however which cannot
        # be updated in-place when wrapped in `chainer.Parameter`s, this flag
        # should be `True` and parameters should be given as ndarrays.
        # `func` in the former case must take inputs as arguments only. In the
        # latter, it must take the parameters in addition.

        if dtype is not None and numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        if is_immutable_params:
            if not all(
                    isinstance(p, chainer.get_array_types()) for p in params):
                raise ValueError(
                    'All parameters in `params` must be ndarrays if '
                    '`is_immutable_params` is `True`. Actual: {}.'.format(
                        ', '.join(str(type(p)) for p in params)))

        xs = _as_tuple(xs)
        if gys is not None:
            gys = _as_tuple(gys)
        params = _as_tuple(params)

        if no_gxs is None:
            no_gxs = [x.dtype.kind != 'f' for x in xs]
        else:
            if len(no_gxs) != len(xs):
                raise ValueError(
                    'Length of no_grads param and xs should be same.\n'
                    'Actual: {0} != {1}'.format(len(no_gxs), len(xs)))

        device = backend.get_device_from_array(*xs)

        if device.xp is chainerx:
            if params and not is_immutable_params:
                raise NotImplementedError(
                    'gradient_check does not support params argument for '
                    'ChainerX arrays')

        self.device = device

        self.func = func
        self.xs = xs
        self.gys = gys
        self.params = params
        self.no_gxs = no_gxs
        self.atol = atol
        self.rtol = rtol
        self.is_immutable_params = is_immutable_params
        # options for numeric gradients
        self.eps = eps
        self.dtype = dtype
        self.detect_nondifferentiable = detect_nondifferentiable
    def __call__(self, array):
        if self.dtype is not None:
            assert array.dtype == self.dtype
        shape = array.shape
        if len(shape) != 2 or shape[0] != shape[1]:
            raise ValueError('Identity matrix initialization can only be used '
                             'for 2D squared matrices.')

        device = backend.get_device_from_array(array)
        array[...] = device.xp.identity(shape[0]) * self.scale
示例#19
0
文件: normal.py 项目: zwcdp/chainer
    def __call__(self, array):
        device = backend.get_device_from_array(array)
        args = {'loc': 0.0, 'scale': self.scale, 'size': array.shape}
        if device.xp is cuda.cupy:
            # Only CuPy supports dtype option
            if self.dtype == numpy.float32 or self.dtype == numpy.float16:
                # float16 is not supported in cuRAND
                args['dtype'] = numpy.float32

        array[...] = device.xp.random.normal(**args)
示例#20
0
    def check_forward(self, dst_device_spec, src_device, dst_device):
        x = src_device.send(self.x)

        x_var = chainer.Variable(x)
        y = functions.copy(x_var, dst_device_spec)

        assert y.device == dst_device
        assert backend.get_device_from_array(y.array) == dst_device
        assert y.dtype == self.dtype
        numpy.testing.assert_array_equal(_numpy_device.send(y.array), self.x)
示例#21
0
    def __call__(self, array):
        if self.dtype is not None:
            assert array.dtype == self.dtype

        # Calling copy to ensures that the fill_value array
        # is moved to the device where array resides
        if isinstance(self.fill_value, chainer.get_array_types()):
            backend.copyto(array, self.fill_value)
        else:
            device = backend.get_device_from_array(array)
            array[...] = device.xp.asarray(self.fill_value)
示例#22
0
    def test_from_array(self, backend_config):
        arr = backend_config.get_array(numpy.ndarray((2, ), numpy.float32))
        # Test precondition check
        assert arr.device.name == backend_config.chainerx_device

        expected_device = backend_config.device

        device = backend.ChainerxDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(retained_inputs,
                                            self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple(
            [None if grad is None else grad.array for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with chainer.using_device(
                backend.get_device_from_array(*(in_data + grad_out_data))):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
示例#24
0
    def test_from_array(self, backend_config):
        arr = backend_config.get_array(numpy.ndarray((2, ), numpy.float32))
        # Test precondition check
        assert isinstance(arr, intel64.mdarray)

        expected_device = backend.Intel64Device()

        device = backend.Intel64Device.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#25
0
    def test_from_array(self, backend_config):
        arr = backend_config.get_array(numpy.ndarray((2,), numpy.float32))
        # Test precondition check
        assert arr.device.name == backend_config.chainerx_device

        expected_device = backend_config.device

        device = backend.ChainerxDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#26
0
    def test_from_array(self, backend_config):
        arr = backend_config.get_array(numpy.ndarray((2,), numpy.float32))
        # Test precondition check
        assert isinstance(arr, intel64.mdarray)

        expected_device = backend.Intel64Device()

        device = backend.Intel64Device.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#27
0
def copy(x, dst):
    """Copies the input variable onto the specified device.

    If the input ``x`` already resides on the device specified by ``dst``, no
    copy will actually take place and the returned variable will hold a view
    of the input. In other cases, the input will be copied to ``dst``.
    When ``dst == -1``, the array is copied to the host memory.
    This function supports copies from host to host, from host to device,
    from device to device and from device to host.

    Args:
        x (:class:`~chainer.Variable` or :ref:`ndarray`):
            Variable to be copied.
        dst: Target device specifier.

    Returns:
        ~chainer.Variable: Output variable.

    .. admonition:: Example

        >>> import chainer.backends.cuda as cuda
        >>> x_arr = np.random.uniform(-1, 1, (5, 10))
        >>> x = chainer.Variable(x_arr)
        >>> x.device
        <CpuDevice (numpy)>
        >>> y = F.copy(x, '@cupy:0') # from CPU (NumPy) to GPU 0 (CuPy)
        >>> y.device
        <GpuDevice (cupy):0>

    .. note::
        Copies between non-ChainerX devices and ChainerX devices are not
        supported.

    """
    # For backward compatibility
    if dst is cuda.DummyDevice:
        dst = chainer.get_device('@numpy')

    in_device = backend.get_device_from_array(
        x.array if isinstance(x, chainer.Variable) else x)
    out_device = chainer.get_device(dst)

    is_chainerx = in_device.xp is chainerx
    if is_chainerx != (out_device.xp is chainerx):
        raise RuntimeError(
            'F.copy does not support copies between non-ChainerX devices and '
            'ChainerX devices.\n'
            'From: {}\n'
            'To: {}'.format(in_device, out_device))

    y, = Copy(in_device, out_device).apply((x,))
    return y
示例#28
0
    def test_get_device_from_array(self, backend_config):
        with cuda.Device(backend_config.cuda_device):
            arr = cuda.ndarray((), numpy.float32)
        # Test precondition check
        assert arr.device.id == backend_config.cuda_device

        expected_device = backend_config.device

        device = backend.GpuDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#29
0
    def check_concat_tuples(self, tuples, device, expected_device):
        arrays = self.converter(tuples, device)
        self.assertEqual(len(arrays), len(tuples[0]))
        for i in range(len(arrays)):
            shape = (len(tuples),) + tuples[0][i].shape
            self.assertEqual(arrays[i].shape, shape)

            assert backend.get_device_from_array(arrays[i]) == expected_device

            arr = backend.CpuDevice().send(arrays[i])
            for x, y in zip(arr, tuples):
                numpy.testing.assert_array_equal(
                    x, backend.CpuDevice().send(y[i]))
示例#30
0
    def check_concat_dicts(self, dicts, device, expected_device):
        arrays = self.converter(dicts, device)
        self.assertEqual(frozenset(arrays.keys()), frozenset(dicts[0].keys()))
        for key in arrays:
            shape = (len(dicts),) + dicts[0][key].shape
            self.assertEqual(arrays[key].shape, shape)
            self.assertEqual(
                backend.get_device_from_array(arrays[key]), expected_device)

            arr = backend.CpuDevice().send(arrays[key])
            for x, y in zip(arr, dicts):
                numpy.testing.assert_array_equal(
                    x, backend.CpuDevice().send(y[key]))
示例#31
0
    def check_concat_dicts(self, dicts, device, expected_device):
        arrays = self.converter(dicts, device)
        self.assertEqual(frozenset(arrays.keys()), frozenset(dicts[0].keys()))
        for key in arrays:
            shape = (len(dicts),) + dicts[0][key].shape
            self.assertEqual(arrays[key].shape, shape)
            self.assertEqual(
                backend.get_device_from_array(arrays[key]), expected_device)

            arr = backend.CpuDevice().send(arrays[key])
            for x, y in zip(arr, dicts):
                numpy.testing.assert_array_equal(
                    x, backend.CpuDevice().send(y[key]))
示例#32
0
    def check_concat_tuples(self, tuples, device, expected_device):
        arrays = self.converter(tuples, device)
        self.assertEqual(len(arrays), len(tuples[0]))
        for i in range(len(arrays)):
            shape = (len(tuples),) + tuples[0][i].shape
            self.assertEqual(arrays[i].shape, shape)

            assert backend.get_device_from_array(arrays[i]) == expected_device

            arr = backend.CpuDevice().send(arrays[i])
            for x, y in zip(arr, tuples):
                numpy.testing.assert_array_equal(
                    x, backend.CpuDevice().send(y[i]))
示例#33
0
    def test_get_device_from_array(self, backend_config):
        with cuda.Device(backend_config.cuda_device):
            arr = cuda.ndarray((), numpy.float32)
        # Test precondition check
        assert arr.device.id == backend_config.cuda_device

        expected_device = backend_config.device

        device = backend.GpuDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
示例#34
0
    def make_statistics(self):
        """Computes and returns the mean and standard deviation values.

        Returns:
            tuple: Mean and standard deviation values.

        """
        x, n = self._x, self._n
        xp = backend.get_array_module(x)
        with chainer.using_device(backend.get_device_from_array(x)):
            mean = x / n
            var = self._x2 / n - mean * mean
            std = xp.sqrt(var)
            return mean, std
示例#35
0
def _concat_arrays(arrays, padding):
    # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in
    # types such as int, float or list.
    if not isinstance(arrays[0], chainer.get_array_types()):
        arrays = numpy.asarray(arrays)

    if padding is not None:
        arr_concat = _concat_arrays_with_padding(arrays, padding)
    else:
        device = backend.get_device_from_array(arrays[0])
        with chainer.using_device(device):
            arr_concat = device.xp.concatenate(
                [array[None] for array in arrays])

    return arr_concat
 def __call__(self, array):
     if self.dtype is not None:
         assert array.dtype == self.dtype,\
             '{} != {}'.format(array.dtype, self.dtype)
     if self.rng is None:
         device = backend.get_device_from_array(array)
         array[...] = device.xp.random.uniform(low=-self.scale,
                                               high=self.scale,
                                               size=array.shape)
     else:
         backend.copyto(
             array,
             self.rng.uniform(low=-self.scale,
                              high=self.scale,
                              size=array.shape).astype(array.dtype,
                                                       copy=False))
示例#37
0
def _concat_arrays_with_padding(arrays, padding):
    shape = numpy.array(arrays[0].shape, dtype=int)
    for array in arrays[1:]:
        if numpy.any(shape != array.shape):
            numpy.maximum(shape, array.shape, shape)
    shape = tuple(numpy.insert(shape, 0, len(arrays)))

    device = backend.get_device_from_array(arrays[0])
    with chainer.using_device(device):
        result = device.xp.full(shape, padding, dtype=arrays[0].dtype)
        for i in six.moves.range(len(arrays)):
            src = arrays[i]
            slices = tuple(slice(dim) for dim in src.shape)
            result[(i,) + slices] = src

    return result
示例#38
0
    def test_backward(self, src_backend_config, dst_backend_config):
        x = src_backend_config.get_array(self.x)
        gy = dst_backend_config.get_array(self.gy)
        src_device = src_backend_config.device
        dst_device = dst_backend_config.device

        x_var = chainer.Variable(x, requires_grad=True)

        y_var = functions.copy(x_var, dst_device)
        y_var.grad = gy

        y_var.backward()

        x_grad = x_var.grad
        assert x_var.grad_var.device == src_device
        assert backend.get_device_from_array(x_grad) == src_device
        numpy.testing.assert_array_equal(_numpy_device.send(x_grad), self.gy)
示例#39
0
 def __call__(self, opt):
     sqnorm = _sum_sqnorm([p.grad for p in opt.target.params(False)])
     device = backend.get_device_from_array(sqnorm)
     with chainer.using_device(device):
         norm = device.xp.sqrt(sqnorm)
         rate = self.threshold / norm
         # When no clipping is needed, skip the clipping on CPU and
         # multiply 1.0 on the device otherwise.
         if device.xp is numpy:
             if rate >= 1:
                 return
         else:
             rate = rate.clip(None, 1)
     for param in opt.target.params(False):
         grad = param.grad
         with cuda.get_device_from_array(grad):
             grad *= rate
示例#40
0
    def __init__(
            self, func, x_data, y_grad, params, eps, atol, rtol, no_grads,
            dtype, detect_nondifferentiable):
        if dtype is not None and numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')

        x_data = _as_tuple(x_data)
        if y_grad is not None:
            y_grad = _as_tuple(y_grad)
        params = _as_tuple(params)

        if no_grads is None:
            no_grads = [x.dtype.kind != 'f' for x in x_data]
        else:
            if len(no_grads) != len(x_data):
                raise ValueError(
                    'Length of no_grads param and xs should be same.\n'
                    'Actual: {0} != {1}'.format(len(no_grads), len(x_data)))

        device = backend.get_device_from_array(*x_data)

        if device.xp is chainerx:
            if len(params) > 0:
                raise NotImplementedError(
                    'gradient_check does not support params argument for '
                    'ChainerX arrays')
            if any(no_grads):
                raise NotImplementedError(
                    'gradient_check does not support no_grads argument for '
                    'ChainerX arrays')

        self.device = device

        self.func = func
        self.x_data = x_data
        self.y_grad = y_grad
        self.params = params
        self.no_grads = no_grads
        self.atol = atol
        self.rtol = rtol
        # options for numeric gradients
        self.eps = eps
        self.dtype = dtype
        self.detect_nondifferentiable = detect_nondifferentiable
示例#41
0
    def _backward_chainerx(self, target_input_indexes, grad_outputs,
                           retained_inputs, retained_outputs):
        # Backward wrapper that is called from C++ via a Python binding in case
        # self.apply was called with chainerx.ndarrays.
        assert self._is_chainex_fallback_mode
        assert len(target_input_indexes) > 0
        assert (
            (self._input_indexes_to_retain is None
             and len(retained_inputs) == 0)
            or (len(self._input_indexes_to_retain) == len(retained_inputs)))
        assert (
            (self._output_indexes_to_retain is None
             and len(retained_outputs) == 0)
            or (len(self._output_indexes_to_retain) == len(retained_outputs)))
        assert all([
            a is None or isinstance(a, chainerx.ndarray)
            for a in grad_outputs])

        self._chainerx_retained_inputs = tuple([
            variable.Variable(
                array, requires_grad=array.is_backprop_required())
            for array in retained_inputs])
        self._chainerx_retained_outputs = tuple([
            variable.Variable(
                array, requires_grad=(
                    False if array is None else array.is_backprop_required()))
            for array in retained_outputs])

        device = backend.get_device_from_array(
            *(retained_inputs + retained_outputs + grad_outputs))
        with chainer.using_device(device):
            gxs = self._backward_target_inputs(
                tuple(target_input_indexes),
                tuple([
                    None
                    if gy is None
                    else chainer.Variable(
                        gy, requires_grad=gy.is_backprop_required())
                    for gy in grad_outputs]))

        gx_arrs = [gx._data[0] for gx in gxs]
        assert all([isinstance(gx, chainerx.ndarray) for gx in gx_arrs])
        return gx_arrs
示例#42
0
文件: array.py 项目: asi1024/chainer
    def as_noncontiguous_array(a):
        if a is None:
            return None

        if a.size <= 1:
            return a

        device = backend.get_device_from_array(a)
        xp = device.xp
        slices = (slice(None, None, 2),) * a.ndim
        with chainer.using_device(device):
            ret = xp.empty(tuple([s * 2 for s in a.shape]), dtype=a.dtype)
            ret[slices] = a
            ret = ret[slices]
        if device.xp is chainerx:
            assert not ret.is_contiguous
        else:
            assert not ret.flags.c_contiguous

        return ret
示例#43
0
文件: array.py 项目: jnishi/chainer
    def as_noncontiguous_array(a):
        if a is None:
            return None

        if a.size <= 1:
            return a

        device = backend.get_device_from_array(a)
        xp = device.xp
        with chainer.using_device(device):
            ret = xp.empty(
                (a.shape[0] * 2,) + a.shape[1:], dtype=a.dtype)
        ret[::2] = a
        ret = ret[::2]
        if device.xp is chainerx:
            assert not ret.is_contiguous
        else:
            assert not ret.flags.c_contiguous

        return ret
示例#44
0
 def check_device(self, array, device, expected_device):
     self.assertIsInstance(array, expected_device.xp.ndarray)
     self.assertEqual(
         backend.get_device_from_array(array), expected_device)
示例#45
0
def numerical_grad(
        f, inputs, grad_outputs, eps=1e-3,
        detect_nondifferentiable=False, diff_atol=0, diff_rtol=1e-2,
        center_outputs=None):
    """Computes numerical gradient by finite differences.

    This function is used to implement gradient check. For usage example, see
    unit tests of :mod:`chainer.functions`.

    By default, ``numerical_grad`` computes the gradient to the first order of
    ``eps``.

    Args:
        f (callable): Python function with no arguments that runs forward
            computation and returns the result.
        inputs (tuple of arrays): Tuple of arrays that should be treated as
            inputs. Each element of them is slightly modified to realize
            numerical gradient by finite differences.
        grad_outputs (tuple of arrays or scalars): Tuple of arrays or scalars
            that are treated as output gradients.
        eps (float): Epsilon value of finite differences.
        detect_nondifferentiable (bool):
            ``False`` by default.
            If ``True``, ``numerical_grad`` checks whether ``f`` is
            differentiable at ``inputs``.
            It requires evaluation of ``f`` at 5 points instead of 2.
            As a side effect, the accuracy of numerical gradient will be
            increased to the third order of ``eps``.
            If it turns out that ``f`` is non-differentiable at ``input``,
            ``numerical_grad`` raises
            :class:`~chainer.gradient_check.NondifferentiableError`.
        diff_atol (float):
            Absolute tolerance of fitting error of non-differentiable point
            detection.
        diff_rtol (float):
            Tolerance of fitting error of non-differentiable point detection
            relative to the output values of ``f``.
        center_outputs (tuple of arrays or None):
            Only used if ``detect_nondifferentiable`` is ``True``.
            If specified, these arrays are used as the outputs of ``f`` at
            ``inputs``.
            Otherwise, it is calculated.
            It can be used to reduce the computation if these arrays are
            already calculated before calling ``numerical_grad``.

    Returns:
        tuple: Numerical gradient arrays corresponding to ``inputs``.

    """
    # TODO(niboshi): Deprecate `center_outputs` argument.
    # If dtype of this argument is not float64, often the resolution is
    # insufficient for numerical gradient calculation. We might use it only
    # when its dtype is float64, but it would be better to simply remove it.
    center_outputs = None

    assert eps > 0
    assert isinstance(inputs, (tuple, list))
    for x in inputs:
        if x.dtype.kind != 'f':
            raise RuntimeError(
                'The dtype of input arrays must be kind of float')

    inputs = tuple(inputs)
    # Cast grad_outputs to float64
    grad_outputs = tuple([
        None if g is None
        else numpy.float64(g) if numpy.isscalar(g)
        else g.astype(numpy.float64)
        for g in grad_outputs])

    if not chainer.is_arrays_compatible(
            [a for a in inputs + grad_outputs if not numpy.isscalar(a)]):
        raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`')

    device = backend.get_device_from_array(*(inputs + grad_outputs))
    xp = device.xp

    if xp is cuda.cupy:
        numerical_grad_kernel_1 = cuda.reduce(
            'T y1, T y2, U gy, T eps', 'V gxi',
            '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0',
            'numerical_grad_kernel_1'
        )
        numerical_grad_kernel_3 = cuda.reduce(
            'T y1, T y2, T y3, T y4, U gy, T eps', 'V gxi',
            '(-y1 + 8 * y2 - 8 * y3 + y4) * gy',
            'a + b', 'gxi += a / (eps * 6)', '0',
            'numerical_grad_kernel_3'
        )

    if xp is chainerx:
        grads = [
            xp.zeros(x.shape, numpy.float64, device=x.device) for x in inputs]
    else:
        grads = [xp.zeros(x.shape, numpy.float64) for x in inputs]

    if detect_nondifferentiable:
        if center_outputs is None:
            ys0 = _copy_arrays(f())
        else:
            ys0 = center_outputs
        nout = len(ys0)
        shapes = [_.shape for _ in ys0]
        sizes = numpy.array([_.size for _ in ys0])
        cumsizes = numpy.cumsum(sizes)

    # Evaluate func at a single input
    def eval_func(x, i, delta, orig):
        x[i] = orig + delta
        y = _copy_arrays(f())
        assert len(y) == len(grad_outputs)
        assert all([
            gy is None
            for y_, gy in zip(y, grad_outputs)
            if y_ is None])
        assert all([
            gy is None or numpy.isscalar(gy) or y_.shape == gy.shape
            for y_, gy in zip(y, grad_outputs)])
        x[i] = orig
        return y

    # An iteration on a single input displacement
    def iterate_single_input(i_in, x, orig_x, i):
        orig = orig_x[i]
        # `yss` holds a list of output arrays for each of 2 or 5 sampling
        # points.
        if detect_nondifferentiable:
            yss = [
                eval_func(x, i, -eps * 1., orig),
                eval_func(x, i, -eps * .5, orig),
                ys0,
                eval_func(x, i, +eps * .5, orig),
                eval_func(x, i, +eps * 1., orig),
            ]
        else:
            yss = [
                eval_func(x, i, -eps * 1, orig),
                eval_func(x, i, +eps * 1, orig),
            ]

        if detect_nondifferentiable:
            # Detect non-differentiable point by quadratic fitting

            # Check for non-finite output.
            # If any single element in the output arrays has different
            # finiteness among sampled points, that means this is a
            # non-differentiable point.
            # If the function consistently generates non-finite values
            # around the point, we do not treat the point as
            # non-differentiable.
            # (Example: x<0 region for the logarithm function)
            any_nonfinite = False
            for i_out in range(nout):
                isfinites = [xp.isfinite(ys[i_out]) for ys in yss]
                if any((isfinites[0] != isfinites[i]).any()
                       for i in range(1, len(yss))):
                    s = six.StringIO()
                    s.write(
                        'Tried to compute the numeric gradient on a '
                        'non-differentiable point.\n\n')
                    s.write('i_in: {}\n'.format(i_in))
                    s.write('i_out: {}\n'.format(i_out))
                    s.write('x: {}\n'.format(inputs[i_in]))
                    s.write('index on x: {}\n'.format(i))
                    s.write('eps: {}\n'.format(eps))
                    s.write('y[x-eps  ]: {}\n'.format(yss[0][i_out]))
                    s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out]))
                    s.write('y[x      ]: {}\n'.format(yss[2][i_out]))
                    s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out]))
                    s.write('y[x+eps  ]: {}\n'.format(yss[4][i_out]))
                    raise NondifferentiableError(s.getvalue())

                any_nonfinite |= not all((_).all() for _ in isfinites)

            if not any_nonfinite:
                # Stack flattened outputs to make (5, *)-shaped 2D array
                ystack = xp.vstack(
                    [xp.hstack([y.ravel() for y in ys]) for ys in yss])
                assert ystack.ndim == 2 and ystack.shape[0] == len(yss)
                # Fit to quadratic
                if xp is not numpy:
                    ystack = _cpu._to_cpu(ystack)
                polyfit = numpy.polynomial.polynomial.polyfit
                _, (residuals, _, _, _) = polyfit(
                    range(len(yss)), ystack, deg=2, full=True)
                if xp is not numpy:
                    residuals = device.send(residuals)
                residuals = xp.sqrt(residuals / len(yss))

                # Check for error for each output array
                for i_out in range(nout):
                    size = sizes[i_out]
                    cumsize = cumsizes[i_out]
                    shape = shapes[i_out]
                    # TODO(niboshi): The following two lines could be
                    # rewritten using xp.stack, which is supported in
                    # NumPy>=1.10
                    ymax = xp.concatenate(
                        [ys[i_out][None] for ys in yss]).max(axis=0)
                    ymin = xp.concatenate(
                        [ys[i_out][None] for ys in yss]).min(axis=0)
                    # Restore the shape of flattened residual
                    res = residuals[cumsize - size:cumsize]
                    res = res.reshape(shape)
                    det = utils.force_array(
                        diff_atol + diff_rtol * (ymax - ymin) < res)
                    # Constant output = not nondifferentiable
                    det[ymax == ymin] = False
                    if det.any():
                        s = six.StringIO()
                        s.write(
                            'Tried to compute the numeric gradient on a '
                            'non-differentiable point.\n\n')
                        s.write('i_in: {}\n'.format(i_in))
                        s.write('i_out: {}\n'.format(i_out))
                        s.write('x: {}\n'.format(inputs[i_in]))
                        s.write('index on x: {}\n'.format(i))
                        s.write('eps: {}\n'.format(eps))
                        s.write('diff_rtol: {}\n'.format(diff_rtol))
                        s.write('diff_atol: {}\n'.format(diff_atol))
                        s.write('ymax: {}\n'.format(ymax))
                        s.write('ymin: {}\n'.format(ymin))
                        s.write(
                            'diff_atol + diff_rtol * (ymax-ymin): {}\n'.format(
                                diff_atol + diff_rtol * (ymax - ymin)))
                        s.write('fitting errors: {}\n'.format(res))
                        s.write('y[x-eps  ]: {}\n'.format(yss[0][i_out]))
                        s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out]))
                        s.write('y[x      ]: {}\n'.format(yss[2][i_out]))
                        s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out]))
                        s.write('y[x+eps  ]: {}\n'.format(yss[4][i_out]))
                        raise NondifferentiableError(s.getvalue())

        # Calculate numerical gradient
        for i_out, gy in enumerate(grad_outputs):
            if gy is None:
                continue
            if not numpy.isscalar(gy):
                gy = gy.astype(numpy.float64, copy=False)
            gpu_ = (xp is cuda.cupy and
                    all(isinstance(ys[i_out], cuda.ndarray)
                        for ys in yss))
            # If any output sample is None, all others must be.
            assert all([
                (yss[0][i_out] is None) == (yss[j][i_out] is None)
                for j in range(len(yss))])
            # If outputs samples are None, the part of numeric gradient for
            # this output is considered as zero: skip the accumulation.
            if yss[0][i_out] is None:
                continue

            if len(yss) == 2:  # 1st order
                y0 = yss[0][i_out]
                y1 = yss[1][i_out]
                if gpu_:
                    numerical_grad_kernel_1(
                        y1, y0, xp.asarray(gy), eps, gx[i])
                else:
                    dot = ((y1 - y0) * gy).sum()
                    gx[i] = gx[i] + dot / (2 * eps)
            elif len(yss) == 5:  # 3rd order
                y0 = yss[0][i_out]
                y1 = yss[1][i_out]
                y2 = yss[3][i_out]
                y3 = yss[4][i_out]
                if gpu_:
                    numerical_grad_kernel_3(
                        y3, y2, y1, y0, gy, eps, gx[i])
                else:
                    num = -y3 + 8 * y2 - 8 * y1 + y0
                    dot = (num * gy).sum()
                    gx[i] = gx[i] + dot / (6 * eps)
            else:
                assert False

    # Calculate numeric gradient
    with configuration.using_config('type_check', False):
        for i_in, (x, gx) in enumerate(six.moves.zip(inputs, grads)):
            orig_x = x.copy()  # hold original value
            for i in numpy.ndindex(x.shape):
                iterate_single_input(i_in, x, orig_x, i)

    return [g.astype(x.dtype, copy=False)
            for g, x in six.moves.zip(grads, inputs)]
示例#46
0
 def forward(self, x, y):
     self.args.append((x, y))
     with chainer.using_device(backend.get_device_from_array(x, y)):
         chainer.report({'loss': x.sum() + y.sum()}, self)
示例#47
0
 def check_unrecognized(self, arg):
     device = backend.get_device_from_array(arg)
     assert device == backend.CpuDevice()
示例#48
0
    def backward(self, indexes, grad_outputs):
        x, W, gy = self.get_retained_inputs()

        device = backend.get_device_from_array(x.data)
        xp = device.xp

        if 0 in indexes:
            gx = chainer.Variable(xp.zeros_like(x.data))
        if 1 in indexes:
            gW = chainer.Variable(xp.zeros_like(W.data))
        if 2 in indexes:
            ggy = chainer.Variable(xp.zeros_like(gy.data))

        ggx, _, ggW = grad_outputs

        pos_neg_mask = xp.ones(self.sample_size + 1)
        pos_neg_mask[0] *= -1

        with chainer.using_device(device):
            arange = xp.arange(len(self.ignore_mask))
        for i in arange[self.ignore_mask]:
            # Partial forward pass to obtain intermediate `Variable`s
            ix = x[i]
            k = self.samples[i]

            if self.reduce == 'sum':
                igy = gy
            else:
                igy = gy[i]

            w = W[k]
            f = chainer.functions.flatten(
                chainer.functions.matmul(w, ix[:, None])) * pos_neg_mask
            sigf = chainer.functions.sigmoid(f)
            g = chainer.functions.broadcast_to(igy, f.shape) * sigf \
                * pos_neg_mask

            dgW_dg = chainer.functions.flatten(
                chainer.functions.matmul(ggW[k], ix[:, None])) * pos_neg_mask
            dgW_df = chainer.functions.broadcast_to(igy, f.shape) \
                * _sigmoid_grad(f, sigf, dgW_dg) * pos_neg_mask
            dgx_dg = chainer.functions.flatten(
                chainer.functions.matmul(ggx[i][None, :], w, transb=True))
            dgx_df = chainer.functions.broadcast_to(igy, f.shape) \
                * _sigmoid_grad(f, sigf, dgx_dg)

            if 0 in indexes:
                # derivative of gx
                dgx = chainer.functions.matmul(w, dgx_df[:, None], transa=True)

                # derivative of gW
                dgx += chainer.functions.matmul(g[None, :], ggW[k]).T
                dgx += chainer.functions.matmul(
                    w, dgW_df[:, None], transa=True)

                gx = chainer.functions.scatter_add(
                    gx, i, chainer.functions.flatten(dgx))

            if 1 in indexes:
                # derivative of gx
                shape = ggx[i].shape
                for ik, ig, idgx_df in six.moves.zip(k, g, dgx_df):
                    ig = chainer.functions.broadcast_to(ig, shape)
                    idgx_df = chainer.functions.broadcast_to(idgx_df, shape)
                    gW = chainer.functions.scatter_add(
                        gW, ik, ig * ggx[i] + idgx_df * ix)

                # derivative of gW
                gW = chainer.functions.scatter_add(
                    gW, k,
                    chainer.functions.matmul(dgW_df[:, None], ix[None, :]))

            if 2 in indexes:
                dgx_dg *= pos_neg_mask
                dggy = chainer.functions.sum((dgx_dg + dgW_dg) * sigf)
                if self.reduce == 'sum':
                    ggy += dggy
                else:
                    ggy = chainer.functions.scatter_add(ggy, i, dggy)

        ret = []
        if 0 in indexes:
            ret.append(gx)
        if 1 in indexes:
            ret.append(gW)
        if 2 in indexes:
            ret.append(ggy)
        return ret