示例#1
0
    def setUp(self):
        self.xs = [cuda.cupy.random.uniform(
            -1, 1, (b, self.in_size)).astype('f')
            for b in self.batches]
        h_shape = (self.n_layers * 2, self.batches[0], self.out_size)
        self.hx = cuda.cupy.random.uniform(-1, 1, h_shape).astype('f')

        self.ws = []
        self.bs = []
        for i in range(self.n_layers):
            for di in [0, 1]:
                weights = []
                biases = []
                for j in range(6):
                    if i == 0 and j < 3:
                        w_in = self.in_size
                    elif i > 0 and j < 3:
                        w_in = self.out_size * 2
                    else:
                        w_in = self.out_size

                    weights.append(cuda.cupy.random.uniform(
                        -1, 1, (self.out_size, w_in)).astype('f'))
                    biases.append(cuda.cupy.random.uniform(
                        -1, 1, (self.out_size,)).astype('f'))

                self.ws.append(weights)
                self.bs.append(biases)

        self.dys = [cuda.cupy.random.uniform(
            -1, 1, (b, self.out_size * 2)).astype('f')
            for b in self.batches]
        self.dhy = cuda.cupy.random.uniform(-1, 1, h_shape).astype('f')
        with chainer.using_config('use_cudnn', self.use_cudnn):
            self.expect = chainer.should_use_cudnn('>=auto', 5000)
示例#2
0
 def setUp(self):
     batches = 2
     in_channels_a_group = 3
     out_channels_a_group = 2
     in_channels = in_channels_a_group * self.group
     out_channels = out_channels_a_group * self.group
     kh, kw = (3, 3)
     self.stride = 2
     self.pad = (int(kh / 2) * self.dilate, int(kw / 2) * self.dilate)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (batches, in_channels, 4, 3)).astype(self.dtype)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)),
         (out_channels, in_channels_a_group, kh, kw)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (batches, out_channels, 2, 2)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.should_call_cudnn = chainer.should_use_cudnn('>=auto')
         if self.dilate > 1 and cuda.cuda.cudnn.getVersion() < 6000:
             self.should_call_cudnn = False
         if self.group > 1 and cuda.cuda.cudnn.getVersion() < 7000:
             self.should_call_cudnn = False
         self.can_use_tensor_core = True
         if self.dilate > 1:
             self.can_use_tensor_core = False
 def test_call_cudnn_forward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with mock.patch('cupy.cudnn.cudnn.poolingForward') as func:
             self.forward()
             self.assertEqual(func.called,
                              chainer.should_use_cudnn('>=auto') and
                              self.ndim > 1)
示例#4
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        self._calc_out_size(x, W)
        self._set_cover_all(x, W)

        use_cudnn = (
            chainer.should_use_cudnn('>=auto')
            and not self.cover_all
            and x.dtype == W.dtype
            and ((self.dy == 1 and self.dx == 1)
                 or (_cudnn_version >= 6000
                     and not configuration.config.cudnn_deterministic))
            and (self.groups <= 1 or _cudnn_version >= 7000)
        )

        if use_cudnn:
            # cuDNN implementation
            return self._forward_cudnn(x, W, b)

        elif self.groups > 1:
            return self._forward_grouped_convolution(x, W, b)

        else:
            return self._forward_gpu_core(x, W, b)
示例#5
0
 def _use_cudnn(self, x, gy):
     return (
         chainer.should_use_cudnn('>=auto')
         and not self.cover_all
         and x.dtype == self.W_dtype
         and gy.dtype == self.W_dtype
         and self.ndim > 1)
示例#6
0
    def forward(self, inputs):
        if inputs[0].shape[1] % self.groups != 0:
            raise ValueError('The number of channels {} is not divisible by '
                             '\'groups\' argument {}.'
                             .format(inputs[0].shape[1], self.groups))

        xp = backend.get_array_module(*inputs)
        if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000):
            return self.forward_cudnn(inputs)

        self.retain_inputs((0, 1))
        x, gamma, beta = inputs

        orig_shape = x.shape
        batch_size, channels = orig_shape[:2]
        groups = self.groups
        reduced_shape = (batch_size * groups, -1)
        x = x.reshape(reduced_shape)

        self.mean = x.mean(axis=1)
        x_hat = x - self.mean[:, None]
        var = (x_hat * x_hat).mean(axis=1)
        var += self.eps
        self.inv_std = var
        del var
        xp.sqrt(self.inv_std, out=self.inv_std, dtype=x.dtype)
        xp.reciprocal(self.inv_std, out=self.inv_std)
        x_hat *= self.inv_std[:, None]

        y = x_hat.reshape((batch_size, channels, -1))
        y *= gamma[:, None]
        y += beta[:, None]

        y = y.reshape(orig_shape)
        return y,
    def forward_gpu(self, inputs):
        if not chainer.should_use_cudnn('>=auto', 5000):
            return self._forward(inputs)
        x, grid = inputs
        out_shape = x.shape[:2] + grid.shape[2:]
        y = cuda.cupy.empty(out_shape, dtype=x.dtype)
        shape = numpy.array(out_shape, dtype=numpy.int32)
        x = cuda.cupy.ascontiguousarray(x)
        grid_t = cuda.cupy.transpose(grid, (0, 2, 3, 1))
        grid_t = cuda.cupy.ascontiguousarray(grid_t)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        self.st_desc =\
            cuda.cupy.cudnn.create_spatial_transformer_descriptor(
                _sampler_type, grid.dtype, len(shape), shape.ctypes.data)

        one = numpy.array(1, dtype=x.dtype).ctypes
        zero = numpy.array(0, dtype=x.dtype).ctypes
        libcudnn.spatialTfSamplerForward(
            handle, self.st_desc.value, one.data,
            x_desc.value, x.data.ptr, grid_t.data.ptr, zero.data,
            y_desc.value, y.data.ptr)
        return y,
    def backward_gpu(self, inputs, grad_outputs):
        if not chainer.should_use_cudnn('>=auto', 5000):
            return self._backward(inputs, grad_outputs)
        x, grid = inputs
        gy, = grad_outputs

        grid_t = cuda.cupy.transpose(grid, (0, 2, 3, 1))
        grid_t = cuda.cupy.ascontiguousarray(grid_t)
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)
        gx = cuda.cupy.empty_like(x)
        ggrid_t = cuda.cupy.empty_like(grid_t)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        dx_desc = cudnn.create_tensor_descriptor(gx)
        dy_desc = cudnn.create_tensor_descriptor(gy)

        one = numpy.array(1, dtype=x.dtype).ctypes
        zero = numpy.array(0, dtype=x.dtype).ctypes
        libcudnn.spatialTfSamplerBackward(
            handle, self.st_desc.value,
            one.data,
            x_desc.value, x.data.ptr,
            zero.data,
            dx_desc.value, gx.data.ptr,
            one.data,
            dy_desc.value, gy.data.ptr,
            grid_t.data.ptr, zero.data, ggrid_t.data.ptr)
        ggrid = cuda.cupy.transpose(ggrid_t, axes=(0, 3, 1, 2))
        return gx, ggrid
示例#9
0
    def forward_gpu(self, x):
        if (chainer.should_use_cudnn('>=auto') and
                pooling_nd._check_cudnn_acceptable_type(x[0].dtype)):
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            if _cudnn_version >= 3000 and self.ndim >= 2:
                return super(AveragePoolingND, self).forward_gpu(x)
            # With cuDNN v2, use cuDNN implementation only for inputs with
            # spatial dimensions of two.
            elif self.ndim == 2:
                return super(AveragePoolingND, self).forward_gpu(x)

        self.retain_inputs(())
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p,
                                            cover_all=self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        coeff = 1. / functools.reduce(operator.mul, self.ksize)

        in_params, out_params, operation, name = \
            average_pooling_nd_kernel.AveragePoolingNDKernelForward.generate(
                self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x[0].reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad + (coeff, y)))

        return y,
示例#10
0
文件: dropout.py 项目: jnishi/chainer
    def forward_gpu(self, x):
        if (chainer.should_use_cudnn('==always', 5000)
                and x[0].flags.c_contiguous
                and self.mask is None):
            self._use_cudnn = True

            if hasattr(self, 'states'):
                # if we already have a dropout mask,
                # the forward operation is equal to backward.
                return cuda.get_cudnn_dropout_states().backward(
                    None, x[0], self.dropout_ratio, self.states),

            self.states, y = cuda.get_cudnn_dropout_states().forward(
                None, x[0], self.dropout_ratio)
            return y,
        else:
            if self.mask is not None:
                y = x[0] * self.mask
            else:
                rand = cuda.cupy.random.rand(*x[0].shape, dtype=numpy.float32)
                scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
                self.mask, y = cuda.elementwise(
                    'T x, R r, T scale, T ratio', 'T mask, T y',
                    '''
                    mask = (r >= ratio) * scale;
                    y = x * mask;
                    ''',
                    'dropout_fwd',
                )(x[0], rand, scale, self.dropout_ratio)
            return y,
示例#11
0
    def __init__(self, comm, eps=2e-5, mean=None, var=None, decay=0.9):
        chainer.utils.experimental(
            'chainermn.functions.MultiNodeBatchNormalizationFunction')

        self.comm = comm
        self.running_mean = mean
        self.running_var = var

        # Note: cuDNN v5 requires that eps be greater than 1e-5. Otherwise, an
        # error will occur.
        # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable
        # value.
        self.eps = eps
        if chainer.should_use_cudnn('>=auto'):
            if eps < 1e-5:
                msg = 'cuDNN does not allow an eps value less than 1e-5.'
                raise RuntimeError(msg)
        self.mean_cache = None
        self.decay = decay

        # We need to delay importing MPI4py (and momdules that import MPI4py)
        import chainermn.communicators._memory_utility as memory_utility_module
        from mpi4py import MPI as mpi4py_module
        self.memory_utility_module = memory_utility_module
        self.mpi4py_module = mpi4py_module
示例#12
0
    def __init__(self, eps=2e-5, mean=None, var=None, decay=0.9, axis=None):
        self.running_mean = mean
        self.running_var = var

        # Note: cuDNN requires that eps be greater than or equals to
        # CUDNN_BN_MIN_EPSILON. Otherwise, an error will occur.
        # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable
        # value.
        self.eps = eps
        if chainer.should_use_cudnn('>=auto'):
            if eps < libcudnn.CUDNN_BN_MIN_EPSILON:
                raise RuntimeError(
                    'cuDNN does not allow an eps value '
                    'less than {}.'.format(libcudnn.CUDNN_BN_MIN_EPSILON))
        self.decay = decay
        if isinstance(axis, collections_abc.Sequence):
            for i in range(1, len(axis)):
                if axis[i - 1] >= axis[i]:
                    msg = 'numbers in axis must be sorted in ascending order'
                    raise RuntimeError(msg)
        elif isinstance(axis, int):
            axis = axis,
        elif axis is not None:
            raise RuntimeError('axis must be int, tuple of int or None')
        self.axis = axis
示例#13
0
    def __init__(self, comm, eps=2e-5, mean=None, var=None, decay=0.9,
                 communication_backend='auto'):
        chainer.utils.experimental(
            'chainermn.functions.MultiNodeBatchNormalizationFunction')

        self.comm = comm
        self.running_mean = mean
        self.running_var = var

        # Note: cuDNN v5 requires that eps be greater than 1e-5. Otherwise, an
        # error will occur.
        # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable
        # value.
        self.eps = eps
        if chainer.should_use_cudnn('>=auto'):
            if eps < 1e-5:
                msg = 'cuDNN does not allow an eps value less than 1e-5.'
                raise RuntimeError(msg)
        self.mean_cache = None
        self.decay = decay

        selected_communication_backend = \
            get_communication_backend(comm, communication_backend)

        if selected_communication_backend == 'nccl':
            self._backend = _NcclBackend(comm)
        else:
            self._backend = _MpiBackend(comm)
示例#14
0
 def can_use_cudnn(self, xp):
     # TODO(bkvogel): Check for float16 support again in next cuDNN version.
     # cuDNN v5 batch normalization does not seem to support float16.
     return (xp is not numpy and
             chainer.should_use_cudnn('>=auto', 5000) and
             self.cudnn_dim_ok and
             self.cudnn_dtype_ok)
示例#15
0
文件: softmax.py 项目: okuta/chainer
    def forward(self, inputs):
        self.retain_inputs((0, 1))
        y, gy = inputs
        xp = backend.get_array_module(*y)
        if xp is not numpy and chainer.should_use_cudnn('>=auto'):
            oz_dtype = 'd' if y[0].dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            handle = cudnn.get_handle()
            gx = xp.empty_like(y)
            gx_tensor4d = cuda.cupy.ascontiguousarray(
                gx.reshape(_get_tensor4d_shape(self.axis, gx.shape)))
            gy = cuda.cupy.ascontiguousarray(gy)
            desc = cudnn.create_tensor_descriptor(gx_tensor4d)
            cudnn_mode = _get_cudnn_mode(gx_tensor4d.shape)
            libcudnn.softmaxBackward(
                handle, _algorithm, cudnn_mode, one.data, desc.value,
                y.data.ptr, desc.value, gy.data.ptr, zero.data,
                desc.value, gx.data.ptr)
        else:
            gx = y * gy
            sumdx = gx.sum(axis=self.axis, keepdims=True)
            gx -= y * sumdx

        return gx,
示例#16
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # retain only x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        out_c, _, kh, kw = W.shape
        n, _, h, w = x.shape

        out_h, out_w = self._get_out_size(inputs)
        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)

        use_cudnn = (
            chainer.should_use_cudnn('>=auto')
            and not self.cover_all
            and x.dtype == W.dtype
            and ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000)
            and (self.groups <= 1 or _cudnn_version >= 7000)
        )

        if use_cudnn:
            # cuDNN implementation
            return self._forward_cudnn(x, W, b, y)

        elif self.groups > 1:
            return self._forward_grouped_convolution(x, W, b)

        else:
            return self._forward_gpu_core(x, W, b)
示例#17
0
    def forward_gpu(self, x):
        if chainer.should_use_cudnn('>=auto') and 2 <= self.ndim <= 3:
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            return super(MaxPoolingND, self).forward_gpu(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32)

        in_params, out_params, operation, name = \
            max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x[0].reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad +
              (y, self.indexes)))

        return y,
示例#18
0
 def setUp(self):
     self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.expect = chainer.should_use_cudnn('==always') and (
             cuda.cudnn.cudnn.getVersion() >= 3000 or
             self.dtype != numpy.float16)
示例#19
0
 def backward(self, indexes, gy):
     y = self.get_retained_outputs()[0]
     if chainer.should_use_cudnn('==always') and self._use_cudnn:
         x = self.get_retained_inputs()[0]
         return ReLUGrad3(x, y).apply((gy[0],))
     else:
         return ReLUGrad2(y).apply((gy[0],))
示例#20
0
 def backward(self, indexes, grad_outputs):
     x, = self.get_retained_inputs()
     if chainer.should_use_cudnn('==always') and self._use_cudnn:
         y = self.get_retained_outputs()[0]
         return ClippedReLUGrad3(x.data, y.data, self.cap).apply(
             grad_outputs)
     else:
         return ClippedReLUGrad2(x.data, self.cap).apply(grad_outputs)
示例#21
0
 def check_call_cudnn_backward(self, use_cudnn):
     with chainer.using_config('use_cudnn', use_cudnn):
         expect = chainer.should_use_cudnn('>=auto', 5000)
         hy, ys = self.call_forward(True)
         hy.grad = _to_gpu(self.dhy)
         with mock.patch('cupy.cuda.cudnn.RNNBackwardWeights') as func:
             hy.backward()
         assert func.called == expect
示例#22
0
def _log_softmax(x, axis=1):
    if chainer.should_use_cudnn('>=auto'):
        xp = backend.get_array_module(x)
        if xp is cuda.cupy:
            return cudnn.softmax_forward(x, axis, _algorithm)
    log_z = logsumexp(x, axis)
    y = x - log_z
    return y
示例#23
0
 def test_call_cudnn_backward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         expect = chainer.should_use_cudnn('>=auto') and self.ndim > 1
         y = self.forward()
     # should be consistent to forward regardless of use_cudnn config
     y.grad = self.gy
     with testing.patch('cupy.cudnn.pooling_backward') as func:
         y.backward()
         assert func.called == expect
示例#24
0
 def test_call_cudnn_backrward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         y = self.forward()
         y.grad = self.gy
         patch = 'cupy.cudnn.convolution_backward_data'
         with mock.patch(patch) as func:
             y.backward()
             self.assertEqual(func.called,
                              chainer.should_use_cudnn('>=auto'))
示例#25
0
 def forward(self, inputs):
     self.retain_inputs((0, 1))
     y, gy = inputs
     xp = self._x_xp
     if xp is cuda.cupy and chainer.should_use_cudnn('>=auto'):
         gx = cudnn.softmax_backward(y, gy, self.axis, _algorithm)
     else:
         gx = gy - xp.exp(y) * gy.sum(axis=self.axis, keepdims=True)
     return gx,
示例#26
0
文件: relu.py 项目: asi1024/chainer
 def forward_gpu(self, inputs):
     x, = inputs
     if chainer.should_use_cudnn('>=auto') and x.flags.c_contiguous:
         self._use_cudnn = True
         y = cudnn.activation_forward(x, _mode)
     else:
         y = cuda.cupy.maximum(x, 0, dtype=x.dtype)
     self.retain_outputs((0,))
     return y,
 def test_call_cudnn_backward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         expect = chainer.should_use_cudnn('>=auto')
         y = self.forward()
     y.grad = self.gy
     # should be consistent to forward regardless of use_cudnn config
     with mock.patch('cupy.cudnn.cudnn.poolingBackward') as func:
         y.backward()
         self.assertEqual(func.called, expect)
示例#28
0
    def forward_gpu(self, x):
        if (chainer.should_use_cudnn('>=auto') and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(MaxPooling2D, self).forward_gpu(x)

        self.retain_inputs(())
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y,
示例#29
0
    def forward_gpu(self, x):
        if chainer.should_use_cudnn('==always') and x[0].flags.c_contiguous:
            y = cudnn.activation_forward(x[0], _mode)
        else:
            y = cuda.cupy.empty_like(x[0])
            cuda.cupy.tanh(x[0], out=y)
            self.retain_inputs(())

        self.retain_outputs((0,))
        return y,
示例#30
0
文件: relu.py 项目: delta2323/chainer
 def backward_gpu(self, x, gy):
     y = self.output_data[0]
     if chainer.should_use_cudnn('==always') and self._use_cudnn:
         gx = cudnn.activation_backward(x[0], y, gy[0], _mode)
     else:
         gx = cuda.elementwise(
             'T y, T gy', 'T gx',
             'gx = y > 0 ? gy : (T)0',
             'relu_bwd')(y, gy[0])
     return gx,
示例#31
0
 def _use_cudnn(self, x, W):
     return (not self.cover_all and
             chainer.should_use_cudnn('>=auto') and
             self.ndim > 1 and x.dtype == W.dtype)
示例#32
0
 def _use_cudnn(self, x, W):
     return (chainer.should_use_cudnn('>=auto') and self.ndim > 1
             and x.dtype == W.dtype)
示例#33
0
 def test_no_cudnn_available(self):
     with chainer.using_config('use_cudnn', 'always'):
         self.assertFalse(chainer.should_use_cudnn('==always'))
         self.assertFalse(chainer.should_use_cudnn('>=auto'))
示例#34
0
 def check_call_cudnn_forward_inference(self, use_cudnn):
     with chainer.using_config('use_cudnn', use_cudnn):
         expect = chainer.should_use_cudnn('>=auto', 5000)
         with testing.patch('cupy.cuda.cudnn.RNNForwardInference') as func:
             self.call_forward(False)
         assert func.called == expect
示例#35
0
def n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
        **kwargs):
    """Base function for Stack LSTM/BiLSTM functions.

    This function is used at :func:`chainer.functions.n_step_lstm` and
    :func:`chainer.functions.n_step_bilstm`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    Args:
        n_layers(int): The number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (:class:`~chainer.Variable`):
            Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is the number of layers and
            is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N``
            is the dimension of the hidden units.
        cx (:class:`~chainer.Variable`): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of :class:`~chainer.Variable`): Weight matrices.
            ``ws[i]`` represents the weights for the i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` corresponds to :math:`W_j` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(N, I)``-shape as they
            are multiplied with input variables, where ``I`` is the size of
            the input and ``N`` is the dimension of the hidden units. All
            other matrices are ``(N, N)``-shaped.
        bs (list of list of :class:`~chainer.Variable`): Bias vectors.
            ``bs[i]`` represents the biases for the i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` corresponds to :math:`b_j` in the equation.
            The shape of each matrix is ``(N,)``.
        xs (list of :class:`~chainer.Variable`):
            A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the
            mini-batch size for time ``t``. The sequences must be transposed.
            :func:`~chainer.functions.transpose_sequence` can be used to
            transpose a list of :class:`~chainer.Variable`\\ s each
            representing a sequence.
            When sequences has different lengths, they must be
            sorted in descending order of their lengths before transposing.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        use_bi_direction (bool): If ``True``, this function uses Bi-directional
            LSTM.

    Returns:
        tuple: This function returns a tuple containing three elements,
        ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is the same as
              ``hx``.
            - ``cy`` is an updated cell states whose shape is the same as
              ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              the mini-batch size for time ``t``. Note that ``B_t`` is the same
              value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.n_step_lstm`
       :func:`chainer.functions.n_step_bilstm`

    """
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs, train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    # Check input size consistency with xs and ws here.
    x_in = xs[0].shape[1]
    w_in = ws[0][0].shape[1]
    if x_in != w_in:
        raise ValueError('Inconsistent input size in input values and weight '
                         'parameters: {} != {}'.format(x_in, w_in))

    xp = backend.get_array_module(hx, hx.data)

    use_cuda = xp is cuda.cupy or (
        xp is chainerx and hx.device.device.backend.name == 'cuda')

    directions = 1
    if use_bi_direction:
        directions = 2

    combined = _combine_inputs(hx, cx, ws, bs, xs, n_layers, directions)
    has_chainerx_array, combined = _extract_apply_in_data(combined)
    hx_chx, cx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(
        combined, n_layers, len(xs), directions)

    if has_chainerx_array and xp is chainerx and dropout_ratio == 0:
        if use_bi_direction:
            hy, cy, ys = chainerx.n_step_bilstm(
                n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx)
        else:
            hy, cy, ys = chainerx.n_step_lstm(
                n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx)
        hy = variable.Variable._init_unchecked(
            hy, requires_grad=hy.is_backprop_required(),
            is_chainerx_array=True)
        cy = variable.Variable._init_unchecked(
            cy, requires_grad=cy.is_backprop_required(),
            is_chainerx_array=True)
        ys = [variable.Variable._init_unchecked(
            y, requires_grad=y.is_backprop_required(),
            is_chainerx_array=True)
            for y in ys]
        return hy, cy, ys
    elif use_cuda and chainer.should_use_cudnn('>=auto', 5000):
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)
        with chainer.using_device(xs.device):
            states = cuda.get_cudnn_dropout_states()
            states.set_dropout_ratio(dropout_ratio)

        w = n_step_rnn.cudnn_rnn_weight_concat(
            n_layers, states, use_bi_direction, 'lstm', ws, bs)

        if use_bi_direction:
            rnn = NStepBiLSTM
        else:
            rnn = NStepLSTM

        hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, cy, ys

    else:
        return n_step_rnn.n_step_rnn_impl(
            _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
            use_bi_direction)
示例#36
0
 def can_use_cudnn(self, xp):
     # TODO(bkvogel): Check for float16 support again in next cuDNN version.
     # cuDNN v5 batch normalization does not seem to support float16.
     return (xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000)
             and self.cudnn_dim_ok and self.cudnn_dtype_ok)
示例#37
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, cuda.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph,
                                                d=self.dy)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw,
                                                d=self.dx)
            assert self.outw > 0, 'Width in the output should be positive.'

        self._set_cover_all(x, W)

        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                x.dtype == W.dtype and
                ((self.dy == 1 and self.dx == 1) or _cudnn_version_ >= 6000)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            filter_desc = cudnn.create_filter_descriptor(W)
            conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
            dilation = (self.dy, self.dx)
            conv_desc = cudnn.create_convolution_descriptor(
                *conv_param, dilation=dilation,
                use_tensor_core=use_tensor_core)
            if b is not None:
                bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = np.array(1, dtype=oz_dtype).ctypes
            zero = np.array(0, dtype=oz_dtype).ctypes

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')

            if configuration.config.cudnn_deterministic:
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
            elif configuration.config.autotune and _cudnn_version_ >= 5000:
                algo = get_algorithm(
                    W, x, y, conv_param + (dilation,), handle, filter_desc,
                    x_desc, conv_desc, y_desc, workspace)
            else:
                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, filter_desc.value, x_desc.value, conv_desc.value,
                    y_desc.value, _bwd_data_pref, workspace_size)

            if use_tensor_core:
                # Only CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 supports
                # Tensor-Core in cuDNN7
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1

            libcudnn.convolutionBackwardData_v3(
                handle, one.data, filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, conv_desc.value,
                algo, workspace.data.ptr, workspace_size,
                zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw,
                dy=self.dy, dx=self.dx)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
示例#38
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))
        x, gy = inputs
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape

        if (self.cover_all or not chainer.should_use_cudnn('>=auto')
                or x.dtype != self.W_dtype
                or ((self.dy > 1 or self.dx > 1) and _cudnn_version < 6000)):
            col = conv.im2col_gpu(x,
                                  self.kh,
                                  self.kw,
                                  self.sy,
                                  self.sx,
                                  self.ph,
                                  self.pw,
                                  cover_all=self.cover_all,
                                  dy=self.dy,
                                  dx=self.dx)
            gW = cuda.cupy.tensordot(gy, col, ((0, 2, 3),
                                               (0, 4, 5))).astype(self.W_dtype,
                                                                  copy=False)
            return gW,

        gW = cuda.cupy.empty((out_c, c, self.kh, self.kw), dtype=self.W_dtype)
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_desc = cudnn.create_convolution_descriptor(
            (self.ph, self.pw), (self.sy, self.sx),
            x.dtype,
            dilation=(self.dy, self.dx),
            use_tensor_core=use_tensor_core)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        if use_tensor_core:
            # Only CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 supports
            # Tensor-Core in cuDNN7.
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1

        libcudnn.convolutionBackwardFilter_v3(handle, one.data, x_desc.value,
                                              x.data.ptr, gy_desc.value,
                                              gy.data.ptr, conv_desc.value,
                                              algo, workspace.data.ptr,
                                              workspace_size, zero.data,
                                              filter_desc.value, gW.data.ptr)

        return gW,
示例#39
0
 def setUp(self):
     self.x = cuda.cupy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
     self.t = cuda.cupy.random.randint(0, 3, (4, 3)).astype(numpy.int32)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.expect = chainer.should_use_cudnn('==always')
示例#40
0
 def setUp(self):
     self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.expect = chainer.should_use_cudnn('==always')
示例#41
0
 def forward_gpu(self, inputs):
     a = cuda.to_gpu(self.a)
     b = cuda.to_gpu(self.b)
     assert chainer.should_use_cudnn('==always')
     return cudnn.activation_backward(a, b, inputs[0], _mode),
示例#42
0
 def _use_cudnn(self, x, W):
     return (not self.cover_all and
             chainer.should_use_cudnn('>=auto') and
             self.ndim > 1 and
             _check_cudnn_acceptable_type(x.dtype, W.dtype))
 def test_call_cudnn_forward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with testing.patch('cupy.cuda.cudnn.poolingForward') as func:
             self.forward()
             self.assertEqual(func.called,
                              chainer.should_use_cudnn('>=auto'))
示例#44
0
文件: dropout.py 项目: unokun/chainer
 def backward(self, x, gy):
     if chainer.should_use_cudnn('==always', 5000) and self._use_cudnn:
         return DropoutGradCuDNN(self.states, self.dropout_ratio).apply(gy)
     else:
         return DropoutGrad(self.mask).apply(gy)
示例#45
0
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation,
                    use_bi_direction, **kwargs):
    """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction)

    Base function for Stack RNN/BiRNN functions.

    This function is used at  :func:`chainer.functions.n_step_birnn` and
    :func:`chainer.functions.n_step_rnn`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing two matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing two vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-directional RNN.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t``
              is mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA

    argument.check_unexpected_kwargs(
        kwargs,
        train='train argument is not supported anymore. '
        'Use chainer.using_config',
        use_cudnn='use_cudnn argument is not supported anymore. '
        'Use chainer.using_config')
    argument.assert_kwargs_empty(kwargs)

    activation_list = ['tanh', 'relu']
    if activation not in activation_list:
        candidate = ','.join(activation_list)
        raise ValueError('Invalid activation: "%s". Please select from [%s]' %
                         (activation, candidate))

    xp = cuda.get_array_module(hx)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, ), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), xs))
        if use_bi_direction:
            # Bi-directional RNN
            if activation == 'tanh':
                rnn = NStepBiRNNTanh(n_layers, states)
            elif activation == 'relu':
                rnn = NStepBiRNNReLU(n_layers, states)
        else:
            # Uni-directional RNN
            if activation == 'tanh':
                rnn = NStepRNNTanh(n_layers, states)
            elif activation == 'relu':
                rnn = NStepRNNReLU(n_layers, states)

        ret = rnn(*inputs)
        hy, = ret[:1]
        ys = ret[1:]
        return hy, ys

    else:

        direction = 2 if use_bi_direction else 1
        hx = split_axis.split_axis(hx,
                                   n_layers * direction,
                                   axis=0,
                                   force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]

        xws = [_stack_weight([w[0]]) for w in ws]
        hws = [_stack_weight([w[1]]) for w in ws]
        xbs = [_stack_weight([b[0]]) for b in bs]
        hbs = [_stack_weight([b[1]]) for b in bs]

        xs_next = xs
        hy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (
                        linear.linear(x, xws[layer_idx], xbs[layer_idx]) +
                        linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list

            # Forward RNN
            h, h_forward = _one_directional_loop(di=0)
            hy.append(h)

            if use_bi_direction:
                # Backward RNN
                h, h_backward = _one_directional_loop(di=1)
                h_backward.reverse()
                # Concat
                xs_next = [
                    concat.concat([hfi, hbi], axis=1)
                    for (hfi, hbi) in six.moves.zip(h_forward, h_backward)
                ]
                hy.append(h)
            else:
                # Uni-directional RNN
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        return hy, tuple(ys)
示例#46
0
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,
                    use_bi_direction, **kwargs):
    """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction)

    Base function for Stack GRU/BiGRU functions.

    This function is used at  :func:`chainer.functions.n_step_bigru` and
    :func:`chainer.functions.n_step_gru`.
    This function's behavior depends on argument ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing six matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing six vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-direction GRU.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA
    argument.check_unexpected_kwargs(
        kwargs, train='train argument is not supported anymore. '
        'Use chainer.using_config',
        use_cudnn='use_cudnn argument is not supported anymore. '
        'Use chainer.using_config')
    argument.assert_kwargs_empty(kwargs)

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, ),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        if use_bi_direction:
            rnn = NStepBiGRU(n_layers, states)
        else:
            rnn = NStepGRU(n_layers, states)

        ret = rnn(*inputs)
        hy, = ret[:1]
        ys = ret[1:]
        return hy, ys

    else:
        direction = 2 if use_bi_direction else 1
        hx = split_axis.split_axis(hx, n_layers * direction, axis=0,
                                   force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]

        xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws]
        hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws]
        xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs]
        hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs]

        xs_next = xs
        hy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list

            # Forward GRU
            h, h_forward = _one_directional_loop(di=0)
            hy.append(h)

            if use_bi_direction:
                # Backward GRU
                h, h_backward = _one_directional_loop(di=1)
                h_backward.reverse()
                # Concat
                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
                           six.moves.zip(h_forward, h_backward)]
                hy.append(h)
            else:
                # Uni-directional GRU
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        return hy, tuple(ys)
示例#47
0
 def forward_gpu(self, inputs):
     assert chainer.should_use_cudnn('==always')
     return cudnn.activation_backward(self.x, self.y, inputs[0], _mode,
                                      self.cap),
示例#48
0
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction,
                    **kwargs):
    """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction)

    Base function for Stack GRU/BiGRU functions.

    This function is used at  :func:`chainer.functions.n_step_bigru` and
    :func:`chainer.functions.n_step_gru`.
    This function's behavior depends on argument ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimension of hidden units. Because of bi-direction, the
            first dimension length is ``2S``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing six matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing six vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimension of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this function supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-direction GRU.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs,
            train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    xp = backend.get_array_module(hx, hx.data)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = cuda.get_cudnn_dropout_states()
        states.set_dropout_ratio(dropout_ratio)
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)

        w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states,
                                               use_bi_direction, 'gru', ws, bs)

        if use_bi_direction:
            rnn = NStepBiGRU
        else:
            rnn = NStepGRU

        hy, ys = rnn(n_layers, states, lengths)(hx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, ys

    else:
        hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio,
                                               hx, None, ws, bs, xs,
                                               use_bi_direction)
        return hy, ys
示例#49
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape

        out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph,
                                      cover_all=self.cover_all)
        assert out_h > 0, 'Height in the output should be positive.'
        out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw,
                                      cover_all=self.cover_all)
        assert out_w > 0, 'Width in the output should be positive.'

        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref,
                workspace_size)

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            libcudnn.convolutionForward(
                handle, one.data, x_desc.value, x.data.ptr,
                self.filter_desc.value, W.data.ptr, self.conv_desc.value,
                algo, workspace.data.ptr, workspace_size, zero.data,
                y_desc.value, y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all)
            y = cuda.cupy.tensordot(
                self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                            copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
示例#50
0
 def check_call_cudnn_forward_training(self, use_cudnn):
     with chainer.using_config('use_cudnn', use_cudnn):
         expect = chainer.should_use_cudnn('>=auto', 5000)
         with testing.patch('cupy.cudnn.rnn_forward_training') as func:
             self.call_forward(True)
         assert func.called == expect
示例#51
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, gy_desc.value,
                        self.conv_desc.value, x_desc.value, _bwd_data_pref,
                        workspace_size)

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, x_desc.value, gx.data.ptr)
            else:
                if configuration.config.cudnn_deterministic:
                    raise ValueError(
                        "`cudnn_deterministic` option must be False "
                        "if the backpropagation of "
                        "chainer.functions.Convolution2D "
                        "uses cuDNN and cuDNN versions < v3. "
                        "Turn off cudnn_deterministic option with "
                        "`chainer.using_config('cudnn_deterministic', False)` "
                        "context.")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
示例#52
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw),
                                    dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            out_sh = out_h + (out_h - 1) * (self.sy - 1)
            out_sw = out_w + (out_w - 1) * (self.sx - 1)

            gy_ph = (h + dkh - out_sh - 1) / 2
            gy_pw = (w + dkw - out_sw - 1) / 2

            pad_gy = cuda.cupy.zeros((n, out_c, h + dkh - 1, w + dkw - 1),
                                     dtype=x.dtype)
            pad_gy[:, :, gy_ph:gy_ph + out_sh:self.sy,
                   gy_pw:gy_pw + out_sw:self.sx] = gy

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :, j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    gyji = cuda.cupy.ascontiguousarray(
                        pad_gy[:, :, j * self.dy:j * self.dy + h,
                               i * self.dx:i * self.dx + w])
                    Wji = cuda.cupy.ascontiguousarray(W[:, :, -1::-1,
                                                        -1::-1][:, :, j:j + 1,
                                                                i:i + 1])

                    if i == 0 and j == 0:
                        x = cuda.cupy.ascontiguousarray(x)
                        gy = cuda.cupy.ascontiguousarray(gy)

                        handle = cudnn.get_handle()
                        x_desc = cudnn.create_tensor_descriptor(x)
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        gy_desc = cudnn.create_tensor_descriptor(gy)
                        gyji_desc = cudnn.create_tensor_descriptor(gyji)
                        conv_desc_data = cudnn.create_convolution_descriptor(
                            (0, 0), (1, 1), xji.dtype)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes
                        zero = numpy.array(0, dtype=oz_dtype).ctypes
                        gx = cuda.cupy.zeros_like(x)
                        gWji = cuda.cupy.empty((out_c, c, 1, 1), dtype=W.dtype)

                        if _cudnn_version >= 4000:
                            workspace_size = cuda.get_max_workspace_size()
                            workspace = cuda.cupy.empty((workspace_size, ),
                                                        dtype='b')

                            algo_filter = (
                                libcudnn.getConvolutionBackwardFilterAlgorithm(
                                    handle, xji_desc.value, gy_desc.value,
                                    self.conv_desc.value,
                                    self.filter_desc.value, _bwd_filter_pref,
                                    workspace_size))
                            algo_data = (
                                libcudnn.getConvolutionBackwardDataAlgorithm(
                                    handle, self.filter_desc.value,
                                    gyji_desc.value, conv_desc_data.value,
                                    x_desc.value, _bwd_data_pref,
                                    workspace_size))

                    if _cudnn_version >= 4000:
                        libcudnn.convolutionBackwardFilter_v3(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            algo_filter, workspace.data.ptr, workspace_size,
                            zero.data, self.filter_desc.value, gWji.data.ptr)
                        libcudnn.convolutionBackwardData_v3(
                            handle, one.data, self.filter_desc.value,
                            Wji.data.ptr, gyji_desc.value, gyji.data.ptr,
                            conv_desc_data.value, algo_data,
                            workspace.data.ptr, workspace_size, one.data,
                            x_desc.value, gx.data.ptr)
                    else:
                        libcudnn.convolutionBackwardFilter_v2(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            zero.data, self.filter_desc.value, gWji.data.ptr)
                        libcudnn.convolutionBackwardData_v2(
                            handle, one.data, self.filter_desc.value,
                            Wji.data.ptr, gyji_desc.value, gyji.data.ptr,
                            conv_desc_data.value, one.data, x_desc.value,
                            gx.data.ptr)
                    gW[:, :, j:j + 1, i:i + 1] = gWji

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(gy, self.col,
                                     ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                                    copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)
            gx = conv.col2im_gpu(gcol,
                                 self.sy,
                                 self.sx,
                                 self.ph,
                                 self.pw,
                                 h,
                                 w,
                                 dy=self.dy,
                                 dx=self.dx)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
示例#53
0
 def test_higher_version_required(self):
     with chainer.using_config('use_cudnn', 'always'):
         self.assertFalse(chainer.should_use_cudnn(
             '>=auto', cuda.cuda.cudnn.getVersion() + 1))
示例#54
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        out_h = conv.get_conv_outsize(h,
                                      kh,
                                      self.sy,
                                      self.ph,
                                      cover_all=self.cover_all,
                                      d=self.dy)
        out_w = conv.get_conv_outsize(w,
                                      kw,
                                      self.sx,
                                      self.pw,
                                      cover_all=self.cover_all,
                                      d=self.dx)

        y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw),
                                    dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :, j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    Wji = cuda.cupy.ascontiguousarray(W[:, :, j:j + 1,
                                                        i:i + 1])

                    if i == 0 and j == 0:
                        handle = cudnn.get_handle()
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        y_desc = cudnn.create_tensor_descriptor(y)
                        self.filter_desc = cudnn.create_filter_descriptor(Wji)
                        self.conv_desc = cudnn.create_convolution_descriptor(
                            (0, 0), (self.sy, self.sx), xji.dtype)

                        workspace_size = cuda.get_max_workspace_size()
                        workspace = cuda.cupy.empty((workspace_size, ),
                                                    dtype='b')
                        algo = libcudnn.getConvolutionForwardAlgorithm(
                            handle, xji_desc.value, self.filter_desc.value,
                            self.conv_desc.value, y_desc.value, _fwd_pref,
                            workspace_size)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes

                    libcudnn.convolutionForward(
                        handle, one.data, xji_desc.value, xji.data.ptr,
                        self.filter_desc.value, Wji.data.ptr,
                        self.conv_desc.value, algo, workspace.data.ptr,
                        workspace_size, one.data, y_desc.value, y.data.ptr)

            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])
                cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                                 b.data.ptr, one.data, y_desc.value,
                                 y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(x,
                                       kh,
                                       kw,
                                       self.sy,
                                       self.sx,
                                       self.ph,
                                       self.pw,
                                       cover_all=self.cover_all,
                                       dy=self.dy,
                                       dx=self.dx)
            y = cuda.cupy.tensordot(self.col, W,
                                    ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                                   copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
示例#55
0
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs,
                    activation, use_bi_direction, **kwargs):
    """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction)

    Base function for Stack RNN/BiRNN functions.

    This function is used at  :func:`chainer.functions.n_step_birnn` and
    :func:`chainer.functions.n_step_rnn`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimension of hidden units.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing two matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing two vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimension of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this function supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-directional RNN.

    Returns:
        tuple: This function returns a tuple containing three elements,
            ``hy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t``
              is mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs, train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    activation_list = ['tanh', 'relu']
    if activation not in activation_list:
        candidate = ','.join(activation_list)
        raise ValueError('Invalid activation: "%s". Please select from [%s]'
                         % (activation, candidate))

    xp = backend.get_array_module(hx)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = cuda.get_cudnn_dropout_states()
        states.set_dropout_ratio(dropout_ratio)
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)

        rnn_mode = 'rnn_%s' % activation
        w = cudnn_rnn_weight_concat(
            n_layers, states, use_bi_direction, rnn_mode, ws, bs)

        if use_bi_direction:
            # Bi-directional RNN
            if activation == 'tanh':
                rnn = NStepBiRNNTanh
            elif activation == 'relu':
                rnn = NStepBiRNNReLU
        else:
            # Uni-directional RNN
            if activation == 'tanh':
                rnn = NStepRNNTanh
            elif activation == 'relu':
                rnn = NStepRNNReLU

        hy, ys = rnn(n_layers, states, lengths)(hx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, ys

    else:

        def f(x, h, c, w, b):
            xw, hw = w
            xb, hb = b
            rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
            if activation == 'tanh':
                return tanh.tanh(rnn_in), None
            elif activation == 'relu':
                return relu.relu(rnn_in), None

        hy, _, ys = n_step_rnn_impl(
            f, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction)
        return hy, ys
示例#56
0
文件: relu.py 项目: yottabytt/chainer
 def forward(self, inputs):
     assert chainer.should_use_cudnn('==always')
     gy, = inputs
     return cudnn.activation_backward(self.x, self.y, gy, _mode),
示例#57
0
def n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
        **kwargs):
    """Base function for Stack LSTM/BiLSTM functions.

    This function is used at :func:`chainer.functions.n_step_lstm` and
    :func:`chainer.functions.n_step_bilstm`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    Args:
        n_layers(int): The number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (~chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is the number of layers and
            is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N``
            is the dimension of the hidden units.
        cx (~chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of :class:`~chainer.Variable`): Weight matrices.
            ``ws[i]`` represents the weights for the i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` corresponds to :math:`W_j` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(I, N)``-shape as they
            are multiplied with input variables, where ``I`` is the size of
            the input and ``N`` is the dimension of the hidden units. All
            other matrices are ``(N, N)``-shaped.
        bs (list of list of :class:`~chainer.Variable`): Bias vectors.
            ``bs[i]`` represents the biases for the i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` corresponds to :math:`b_j` in the equation.
            The shape of each matrix is ``(N,)``.
        xs (list of :class:`~chainer.Variable`):
            A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the
            mini-batch size for time ``t``. The sequences must be transposed.
            :func:`~chainer.functions.transpose_sequence` can be used to
            transpose a list of :class:`~chainer.Variable`\\ s each
            representing a sequence.
            When sequences has different lengths, they must be
            sorted in descending order of their lengths before transposing.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        use_bi_direction (bool): If ``True``, this function uses Bi-directional
            LSTM.

    Returns:
        tuple: This function returns a tuple containing three elements,
        ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is the same as
              ``hx``.
            - ``cy`` is an updated cell states whose shape is the same as
              ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              the mini-batch size for time ``t``. Note that ``B_t`` is the same
              value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.n_step_lstm`
       :func:`chainer.functions.n_step_bilstm`

    """
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs, train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        handle = cudnn.get_handle()
        states = cuda.get_cudnn_dropout_states()
        cudnn.set_dropout_descriptor(states._desc, handle, dropout_ratio)
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)

        w = n_step_rnn.cudnn_rnn_weight_concat(
            n_layers, states, use_bi_direction, 'lstm', ws, bs)

        if use_bi_direction:
            rnn = NStepBiLSTM
        else:
            rnn = NStepLSTM

        hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, cy, ys

    else:
        return n_step_rnn.n_step_rnn_impl(
            _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
            use_bi_direction)
示例#58
0
def n_step_lstm_base(n_layers, dropout_ratio, hx, cx, ws, bs, xs,
                     use_bi_direction, **kwargs):
    """Base function for Stack LSTM/BiLSTM functions.

	This function is used at :func:`chainer.functions.n_step_lstm` and
	:func:`chainer.functions.n_step_bilstm`.
	This function's behavior depends on following arguments,
	``activation`` and ``use_bi_direction``.

	Args:
		n_layers(int): Number of layers.
		dropout_ratio(float): Dropout ratio.
		hx (chainer.Variable): Variable holding stacked hidden states.
			Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
			equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
			dimention of hidden units.
		cx (chainer.Variable): Variable holding stacked cell states.
			It has the same shape as ``hx``.
		ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
			represents weights for i-th layer.
			Each ``ws[i]`` is a list containing eight matrices.
			``ws[i][j]`` is corresponding with ``W_j`` in the equation.
			Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
			are multiplied with input variables. All other matrices has
			``(N, N)`` shape.
		bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
			represnents biases for i-th layer.
			Each ``bs[i]`` is a list containing eight vectors.
			``bs[i][j]`` is corresponding with ``b_j`` in the equation.
			Shape of each matrix is ``(N,)`` where ``N`` is dimention of
			hidden units.
		xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
			holding input values. Each element ``xs[t]`` holds input value
			for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
			mini-batch size for time ``t``, and ``I`` is size of input units.
			Note that this functions supports variable length sequences.
			When sequneces has different lengths, sort sequences in descending
			order by length, and transpose the sorted sequence.
			:func:`~chainer.functions.transpose_sequence` transpose a list
			of :func:`~chainer.Variable` holding sequence.
			So ``xs`` needs to satisfy
			``xs[t].shape[0] >= xs[t + 1].shape[0]``.
		use_bi_direction (bool): If ``True``, this function uses Bi-directional
			LSTM.

	Returns:
		tuple: This functions returns a tuple concaining three elements,
			``hy``, ``cy`` and ``ys``.

			- ``hy`` is an updated hidden states whose shape is same as ``hx``.
			- ``cy`` is an updated cell states whose shape is same as ``cx``.
			- ``ys`` is a list of :class:`~chainer.Variable` . Each element
			  ``ys[t]`` holds hidden states of the last layer corresponding
			  to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
			  mini-batch size for time ``t``, and ``N`` is size of hidden
			  units. Note that ``B_t`` is the same value as ``xs[t]``.

	.. seealso::

	   :func:`chainer.functions.n_step_lstm`
	   :func:`chainer.functions.n_step_bilstm`

	"""

    argument.check_unexpected_kwargs(
        kwargs,
        train='train argument is not supported anymore. '
        'Use chainer.using_config',
        use_cudnn='use_cudnn argument is not supported anymore. '
        'Use chainer.using_config')
    argument.assert_kwargs_empty(kwargs)

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, cx), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), xs))
        if use_bi_direction:
            rnn = NStepBiLSTM(n_layers, states)
        else:
            rnn = NStepLSTM(n_layers, states)

        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        direction = 2 if use_bi_direction else 1
        split_size = n_layers * direction
        hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        xs_next = xs
        hy = []
        cy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                counter = 0
                for x in xs_list:
                    counter += 1
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)
                    if counter == 4:
                        lstm_in = linear.linear(x, xws[layer_idx],
                                                xbs[layer_idx])
                    else:
                        lstm_in = linear.linear(
                            x, xws[layer_idx], xbs[layer_idx]) + linear.linear(
                                h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list

            h, c, h_forward, c_forward = _one_directional_loop(di=0)
            hy.append(h)
            cy.append(c)

            if use_bi_direction:
                # BiLSTM
                h, c, h_backward, c_backward = _one_directional_loop(di=1)
                hy.append(h)
                cy.append(c)

                h_backward.reverse()
                # concat
                xs_next = [
                    concat.concat([hfi, hbi], axis=1)
                    for (hfi, hbi) in zip(h_forward, h_backward)
                ]
            else:
                # Uni-directional RNN
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        cy = stack.stack(cy)
        return hy, cy, tuple(ys)
示例#59
0
 def _use_cudnn(self, x, gy):
     return (chainer.should_use_cudnn('>=auto') and not self.cover_all
             and x.dtype == self.W_dtype and gy.dtype == self.W_dtype
             and self.ndim > 1)
示例#60
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # retain only x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, cuda.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape

        out_h = conv.get_conv_outsize(h,
                                      kh,
                                      self.sy,
                                      self.ph,
                                      cover_all=self.cover_all,
                                      d=self.dy)
        assert out_h > 0, 'Height in the output should be positive.'
        out_w = conv.get_conv_outsize(w,
                                      kw,
                                      self.sx,
                                      self.pw,
                                      cover_all=self.cover_all,
                                      d=self.dx)
        assert out_w > 0, 'Width in the output should be positive.'

        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and x.dtype == W.dtype and
            ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y_desc = cudnn.create_tensor_descriptor(y)

            filter_desc = cudnn.create_filter_descriptor(W)
            conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx),
                x.dtype,
                dilation=(self.dy, self.dx),
                use_tensor_core=use_tensor_core)
            if b is not None:
                bias_desc = cudnn.create_tensor_descriptor(b[None, :, None,
                                                             None])

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, filter_desc.value, conv_desc.value,
                y_desc.value, _fwd_pref, workspace_size)

            if use_tensor_core:
                # Only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
                # supports Tensor-Core in cuDNN7.
                algo = libcudnn.CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM  # NOQA

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            libcudnn.convolutionForward(handle, one.data, x_desc.value,
                                        x.data.ptr, filter_desc.value,
                                        W.data.ptr, conv_desc.value, algo,
                                        workspace.data.ptr, workspace_size,
                                        zero.data, y_desc.value, y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if b is not None:
                cudnn.add_tensor(handle, one.data, bias_desc.value, b.data.ptr,
                                 one.data, y_desc.value, y.data.ptr)
        else:
            # Implementation using im2col
            col = conv.im2col_gpu(x,
                                  kh,
                                  kw,
                                  self.sy,
                                  self.sx,
                                  self.ph,
                                  self.pw,
                                  cover_all=self.cover_all,
                                  dy=self.dy,
                                  dx=self.dx)
            y = cuda.cupy.tensordot(col, W,
                                    ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                                   copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,