Exemplo n.º 1
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]  # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0, ) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0, ) + tuple(moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(W.dtype)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0, ) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Exemplo n.º 2
0
    def _forward_xp_core(self, x, W, b, xp):
        ndim = self.ndim
        stride = self.stride
        pad = self.pad
        dilate = self.dilate

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol,
                                      stride,
                                      pad,
                                      self.outs,
                                      dilate=dilate)
        else:
            y = conv_nd.col2im_nd_gpu(gcol,
                                      stride,
                                      pad,
                                      self.outs,
                                      dilate=dilate)
        if b is not None:
            b_shape = (1, -1) + (1, ) * ndim
            y += b.reshape(b_shape)

        return y,
Exemplo n.º 3
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]     # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]      # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1,) * ndim
            y += b.reshape(b_shape)

        return y,
Exemplo n.º 4
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]  # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]  # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1, ) * ndim
            y += b.reshape(b_shape)

        return y,
Exemplo n.º 5
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
    def test_col2im_nd_gpu_parameter_ranks(self):
        # Invalid ksize length.
        col_shape = (2, 3) + (2,) + self.outs
        col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)
        col_gpu = cuda.to_gpu(col)
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_gpu(col_gpu, self.stride, self.pad, self.dims)

        col_gpu = cuda.to_gpu(self.col)

        # Invalid stride length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_gpu(col_gpu, (1,), self.pad, self.dims)

        # Invalid pad length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_gpu(col_gpu, self.stride, (0,), self.dims)

        # Invalid dims length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_gpu(col_gpu, self.stride, self.pad, (4,))
Exemplo n.º 7
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1,) * ndim
            y += b.reshape(b_shape)

        return y,
Exemplo n.º 8
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Exemplo n.º 9
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Exemplo n.º 10
0
 def test_col2im_consistency(self):
     col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad)
     im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims)
     im_gpu = conv_nd.col2im_nd_gpu(cuda.to_gpu(col), self.stride, self.pad,
                                    self.dims)
     testing.assert_allclose(im_cpu, im_gpu.get())
Exemplo n.º 11
0
 def test_col2im_consistency(self):
     col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad)
     im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims)
     im_gpu = conv_nd.col2im_nd_gpu(
         cuda.to_gpu(col), self.stride, self.pad, self.dims)
     testing.assert_allclose(im_cpu, im_gpu.get())