Exemplo n.º 1
0
    def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        if gpu:
            im2col = conv.im2col_gpu
            img = cuda.to_gpu(self.img)
        else:
            im2col = conv.im2col_cpu
            img = self.img

        col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx)
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w))

        col = cuda.to_cpu(col)

        for n in moves.range(2):
            for c in moves.range(3):
                for y in moves.range(col_h):
                    for x in moves.range(col_w):
                        for ky in moves.range(kh):
                            for kx in moves.range(kw):
                                oy = y * sy - ph + ky * dy
                                ox = x * sx - pw + kx * dx
                                if 0 <= oy < self.h and 0 <= ox < self.w:
                                    self.assertEqual(
                                        col[n, c, ky, kx, y, x],
                                        self.img[n, c, oy, ox])
                                else:
                                    self.assertEqual(col[n, c, ky, kx, y, x],
                                                     0)
Exemplo n.º 2
0
    def forward_gpu(self, x):
        self._used_cudnn = True

        # Implementation using cudnn
        x = cuda.cupy.ascontiguousarray(x[0])
        n, c, h, w = x.shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph,
                                    self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw,
                                    self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype)

        handle = cudnn.get_handle()
        pool_desc = self.create_pool_desc()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.poolingForward(handle, pool_desc.value, one.data,
                                x_desc.value, x.data.ptr, zero.data,
                                y_desc.value, y.data.ptr)
        self.retain_outputs((0, ))
        return y,
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(x_type.dtype.kind == 'f', w_type.dtype.kind == 'f',
                          x_type.ndim == 4, w_type.ndim == 4,
                          x_type.shape[1] == w_type.shape[0])

        if self.outh is not None:
            lower_bound = conv.get_conv_outsize(self.outh, w_type.shape[2],
                                                self.sy, self.ph)
            upper_bound = conv.get_conv_outsize(self.outh,
                                                w_type.shape[2],
                                                self.sy,
                                                self.ph,
                                                cover_all=True)
            type_check.expect(lower_bound <= x_type.shape[2],
                              x_type.shape[2] <= upper_bound)
        if self.outw is not None:
            lower_bound = conv.get_conv_outsize(self.outw, w_type.shape[3],
                                                self.sx, self.pw)
            upper_bound = conv.get_conv_outsize(self.outw,
                                                w_type.shape[3],
                                                self.sx,
                                                self.pw,
                                                cover_all=True)
            type_check.expect(lower_bound <= x_type.shape[3],
                              x_type.shape[3] <= upper_bound)

        if type_check.eval(n_in) == 3:
            b_type = in_types[2]
            type_check.expect(b_type.dtype == x_type.dtype, b_type.ndim == 1,
                              b_type.shape[0] == w_type.shape[1])
Exemplo n.º 4
0
    def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        if gpu:
            im2col = conv.im2col_gpu
            img = cuda.to_gpu(self.img)
        else:
            im2col = conv.im2col_cpu
            img = self.img

        col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx)
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w))

        col = cuda.to_cpu(col)

        for y in moves.range(col_h):
            for x in moves.range(col_w):
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = y * sy - ph + ky * dy
                        ox = x * sx - pw + kx * dx
                        if 0 <= oy < self.h and 0 <= ox < self.w:
                            testing.assert_allclose(col[:, :, ky, kx, y, x],
                                                    self.img[:, :, oy, ox])
                        else:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                numpy.zeros((2, 3), self.dtype))
Exemplo n.º 5
0
    def forward_gpu(self, x):
        if (cuda.cudnn_enabled and self.use_cudnn
                and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(AveragePooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        coeff = 1. / (self.kh * self.kw)
        kern = cuda.elementwise(
            'raw T in, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T out', '''
            int c0    = i / (out_h * out_w);
            int out_y = i / out_w % out_h;
            int out_x = i % out_w;
            int in_y_0 = max(0, out_y * sy - ph);
            int in_y_1 = min(h, out_y * sy + kh - ph);
            int in_x_0 = max(0, out_x * sx - pw);
            int in_x_1 = min(w, out_x * sx + kw - pw);

            T val = 0;
            for (int y = in_y_0; y < in_y_1; ++y) {
              int offset_y = w * (y + h * c0);
              for (int x = in_x_0; x < in_x_1; ++x) {
                val = val + in[x + offset_y];
              }
            }
            out = val * coeff;
            ''', 'avg_pool_fwd')
        kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy,
             self.sx, self.ph, self.pw, coeff, y)
        return y,
Exemplo n.º 6
0
    def forward_gpu(self, x):
        self.retain_inputs((0,))
        self._used_cudnn = True

        # Implementation using cudnn
        x = cuda.cupy.ascontiguousarray(x[0])
        n, c, h, w = x.shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype)

        handle = cudnn.get_handle()
        pool_desc = self.create_pool_desc()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.poolingForward(
            handle, pool_desc.value, one.data, x_desc.value,
            x.data.ptr, zero.data, y_desc.value, y.data.ptr)
        self.retain_outputs((0,))
        return y,
	def check_type_forward(self, in_types):
		n_in = in_types.size()
		type_check.expect(3 <= n_in, n_in <= 4)
		x_type = in_types[0]
		v_type = in_types[1]
		g_type = in_types[2]

		type_check.expect(
			x_type.dtype.kind == "f",
			v_type.dtype.kind == "f",
			g_type.dtype.kind == "f",
			x_type.ndim == 4,
			v_type.ndim == 4,
			g_type.ndim == 4,
			x_type.shape[1] == v_type.shape[0]
		)

		if self.outh is not None:
			type_check.expect(
				x_type.shape[2] ==
				conv.get_conv_outsize(self.outh, v_type.shape[2],self.sy, self.ph),
			)
		if self.outw is not None:
			type_check.expect(
				x_type.shape[3] ==
				conv.get_conv_outsize(self.outw, v_type.shape[3], self.sx, self.pw),
			)

		if type_check.eval(n_in) == 4:
			b_type = in_types[3]
			type_check.expect(
				b_type.dtype == x_type.dtype,
				b_type.ndim == 1,
				b_type.shape[0] == v_type.shape[1]
			)
Exemplo n.º 8
0
    def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        if gpu:
            im2col = conv.im2col_gpu
            img = cuda.to_gpu(self.img)
        else:
            im2col = conv.im2col_cpu
            img = self.img

        col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx)
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w))

        col = cuda.to_cpu(col)

        for y in moves.range(col_h):
            for x in moves.range(col_w):
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = y * sy - ph + ky * dy
                        ox = x * sx - pw + kx * dx
                        if 0 <= oy < self.h and 0 <= ox < self.w:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                self.img[:, :, oy, ox])
                        else:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                numpy.zeros((2, 3), self.dtype))
Exemplo n.º 9
0
    def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        shape = (2, 3, kh, kw, col_h, col_w)
        col = numpy.random.uniform(-1, 1, shape).astype(numpy.float32)

        if gpu:
            col2im = conv.col2im_gpu
            col_data = cuda.to_gpu(col)
        else:
            col2im = conv.col2im_cpu
            col_data = col

        img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx)
        img = cuda.to_cpu(img)
        self.assertEqual(img.shape, (2, 3, self.h, self.w))
        for n in moves.range(2):
            for c in moves.range(3):
                for y in moves.range(self.h):
                    for x in moves.range(self.w):
                        v = numpy.float32(0.0)
                        for ky in moves.range(kh):
                            for kx in moves.range(kw):
                                oy = (y + ph - ky * dy) // sy
                                ox = (x + pw - kx * dx) // sx
                                if (y + ph - ky * dy) % sy == 0 and \
                                   (x + pw - kx * dx) % sx == 0 and \
                                   0 <= oy < col_h and \
                                   0 <= ox < col_w:
                                    v += col[n, c, ky, kx, oy, ox]
                        self.assertAlmostEqual(img[n, c, y, x], v)
Exemplo n.º 10
0
def calc_max_pooling2d(func, in_data, **kwargs):
    """[MaxPooling2D](https://docs.chainer.org/en/v4.3.0/reference/generated/chainer.functions.max_pooling_2d.html)

    Each output pixel is calculated by taking max of $k * k$ elements from the
    input ($k*k - 1$ FLOPs). Output size is calculated by
    [chainer.utils.get_conv_outsize](https://docs.chainer.org/en/v4.3.0/reference/util/generated/chainer.utils.get_conv_outsize.html).

    | Item   | Value |
    |:-------|:------|
    | FLOPs  | $$ \| y \| (k_{\mathrm{w}} k_{\mathrm{h}} - 1) $$ |
    | mread  | $$\| x \|$$ |
    | mwrite | $$\| y \|$$ |
    | params | AvgPooling parameter `k`, `s` and `p` |
    """
    x, = in_data

    kh, kw = int(func.kh), int(func.kw)
    sy, sx = int(func.sy), int(func.sx)
    ph, pw = int(func.ph), int(func.pw)
    batch_size, in_c, in_h, in_w = x.shape
    out_h = get_conv_outsize(in_h, kh, sy, ph, cover_all=func.cover_all)
    out_w = get_conv_outsize(in_w, kw, sx, pw, cover_all=func.cover_all)

    out_size = batch_size * in_c * out_h * out_w
    flops = out_size * int(kw * kh - 1)

    params = {
        'k': kw if kw == kh else (kh, kw),
        's': sx if sx == sy else (sy, sx),
        'p': pw if pw == ph else (ph, pw)
    }
    return (flops, x.size, out_size, params)
Exemplo n.º 11
0
    def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        x = x.copy()
        n, c, h, w = x.shape
        col = functions.im2col(
            x, (kh, kw), (sy, sx), (ph, pw), dilate=(dy, dx)).data
        col_h = get_conv_outsize(h, kh, sy, ph, d=dy)
        col_w = get_conv_outsize(w, kw, sx, pw, d=dx)

        self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w))
        col = col.reshape(n, c, kh, kw, col_h, col_w)
        col = cuda.to_cpu(col)

        for y in moves.range(col_h):
            for x in moves.range(col_w):
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = y * sy - ph + ky * dy
                        ox = x * sx - pw + kx * dx
                        if 0 <= oy < h and 0 <= ox < w:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                self.x[:, :, oy, ox])
                        else:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                numpy.zeros((2, 3), self.dtype))
Exemplo n.º 12
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(
            x_type.dtype.kind == 'f',
            w_type.dtype.kind == 'f',
            x_type.ndim == 4,
            w_type.ndim == 4,
            x_type.shape[1] == w_type.shape[0]
        )

        if self.outh is not None:
            type_check.expect(
                x_type.shape[2] ==
                conv.get_conv_outsize(self.outh, w_type.shape[2],
                                      self.sy, self.ph),
            )
        if self.outw is not None:
            type_check.expect(
                x_type.shape[3] ==
                conv.get_conv_outsize(self.outw, w_type.shape[3],
                                      self.sx, self.pw),
            )

        if type_check.eval(n_in) == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                b_type.shape[0] == w_type.shape[1]
            )
Exemplo n.º 13
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(x_type.dtype.kind == 'f', w_type.dtype.kind == 'f',
                          x_type.ndim == self.ndim + 2,
                          w_type.ndim == self.ndim + 2,
                          x_type.shape[1] == w_type.shape[0])

        if self.outs is not None:
            for i, (out, s, p, di) in enumerate(
                    zip(self.outs, self.stride, self.pad, self.dilate)):
                lower_bound = conv.get_conv_outsize(out,
                                                    w_type.shape[i + 2],
                                                    s,
                                                    p,
                                                    d=di)
                upper_bound = conv.get_conv_outsize(out,
                                                    w_type.shape[i + 2],
                                                    s,
                                                    p,
                                                    cover_all=True,
                                                    d=di)
                type_check.expect(lower_bound <= x_type.shape[i + 2],
                                  x_type.shape[i + 2] <= upper_bound)

        if type_check.eval(n_in) == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                # Need to consider the case that group count > 1.
                # b_type.shape[0] == w_type.shape[1]
            )
Exemplo n.º 14
0
    def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        shape = (2, 3, kh, kw, col_h, col_w)
        col = numpy.random.uniform(-1, 1, shape).astype(self.dtype)

        if gpu:
            col2im = conv.col2im_gpu
            col_data = cuda.to_gpu(col)
        else:
            col2im = conv.col2im_cpu
            col_data = col

        img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx)
        img = cuda.to_cpu(img)
        self.assertEqual(img.shape, (2, 3, self.h, self.w))
        for y in moves.range(self.h):
            for x in moves.range(self.w):
                v = numpy.zeros((2, 3), self.dtype)
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = (y + ph - ky * dy) // sy
                        ox = (x + pw - kx * dx) // sx
                        if ((y + ph - ky * dy) % sy == 0
                                and (x + pw - kx * dx) % sx == 0
                                and 0 <= oy < col_h and 0 <= ox < col_w):
                            v += col[:, :, ky, kx, oy, ox]
                testing.assert_allclose(img[:, :, y, x], v)
Exemplo n.º 15
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(n_in == 1)
        x_type = in_types[0]

        type_check.expect(
            x_type.dtype.kind == 'f',
            x_type.ndim == 4,
            x_type.shape == self.indexes.shape,
        )

        if self.outh is not None:
            expected_h = conv.get_conv_outsize(self.outh,
                                               self.kh,
                                               self.sy,
                                               self.ph,
                                               cover_all=self.cover_all)
            type_check.expect(x_type.shape[2] == expected_h)
        if self.outw is not None:
            expected_w = conv.get_conv_outsize(self.outw,
                                               self.kw,
                                               self.sx,
                                               self.pw,
                                               cover_all=self.cover_all)
            type_check.expect(x_type.shape[3] == expected_w)
Exemplo n.º 16
0
    def _process_conv2d(self, function, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        batch_size, in_c, in_h, in_w = x.shape
        out_c, _, kh, kw = W.shape

        out_h = conv.get_conv_outsize(in_h,
                                      kh,
                                      function.sy,
                                      function.ph,
                                      cover_all=function.cover_all)
        out_w = conv.get_conv_outsize(in_w,
                                      kw,
                                      function.sx,
                                      function.pw,
                                      cover_all=function.cover_all)
        ops = 2 * batch_size * in_c * out_c * kw * kh * out_w * out_h  # twice because of multiply-and-add
        if b is not None:
            ops += batch_size * out_c * out_w * out_h  # bias
        self._print(
            '%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%f' %
            (function.label, batch_size, in_w, in_h, in_c, out_w, out_h, out_c,
             kw, kh, function.pw, function.sx, ops / 1e9))
        self.total_ops += ops
Exemplo n.º 17
0
    def check_im2col(self, kh, kw, sy, sx, ph, pw, gpu):
        if gpu:
            im2col = conv.im2col_gpu
            img = cuda.to_gpu(self.img)
        else:
            im2col = conv.im2col_cpu
            img = self.img

        col = im2col(img, kh, kw, sy, sx, ph, pw)
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw)
        self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w))

        col = cuda.to_cpu(col)

        for n in moves.range(2):
            for c in moves.range(3):
                for y in moves.range(col_h):
                    for x in moves.range(col_w):
                        for dy in moves.range(kh):
                            for dx in moves.range(kw):
                                oy = y * sy - ph + dy
                                ox = x * sx - pw + dx
                                if 0 <= oy < self.h and 0 <= ox < self.w:
                                    self.assertEqual(col[n, c, dy, dx, y, x],
                                                     self.img[n, c, oy, ox])
                                else:
                                    self.assertEqual(col[n, c, dy, dx, y, x],
                                                     0)
Exemplo n.º 18
0
    def check_col2im(self, kh, kw, sy, sx, ph, pw, gpu):
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw)
        shape = (2, 3, kh, kw, col_h, col_w)
        col = numpy.random.uniform(-1, 1, shape).astype(numpy.float32)

        if gpu:
            col2im = conv.col2im_gpu
            col_data = cuda.to_gpu(col)
        else:
            col2im = conv.col2im_cpu
            col_data = col

        img = col2im(col_data, sy, sx, ph, pw, self.h, self.w)
        img = cuda.to_cpu(img)
        self.assertEqual(img.shape, (2, 3, self.h, self.w))
        for n in moves.range(2):
            for c in moves.range(3):
                for y in moves.range(self.h):
                    for x in moves.range(self.w):
                        v = numpy.float32(0.0)
                        for dy in moves.range(kh):
                            for dx in moves.range(kw):
                                oy = (y + ph - dy) // sy
                                ox = (x + pw - dx) // sx
                                if (y + ph - dy) % sy == 0 and \
                                   (x + pw - dx) % sx == 0 and \
                                   0 <= oy < col_h and \
                                   0 <= ox < col_w:
                                    v += col[n, c, dy, dx, oy, ox]
                        self.assertAlmostEqual(img[n, c, y, x], v)
Exemplo n.º 19
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(
            x_type.dtype == numpy.float32,
            w_type.dtype == numpy.float32,
            x_type.ndim == 4,
            w_type.ndim == 4,
            x_type.shape[1] == w_type.shape[0]
        )

        if self.outh is not None:
            type_check.expect(
                x_type.shape[2] ==
                conv.get_conv_outsize(self.outh, w_type.shape[2],
                                      self.sy, self.ph),
            )
        if self.outw is not None:
            type_check.expect(
                x_type.shape[3] ==
                conv.get_conv_outsize(self.outw, w_type.shape[3],
                                      self.sx, self.pw),
            )

        if n_in.eval() == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == numpy.float32,
                b_type.ndim == 1,
                b_type.shape[0] == w_type.shape[1]
            )
Exemplo n.º 20
0
    def _forward_ideep(self, x):
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype
        self.retain_inputs((0,))

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph
        self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw

        pp = intel64.ideep.pooling2DParam(
            (n, c, y_h, y_w),
            self.kh, self.kw,
            self.sy, self.sx,
            self.ph, self.pw,
            self.pd, self.pr,
            intel64.ideep.pooling2DParam.pooling_max)
        y, self.indexes = intel64.ideep.pooling2D.Forward(
            intel64.ideep.array(x[0]), pp)
        return y,
Exemplo n.º 21
0
    def forward_gpu(self, inputs):
        if self._used_cudnn:
            x, = self.mpool2d.get_retained_inputs()
            return self._forward_gpu_compute_indexes_again((x.data, inputs[0]))
        else:
            x, = inputs
            n, c, h, w = x.shape
            y_h = conv.get_conv_outsize(
                h, self.kh, self.sy, self.ph, self.cover_all)
            assert y_h > 0, 'Height in the output should be positive.'
            y_w = conv.get_conv_outsize(
                w, self.kw, self.sx, self.pw, self.cover_all)
            assert y_w > 0, 'Width in the output should be positive.'
            y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype)

            cuda.elementwise(
                'raw T in, raw S indexes, int32 h, int32 w, int32 out_h,'
                'int32 out_w, int32 kh, int32 kw, int32 sy, int32 sx,'
                'int32 ph, int32 pw', 'T out',
                '''
                int c0    = i / (out_h * out_w);
                int out_y = i / out_w % out_h;
                int out_x = i % out_w;
                int index = indexes[i];
                int max_y = max(0, out_y * sy - ph + index / kw);
                int max_x = max(0, out_x * sx - pw + index % kw);
                out = in[max_x + w * (max_y + h * c0)];
                ''', 'max_pool_grad_fwd')(
                    x.reduced_view(), self.indexes.reduced_view(), h, w,
                    y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph,
                    self.pw, y)
            return y,
Exemplo n.º 22
0
    def forward_gpu(self, x):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(AveragePooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        coeff = 1. / (self.kh * self.kw)
        kern = cuda.elementwise(
            'raw T in, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw, T coeff',
            'T out', '''
            int c0    = i / (out_h * out_w);
            int out_y = i / out_w % out_h;
            int out_x = i % out_w;
            int in_y_0 = max(0, out_y * sy - ph);
            int in_y_1 = min(h, out_y * sy + kh - ph);
            int in_x_0 = max(0, out_x * sx - pw);
            int in_x_1 = min(w, out_x * sx + kw - pw);

            T val = 0;
            for (int y = in_y_0; y < in_y_1; ++y) {
              int offset_y = w * (y + h * c0);
              for (int x = in_x_0; x < in_x_1; ++x) {
                val = val + in[x + offset_y];
              }
            }
            out = val * coeff;
            ''', 'avg_pool_fwd')
        kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw,
             self.sy, self.sx, self.ph, self.pw, coeff, y)
        return y,
Exemplo n.º 23
0
    def _forward_ideep(self, x):
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype
        self.retain_inputs((0,))

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph
        self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw

        pp = intel64.ideep.pooling2DParam(
            (n, c, y_h, y_w),
            self.kh, self.kw,
            self.sy, self.sx,
            self.ph, self.pw,
            self.pd, self.pr,
            intel64.ideep.pooling2DParam.pooling_max)
        y, self.indexes = intel64.ideep.pooling2D.Forward(
            intel64.ideep.array(x[0]), pp)
        return y,
Exemplo n.º 24
0
    def forward_gpu(self, x):
        if cudnn.enabled and self.use_cudnn:
            return super(AveragePooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32)
        coeff = 1. / (self.kh * self.kw)

        cuda.elementwise(
            '''
               float* out, const float* in, int h, int w, int out_h, int out_w,
               int kh, int kw, int sy, int sx, int ph, int pw, float coeff
            ''', '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               float val = 0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   val += in[x + offset_y];
                 }
               }
               out[i] = val * coeff;
            ''', 'avg_pool_fwd')(y, x[0], h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw, coeff)
        return y,
Exemplo n.º 25
0
    def forward_gpu(self, inputs):
        if self._used_cudnn:
            x, = self.mpool2d.get_retained_inputs()
            return self._forward_gpu_compute_indexes_again((x.data, inputs[0]))
        else:
            x, = inputs
            n, c, h, w = x.shape
            y_h = conv.get_conv_outsize(
                h, self.kh, self.sy, self.ph, self.cover_all)
            assert y_h > 0, 'Height in the output should be positive.'
            y_w = conv.get_conv_outsize(
                w, self.kw, self.sx, self.pw, self.cover_all)
            assert y_w > 0, 'Width in the output should be positive.'
            y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype)

            cuda.elementwise(
                'raw T in, raw S indexes, int32 h, int32 w, int32 out_h,'
                'int32 out_w, int32 kh, int32 kw, int32 sy, int32 sx,'
                'int32 ph, int32 pw', 'T out',
                '''
                int c0    = i / (out_h * out_w);
                int out_y = i / out_w % out_h;
                int out_x = i % out_w;
                int index = indexes[i];
                int max_y = max(0, out_y * sy - ph + index / kw);
                int max_x = max(0, out_x * sx - pw + index % kw);
                out = in[max_x + w * (max_y + h * c0)];
                ''', 'max_pool_grad_fwd')(
                    x.reduced_view(), self.indexes.reduced_view(), h, w,
                    y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph,
                    self.pw, y)
            return y,
Exemplo n.º 26
0
    def setUp(self):
        self.x = numpy.random.uniform(
            size=self.in_shape).astype(self.dtype)

        kh, kw = _pair(self.ksize)
        sy, sx = _pair(self.stride)
        ph, pw = _pair(self.pad)
        dy, dx = _pair(self.dilate)

        N, C, H, W = self.in_shape

        o_H = get_conv_outsize(H, kh, sy, ph, cover_all=self.cover_all, d=dy)
        o_W = get_conv_outsize(W, kw, sx, pw, cover_all=self.cover_all, d=dx)

        self.gy = numpy.random.uniform(
            size=(N, C * kh * kw, o_H, o_W)).astype(self.dtype)
        self.ggx = numpy.random.uniform(
            size=self.in_shape).astype(self.dtype)

        self.check_backward_options = {'atol': 5e-4, 'rtol': 5e-3}
        if self.dtype is numpy.float16:
            self.check_backward_options.update({'atol': 1e-3, 'rtol': 1e-2})

        self.check_double_backward_options = {'atol': 5e-4, 'rtol': 5e-3}
        if self.dtype is numpy.float16:
            self.check_double_backward_options.update(
                {'atol': 1e-3, 'rtol': 1e-2})
Exemplo n.º 27
0
 def _set_cover_all(self, x, W):
     in_h, in_w = x.shape[2:]
     kh, kw = W.shape[2:]
     self.cover_all = (in_h != conv.get_conv_outsize(
         self.outh, kh, self.sy, self.ph, d=self.dy)
                       or in_w != conv.get_conv_outsize(
                           self.outw, kw, self.sx, self.pw, d=self.dx))
Exemplo n.º 28
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(
            x_type.dtype.kind == 'f',
            w_type.dtype.kind == 'f',
            x_type.ndim == self.ndim + 2,
            w_type.ndim == self.ndim + 2,
            x_type.shape[1] == w_type.shape[0]
        )

        if self.outs is not None:
            for i, (out, s, p, di) in enumerate(zip(
                    self.outs, self.stride, self.pad, self.dilate)):
                lower_bound = conv.get_conv_outsize(
                    out, w_type.shape[i + 2], s, p, d=di)
                upper_bound = conv.get_conv_outsize(
                    out, w_type.shape[i + 2], s, p, cover_all=True, d=di)
                type_check.expect(
                    lower_bound <= x_type.shape[i + 2],
                    x_type.shape[i + 2] <= upper_bound)

        if type_check.eval(n_in) == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                # Need to consider the case that group count > 1.
                # b_type.shape[0] == w_type.shape[1]
            )
Exemplo n.º 29
0
    def forward_gpu(self, x):
        n, c, h, w = x[0].shape
        out_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        out_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        out_c = self.W.shape[0]
        y = cuda.empty((n, out_c, out_h, out_w), dtype=self.dtype)
        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x[0])
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(self.W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx))
            if self.b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    self.b[None, :, None, None])

            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref,
                self.max_workspace_size)
            workspace_size = libcudnn.getConvolutionForwardWorkspaceSize(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, algo)
            workspace = cuda.empty(
                (max(workspace_size // 4, 1),), dtype=self.dtype)

            one = ctypes.c_float(1)
            zero = ctypes.c_float(0)
            libcudnn.convolutionForward(
                handle, one, x_desc.value, x[0].data.ptr,
                self.filter_desc.value, self.W.data.ptr, self.conv_desc.value,
                algo, workspace.data.ptr, workspace_size, zero, y_desc.value,
                y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                libcudnn.addTensor(
                    handle, libcudnn.CUDNN_ADD_SAME_C, one,
                    self.bias_desc.value, self.b.data.ptr, one, y_desc.value,
                    y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(
                x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw)

            # TODO(beam2d): Use streams
            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            y_mats = y.reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                y_mats[i] = W_mat.dot(col_mats[i])

            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                y += self.b.reshape((1, out_c, 1, 1))

        return y,
Exemplo n.º 30
0
    def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy)
        col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx)
        shape = (2, 3, kh, kw, col_h, col_w)
        col = numpy.random.uniform(-1, 1, shape).astype(self.dtype)

        if gpu:
            col2im = conv.col2im_gpu
            col_data = cuda.to_gpu(col)
        else:
            col2im = conv.col2im_cpu
            col_data = col

        img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx)
        img = cuda.to_cpu(img)
        self.assertEqual(img.shape, (2, 3, self.h, self.w))
        for y in moves.range(self.h):
            for x in moves.range(self.w):
                v = numpy.zeros((2, 3), self.dtype)
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = (y + ph - ky * dy) // sy
                        ox = (x + pw - kx * dx) // sx
                        if ((y + ph - ky * dy) % sy == 0 and
                            (x + pw - kx * dx) % sx == 0 and
                                0 <= oy < col_h and 0 <= ox < col_w):
                            v += col[:, :, ky, kx, oy, ox]
                testing.assert_allclose(img[:, :, y, x], v)
    def _set_cover_all(self, x_shape, w_shape):
        _, _, kh, kw = w_shape
        _, _, in_h, in_w = x_shape

        self.cover_all = (in_h != conv.get_conv_outsize(
            self.outh, kh, self.sy, self.ph, d=self.dy)
                          or in_w != conv.get_conv_outsize(
                              self.outw, kw, self.sx, self.pw, d=self.dx))
Exemplo n.º 32
0
 def _set_cover_all(self, x, W):
     in_h, in_w = x.shape[2:]
     kh, kw = W.shape[2:]
     self.cover_all = (
         in_h != conv.get_conv_outsize(self.outh, kh, self.sy,
                                       self.ph, d=self.dy) or
         in_w != conv.get_conv_outsize(self.outw, kw, self.sx,
                                       self.pw, d=self.dx))
Exemplo n.º 33
0
    def _create_cc(self, x, gy, hint, y, ws, ksize, stride, pad, cover_all, e):
        self.ksize = ksize
        self.stride = stride
        self.pad = pad
        self.cover_all = cover_all
        self.x = array(x, m.memory.nchw, e)
        gy = array(gy, m.memory.nchw, e)
        if self.alg_kind is pooling_max:
            gy_md = y.memory.get_primitive_desc().desc()
        else:
            gy_md = gy.memory.get_primitive_desc().desc()
        gx_md = m.desc(x.shape, m.memory.f32, m.memory.any)
        # x_md = self.x.memory.get_primitive_desc().desc()

        n, c, h, w = x.shape
        sy, sx = _pair(stride)
        kh, kw = _pair(ksize)
        p_upper, p_left = _pair(pad)

        yh = conv.get_conv_outsize(h, kh, sy, p_upper, cover_all=cover_all)
        assert yh > 0, 'Height in the output should be positive.'
        yw = conv.get_conv_outsize(w, kw, sx, p_left, cover_all=cover_all)
        assert yw > 0, 'Width in the output should be positive.'

        p_down = sy * (yh - 1) + kh - h - p_upper
        p_right = sx * (yw - 1) + kw - w - p_left

        cc_d = pooling_backward.desc(self.alg_kind, gx_md, gy_md, stride,
                                     ksize, (p_upper, p_left),
                                     (p_down, p_right), zero)

        cc_pd = pooling_backward.primitive_desc(cc_d, e, hint)

        gx = mdarray(cc_pd.diff_src_primitive_desc())

        if self.alg_kind is pooling_max:
            # For max pooling reorder y if needed
            outputs = reorder_if_must(gy, y.memory.get_primitive_desc(), e,
                                      self.dag_)
            if len(outputs) == 2:
                self.reordered_gy, self.itm_arr = outputs[:2]
            else:
                self.reordered_gy = outputs[0]
                self.dag_.push_back(
                    pooling_backward.pooling_backward(
                        cc_pd, at(self.reordered_gy.memory), at(ws.memory),
                        gx.memory))
        else:
            # There is no workspace for average pooling
            self.dag_.push_back(
                pooling_backward.pooling_backward(cc_pd, at(gy.memory),
                                                  gx.memory))

        self._hint = hint
        self.gy = gy
        self.outputs = gx,
Exemplo n.º 34
0
    def forward_gpu(self, x):
        if (chainer.should_use_cudnn('>=auto') and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(MaxPooling2D, self).forward_gpu(x)

        self.retain_inputs(())
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y,
Exemplo n.º 35
0
    def forward_gpu(self, x):
        if chainer.should_use_cudnn('>=auto'):
            self.retain_inputs((0,))
            return super(MaxPooling2D, self).forward_gpu(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        assert y_h > 0, 'Height in the output should be positive.'
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        assert y_w > 0, 'Width in the output should be positive.'
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y,
Exemplo n.º 36
0
    def infer_return(self, conv, x_type):
        ksize = make_pair(conv.ksize)
        stride = make_pair(conv.stride)
        pad = make_pair(conv.pad)
        dilate = make_pair(conv.dilate)

        shape_2 = get_conv_outsize(
                x_type.shape[2], ksize[0], stride[0], pad[0], d=dilate[0])
        shape_3 = get_conv_outsize(
                x_type.shape[3], ksize[1], stride[1], pad[1], d=dilate[1])
        ret_shape = (x_type.shape[0], conv.out_channels, shape_2, shape_3)
        return TyChainerVariable(x_type.dtype, shape=ret_shape)
Exemplo n.º 37
0
 def __call__(self, x):
     xp = cuda.get_array_module(x.data)
     l = conv.get_conv_outsize(x.data.shape[2], self.ksize[0],
                               self.stride[0], self.pad[0])
     h = conv.get_conv_outsize(x.data.shape[3], self.ksize[1],
                               self.stride[1], self.pad[1])
     w = conv.get_conv_outsize(x.data.shape[4], self.ksize[2],
                               self.stride[2], self.pad[2])
     shape = (len(x.data), self.out_channels, l, h, w)
     return variable.Variable(xp.random.uniform(-1, 1,
                                                shape).astype(x.data.dtype),
                              volatile=x.volatile)
    def forward_gpu(self, x):
        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        y = cuda.cupy.zeros((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.zeros((n, c, y_h, y_w), dtype=numpy.int32)

	indices_unpooling = cuda.cupy.zeros((n, c, y_h, y_w), dtype=numpy.int32)
        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes, S indices_unpooling',
            '''
               int c0    = (int)floor(double(i) / double(out_h * out_w));
               int out_y = int(floor(double(i) / double(out_w))) % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
	       int max_ind = in_x_0 + w * (in_y_0 + h * c0);
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                     max_ind = x + offset_y;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
	       indices_unpooling = max_ind;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes, indices_unpooling)

	
        return (y, indices_unpooling, cuda.cupy.array([n, c, h, w]))
Exemplo n.º 39
0
    def forward_gpu(self, x):
        """
        Commented away since we need the indexes for the unpooling process.

        if cuda.cudnn_enabled and self.use_cudnn:
            return super(MaxPooling2D, self).forward_gpu(x), self.indexes
        """
        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y, self.indexes
Exemplo n.º 40
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(n_in == 1)
        x_type = in_types[0]

        type_check.expect(x_type.dtype.kind == "f", x_type.ndim == 4)

        if self.outh is not None:
            expected_h = conv.get_conv_outsize(self.outh, self.kh, self.sy, self.ph, cover_all=self.cover_all)
            type_check.expect(x_type.shape[2] == expected_h)
        if self.outw is not None:
            expected_w = conv.get_conv_outsize(self.outw, self.kw, self.sx, self.pw, cover_all=self.cover_all)
            type_check.expect(x_type.shape[3] == expected_w)
Exemplo n.º 41
0
 def _get_out_size(self, inputs):
     x, W = inputs[:2]
     _, _, kh, kw = W.shape
     _, _, h, w = x.shape
     out_h = conv.get_conv_outsize(
         h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy)
     if out_h <= 0:
         raise RuntimeError('Height in the output should be positive.')
     out_w = conv.get_conv_outsize(
         w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx)
     if out_w <= 0:
         raise RuntimeError('Width in the output should be positive.')
     return out_h, out_w
Exemplo n.º 42
0
 def _get_out_size(self, inputs):
     x, W = inputs[:2]
     _, _, kh, kw = W.shape
     _, _, h, w = x.shape
     out_h = conv.get_conv_outsize(
         h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy)
     if out_h <= 0:
         raise RuntimeError('Height in the output should be positive.')
     out_w = conv.get_conv_outsize(
         w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx)
     if out_w <= 0:
         raise RuntimeError('Width in the output should be positive.')
     return out_h, out_w
Exemplo n.º 43
0
def max_pooling_3d(x, ksize, stride=None, pad=0, use_cudnn=True):
    xp = cuda.get_array_module(x.data)
    if stride is None:
        stride = ksize
    ksize = _triplet(ksize)
    stride = _triplet(stride)
    pad = _triplet(pad)
    l = conv.get_conv_outsize(x.data.shape[2], ksize[0], stride[0], pad[0])
    h = conv.get_conv_outsize(x.data.shape[3], ksize[1], stride[1], pad[1])
    w = conv.get_conv_outsize(x.data.shape[4], ksize[2], stride[2], pad[2])
    shape = (len(x.data), x.data.shape[1], l, h, w)
    return variable.Variable(xp.random.uniform(-1, 1,
                                               shape).astype(x.data.dtype),
                             volatile=x.volatile)
Exemplo n.º 44
0
    def forward_gpu(self, x):
        if cudnn.enabled and self.use_cudnn:
            return super(MaxPooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32)
        self.indexes = cuda.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            '''
               float* out, int* indexes, const float* in,
               int h, int w, int out_h, int out_w,
               int kh, int kw, int sy, int sx, int ph, int pw
            ''', '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               float maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out[i] = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes[i] = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(y, self.indexes, x[0], h, w, y_h, y_w,
                                 self.kh, self.kw, self.sy, self.sx, self.ph,
                                 self.pw)
        return y,
Exemplo n.º 45
0
    def forward_gpu(self, x):
        if cudnn.enabled and self.use_cudnn:
            return super(MaxPooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph,
                                    self.cover_all)
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw,
                                    self.cover_all)
        y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32)
        self.indexes = cuda.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            '''
               float* out, int* indexes, const float* in,
               int h, int w, int out_h, int out_w,
               int kh, int kw, int sy, int sx, int ph, int pw
            ''', '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               float maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out[i] = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes[i] = argmax_kx + kw * argmax_ky;
            ''',
            'max_pool_fwd')(y, self.indexes, x[0], h, w, y_h, y_w, self.kh,
                            self.kw, self.sy, self.sx, self.ph, self.pw)
        return y,
Exemplo n.º 46
0
    def forward_gpu(self, x):
        if cuda.cudnn_enabled and self.use_cudnn:
            return super(MaxPooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y,
Exemplo n.º 47
0
    def setUp(self):
        self.x = numpy.random.uniform(size=self.in_shape).astype(numpy.float32)

        kh, kw = _pair(self.ksize)
        sy, sx = _pair(self.stride)
        ph, pw = _pair(self.pad)
        dy, dx = _pair(self.dilate)

        N, C, H, W = self.in_shape

        o_H = get_conv_outsize(H, kh, sy, ph, cover_all=self.cover_all, d=dy)
        o_W = get_conv_outsize(W, kw, sx, pw, cover_all=self.cover_all, d=dx)

        self.gy = numpy.random.uniform(size=(N, C * kh * kw, o_H,
                                             o_W)).astype(numpy.float32)
Exemplo n.º 48
0
    def setUp(self):
        self.ndim = len(self.dims)
        self.ksize = (3, ) * self.ndim
        self.stride = (2, ) * self.ndim
        self.pad = (1, ) * self.ndim

        # Avoid unstability of numerical gradient
        x_shape = (2, 3) + self.dims
        self.x = numpy.arange(functools.reduce(mul, x_shape),
                              dtype=self.dtype).reshape(x_shape)
        self.x = 2 * self.x / self.x.size - 1

        outs = tuple(
            conv.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p)
            in six.moves.zip(self.dims, self.ksize, self.stride, self.pad))
        gy_shape = (2, 3) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)

        self.check_backward_options = {'eps': 2.0**-8}
        if self.dtype == numpy.float16:
            self.check_backward_options = {
                'eps': 2.0**-8,
                'atol': 1e-03,
                'rtol': 1e-03
            }
Exemplo n.º 49
0
 def _set_cover_all(self, x, W):
     x_shape = x.shape[2:]
     k_shape = W.shape[2:]
     self.cover_all = any(
         ix != conv.get_conv_outsize(oy, k, s, p)
         for (ix, oy, k, s, p)
         in zip(x_shape, self.outs, k_shape, self.stride, self.pad))
Exemplo n.º 50
0
    def setUp(self):
        self.ndim = len(self.dims)
        self.ksize = (3,) * self.ndim
        self.stride = (2,) * self.ndim
        self.pad = (1,) * self.ndim

        # Avoid unstability of numerical gradient
        x_shape = (2, 3) + self.dims
        self.x = numpy.arange(
            functools.reduce(mul, x_shape), dtype=self.dtype).reshape(x_shape)
        self.x = 2 * self.x / self.x.size - 1

        outs = tuple(conv.get_conv_outsize(d, k, s, p, self.cover_all)
                     for (d, k, s, p)
                     in six.moves.zip(
                         self.dims, self.ksize, self.stride, self.pad))
        gy_shape = (2, 3) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)
        self.ggx = numpy.random.uniform(
            -1, 1, x_shape).astype(self.dtype)

        self.check_backward_options = {}
        if self.dtype == numpy.float16:
            self.check_backward_options = {
                'atol': 1e-3, 'rtol': 1e-2}
            self.check_double_backward_options = {
                'atol': 1e-3, 'rtol': 1e-2}
        else:
            self.check_backward_options = {
                'atol': 1e-4, 'rtol': 1e-3}
            self.check_double_backward_options = {
                'atol': 1e-4, 'rtol': 1e-3}
Exemplo n.º 51
0
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(
            x_type.dtype.kind == 'f',
            w_type.dtype.kind == 'f',
            x_type.ndim == self.ndim + 2,
            w_type.ndim == self.ndim + 2,
            x_type.shape[1] == w_type.shape[0]
        )

        if self.outs is not None:
            for i, (out, s, p) in enumerate(zip(
                    self.outs, self.stride, self.pad)):
                type_check.expect(
                    x_type.shape[i + 2] ==
                    conv.get_conv_outsize(out, w_type.shape[i + 2], s, p)
                )

        if type_check.eval(n_in) == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                b_type.shape[0] == w_type.shape[1]
            )
Exemplo n.º 52
0
    def setUp(self):
        in_channels = 3
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        self.stride = (2,) * ndim
        self.pad = (1,) * ndim

        W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels))
        W_shape = (out_channels, in_channels) + ksize
        self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype)
        self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.x_dtype)

        x_shape = (2, 3) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype)
        gy_shape = (2, 2) + tuple(
            conv.get_conv_outsize(d, k, s, p, cover_all=self.cover_all)
            for (d, k, s, p) in zip(self.dims, ksize, self.stride, self.pad))
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype)

        self.check_forward_options = {}
        self.check_backward_options = {'dtype': numpy.float64}
        if self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16:
            self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3}
            self.check_backward_options = {
                'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4}
    def test_valid_insize(self):
        N = self.N
        c = self.c
        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        outs = self.outsize
        cover_all = self.cover_all

        # Make input.
        dims = tuple(
            conv.get_conv_outsize(out, k, s, p, cover_all=cover_all)
            for (out, k, s, p) in zip(outs, ksize, stride, pad))
        x_shape = (N, c) + dims
        x_data = numpy.random.uniform(-1, 1, x_shape).astype(numpy.float32)
        x = chainer.Variable(x_data)

        # Compute unpooling.
        y = functions.unpooling_nd(x,
                                   ksize,
                                   stride,
                                   pad,
                                   outsize=outs,
                                   cover_all=cover_all)

        # Test output's value.
        y_expected = expected_unpooling_nd(x_data, outs, ksize, stride, pad)
        testing.assert_allclose(y_expected, y.data)
Exemplo n.º 54
0
def im2col_nd_cpu(img, ksize, stride, pad, pval=0, cover_all=False):
    n, c = img.shape[0:2]       # (n, c, d_1, d_2, ..., d_N)
    dims = img.shape[2:]
    ndim = len(dims)
    assert ndim == len(ksize) == len(stride) == len(pad)
    outs = tuple(get_conv_outsize(d, k, s, p, cover_all)
                 for (d, k, s, p) in zip(dims, ksize, stride, pad))
    assert all(out > 0 for out in outs), 'Output sizes should be positive.'

    # Pad around image.
    pad_width = ((0, 0), (0, 0)) + tuple(
        (p, p + s - 1) for (s, p) in zip(stride, pad))
    img = numpy.pad(img, pad_width, mode='constant', constant_values=(pval,))

    # Make patch array with which we will compute correlation with filter.
    # shape: (n, c, k_1, k_2, ..., k_N, out_1, out_2, ..., out_N)
    shape = (n, c) + ksize + outs
    col = numpy.ndarray(shape, dtype=img.dtype)

    # Fill the patch array.
    colon = slice(None)
    for kxs in itertools.product(*[six.moves.range(k) for k in ksize]):
        # col[:, :, kx_1, kx_2, ..., kx_N, :, :, ..., :]
        col_index = (colon, colon) + kxs + (colon,) * ndim
        # img[:, :, kx_1:kx_lim_1:s_1, ..., kx_N:kx_lim_N:s_N]
        kx_lims = tuple(kx + s * out
                        for (kx, s, out) in zip(kxs, stride, outs))
        img_index = (colon, colon) + tuple(
            slice(kx, kx_lim, s)
            for (kx, kx_lim, s) in zip(kxs, kx_lims, stride))
        col[col_index] = img[img_index]

    return col
Exemplo n.º 55
0
 def check_conv_outsize(self, size, k, s, p, d):
     # When cover_all == False, `outsize` is the maximum integer that
     # satisfies "(outsize - 1) * s + k <= w"
     w = size + p * 2
     dk = k + (k - 1) * (d - 1)
     outsize = conv.get_conv_outsize(size, k, s, p, cover_all=False, d=d)
     self.assertTrue((outsize - 1) * s + dk <= w < outsize * s + dk)
Exemplo n.º 56
0
 def check_conv_outsize_cover_all(self, size, k, s, p, d):
     # When cover_all == True, `outsize` is the minimum integer that
     # satisfies "w <= (outsize - 1) * s + k"
     w = size + p * 2
     dk = k + (k - 1) * (d - 1)
     outsize = conv.get_conv_outsize(size, k, s, p, cover_all=True, d=d)
     self.assertTrue((outsize - 2) * s + dk < w <= (outsize - 1) * s + dk)
Exemplo n.º 57
0
    def forward_gpu(self, x):
        self._used_cudnn = True

        # Implementation using cuDNN.
        x = cuda.cupy.ascontiguousarray(x[0])
        n, c = x.shape[:2]
        dims = x.shape[2:]
        ys = tuple(
            conv.get_conv_outsize(d, k, s, p, self.cover_all) for d, k, s, p in
            six.moves.zip(dims, self.ksize, self.stride, self.pad))
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        handle = cudnn.get_handle()
        pool_desc = self.create_pool_desc()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.poolingForward(handle, pool_desc.value, one.data,
                                x_desc.value, x.data.ptr, zero.data,
                                y_desc.value, y.data.ptr)
        self.retain_outputs((0, ))
        return y,
Exemplo n.º 58
0
    def setUp(self):
        self.ndim = len(self.dims)
        self.ksize = (3, ) * self.ndim
        self.stride = (2, ) * self.ndim
        self.pad = (1, ) * self.ndim

        x_shape = (2, 3) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)

        outs = tuple(
            conv.get_conv_outsize(d, k, s, p, False) for (d, k, s, p) in
            six.moves.zip(self.dims, self.ksize, self.stride, self.pad))
        gy_shape = (2, 3) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)
        self.ggx = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)

        self.check_forward_options = {}
        self.check_backward_options = {'eps': 1e-2}
        if self.dtype == numpy.float16:
            self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3}
            self.check_backward_options = {
                'eps': 1e-2,
                'atol': 5e-3,
                'rtol': 5e-2
            }
Exemplo n.º 59
0
    def forward_gpu(self, x):
        self._used_cudnn = True

        # Implementation using cuDNN.
        x = cuda.cupy.ascontiguousarray(x[0])
        n, c = x.shape[:2]
        dims = x.shape[2:]
        ys = tuple(conv.get_conv_outsize(d, k, s, p, self.cover_all)
                   for d, k, s, p in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        handle = cudnn.get_handle()
        pool_desc = self.create_pool_desc()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.poolingForward(
            handle, pool_desc.value, one.data, x_desc.value,
            x.data.ptr, zero.data, y_desc.value, y.data.ptr)
        self.retain_outputs((0,))
        return y,