def _forward_cpu_core(self, x, gy): col = conv.im2col_cpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) gW = numpy.tensordot(gy, col, ((0, 2, 3), (0, 4, 5)) ).astype(self.W_dtype, copy=False) return gW
def _forward_cpu_core(self, x, gy): if self._use_ideep: return self._forward_ideep(x, gy) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) col = conv.im2col_cpu(x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) gW = numpy.tensordot(gy, col, ((0, 2, 3), (0, 4, 5))).astype(self.W_dtype, copy=False) return gW,
def test_col2im_consistency(self): col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2) h, w = self.x.shape[2:] im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2) im_gpu = conv.col2im_gpu( cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2) testing.assert_allclose(im_cpu, im_gpu.get())
def forward_cpu(self, x): self.col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) y = numpy.tensordot(self.col, self.W, ([1, 2, 3], [1, 2, 3])) if self.b is not None: y += self.b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None out_c, input_c, kh, kw = W.shape n, c, h, w = x.shape """ For mkldnn backend, only support float32 for x and W """ if mkld.enable_convF(inputs): out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all) assert out_w > 0, 'Width in the output should be positive.' self.pd = self.sy*(out_h-1) + kh - h - self.ph self.pr = self.sx*(out_w-1) + kw - w - self.pw y = numpy.empty(shape=(n, out_c, out_h, out_w), dtype=x.dtype) if b is not None: mkldnn.Convolution2D_F32.do_forward(x, W, b, y, kh, kw, self.sx, self.sy, self.ph, self.pw, self.pd, self.pr) else: mkldnn.Convolution2D_F32.do_forward(x, W, y, kh, kw, self.sx, self.sy, self.ph, self.pw, self.pd, self.pr) return y, else: self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) # print "%f, %f" %(cos_module.cos_func(0.5), sin_module.sin_func(0.5)) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b y = numpy.rollaxis(y, 3, 1) return y,
def forward_cpu(self, x): self.retain_inputs(()) self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def forward(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] xp = cuda.get_array_module(*x) if xp is numpy: self.col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw) else: self.col = conv.im2col_gpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw) B, C, KY, KX, IY, IX = self.col.shape D = W.shape[0] # (D, C, KY, KX) c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \ .reshape((C, B * IY * IX, KY * KX)) w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D)) # (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D) y = _matmul(c_, w_, xp).astype(x.dtype, copy=False) # (C, B*IY*IX, D) -> (B, C*D, IY, IX) y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \ .reshape((B, C * D, IY, IX)) if b is not None: y += b[None, :, None, None] return y,
def forward_cpu(self, x): n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def forward_cpu(self, x): self.col = conv.im2col_cpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) y = numpy.tensordot(self.col, self.W, ([1, 2, 3], [1, 2, 3])) if self.b is not None: y += self.b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, x): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)): return self._forward_ideep(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = col.reshape(n, c, kh * kw, out_h, out_w) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. self.indexes = col.argmax(axis=2) y = col.max(axis=2) return y,
def forward(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] xp = cuda.get_array_module(*x) if xp is numpy: self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw) else: self.col = conv.im2col_gpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw) B, C, KY, KX, IY, IX = self.col.shape D = W.shape[0] # (D, C, KY, KX) c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \ .reshape((C, B * IY * IX, KY * KX)) w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D)) # (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D) y = _matmul(c_, w_, xp).astype(x.dtype, copy=False) # (C, B*IY*IX, D) -> (B, C*D, IY, IX) y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \ .reshape((B, C * D, IY, IX)) if b is not None: y += b[None, :, None, None] return y,
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose( 0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu(up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu( gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot( x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = numpy.tensordot( col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = numpy.rollaxis(gx, 3, 1) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward(self, inputs): x, = inputs xp = cuda.get_array_module(x) if xp == numpy: y = im2col_cpu(x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) else: y = im2col_gpu(x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) n, c, kh, kw, out_h, out_w = y.shape y = y.reshape(n, c * kh * kw, out_h, out_w) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None olen, ilen, hlen, wlen = W.shape if self.coeffs is None: self.coeffs = numpy.ones(ilen) coeffs = numpy.copy(self.coeffs) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) M = numpy.asarray(coeffs,numpy.float32).reshape(W.shape) self.M = M W = self.M*W if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) # retain only x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, numpy.ndarray) for i in inputs]): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot(col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu(gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = numpy.rollaxis(gx, 3, 1) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, x): self._in_shape = x[0].shape self._in_dtype = x[0].dtype col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) y = col.mean(axis=(2, 3)) return y,
def backward_cpu(self, x, gy): if self.gb is not None: self.gb += gy[0].sum(axis=(0, 2, 3)) col = conv.im2col_cpu( gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) self.gW += numpy.tensordot(x[0], col, ([0, 2, 3], [0, 4, 5])) gx = numpy.tensordot(col, self.W, ([1, 2, 3], [1, 2, 3])) gx = numpy.rollaxis(gx, 3, 1) return gx,
def forward_cpu(self, gy): gcol = conv.im2col_cpu( gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = gcol.shape gcol = gcol.transpose(0, 1, 4, 5, 2, 3).reshape(-1, kh * kw) indexes = self.indexes.ravel() gx = gcol[numpy.arange(len(indexes)), indexes] return gx.reshape(n, c, out_h, out_w),
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) x, gy = inputs col = conv.im2col_cpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) gW = numpy.tensordot( gy, col, ((0, 2, 3), (0, 4, 5))).astype(self.W_dtype, copy=False) return gW,
def forward_cpu(self, x): col = conv.im2col_cpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = numpy.rollaxis(col.reshape(n, c, kh * kw, out_h, out_w), 2) self.indexes = col.argmax(axis=0) y = self.indexes.choose(col) return y,
def forward_cpu(self, x): col = conv.im2col_cpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = col.reshape(n, c, kh * kw, out_h, out_w) col = col.transpose(0, 1, 3, 4, 2).reshape(-1, kh * kw) indexes = self.indexes.ravel() col = col[numpy.arange(len(indexes)), indexes] return col.reshape(n, c, out_h, out_w),
def forward_cpu(self, inputs): x, W = inputs[:2] kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw) y = numpy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))) if len(inputs) == 3: b = inputs[2] y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] self.col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw) y = numpy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = numpy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] self.col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw) Wb = numpy.where(W >= 0, 1, -1).astype(numpy.float32, copy=False) y = numpy.tensordot(self.col, Wb, ((1, 2, 3), (1, 2, 3))) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def _forward_cpu_core(self, x, W, b): kh, kw = W.shape[2:] col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot( col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b y = numpy.rollaxis(y, 3, 1) return y
def backward(self, x, gy): if isinstance(gy[0], cuda.ndarray): gcol = conv.im2col_gpu( gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all ) else: gcol = conv.im2col_cpu( gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all ) gx = gcol.sum(axis=(2, 3)) return (gx,)
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw) Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False) y = numpy.tensordot(self.col, Wb, ((1, 2, 3), (1, 2, 3))) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, x): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)): return self._forward_ideep(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) y = col.mean(axis=(2, 3)) return y,
def forward_cpu(self, x): col = conv.im2col_cpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = col.reshape(n, c, kh * kw, out_h, out_w) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. self.indexes = col.argmax(axis=2) y = col.max(axis=2) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def test_im2col_consistency(self): col_cpu = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2) col_gpu = conv.im2col_gpu(cuda.to_gpu(self.x), 3, 3, 2, 2, 2, 2, dy=2, dx=2) testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)
def backward_cpu(self, x, gy): gcol = conv.im2col_cpu(gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) gcol = gcol.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = gcol.shape gcol = gcol.reshape((n, c, oy, ox, ky * kx)) gx = numpy.empty((n, c, oy, ox), dtype=x[0].dtype) for n in six.moves.range(gcol.shape[0]): for c in six.moves.range(gcol.shape[1]): for oy in six.moves.range(gcol.shape[2]): for ox in six.moves.range(gcol.shape[3]): gx[n, c, oy, ox] = gcol[n, c, oy, ox][self.indexes[n, c, oy, ox]] return (gx,)
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu(gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])) gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])) gx = numpy.rollaxis(gx, 3, 1) if len(inputs) == 3: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb else: return gx, gW
def forward(self, inputs): x, = inputs xp = cuda.get_array_module(x) if xp == numpy: y = im2col_cpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) else: y = im2col_gpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) n, c, kh, kw, out_h, out_w = y.shape y = y.reshape(n, c * kh * kw, out_h, out_w) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu( gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])) gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])) gx = numpy.rollaxis(gx, 3, 1) if len(inputs) == 3: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb else: return gx, gW
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu(gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = numpy.rollaxis(gx, 3, 1) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) x, gy = inputs col = conv.im2col_cpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) gW = numpy.tensordot( gy, col, ((0, 2, 3), (0, 4, 5))).astype(self.W_dtype, copy=False) return gW,
def _forward_cpu_core(self, x, gy): if self._use_ideep: return self._forward_ideep(x, gy) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) col = conv.im2col_cpu( x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) gW = numpy.tensordot(gy, col, ((0, 2, 3), (0, 4, 5)) ).astype(self.W_dtype, copy=False) return gW,
def backward_cpu(self, x, gy): # x is a dummy variable, which is required only for compatibility with pooling_2d.Pooling2D col = conv.im2col_cpu(gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = col.reshape(n, c, kh * kw, out_h, out_w) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. gx = col.max(axis=2) return gx,
def forward_cpu(self, x): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)): return self._forward_ideep(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype col = conv.im2col_cpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, pval=-float('inf'), cover_all=self.cover_all) n, c, kh, kw, out_h, out_w = col.shape col = col.reshape(n, c, kh * kw, out_h, out_w) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. self.indexes = col.argmax(axis=2) y = col.max(axis=2) return y,
def check_forward(self, y): y = F.upsampling_2d( self.pooled_y, self.p.indexes, ksize=(self.p.kh, self.p.kw), stride=(self.p.sy, self.p.sx), pad=(self.p.ph, self.p.pw), outsize=self.in_shape[2:], cover_all=self.p.cover_all) if isinstance(y.data, numpy.ndarray): y = conv.im2col_cpu(y.data, self.p.kh, self.p.kw, self.p.sy, self.p.sx, self.p.ph, self.p.pw) else: y = conv.im2col_gpu(y.data, self.p.kh, self.p.kw, self.p.sy, self.p.sx, self.p.ph, self.p.pw) for i in numpy.ndindex(y.shape): n, c, ky, kx, oy, ox = i up_y = y[n, c, ky, kx, oy, ox] if ky * y.shape[3] + kx == self.p.indexes[n, c, oy, ox]: in_y = self.pooled_y.data[n, c, oy, ox] testing.assert_allclose(in_y, up_y) else: testing.assert_allclose(up_y, 0)
def check_forward(self, y): y = F.upsampling_2d( self.pooled_y, self.indices, ksize=self.ksize, stride=self.stride, outsize=self.in_shape[2:]) if isinstance(y.array, numpy.ndarray): y = conv.im2col_cpu( y.array, self.ksize, self.ksize, self.stride, self.stride, 0, 0) else: y = conv.im2col_gpu( y.array, self.ksize, self.ksize, self.stride, self.stride, 0, 0) for i in numpy.ndindex(y.shape): n, c, ky, kx, oy, ox = i up_y = y[n, c, ky, kx, oy, ox] if ky * y.shape[3] + kx == self.indices[n, c, oy, ox]: in_y = self.pooled_y.array[n, c, oy, ox] testing.assert_allclose(in_y, up_y) else: testing.assert_allclose(up_y, 0)
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose(0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu( up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) # retain only x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, numpy.ndarray) for i in inputs]): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot( col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, x): col = conv.im2col_cpu(x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) y = col.mean(axis=(2, 3)) return y,