def _temp2mat(D, size, stride, padding, xp): """ Convert template to the equivalent matrix. """ if xp == np: if padding == 'VALID': dmat = conv_nd.im2col_nd_cpu( xp.expand_dims(D, 0), (size, ), stride=(stride, ), pad=(size - D.shape[1], )) else: # 'SAME' dmat = conv_nd.im2col_nd_cpu( xp.expand_dims(D, 0), (size, ), stride=(stride, ), pad=(size - 1, )) else: raise NotImplementedError('_temp2mat is not yet implemented for gpu.') return xp.moveaxis(xp.squeeze(dmat, 0), 1, -1)[:, ::-1]
def _forward_xp_core(self, x, gy, xp): # Compute filter weight gradient. # (n, _, out_1, out_2, ..., out_N) out_axes = (0, ) + tuple(moves.range(2, self.ndim + 2)) # (n, _, _, ..., _, out_1, out_2, ..., out_N) col_axes = (0, ) + tuple(moves.range(self.ndim + 2, self.ndim * 2 + 2)) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (xp is numpy and not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) if xp is numpy: col = conv_nd.im2col_nd_cpu(x, self.ksize, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) else: col = conv_nd.im2col_nd_gpu(x, self.ksize, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) gW = xp.tensordot(gy, col, (out_axes, col_axes)).astype(self.W_dtype, copy=False) return gW,
def _backward_xp(self, x, W, b, gy, xp): ndim = self.ndim ksize = W.shape[2:] stride = self.stride pad = self.pad if xp is numpy: col = conv_nd.im2col_nd_cpu(gy, ksize, stride, pad) else: col = conv_nd.im2col_nd_gpu(gy, ksize, stride, pad) # x : n, C_I, d_1, d_2, ..., d_N # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N x_axes = (0, ) + tuple(six.moves.range(2, ndim + 2)) col_axes = (0, ) + tuple(six.moves.range(ndim + 2, ndim * 2 + 2)) gW = xp.tensordot(x, col, (x_axes, col_axes)).astype(W.dtype, copy=False) # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N # W : C_I, C_O, k_1, k_2, ..., k_N axes = (1, ) + tuple(six.moves.range(2, ndim + 2)) gx = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False) gx = xp.rollaxis(gx, ndim + 1, 1) if b is None: return gx, gW else: sum_axis = (0, ) + tuple(six.moves.range(2, ndim + 2)) gb = gy.sum(axis=sum_axis) return gx, gW, gb
def _forward_xp_core(self, x, W, b, xp): ndim = self.ndim ksize = W.shape[2:] stride = self.stride pad = self.pad dilate = self.dilate # Make patch array. if xp is numpy: col = conv_nd.im2col_nd_cpu( x, ksize, stride, pad, cover_all=self.cover_all, dilate=dilate) else: col = conv_nd.im2col_nd_gpu( x, ksize, stride, pad, cover_all=self.cover_all, dilate=dilate) # Compute correlation. axes = tuple(moves.range(1, ndim + 2)) # (1, 2, ..., N+1) y = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False) # Apply bias if given. if b is not None: y += b # Roll c_O before the second in (n, y_1, y_2, ..., y_N, c_O). return xp.rollaxis(y, ndim + 1, 1),
def forward_cpu(self, x): if (self.ndim == 2 and intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(x)): return self._forward_2d_ideep(x) ksize = self.ksize stride = self.stride pad = self.pad cover_all = self.cover_all in_shape = x[0].shape in_dtype = x[0].dtype col = conv_nd.im2col_nd_cpu(x[0], ksize, stride, pad, pval=-float('inf'), cover_all=cover_all) n, c = col.shape[:2] mid = (len(col.shape) - 2) // 2 + 2 ksize = col.shape[2:mid] outs = col.shape[mid:] # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N) col_shape = (n, c) + (functools.reduce(mul, ksize), ) + outs col = col.reshape(col_shape) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. y = col.max(axis=2) self._in_shape = in_shape self._in_dtype = in_dtype self.indexes = col.argmax(axis=2) return y,
def forward_cpu(self, x): func = self.func ndim = func.ndim ksize = func.ksize stride = func.stride pad = func.pad cover_all = func.cover_all indexes = backend.from_chx(func.indexes) col = conv_nd.im2col_nd_cpu(x[0], ksize, stride, pad, pval=-float('inf'), cover_all=cover_all) n, c = col.shape[:2] mid = (len(col.shape) - 2) // 2 + 2 ksize = col.shape[2:mid] outs = col.shape[mid:] # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N) ksize_total = functools.reduce(mul, ksize) col_shape = (n, c) + (ksize_total, ) + outs col = col.reshape(col_shape) # (n, c, out_1, ..., out_N, k_1 * .. * k_N) col_indexes = (0, 1) + tuple(six.moves.range(3, 3 + ndim)) + (2, ) col = col.transpose(col_indexes) col = col.reshape(-1, ksize_total) indexes = indexes.ravel() col = col[numpy.arange(len(indexes)), indexes] return col.reshape((n, c) + outs),
def _forward_xp_core(self, x, gy, xp): # Compute filter weight gradient. # (n, _, out_1, out_2, ..., out_N) out_axes = (0,) + tuple(moves.range(2, self.ndim + 2)) # (n, _, _, ..., _, out_1, out_2, ..., out_N) col_axes = (0,) + tuple(moves.range(self.ndim + 2, self.ndim * 2 + 2)) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (xp is numpy and not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) if xp is numpy: col = conv_nd.im2col_nd_cpu( x, self.ksize, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) else: col = conv_nd.im2col_nd_gpu( x, self.ksize, self.stride, self.pad, cover_all=self.cover_all, dilate=self.dilate) gW = xp.tensordot(gy, col, (out_axes, col_axes)).astype( self.W_dtype, copy=False) return gW,
def forward_cpu(self, inputs): ksize = self.ksize stride = self.stride pad = self.pad pad_value = self.pad_value cover_all = self.cover_all x, = inputs in_shape = x.shape in_dtype = x.dtype col = conv_nd.im2col_nd_cpu(x, ksize, stride, pad, cover_all=cover_all) # mean along (_, _, k_1, k_2, ..., k_N, _, ..., _) y_axis = tuple(six.moves.range(2, 2 + len(ksize))) if pad_value is None: dims = x.shape[2:] width = self._get_pooling_width(numpy, dims, x.dtype) y = col.sum(axis=y_axis) / width else: assert pad_value == 0 y = col.mean(axis=y_axis) width = None self.width = width self._in_shape = in_shape self._in_dtype = in_dtype return y,
def _backward_xp(self, x, W, b, gy, xp): ndim = self.ndim ksize = W.shape[2:] stride = self.stride pad = self.pad if xp is numpy: col = conv_nd.im2col_nd_cpu(gy, ksize, stride, pad) else: col = conv_nd.im2col_nd_gpu(gy, ksize, stride, pad) # x : n, C_I, d_1, d_2, ..., d_N # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N x_axes = (0,) + tuple(six.moves.range(2, ndim + 2)) col_axes = (0,) + tuple(six.moves.range(ndim + 2, ndim * 2 + 2)) gW = xp.tensordot(x, col, (x_axes, col_axes)).astype( W.dtype, copy=False) # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N # W : C_I, C_O, k_1, k_2, ..., k_N axes = (1,) + tuple(six.moves.range(2, ndim + 2)) gx = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False) gx = xp.rollaxis(gx, ndim + 1, 1) if b is None: return gx, gW else: sum_axis = (0,) + tuple(six.moves.range(2, ndim + 2)) gb = gy.sum(axis=sum_axis) return gx, gW, gb
def _forward_xp(self, x, W, b, xp): ndim = self.ndim ksize = W.shape[2:] stride = self.stride pad = self.pad # Make patch array. if xp is numpy: self.col = conv_nd.im2col_nd_cpu(x, ksize, stride, pad, cover_all=self.cover_all) else: self.col = conv_nd.im2col_nd_gpu(x, ksize, stride, pad, cover_all=self.cover_all) # Compute correlation. axes = tuple(moves.range(1, ndim + 2)) # (1, 2, ..., N+1) y = xp.tensordot(self.col, W, (axes, axes)).astype(x.dtype) # Apply bias if given. if b is not None: y += b # Roll c_O before the second in (n, y_1, y_2, ..., y_N, c_O). return xp.rollaxis(y, ndim + 1, 1),
def forward_cpu(self, x): col = conv_nd.im2col_nd_cpu( x[0], self.ksize, self.stride, self.pad, cover_all=self.cover_all) # mean along (_, _, k_1, k_2, ..., k_N, _, ..., _) y_axis = tuple(six.moves.range(2, 2 + len(self.ksize))) y = col.mean(axis=y_axis) return y,
def forward_cpu(self, x): col = conv_nd.im2col_nd_cpu(x[0], self.ksize, self.stride, self.pad, cover_all=self.cover_all) # mean along (_, _, k_1, k_2, ..., k_N, _, ..., _) y_axis = tuple(six.moves.range(2, 2 + len(self.ksize))) y = col.mean(axis=y_axis) return y,
def forward_cpu(self, x): self.retain_inputs(()) self._in_shape = x[0].shape self._in_dtype = x[0].dtype col = conv_nd.im2col_nd_cpu( x[0], self.ksize, self.stride, self.pad, cover_all=self.cover_all) # mean along (_, _, k_1, k_2, ..., k_N, _, ..., _) y_axis = tuple(six.moves.range(2, 2 + len(self.ksize))) y = col.mean(axis=y_axis) return y,
def _coef2mat(x_orig, size, template_size, stride, padding, xp): """ Convert coefficient vector to equivalent matrix """ if x_orig.ndim == 2: x = xp.expand_dims(x_orig, 0) else: x = x_orig if stride > 1: pad_size = (x.shape[2] - 1) * stride + 1 pad_start = 0 if padding == 'VALID': coef_size = size - pad_size if coef_size > template_size - 1: pad_start = coef_size - template_size + 1 else: coef_size = pad_size - size + 1 if coef_size < template_size: pad_start = template_size - coef_size x_pad = xp.zeros((x.shape[0], x.shape[1], pad_start + pad_size), dtype=x.dtype) x_pad[:, :, slice(pad_start, pad_size + pad_start, stride)] = x x = x_pad if xp == np: if padding == 'VALID': xmat = conv_nd.im2col_nd_cpu( x, (size, ), stride=(1, ), pad=(size - x.shape[2], )) else: # 'SAME' xmat = conv_nd.im2col_nd_cpu(x, (size, ), stride=(1, ), pad=(0, )) else: raise NotImplementedError('_temp2mat is not yet implemented for gpu.') if xmat.shape[-1] > template_size: xmat = xmat[..., slice(xmat.shape[-1] - template_size, None)] if x_orig.ndim == 2: xmat = xp.squeeze(xmat, 0) return xp.moveaxis(xmat, -2, -1)[:, ::-1] else: return xp.moveaxis(xmat, -2, -1)[:, :, ::-1]
def forward_cpu(self, x): col = conv_nd.im2col_nd_cpu( x[0], self.ksize, self.stride, self.pad, pval=-float('inf'), cover_all=self.cover_all) n, c = col.shape[:2] mid = (len(col.shape) - 2) // 2 + 2 ksize = col.shape[2:mid] outs = col.shape[mid:] # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N) col_shape = (n, c) + (functools.reduce(mul, ksize),) + outs col = col.reshape(col_shape) # We select maximum twice, since the implementation using numpy.choose # hits its bug when kh * kw >= 32. self.indexes = col.argmax(axis=2) y = col.max(axis=2) return y,
def test_im2col_nd_cpu_parameter_ranks(self): # Invalid ksize length. with self.assertRaises(AssertionError): conv_nd.im2col_nd_cpu(self.img, (2,), self.stride, self.pad) # Invalid stride length. with self.assertRaises(AssertionError): conv_nd.im2col_nd_cpu(self.img, self.ksize, (1,), self.pad) # Invalid pad length. with self.assertRaises(AssertionError): conv_nd.im2col_nd_cpu(self.img, self.ksize, self.stride, (0,))
def forward_cpu(self, inputs): x, = inputs self._in_shape = x.shape self._in_dtype = x.dtype col = conv_nd.im2col_nd_cpu( x, self.ksize, self.stride, self.pad, cover_all=self.cover_all) # mean along (_, _, k_1, k_2, ..., k_N, _, ..., _) y_axis = tuple(six.moves.range(2, 2 + len(self.ksize))) if self.pad_value is None: dims = x.shape[2:] width = self._get_pooling_width(numpy, dims, x.dtype) y = col.sum(axis=y_axis) / width else: assert self.pad_value == 0 y = col.mean(axis=y_axis) return y,
def forward_cpu(self, x): col = conv_nd.im2col_nd_cpu( x[0], self.ksize, self.stride, self.pad, pval=-float('inf'), cover_all=self.cover_all) n, c = col.shape[:2] mid = (len(col.shape) - 2) // 2 + 2 ksize = col.shape[2:mid] outs = col.shape[mid:] # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N) ksize_total = functools.reduce(mul, ksize) col_shape = (n, c) + (ksize_total,) + outs col = col.reshape(col_shape) # (n, c, out_1, ..., out_N, k_1 * .. * k_N) col_indexes = (0, 1) + tuple(six.moves.range(3, 3 + self.ndim)) + (2,) col = col.transpose(col_indexes) col = col.reshape(-1, ksize_total) indexes = self.indexes.ravel() col = col[numpy.arange(len(indexes)), indexes] return col.reshape((n, c) + outs),
def test_col2im_consistency(self): col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad) im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims) im_gpu = conv_nd.col2im_nd_gpu(cuda.to_gpu(col), self.stride, self.pad, self.dims) testing.assert_allclose(im_cpu, im_gpu.get())
def test_im2col_consistency(self): col_cpu = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad) col_gpu = conv_nd.im2col_nd_gpu(cuda.to_gpu(self.x), self.ksize, self.stride, self.pad) testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)
def test_col2im_consistency(self): col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad) im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims) im_gpu = conv_nd.col2im_nd_gpu( cuda.to_gpu(col), self.stride, self.pad, self.dims) testing.assert_allclose(im_cpu, im_gpu.get())
def test_im2col_consistency(self): col_cpu = conv_nd.im2col_nd_cpu( self.x, self.ksize, self.stride, self.pad) col_gpu = conv_nd.im2col_nd_gpu( cuda.to_gpu(self.x), self.ksize, self.stride, self.pad) testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)