def ssim_im2col(y, t, window_size, stride): n, c, w, h = y.shape ycol = F.im2col(y, window_size, stride) tcol = F.im2col(t, window_size, stride) mu_y = F.mean(ycol, 1) mu_t = F.mean(tcol, 1) mu_y_sq = F.mean(ycol * ycol, 1) mu_t_sq = F.mean(tcol * tcol, 1) mu_ty = F.mean(ycol * tcol, 1) muy_mut = mu_y * mu_t sq_mu_y = mu_y * mu_y sq_mu_t = mu_t * mu_t sigma_y_sq = mu_y_sq - sq_mu_y sigma_t_sq = mu_t_sq - sq_mu_t sigma_yt = mu_ty - muy_mut c1 = 0.01**2 c2 = 0.03**2 ssim_map = ((2 * muy_mut + c1) * (2 * sigma_yt + c2)) / ((sq_mu_y + sq_mu_t + c1) * (sigma_y_sq + sigma_t_sq + c2)) return ssim_map
def process_convolution2d(self, func, in_data): """ Work on the convolution2d input. """ assert len(in_data) == 2 func_id = self.inc_counter(func) X, W = in_data xp = backend.get_array_module(X) ksize = W.shape[2] stride = 1 pad = 1 if ksize == 3 else 0 X_ = F.im2col(X, ksize, stride=stride, pad=pad).reshape( [X.shape[0], -1, X.shape[2] * X.shape[3]]) X_ = F.transpose(X_, axes=(0, 2, 1)).reshape([-1, X_.shape[1]]) X_ = X_.array W_ = W.reshape([W.shape[0], -1]) W_nz = (W_ != 0).astype('bool') X_nz = (X_ != 0).astype('bool') n_zm = 0 for i in range(W_.shape[0]): M = xp.multiply(X_, W_[i, :]) # multiply # zero mult ZM = xp.logical_and(M == 0, xp.logical_and(W_nz[i, :], X_nz)) n_zm += ZM.sum() self.results.append([ self.current_epoch, self.current_iteration, func_id, func.label, n_zm.item(), W_.shape[0] * X_.shape[0] * X_.shape[1] ])
def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu): x = x.copy() n, c, h, w = x.shape col = functions.im2col(x, (kh, kw), (sy, sx), (ph, pw), dilate=(dy, dx)).data col_h = get_conv_outsize(h, kh, sy, ph, d=dy) col_w = get_conv_outsize(w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w)) col = col.reshape(n, c, kh, kw, col_h, col_w) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < h and 0 <= ox < w: testing.assert_allclose(col[:, :, ky, kx, y, x], self.x[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), numpy.float32))
def f(x): return functions.im2col(x, ksize, stride=stride, pad=pad, cover_all=cover_all, dilate=dilate)
def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu): x = x.copy() n, c, h, w = x.shape col = functions.im2col( x, (kh, kw), (sy, sx), (ph, pw), dilate=(dy, dx)).data col_h = get_conv_outsize(h, kh, sy, ph, d=dy) col_w = get_conv_outsize(w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w)) col = col.reshape(n, c, kh, kw, col_h, col_w) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < h and 0 <= ox < w: testing.assert_allclose( col[:, :, ky, kx, y, x], self.x[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def acts_expand_convolution_2d(self): acts = self.in_acts ksize, stride, pad = self.conv2d_args acts_expand = im2col(acts, ksize, stride, pad).data # n x c*ksize*ksize x ho x wo n, _, ho, wo = acts_expand.shape # n x ho x wo x c*ksize*ksize acts_expand = acts_expand.transpose(0, 2, 3, 1) # n*ho*wo x c*ksize*ksize acts_expand = acts_expand.reshape(n * ho * wo, -1) return acts_expand
def compute_A(self, in_data): x = in_data[0] ksize, stride, pad = \ self._link.ksize, self._link.stride[0], self._link.pad[0] xp = cuda.get_array_module(x) x = im2col(x, ksize, stride, pad).data x = x.transpose(0, 2, 3, 1) # NCHW -> NHWC n, ho, wo, _ = x.shape x = x.reshape(n * ho * wo, -1) if self._link.b is not None: ones = xp.ones(x.shape[0], dtype=x.dtype) x = xp.column_stack((x, ones)) A_scale = 1 / n if x.dtype == xp.float16: x = cast(x, xp.float32).data A = x.T.dot(x) * A_scale else: A = x.T.dot(x) * A_scale return A
def f(x): return functions.im2col( x, ksize, stride=stride, pad=pad, cover_all=cover_all, dilate=dilate)
def contextual_attention(f, b, mask=None, ksize=3, stride=1, rate=1, fuse_k=3, softmax_scale=10., training=True, fuse=True, return_flow=False): """ Contextual attention layer implementation. Contextual attention is first introduced in publication: Generative Image Inpainting with Contextual Attention, Yu et al. Args: x: Input feature to match (foreground). t: Input feature for match (background). mask: Input mask for t, indicating patches not available. ksize: Kernel size for contextual attention. stride: Stride for extracting patches from t. rate: Dilation for matching. softmax_scale: Scaled softmax for attention. training: Indicating if current graph is training or inference. """ xp = cuda.get_array_module(f.data) # get shapes raw_fs = f.shape raw_int_fs = f.shape raw_int_bs = b.shape # extract patches from background with stride and rate kernel = 2 * rate pad = (kernel - rate * stride + 1) // 2 raw_w = F.im2col(b, kernel, rate * stride, pad=pad).transpose(0, 2, 3, 1) raw_w = raw_w.reshape(raw_int_bs[0], -1, raw_int_bs[1], kernel, kernel) # raw_w = raw_w.transpose(0, 1, 4, 2, 3) # transpose to b*hw*c*k*k # downscaling foreground option: downscaling both foreground and # background for matching and use original background for reconstruction. f = f[:, :, ::rate, ::rate] b = b[:, :, ::rate, ::rate] if mask is not None: mask = mask[:, :, ::rate, ::rate] fs = f.shape int_fs = f.shape f_groups = F.split_axis(f, int_fs[0], axis=0) # from t(H*W*C) to w(b*k*k*c*h*w) bs = b.shape int_bs = b.shape pad = (ksize - stride + 1) // 2 w = F.im2col(b, ksize, stride, pad=pad).transpose(0, 2, 3, 1) w = w.reshape(int_fs[0], -1, int_fs[1], ksize, ksize) # w = w.transpose(0, 1, 4, 2, 3) # transpose to b*hw*c*k*k # process mask if mask is None: mask = xp.zeros([1, 1, bs[2], bs[3]]) m = F.im2col(mask, ksize, stride, pad=pad).transpose(0, 2, 3, 1).data m = m.reshape( 1, -1, 1, ksize, ksize, ) # m = m.transpose(0, 1, 4, 2, 3) # transpose to b*hw*c*k*k # m = m[0] m = (m.mean(axis=(2, 3, 4)) == 0.).astype("float32").reshape(bs[0], 1, -1, 1, 1) w_groups = F.split_axis(w, int_bs[0], axis=0) raw_w_groups = F.split_axis(raw_w, int_bs[0], axis=0) y = [] offsets = [] k = fuse_k scale = softmax_scale fuse_weight = xp.eye(k).reshape(1, 1, k, k) for i, (xi, wi, raw_wi) in enumerate(zip(f_groups, w_groups, raw_w_groups)): # conv for compare wi = wi[0] mm = m[i] norm = F.sqrt(F.sum(F.square(wi), axis=(1, 2, 3), keepdims=True)) + 1e-4 wi_normed = wi / (F.tile(norm, (1, *wi.shape[1:]))) pad = (ksize) // 2 yi = F.convolution_2d(xi, wi_normed, pad=pad) # conv implementation for fuse scores to encourage large patches if fuse: yi = yi.reshape(1, 1, fs[2] * fs[3], bs[2] * bs[3]) pad = (fuse_k) // 2 yi = F.convolution_2d(yi, fuse_weight, pad=pad) yi = yi.reshape(1, fs[2], fs[3], bs[2], bs[3]) yi = yi.transpose(0, 2, 1, 4, 3) yi = yi.reshape(1, 1, fs[2] * fs[3], bs[2] * bs[3]) yi = F.convolution_2d(yi, fuse_weight, pad=pad) yi = yi.reshape(1, fs[3], fs[2], bs[3], bs[2]) yi = yi.transpose(0, 4, 3, 2, 1) yi = yi.reshape(1, bs[2] * bs[3], fs[2], fs[3]) # softmax to match yi *= mm # mask yi = F.softmax(yi * scale, 1) yi *= mm # mask # deconv for patch pasting # 3.1 paste center wi_center = raw_wi[0] pad = (kernel + rate * (yi.shape[2] - 1) - raw_fs[2]) // 2 yi = F.deconvolution_2d( yi, wi_center, outsize=raw_fs[2:], stride=rate, pad=pad) / 4. y.append(yi) if return_flow: offset = xp.argmax(yi.data, axis=1) offset = xp.concatenate([offset // fs[2], offset % fs[2]], axis=0) offsets.append(offset) y = F.concat(y, axis=0).reshape(*raw_int_fs) if return_flow: offsets = xp.concatenate(offsets, axis=0).reshape(int_bs[0], 2, int_bs[2], int_bs[3]) # case1: visualize optical flow: minus current position h_add = xp.tile(xp.reshape(xp.arange(bs[1]), [1, bs[1], 1, 1]), [bs[0], 1, bs[2], 1]) w_add = xp.tile(xp.reshape(xp.arange(bs[2]), [1, 1, bs[2], 1]), [bs[0], bs[1], 1, 1]) offsets = offsets - xp.concatenate([h_add, w_add], axis=3) # to flow image flow = flow_to_image_chainer(offsets) # # case2: visualize which pixels are attended if rate != 1: flow = F.unpooling_2d(flow, rate) return y, flow return y, None