def test_mode_1(): input_shape = (1, 2, 3, 4, 5) data = np.random.rand(*input_shape).astype(np.float32) x = paddle.fluid.data(name="x", shape=input_shape) y = F.pad(x, pad=[1, 1, 1, 1, 1, 1], mode='reflect') place = paddle.NPUPlace() exe = Executor(place) outputs = exe.run(feed={'x': data}, fetch_list=[y.name])
def test_dygraph_3(self): paddle.disable_static() input_shape = (3, 4, 5) pad = [3, 4] pad_3 = [3, 4, 5, 6, 7, 8] mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) np_out1 = self._get_numpy_out(input_data, pad, mode, value, data_format="NCL") np_out2 = self._get_numpy_out(input_data, pad, mode, value, data_format="NLC") np_out3 = self._get_numpy_out(input_data, pad_3, mode, value, data_format="NCL") tensor_data = paddle.to_tensor(input_data) tensor_pad = paddle.to_tensor(pad, dtype="int32") y1 = F.pad(tensor_data, pad=tensor_pad, mode=mode, value=value, data_format="NCL") y2 = F.pad(tensor_data, pad=tensor_pad, mode=mode, value=value, data_format="NLC") y3 = F.pad(tensor_data, pad=pad_3, mode=mode, value=value, data_format="NCL") self.assertTrue(np.allclose(y1.numpy(), np_out1)) self.assertTrue(np.allclose(y2.numpy(), np_out2)) self.assertTrue(np.allclose(y3.numpy(), np_out3))
def forward(self, x): B, C, H, W = x.shape # assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1]) if W % self.patch_size[1] != 0: x = F.pad(x, [0, self.patch_size[1] - W % self.patch_size[1], 0, 0]) if H % self.patch_size[0] != 0: x = F.pad(x, [0, 0, 0, self.patch_size[0] - H % self.patch_size[0]]) x = self.proj(x) if self.norm is not None: _, _, Wh, Ww = x.shape x = x.flatten(2).transpose([0, 2, 1]) x = self.norm(x) x = x.transpose([0, 2, 1]).reshape([-1, self.embed_dim, Wh, Ww]) return x
def test_reflect_3(): input_shape = (1, 2, 3, 4, 5) data = np.random.rand(*input_shape).astype(np.float32) x = paddle.to_tensor(data) y = F.pad(x, pad=[1, 1, 1, 1, 2, 3], value=1, mode='reflect', data_format="NCDHW")
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out = F.interpolate(out, scale_factor=[self.scale, self.scale]) return out
def forward(self, src, pha, err, hid, tri): ''' Args: src: (B, 3, H, W) full resolution source image. pha: (B, 1, Hc, Wc) coarse alpha prediction. err: (B, 1, Hc, Hc) coarse error prediction. hid: (B, 32, Hc, Hc) coarse hidden encoding. tri: (B, 1, Hc, Hc) trimap prediction. ''' h_full, w_full = paddle.shape(src)[2:] h_half, w_half = h_full // 2, w_full // 2 h_quat, w_quat = h_full // 4, w_full // 4 x = paddle.concat([hid, pha, tri], axis=1) x = F.interpolate(x, paddle.concat((h_half, w_half)), mode='bilinear', align_corners=False) y = F.interpolate(src, paddle.concat((h_half, w_half)), mode='bilinear', align_corners=False) if self.kernel_size == 3: x = F.pad(x, [3, 3, 3, 3]) y = F.pad(y, [3, 3, 3, 3]) x = self.conv1(paddle.concat([x, y], axis=1)) x = self.conv2(x) if self.kernel_size == 3: x = F.interpolate(x, paddle.concat((h_full + 4, w_full + 4))) y = F.pad(src, [2, 2, 2, 2]) else: x = F.interpolate(x, paddle.concat((h_full, w_full)), mode='nearest') y = src x = self.conv3(paddle.concat([x, y], axis=1)) x = self.conv4(x) pha = x return pha
def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): _, channel, in_h, in_w = input.shape input = input.reshape((-1, in_h, in_w, 1)) _, in_h, in_w, minor = input.shape kernel_h, kernel_w = kernel.shape out = input.reshape((-1, in_h, 1, in_w, 1, minor)) out = out.transpose((0, 1, 3, 5, 2, 4)) out = out.reshape((-1, 1, 1, 1)) out = F.pad(out, [0, up_x - 1, 0, up_y - 1]) out = out.reshape((-1, in_h, in_w, minor, up_y, up_x)) out = out.transpose((0, 3, 1, 4, 2, 5)) out = out.reshape((-1, minor, in_h * up_y, in_w * up_x)) out = F.pad( out, [max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) out = out[:, :, max(-pad_y0, 0):out.shape[2] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[3] - max(-pad_x1, 0), ] out = out.reshape( ([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])) w = paddle.flip(kernel, [0, 1]).reshape((1, 1, kernel_h, kernel_w)) out = F.conv2d(out, w) out = out.reshape(( -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, )) out = out.transpose((0, 2, 3, 1)) out = out[:, ::down_y, ::down_x, :] out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 return out.reshape((-1, channel, out_h, out_w))
def forward(self, x): """Forward function.""" # padding _, _, H, W = x.shape if W % self.patch_size[1] != 0: x = F.pad(x, [0, self.patch_size[1] - W % self.patch_size[1], 0, 0]) if H % self.patch_size[0] != 0: x = F.pad(x, [0, 0, 0, self.patch_size[0] - H % self.patch_size[0]]) x = self.proj(x) # B C Wh Ww if self.norm is not None: _, _, Wh, Ww = x.shape x = x.flatten(2).transpose([0, 2, 1]) x = self.norm(x) x = x.transpose([0, 2, 1]).reshape([-1, self.embed_dim, Wh, Ww]) return x
def inflate_tensor(tensor, scope): """Inflate tensor""" max_len = max([le for _, le in scope]) batch_vecs = [] for st, le in scope: cur_vecs = tensor[st:st + le] cur_vecs = F.pad(cur_vecs, (0, 0, 0, max_len - le)) batch_vecs.append(cur_vecs) return paddle.stack(batch_vecs, axis=0)
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb], mode='constant') out = self.conv(out) out = F.interpolate(out, scale_factor=self.scale, mode='NEAREST', align_corners=False) return out
def forward(self, inputs, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2): # ===== GSRM Visual-to-semantic embedding block ===== b, t, c = inputs.shape pvam_features = paddle.reshape(inputs, [-1, c]) word_out = self.fc0(pvam_features) word_ids = paddle.argmax(F.softmax(word_out), axis=1) word_ids = paddle.reshape(x=word_ids, shape=[-1, t, 1]) #===== GSRM Semantic reasoning block ===== """ This module is achieved through bi-transformers, ngram_feature1 is the froward one, ngram_fetaure2 is the backward one """ pad_idx = self.char_num word1 = paddle.cast(word_ids, "float32") word1 = F.pad(word1, [1, 0], value=1.0 * pad_idx, data_format="NLC") word1 = paddle.cast(word1, "int64") word1 = word1[:, :-1, :] word2 = word_ids enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1] enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2] gsrm_feature1 = self.wrap_encoder0(enc_inputs_1) gsrm_feature2 = self.wrap_encoder1(enc_inputs_2) gsrm_feature2 = F.pad(gsrm_feature2, [0, 1], value=0., data_format="NLC") gsrm_feature2 = gsrm_feature2[:, 1:, ] gsrm_features = gsrm_feature1 + gsrm_feature2 gsrm_out = self.mul(gsrm_features) b, t, c = gsrm_out.shape gsrm_out = paddle.reshape(gsrm_out, [-1, c]) return gsrm_features, word_out, gsrm_out
def test_static(self): paddle.enable_static() self.place = fluid.NPUPlace( 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() with program_guard(Program(), Program()): input_shape = (1, 2, 3, 4, 5) pad = [1, 2, 1, 1, 3, 4] mode = "constant" value = 0 input_data = np.random.rand(*input_shape).astype(np.float32) x = paddle.fluid.data(name="x", shape=input_shape) result1 = F.pad(x=x, pad=pad, value=value, mode=mode, data_format="NCDHW") result2 = F.pad(x=x, pad=pad, value=value, mode=mode, data_format="NDHWC") exe = Executor(self.place) fetches = exe.run(default_main_program(), feed={"x": input_data}, fetch_list=[result1, result2]) np_out1 = self._get_numpy_out(input_data, pad, mode, value, data_format="NCDHW") np_out2 = self._get_numpy_out(input_data, pad, mode, value, data_format="NDHWC") self.assertTrue(np.allclose(fetches[0], np_out1)) self.assertTrue(np.allclose(fetches[1], np_out2))
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out.stop_gradient = False inv_scale = 1 / self.scale int_inv_scale = int(inv_scale) assert (inv_scale == int_inv_scale) # out = out[:, :, ::int_inv_scale, ::int_inv_scale] # patch end out = paddle.fluid.layers.resize_nearest(out, scale=self.scale) return out
def forward(self, x): generated_filter = self.filter_gen_conv(self.avg_pool(x)) x = self.input_redu_conv(x) b, c, h, w = x.shape x = x.reshape([1, b * c, h, w]) generated_filter = generated_filter.reshape( [b * c, 1, self.filter_size, self.filter_size]) x = F.pad(x, self.pad, mode='constant', value=0) output = F.conv2d(x, weight=generated_filter, groups=b * c) output = output.reshape([b, self.channels, h, w]) output = self.norm(output) output = self.act(output) if self.fusion: output = self.fusion_conv(output) return output
def forward(self, x): ih, iw = x.shape[-2:] kh, kw = self.weight.shape[-2:] sh, sw = self.stride oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) pad_h = max( (oh - 1) * self.stride[0] + (kh - 1) * self._dilation[0] + 1 - ih, 0) pad_w = max( (ow - 1) * self.stride[1] + (kw - 1) * self._dilation[1] + 1 - iw, 0) if pad_h > 0 or pad_w > 0: x = F.pad(x, [ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ]) return F.conv2d(x, self.weight, self.bias, self.stride, self._padding, self._dilation, self._groups)
def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: y = self.conv0(inputs) if self.dilation > 1: padding = self.dilation y = F.pad(y, [padding, padding, padding, padding]) conv1 = self.conv1(y) conv2 = self.conv2(conv1) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(x=short, y=conv2) y = F.relu(y) return y
def forward(self, feat_list): C3, C4 = feat_list x = self.in_conv(C4) x_shape = paddle.shape(x) P_h, P_w = self.down_factor Q_h, Q_w = paddle.ceil(x_shape[2] / P_h).astype('int32'), paddle.ceil( x_shape[3] / P_w).astype('int32') pad_h, pad_w = (Q_h * P_h - x_shape[2]).astype('int32'), ( Q_w * P_w - x_shape[3]).astype('int32') if pad_h > 0 or pad_w > 0: padding = paddle.concat([ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ], axis=0) feat = F.pad(x, padding) else: feat = x feat = feat.reshape([0, x_shape[1], Q_h, P_h, Q_w, P_w]) feat = feat.transpose([0, 3, 5, 1, 2, 4]).reshape([-1, self.inter_channels, Q_h, Q_w]) feat = self.global_relation(feat) feat = feat.reshape([x_shape[0], P_h, P_w, x_shape[1], Q_h, Q_w]) feat = feat.transpose([0, 4, 5, 3, 1, 2]).reshape([-1, self.inter_channels, P_h, P_w]) feat = self.local_relation(feat) feat = feat.reshape([x_shape[0], Q_h, Q_w, x_shape[1], P_h, P_w]) feat = feat.transpose([0, 3, 1, 4, 2, 5]).reshape( [0, self.inter_channels, P_h * Q_h, P_w * Q_w]) if pad_h > 0 or pad_w > 0: feat = paddle.slice( feat, axes=[2, 3], starts=[pad_h // 2, pad_w // 2], ends=[pad_h // 2 + x_shape[2], pad_w // 2 + x_shape[3]]) feat = self.out_conv(paddle.concat([feat, x], axis=1)) output = self.cls(feat) if self.enable_auxiliary_loss: auxout = self.aux(C3) return [output, auxout] else: return [output]
def check_static_result_1(self, place): paddle.enable_static() with program_guard(Program(), Program()): input_shape = (1, 2, 3, 4, 5) pad = [1, 2, 1, 1, 3, 4] mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) x = paddle.fluid.data(name="x", shape=input_shape) result = F.pad(x=x, pad=pad, value=value, mode=mode, data_format="NCDHW") exe = Executor(place) fetches = exe.run(default_main_program(), feed={"x": input_data}, fetch_list=[result]) np_out = self._get_numpy_out(input_data, pad, mode, value) self.assertTrue(np.allclose(fetches[0], np_out))
def forward(self, inputs): y = self.conv0(inputs) #################################################################### # If given dilation rate > 1, using corresponding padding. # The performance drops down without the follow padding. if self.dilation > 1: padding = self.dilation y = F.pad(y, [padding, padding, padding, padding]) ##################################################################### conv1 = self.conv1(y) conv2 = self.conv2(conv1) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(x=short, y=conv2) y = F.relu(y) return y
def local_pairwise_distances2(x, y, max_distance=9): """Computes pairwise squared l2 distances using a local search window. Naive implementation using map_fn. Used as a slow fallback for when correlation_cost is not available. Args: x: Float32 tensor of shape [height, width, feature_dim]. y: Float32 tensor of shape [height, width, feature_dim]. max_distance: Integer, the maximum distance in pixel coordinates per dimension which is considered to be in the search window. Returns: Float32 distances tensor of shape [height, width, (2 * max_distance + 1) ** 2]. """ ori_h, ori_w, _ = x.shape x = paddle.transpose(x, [2, 0, 1]).unsqueeze(0) x = F.avg_pool2d(x, (2, 2), (2, 2)) y = paddle.transpose(y, [2, 0, 1]).unsqueeze(0) y = F.avg_pool2d(y, (2, 2), (2, 2)) _, channels, height, width = x.shape padding_val = 1e20 padded_y = F.pad(y, (max_distance, max_distance, max_distance, max_distance), mode='constant', value=padding_val) offset_y = F.unfold(padded_y, kernel_sizes=[height, width]).reshape( [1, channels, height, width, -1]) x = x.reshape([1, channels, height, width, 1]) minus = x - offset_y dists = paddle.sum(paddle.multiply(minus, minus), axis=1).reshape([1, height, width, -1]).transpose([0, 3, 1, 2]) dists = (paddle.nn.functional.sigmoid(dists) - 0.5) * 2 dists = F.interpolate(dists, size=[ori_h, ori_w], mode='bilinear', align_corners=True) dists = dists.squeeze(0).transpose([1, 2, 0]) return dists
def fluid_layer(self, place): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): input_shape = (-1, -1, -1,self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) weight_attr = I.NumpyArrayInitializer(self.weight) if self.bias is None: bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) if self.padding_mode != 'zeros': x_var = F.pad(x_var, self._reversed_padding_repeated_twice, mode=self.padding_mode, data_format=self.data_format) padding = 0 else: padding = self.padding y_var = fluid.layers.conv2d( x_var, self.num_filters, self.filter_size, padding=padding, stride=self.stride, dilation=self.dilation, groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) exe.run(start) y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) return y_np
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out.stop_gradient = False # The high version of pytorch has a bug that affects the convergence of this model # original code # out = F.interpolate(out, scale_factor=[self.scale, self.scale]) # original code end # a patch 'might be' work for this bug. # see https://github.com/AliaksandrSiarohin/first-order-model/issues/146#issue-624354694 inv_scale = 1 / self.scale int_inv_scale = int(inv_scale) assert (inv_scale == int_inv_scale) out = out[:, :, ::int_inv_scale, ::int_inv_scale] # patch end return out
def relax_onehot(self, label, num_classes): # pad label, and let ignore_index as num_classes if len(label.shape) == 3: label = label.unsqueeze(1) h, w = label.shape[-2], label.shape[-1] label = F.pad(label, [self.border] * 4, value=num_classes) label = label.squeeze(1) ignore_mask = (label == self.ignore_index).astype('int64') label = label * (1 - ignore_mask) + num_classes * ignore_mask onehot = 0 for i in range(-self.border, self.border + 1): for j in range(-self.border, self.border + 1): h_start, h_end = 1 + i, h + 1 + i w_start, w_end = 1 + j, w + 1 + j label_ = label[:, h_start:h_end, w_start:w_end] onehot_ = F.one_hot(label_, num_classes + 1) onehot += onehot_ onehot = (onehot > 0).astype('int64') onehot = paddle.transpose(onehot, (0, 3, 1, 2)) return onehot
def functional(self, place): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): input_shape = (-1, -1, -1,self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) w_var = fluid.data("weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data("bias", (self.num_filters, ), dtype=self.dtype) if self.padding_mode != 'zeros': x_var = F.pad(x_var, self._reversed_padding_repeated_twice, mode=self.padding_mode, data_format=self.data_format) padding = 0 else: padding = self.padding y_var = F.conv2d(x_var, w_var, b_var if not self.no_bias else None, padding=padding, stride=self.stride, dilation=self.dilation, groups=self.groups, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias exe = fluid.Executor(place) exe.run(start) y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) return y_np
def forward(self, feat_list): C3, C4 = feat_list x = self.in_conv(C4) n, c, h, w = x.shape P_h, P_w = self.down_factor Q_h, Q_w = math.ceil(h / P_h), math.ceil(w / P_w) pad_h, pad_w = Q_h * P_h - h, Q_w * P_w - w if pad_h > 0 or pad_w > 0: padding = [ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ] feat = F.pad(x, padding) else: feat = x feat = feat.reshape([n, c, Q_h, P_h, Q_w, P_w]) feat = feat.transpose([0, 3, 5, 1, 2, 4]).reshape([-1, c, Q_h, Q_w]) feat = self.global_relation(feat) feat = feat.reshape([n, P_h, P_w, c, Q_h, Q_w]) feat = feat.transpose([0, 4, 5, 3, 1, 2]).reshape([-1, c, P_h, P_w]) feat = self.local_relation(feat) feat = feat.reshape([n, Q_h, Q_w, c, P_h, P_w]) feat = feat.transpose([0, 3, 1, 4, 2, 5]).reshape([n, c, P_h * Q_h, P_w * Q_w]) if pad_h > 0 or pad_w > 0: feat = feat[:, :, pad_h // 2:pad_h // 2 + h, pad_w // 2:pad_w // 2 + w] feat = self.out_conv(paddle.concat([feat, x], axis=1)) output = self.cls(feat) if self.enable_auxiliary_loss: auxout = self.aux(C3) return [output, auxout] else: return [output]
def forward(self, x): return F.pad(x, self.size, mode="replicate")
def forward(self, x, mask_matrix): """ Forward function. Args: x: Input feature, tensor size (B, H*W, C). H, W: Spatial resolution of the input feature. mask_matrix: Attention mask for cyclic shift. """ B, L, C = x.shape H, W = self.H, self.W assert L == H * W, "input feature has wrong size" shortcut = x x = self.norm1(x) x = x.reshape([B, H, W, C]) # pad feature maps to multiples of window size pad_l = pad_t = 0 pad_r = (self.window_size - W % self.window_size) % self.window_size pad_b = (self.window_size - H % self.window_size) % self.window_size x = F.pad(x, [0, pad_l, 0, pad_b, 0, pad_r, 0, pad_t]) _, Hp, Wp, _ = x.shape # cyclic shift if self.shift_size > 0: shifted_x = paddle.roll( x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2)) attn_mask = mask_matrix else: shifted_x = x attn_mask = None # partition windows x_windows = window_partition( shifted_x, self.window_size) # nW*B, window_size, window_size, C x_windows = x_windows.reshape( [-1, self.window_size * self.window_size, C]) # nW*B, window_size*window_size, C # W-MSA/SW-MSA attn_windows = self.attn( x_windows, mask=attn_mask) # nW*B, window_size*window_size, C # merge windows attn_windows = attn_windows.reshape( [-1, self.window_size, self.window_size, C]) shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C # reverse cyclic shift if self.shift_size > 0: x = paddle.roll( shifted_x, shifts=(self.shift_size, self.shift_size), axis=(1, 2)) else: x = shifted_x if pad_r > 0 or pad_b > 0: x = x[:, :H, :W, :] x = x.reshape([B, H * W, C]) # FFN x = shortcut + self.drop_path(x) x = x + self.drop_path(self.mlp(self.norm2(x))) return x
def _forward(self, x): offset = x.shape[-1] + 1 - paddle.ones([x.shape[-1]]).cumsum(-1) z = F.sigmoid(x - offset.log()) z_cumprod = (1 - z).cumprod(-1) return F.pad(z, [0]*2*(len(x.shape)-1) + [0, 1], value=1) * \ F.pad(z_cumprod, [0]*2*(len(x.shape)-1) + [1, 0], value=1)
def test_variable(): input_shape = (1, 2, 3, 4, 5) data = np.random.rand(*input_shape).astype(np.float32) y = F.pad(x=data, pad=[1, 1, 1, 1, 1, 1], data_format="NCDHW")
def forward(self, x, offset, mask): in_C = self.in_channels out_C = self.out_channels stride = self.stride padding = self.padding # dilation = self.dilation groups = self.groups N, _, H, W = x.shape _, w_in, kH, kW = self.weight.shape out_W = (W + 2 * padding - (kW - 1)) // stride out_H = (H + 2 * padding - (kH - 1)) // stride # ================== 1.先对图片x填充得到填充后的图片pad_x ================== pad_x_H = H + padding * 2 + 1 pad_x_W = W + padding * 2 + 1 pad_x = F.pad(x, pad=[0, 0, 0, 0, padding, padding + 1, padding, padding + 1], value=0.0) # ================== 2.求所有采样点的坐标 ================== # 卷积核中心点在pad_x中的位置 y_outer, x_outer = paddle.meshgrid([paddle.arange(out_H), paddle.arange(out_W)]) y_outer = y_outer * stride + padding x_outer = x_outer * stride + padding start_pos_yx = paddle.stack((y_outer, x_outer), 2).cast(dtype='float32') # [out_H, out_W, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.unsqueeze(start_pos_yx, axis=[0, 3]) # [1, out_H, out_W, 1, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.tile(start_pos_yx, [N, 1, 1, kH * kW, 1]) # [N, out_H, out_W, kH*kW, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y = start_pos_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_x = start_pos_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y.stop_gradient = True start_pos_x.stop_gradient = True # 卷积核内部的偏移 half_W = (kW - 1) // 2 half_H = (kH - 1) // 2 y_inner, x_inner = paddle.meshgrid([paddle.arange(kH), paddle.arange(kW)]) y_inner -= half_H x_inner -= half_W filter_inner_offset_yx = paddle.stack((y_inner, x_inner), 2).cast(dtype='float32') # [kH, kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.reshape(filter_inner_offset_yx, (1, 1, 1, kH * kW, 2)) # [1, 1, 1, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.tile(filter_inner_offset_yx, [N, out_H, out_W, 1, 1]) # [N, out_H, out_W, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_y = filter_inner_offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_x = filter_inner_offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_y.stop_gradient = True filter_inner_offset_x.stop_gradient = True # 预测的偏移 offset = paddle.transpose(offset, [0, 2, 3, 1]) # [N, out_H, out_W, kH*kW*2] offset_yx = paddle.reshape(offset, (N, out_H, out_W, kH * kW, 2)) # [N, out_H, out_W, kH*kW, 2] offset_y = offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] offset_x = offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] # 最终采样位置。 pos_y = start_pos_y + filter_inner_offset_y + offset_y # [N, out_H, out_W, kH*kW, 1] pos_x = start_pos_x + filter_inner_offset_x + offset_x # [N, out_H, out_W, kH*kW, 1] pos_y = paddle.clip(pos_y, 0.0, H + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 pos_x = paddle.clip(pos_x, 0.0, W + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 # ================== 3.采样。用F.grid_sample()双线性插值采样。 ================== pos_x = pos_x / (pad_x_W - 1) * 2.0 - 1.0 pos_y = pos_y / (pad_x_H - 1) * 2.0 - 1.0 xtyt = paddle.concat([pos_x, pos_y], -1) # [N, out_H, out_W, kH*kW, 2] xtyt = paddle.reshape(xtyt, (N, out_H, out_W * kH * kW, 2)) # [N, out_H, out_W*kH*kW, 2] value = F.grid_sample(pad_x, xtyt, mode='bilinear', padding_mode='zeros', align_corners=True) # [N, in_C, out_H, out_W*kH*kW] value = paddle.reshape(value, (N, in_C, out_H, out_W, kH * kW)) # [N, in_C, out_H, out_W, kH * kW] value = value.transpose((0, 1, 4, 2, 3)) # [N, in_C, kH * kW, out_H, out_W] # ================== 4.乘以重要程度 ================== # 乘以重要程度 mask = paddle.unsqueeze(mask, [1]) # [N, 1, kH * kW, out_H, out_W] value = value * mask # [N, in_C, kH * kW, out_H, out_W] new_x = paddle.reshape(value, (N, in_C * kH * kW, out_H, out_W)) # [N, in_C * kH * kW, out_H, out_W] # ================== 5.乘以本层的权重,加上偏置 ================== # 1x1卷积 rw = paddle.reshape(self.weight, (out_C, w_in * kH * kW, 1, 1)) # [out_C, w_in, kH, kW] -> [out_C, w_in*kH*kW, 1, 1] 变成1x1卷积核 out = F.conv2d(new_x, rw, bias=self.bias, stride=1, groups=groups) # [N, out_C, out_H, out_W] return out