def forward(self, x, *cond_inputs, norm_weights=(None, None), **kwargs): """ Spatially Adaptive Normalization (SPADE) forward. """ output = self.norm(x) for i in range(len(cond_inputs)): if cond_inputs[i] is None: continue if type(cond_inputs[i]) == list: cond_input, mask = cond_inputs[i] mask = L.image_resize(mask, size=x.shape[2:], resample='BILINEAR', align_corners=False) else: cond_input = cond_inputs[i] mask = None label_map = L.image_resize(cond_input, x.shape[2:]) if norm_weights is None or norm_weights[0] is None or i != 0: affine_params = self.mlps[i](label_map) else: affine_params = self.mlps[i](label_map, conv_weights=norm_weights) gamma, beta = L.split(affine_params, 2, 1) if mask is not None: gamma = gamma * (1 - mask) beta = beta * (1 - mask) output = output * (1 + gamma) + beta return output
def forward(self, ten_first, ten_second): h, w = ten_first.shape[2:] r_h, r_w = int(math.floor(math.ceil(h / 32.0) * 32.0)), int(math.floor(math.ceil(w / 32.0) * 32.0)) ten_first = L.image_resize(ten_first, (r_h, r_w)) ten_second = L.image_resize(ten_second, (r_h, r_w)) with dg.no_grad(): flow = self.network(ten_first, ten_second) flow = L.image_resize(flow, (h, w)) flow[:, 0, :, :] *= float(w) / float(r_w) flow[:, 1, :, :] *= float(h) / float(r_h) return flow
def fast_preprocess_layer(img, input_size, normalize, subtract_means, to_float, mean=MEANS, std=STD): ''' 对图片预处理。用paddle而不使用numpy来得到更快的速度。预测时使用。 ''' # NCHW img = P.transpose(img, perm=[0, 3, 1, 2]) img = P.image_resize(img, out_shape=[input_size, input_size], resample="BILINEAR") if normalize: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) v = P.create_tensor(dtype='float32') P.assign(np.array(std).astype(np.float32), v) v = P.reshape(v, (1, 3, 1, 1)) v = P.expand_as(v, target_tensor=img) img = (img - m) / v elif subtract_means: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) img = (img - m) elif to_float: # 只是归一化 img = img / 255 # 换成RGB格式 img_rgb = P.concat([img[:, 2:3, :, :], img[:, 1:2, :, :], img[:, 0:1, :, :]], axis=1) # Return value is in channel order [n, c, h, w] and RGB return img_rgb
def FPN(s8, s16, s32): # y1 y1 = P.conv2d(s32, 256, filter_size=(1, 1), param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.lat_layers.0.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.lat_layers.0.bias")) # y2 h_m, w_m = P.shape(s16)[2], P.shape(s16)[3] x = P.image_resize(y1, out_shape=[h_m, w_m], resample="BILINEAR") y2 = P.conv2d(s16, 256, filter_size=(1, 1), param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.lat_layers.1.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.lat_layers.1.bias")) y2 = P.elementwise_add(x, y2, act=None) # y3 h_s, w_s = P.shape(s8)[2], P.shape(s8)[3] x = P.image_resize(y2, out_shape=[h_s, w_s], resample="BILINEAR") y3 = P.conv2d(s8, 256, filter_size=(1, 1), param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.lat_layers.2.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.lat_layers.2.bias")) y3 = P.elementwise_add(x, y3, act=None) # pred y1 = P.conv2d(y1, 256, filter_size=(3, 3), padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.pred_layers.0.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.pred_layers.0.bias")) y1 = P.relu(y1) y2 = P.conv2d(y2, 256, filter_size=(3, 3), padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.pred_layers.1.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.pred_layers.1.bias")) y2 = P.relu(y2) y3 = P.conv2d(y3, 256, filter_size=(3, 3), padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.pred_layers.2.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.pred_layers.2.bias")) y3 = P.relu(y3) # 再对y1下采样2次 s64 = P.conv2d(y1, 256, filter_size=(3, 3), stride=2, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.downsample_layers.0.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.downsample_layers.0.bias")) s128 = P.conv2d(s64, 256, filter_size=(3, 3), stride=2, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="fpn.downsample_layers.1.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="fpn.downsample_layers.1.bias")) return y3, y2, y1, s64, s128
def __call__(self, img): x, y, w, h = self._get_params(img) cropped_img = img[y:y + h, x:x + w] cropped_img = reshape(cropped_img, shape=(1, cropped_img.shape[0], cropped_img.shape[1], cropped_img.shape[2])) out = image_resize(input=cropped_img, out_shape=self.output_size, data_format='NHWC')[0] return out
def forward(self, x): feature = self.backbone(x) p_f = self.conv5p(feature) p_f = self.sp(p_f) p_f = self.conv6p(p_f) p_out = self.conv7p(p_f) c_f = self.conv5c(feature) c_f = self.sc(c_f) c_f = self.conv6c(c_f) c_out = self.conv7c(c_f) sum_f = p_f + c_f sum_out = self.conv7pc(sum_f) p_out = image_resize(p_out, out_shape=x.shape[2:]) c_out = image_resize(c_out, out_shape=x.shape[2:]) sum_out = image_resize(sum_out, out_shape=x.shape[2:]) return [p_out, c_out, sum_out]
def forward(self, tenFirst, tenSecond): tenFeaturesFirst = self.moduleFeatures(tenFirst) tenFeaturesSecond = self.moduleFeatures(tenSecond) tenFirst = [tenFirst] tenSecond = [tenSecond] for intLevel in [1, 2, 3, 4, 5]: h, w = tenFeaturesFirst[intLevel].shape[2:] tenFirst.append(L.image_resize(tenFirst[-1], out_shape=(h, w), align_corners=False)) tenSecond.append(L.image_resize(tenSecond[-1], out_shape=(h, w), align_corners=False)) tenFlow = None for intLevel in [-1, -2, -3, -4, -5]: tenFlow = self.moduleMatching[intLevel + 5](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow) tenFlow = self.moduleSubpixel[intLevel + 5](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow) tenFlow = self.moduleRegularization[intLevel + 5](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow) return tenFlow * 20.0
def prepare_data(first_image_path, second_image_path): tenFirst = np.array(Image.open(first_image_path).convert("RGB")).astype("float32") tenSecond = np.array(Image.open(second_image_path).convert("RGB")).astype("float32") mean = np.array([0.411618, 0.434631, 0.454253]).astype("float32") tenFirst = tenFirst / 255. - mean tenSecond = tenSecond / 255. - mean tenFirst = tenFirst.transpose((2, 0, 1)) tenSecond = tenSecond.transpose((2, 0, 1)) h, w = tenFirst.shape[1:] tenFirst = dg.to_variable(tenFirst) tenSecond = dg.to_variable(tenSecond) tenFirst = L.reshape(tenFirst, (1, 3, h, w)) tenSecond = L.reshape(tenSecond, (1, 3, h, w)) r_h, r_w = int(math.floor(math.ceil(h / 32.0) * 32.0)), int(math.floor(math.ceil(w / 32.0) * 32.0)) tenFirst = L.image_resize(tenFirst, (r_h, r_w)) tenSecond = L.image_resize(tenSecond, (r_h, r_w)) return tenFirst, tenSecond, (h, w), (r_h, r_w)
def forward(self, tensor_list: NestedTensor): xs = self.body(tensor_list.tensors) out: Dict[str, NestedTensor] = {} for name, x in xs.items(): m = tensor_list.mask assert m is not None m = L.unsqueeze(m, 1) # [batch_size, h, w] -> [batch_size, 1, h, w] m = m.astype("float32") mask = L.image_resize(m, out_shape=x.shape[-2:], resample="NEAREST") mask = mask.astype("bool") mask = L.squeeze( mask, [1]) # [batch_size, 1, h, w] -> [batch_size, h, w] out[name] = NestedTensor(x, mask) return out
def forward(self, x, *cond_inputs, **kwargs): output = self.norm(x) if self.norm is not None else x for i in range(len(cond_inputs)): if cond_inputs[i] is None: continue label_map = L.image_resize(cond_inputs[i], out_shape=x.shape[2:], resample='NEAREST') if self.separate_projection: hidden = self.mlps[i](label_map) gamma = self.gammas[i](hidden) beta = self.betas[i](hidden) else: affine_params = self.mlps[i](label_map) gamma, beta = L.split(affine_params, 2, 1) output = output * (1 + gamma) + beta return output
def forward(self, input_x): """ Multi-resolution patch discriminator forward. Args: input_x (tensor): Input images. Returns: (tuple) - output_list (list): list of output tensors produced by individual patch discriminators. - features_list (list): list of lists of features produced by individual patch discriminators. - input_list (list): list of downsampled input images. """ input_list = [] output_list = [] features_list = [] input_downsampled = input_x for i in range(self.num_discriminators): input_list.append(input_downsampled) output, features = self.discriminator(input_downsampled) output_list.append(output) features_list.append(features) input_downsampled = L.image_resize(input_downsampled, scale=0.5) return output_list, features_list, input_list
def crop_face_from_output(data_cfg, image, input_label, crop_smaller=0): """ Crop out the face region of the image (and resize if necessary to feed into generator/discriminator). """ if type(image) == list: return [ crop_face_from_output(data_cfg, im, input_label, crop_smaller) for im in image ] output = None face_size = image.shape[-2] // 32 * 8 for i in range(input_label.shape[0]): ys, ye, xs, xe = get_face_bbox_for_output(data_cfg, input_label[i:i + 1], crop_smaller=crop_smaller) output_i = L.image_resize( image[i:i + 1, -3:, ys:ye, xs:xe], out_shape=(face_size, face_size), ) output = L.concat([output, output_i]) if i != 0 else output_i return output
def forward(self, x): bottom_up_features = self.bottom_up(x) x = [bottom_up_features[f] for f in self.in_features] results = [] prev_features = self.lateral_convs[0](x[0]) results.append(self.output_convs[0](prev_features)) for features, lateral_conv, output_conv in zip(x[1:], self.lateral_convs[1:], self.output_convs[1:]): top_down_features = L.image_resize(prev_features, scale=2, resample='NEAREST') lateral_features = lateral_conv(features) prev_features = lateral_features + top_down_features results.append(output_conv(prev_features)) if self.top_block is not None: P6_feature = self.top_block(results[0]) results.insert(0, P6_feature) return results
def forward(self, input_x): """ Multi-resolution patch discriminator forward. Args: inputs_x (N x C x H x W tensor): Concatenation of images and semantic representations. Returns: (dict): - output (list): list of output tensors produced by individual patch discriminators. - features (list): list of lists of features produced by individual patch discriminators. """ output_list = [] features_list = [] input_downsampled = input_x for net_discriminator in self.nets_discriminators: output, features = net_discriminator(input_downsampled) output_list.append(output) features_list.append(features) input_downsampled = L.image_resize(input_downsampled, scale=0.5) output_x = dict() output_x['output'] = output_list output_x['features'] = features_list return output_x
def net(self, class_dim=5, CAM=False): """Create second stage model Args: class_dim: dim of multi-class vector CAM: 是否创建CAM heatmap Returns: * A list contain 4/5 tensors / ops: - loss, cross-entropy loss tensor - accuracy, accuracy metric tensor - predict, model output tensor activated by softmax - hacked_img_id, img_id tensor - cam_heatmap, only if CAM == True, class activation map tensor * reader, reader op to feed data into placeholder """ self.input_feature = fluid.data(name='{}_input'.format(self.name), shape=[-1] + self.data_shape, dtype='uint8') self.label = fluid.data(name='{}_label'.format(self.name), shape=[-1, 1], dtype='int64') self.img_id = fluid.data(name='{}_img_id'.format(self.name), shape=[-1, 1], dtype='int64') # Lesion Net lesion = lesionnet.LesionNet() # Backbone if self.main_arch in ResNetModels: model = resnet.__dict__[self.main_arch]() elif self.main_arch in DenseNetModels: model = densenet.__dict__[self.main_arch]() elif self.main_arch == "inception": model = inception.InceptionV4() else: raise ValueError("Model {} is not supported.".format( self.main_arch)) inp = FL.transpose(FL.cast(self.input_feature, "float32"), perm=[0, 3, 1, 2]) / 255. # Element wise mul of lesion prob maps and input image lesion_probs = lesion.net(inp, class_dim=4) # bs, 4, 16, 16 lesion_probs = FL.split(lesion_probs, num_or_sections=4, dim=1) # probs, bs*1*16*16 4 I = FL.image_resize(inp, out_shape=(512, 512), resample="BILINEAR") Is = [] for L in lesion_probs: W = FL.image_resize(L, out_shape=(512, 512), resample="NEAREST") # bs, 1, 512, 512 temp_I = FL.elementwise_mul( I, FL.expand(W + 1., expand_times=[1, 3, 1, 1])) # W + 1., bs, 3, 512, 512 Is.append(temp_I) I = FL.concat(Is, axis=1) # bs, 3*4, 512, 512 I.stop_gradient = True lesion_pos_prob = 1. - lesion_probs[0] main_arch_out = model.net(I, class_dim=class_dim, lesion_map=lesion_pos_prob, CAM=CAM) if CAM: logit, heatmaps = main_arch_out else: logit = main_arch_out predict = FL.softmax(logit) accuracy = self.create_acc_op(predict, self.label) loss = self.create_loss_op(predict, self.label) reader = self.create_reader_op( [self.img_id, self.input_feature, self.label]) # This is a hack hacked_img_id = FL.cast(self.img_id, "int32") if CAM: cam_heatmap = self.create_cam_op(predict, class_dim, heatmaps) return [loss, accuracy, predict, hacked_img_id, cam_heatmap], reader return [loss, accuracy, predict, hacked_img_id], reader
def forward(self, x): return L.image_resize(x, scale=self.scale_factor, resample='NEAREST')
def __call__(self, img): input = reshape(img, shape=(1, img.shape[0], img.shape[1], img.shape[2])) out = image_resize(input, out_shape=self.size, data_format='NHWC')[0] return out
def get_model(args): model = Network() state_dict, _ = F.load_dygraph(args.pretrained_model) model.load_dict(state_dict) return model if __name__ == '__main__': parser = argparse.ArgumentParser('LiteFlownet Inference', add_help=False) parser.add_argument('--pretrained_model', default='./pretrained_models/network-default.pdparams', type=str, help="path to the pretrained model") parser.add_argument('--first', default="./images/first.png", type=str, help="path to the first image") parser.add_argument('--second', default="./images/second.png", type=str, help="path to the second image") parser.add_argument('--out', default="./images/flow.png", type=str, help="path to the output") args = parser.parse_args() with dg.guard(): model = get_model(args) first, second, original_size, resized_size = prepare_data(args.first, args.second) flow = model(first, second) h, w = original_size r_h, r_w = resized_size flow = L.image_resize(flow, (h, w)) flow[:, 0, :, :] *= float(w) / float(r_w) flow[:, 1, :, :] *= float(h) / float(r_h) flow = L.transpose(flow[0], (1, 2, 0)).numpy() # [h, w, 2] visulize_flow(flow, args.out)
def forward(self, x): return layers.image_resize(x, scale=self.scale, resample=self.resample)
def __call__(self, lr): x = image_resize(lr, scale=self.scale) x = conv2d(x, 64, (9, 9), padding=4, act='relu', name='conv1_1') x = conv2d(x, 32, (1, 1), act='relu', name='conv2_1') x = conv2d(x, 3, (5, 5), padding=2, name='conv3_1') return x