def get_fg_mask(densepose_map, has_fg): """ Obtain the foreground mask for pose sequences, which only includes the human. This is done by looking at the body part map from DensePose. Args: densepose_map (NxCxHxW tensor): DensePose map. has_fg (bool): Whether data has foreground or not. Returns: mask (Nx1xHxW tensor): fg mask. """ if type(densepose_map) == list: return [get_fg_mask(label, has_fg) for label in densepose_map] if not has_fg or densepose_map is None: return 1 if len(densepose_map.shape) == 5: densepose_map = densepose_map[:, 0] # Get the body part map from DensePose. mask = densepose_map[:, 2:3] # Make the mask slightly larger. mask = L.pool2d(mask, pool_size=15, pool_type='max', pool_stride=1, pool_padding=7) # mask = dg.to_variable(((mask > -1).numpy().astype("float32"))) mask = P.cast((mask > -1), "float32") return mask
def points_nms(heat, kernel=2): # kernel must be 2 hmax = L.pool2d(heat, pool_size=kernel, pool_stride=1, pool_padding=[[0, 0], [0, 0], [1, 0], [1, 0]], pool_type='max') keep = L.cast(L.equal(hmax, heat), 'float32') return heat * keep
def compute_mask_losses(self, occ_mask, fake_image, warped_image, tgt_label, tgt_image, fg_mask, ref_fg_mask, body_mask_diff): """ Compute losses on the generated occlusion masks. Args: occ_mask (tensor or list of tensors): Generated occlusion masks. fake_image (tensor): Generated image. warped_image (tensor or list of tensors): Warped images using the flow maps. tgt_label (tensor): Target label map. tgt_image (tensor): Target image for the warped image. fg_mask (tensor): Foreground mask for the reference image. body_fg_mask (tensor): Difference between warped body part map and target body part map. Used for pose dataset only. """ loss_mask = dg.to_variable(np.zeros((1, )).astype("float32")) if isinstance(occ_mask, list): # Compute occlusion mask losses for both warping reference -> target and previous -> target. for i in range(len(occ_mask)): loss_mask += self.compute_mask_loss(occ_mask[i], warped_image[i], tgt_image) else: # Compute loss for warping either reference or previous images. loss_mask += self.compute_mask_loss(occ_mask, warped_image, tgt_image) if self.warp_ref: ref_occ_mask = occ_mask[0] dummy0 = L.zeros_like(ref_occ_mask) dummy1 = L.ones_like(ref_occ_mask) if self.for_pose_dataset: # Enforce output to use more warped reference image for face region. face_mask = L.unsqueeze(get_face_mask(tgt_label[:, 2]), [1]) face_mask = L.pool2d(face_mask, pool_size=15, pool_type='avg', pool_stride=1, pool_padding=7) loss_mask += self.criterionMasked(ref_occ_mask, dummy0, face_mask) loss_mask += self.criterionMasked(fake_image, warped_image[0], face_mask) # Enforce output to use more hallucinated image for discrepancy # regions of body part masks between warped reference and target image. loss_mask += self.criterionMasked(ref_occ_mask, dummy1, body_mask_diff) if self.has_fg: # Enforce output to use more hallucinated image for discrepancy regions # of foreground masks between reference and target image. fg_mask_diff = ((ref_fg_mask - fg_mask) > 0).astype("float32") loss_mask += self.criterionMasked(ref_occ_mask, dummy1, fg_mask_diff) return loss_mask
def test_pool2d(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 224, 224], dtype='float32') self.assertIsNotNone( layers.pool2d(x, pool_size=[5, 3], pool_stride=[1, 2], pool_padding=(2, 1)))
def forward(self, x): x = self.cnn1(x) x1 = layers.pool2d(x, pool_size=(3, 150), pool_type='avg') x = self.cnn2(x) x2 = layers.pool2d(x, pool_size=(3, 150), pool_type='avg') x = self.cnn3(x) x3 = layers.pool2d(x, pool_size=(3, 150), pool_type='avg') # print(x1.shape, x2.shape) y = layers.concat([x1, x2, x3], axis=1) y = layers.reshape(y, shape=[ y.shape[0], -1, ]) # print('y:', y.shape) # y = layers.concat([h,y], axis=1) # print(x.shape) y = self.cls(y) y = layers.softmax(y, axis=1) return y
def func(self, place): input_NCHW = fluid.layers.data( name="input_NCHW", shape=[2, 3, 5, 5], append_batch_size=False, dtype="float32") input_NCHW.persistable = True y = layers.pool2d(input_NCHW, pool_size=2, pool_type="avg") x_arr = np.random.uniform(-1, 1, [2, 3, 5, 5]).astype(np.float32) gradient_checker.double_grad_check( [input_NCHW], y, x_init=x_arr, place=place, eps=0.05)
def net(self, inputs): print(inputs.shape) x = conv2d(inputs, 64, 3, padding=1, act='relu') x = conv2d(x, 64, 3, padding=1, act='relu') x = pool2d(x, 2, pool_stride=2) print(x.shape) x = conv2d(x, 128, 3, padding=1, act='relu') x = conv2d(x, 128, 3, padding=1, act='relu') x = pool2d(x, 2, pool_stride=2) print(x.shape) x = conv2d(x, 256, 3, padding=1, act='relu') x = conv2d(x, 256, 3, padding=1, act='relu') x = conv2d(x, 256, 3, padding=1, act='relu') x = pool2d(x, 2, pool_stride=2) print(x.shape) x = conv2d(x, 512, 3, padding=1, act='relu') x = conv2d(x, 512, 3, padding=1, act='relu') x = conv2d(x, 512, 3, padding=1, act='relu') x = pool2d(x, 2, pool_stride=2) print(x.shape) x = conv2d(x, 512, 3, padding=1, act='relu') x = conv2d(x, 512, 3, padding=1, act='relu') x = conv2d(x, 512, 3, padding=1, act='relu') x = pool2d(x, 2, pool_stride=2) print(x.shape) x = flatten(x) x = fc(x, 4096, act='relu') x = fc(x, 4096, act='relu') out = fc(x, self.class_num) print(out.shape) return out
def get_feature(self, im: np.ndarray): """Get the feature. Generally, call this function. args: im: image patch """ # Return empty tensor if it should not be used is_color = im.shape[1] == 3 if is_color and not self.use_for_color or not is_color and not self.use_for_gray: return np.array([]) feat_list = self.extract(im) output_sz = [None] * len( feat_list) if self.output_size is None else self.output_size # Pool/downsample with fluid.dygraph.guard(): feat_list = [n2p(f) for f in feat_list] for i, (sz, s) in enumerate(zip(output_sz, self.pool_stride)): if sz is not None: feat_list[i] = layers.adaptive_pool2d(feat_list[i], sz, pool_type='avg') elif s != 1: feat_list[i] = layers.pool2d(feat_list[i], s, pool_stride=s, pool_type='avg') # Normalize if self.normalize_power is not None: new_feat_list = [] for feat in feat_list: norm = (layers.reduce_sum(layers.reshape( layers.abs(feat), [feat.shape[0], 1, 1, -1])** self.normalize_power, dim=3, keep_dim=True) / (feat.shape[1] * feat.shape[2] * feat.shape[3]) + 1e-10)**(1 / self.normalize_power) feat = broadcast_op(feat, norm, 'div') new_feat_list.append(feat) feat_list = new_feat_list # To numpy feat_list = TensorList([f.numpy() for f in feat_list]) return feat_list
def __call__(self, x, residual=None, children=None): children = [] if children is None else children bottom = L.pool2d(input=x, pool_size=self.stride, pool_stride=self.stride, pool_type='max') if self.downsample else x residual = self.project(bottom) if self.project else bottom if self.level_root: children.append(bottom) x1 = self.tree1(x, residual) if self.levels == 1: x2 = self.tree2(x1) x = self.root(x2, x1, *children) else: children.append(x1) x = self.tree2(x1, children=children) return x
def forward(self, input, condition=None): out = input if self.conditional: out = self.cond_norm1( out, condition[0] if isinstance(condition, list) else condition) out = self.activation(out) if self.upsample: out = unpool(out) out = self.conv0(out) if self.conditional: out = self.cond_norm2( out, condition[1] if isinstance(condition, list) else condition) out = self.activation(out) out = self.conv1(out) if self.downsample: out = layers.pool2d(out, 2, pool_type='avg', pool_stride=2) if self.skip_proj: skip = input if self.upsample: skip = unpool(skip) skip = self.conv_sc(skip) if self.downsample: skip = layers.pool2d(skip, 2, pool_type='avg', pool_stride=2) out = out + skip else: skip = input if self.use_attention: out = self.attention(out) return out
def forward(self, x): x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = fluid.layers.pool3d(x, pool_size=(3, 1, 1), pool_type='avg', pool_stride=(2, 1, 1)) b, c, t, h, w = x.shape x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = layers.reshape(x, shape=[b * t, c, h, w]) x = self.stem(x) #print(self.stem.weight.numpy().sum()) x = self.bn1(x) x = layers.pool2d(x, pool_size=3, pool_type='max', pool_stride=2, pool_padding=1) x = self.res2(x) x = self.res3(x) bt, c, h, w = x.shape x = layers.reshape(x, shape=[b, t, c, h, w]) x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = fluid.layers.pool3d(x, pool_size=(3, 1, 1), pool_type='avg', pool_stride=(2, 1, 1)) b, c, t, h, w = x.shape x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) res = layers.reshape(x[:, 1:-1], shape=[-1, c, h, w]) x = layers.reshape(x, shape=[b * t, c, h, w]) x = self.rep_flow(x) x = self.flow_conv(x) x = self.rep_flow2(x) x = layers.relu(res + x) x = self.res4(x) x = self.res5(x) x = self.dropout(x) x = layers.reduce_mean(x, dim=3) x = layers.reduce_mean(x, dim=2) x = layers.reshape(x, shape=[x.shape[0], -1]) x = self.classify(x) x = layers.reshape(x, shape=[b, -1, self.num_classes]) x = layers.reduce_mean(x, dim=1) return x
def func(self, place): input_NCHW = fluid.layers.data( name="input_NCHW", shape=[2, 3, 5, 5], append_batch_size=False, dtype="float32") input_NCHW.persistable = True y = layers.pool2d(input_NCHW, pool_size=[4, 4], pool_type="avg") y = paddle.nn.functional.avg_pool2d(input_NCHW, kernel_size=[4, 4]) x_arr = np.random.uniform(-1, 1, [2, 3, 5, 5]).astype(np.float32) gradient_checker.double_grad_check( [input_NCHW], y, x_init=x_arr, place=place, eps=0.05) gradient_checker.double_grad_check_for_dygraph( self.pool2d_wrapper, [input_NCHW], y, x_init=x_arr, place=place)
def get_feature(self, im: np.ndarray): """Get the feature. Generally, call this function. args: im: image patch """ # Return empty tensor if it should not be used is_color = im.shape[1] == 3 if is_color and not self.use_for_color or not is_color and not self.use_for_gray: return np.array([]) # Extract feature feat = self.extract(im) # Pool/downsample with fluid.dygraph.guard(): feat = n2p(feat) if self.output_size is not None: feat = layers.adaptive_pool2d(feat, self.output_size, 'avg') elif self.pool_stride != 1: feat = layers.pool2d( feat, self.pool_stride, pool_stride=self.pool_stride, pool_type='avg') # Normalize if self.normalize_power is not None: feat /= ( layers.reduce_sum( layers.reshape( layers.abs(feat), [feat.shape[0], 1, 1, -1])** self.normalize_power, dim=3, keep_dim=True) / (feat.shape[1] * feat.shape[2] * feat.shape[3]) + 1e-10)**( 1 / self.normalize_power) feat = feat.numpy() return feat
def __call__(self, input_tensor): x = self.conv1(input_tensor) x = L.pool2d(input=x, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # stage2 x = self.stage2_0(x) x = self.stage2_1(x) s4 = self.stage2_2(x) # stage3 x = self.stage3_0(s4) x = self.stage3_1(x) x = self.stage3_2(x) s8 = self.stage3_3(x) # stage4 x = self.stage4_0(s8) for ly in self.stage4_layers: x = ly(x) s16 = self.stage4_last_layer(x) # stage5 x = self.stage5_0(s16) x = self.stage5_1(x) s32 = self.stage5_2(x) outs = [] if 2 in self.feature_maps: outs.append(s4) if 3 in self.feature_maps: outs.append(s8) if 4 in self.feature_maps: outs.append(s16) if 5 in self.feature_maps: outs.append(s32) return outs
def CNNCharEmbedding(input, vocab_size, cnn_dim, n_kernals, hidden_dim, dropout_rate, output_dropout, embed_dim): """ CNN generates character embedding. Structed as: - embed(x) len_word X hidden_dim - Dropout(x) - CNN(x) len_word X hidden_dim - activation(x) - pool hidden_dim - fc embed_dim - Dropout. Return: embedded Tensor shaped like [bsz, len_seq, embed_dim] """ # input.size [batch_size, len_sentence, len_word] bsz, len_seq, len_word = input.shape emb = fluid.embedding(input, size=[vocab_size, hidden_dim]) # emb.size [batch_size, len_sentence, len_word, hidden_dim] emb = layers.dropout(x=emb, dropout_prob=dropout_rate) emb = layers.reshape(x=emb, shape=(bsz * len_seq, 1, len_word, hidden_dim)) # emb.size [batch_size X len_sentence, 1, len_word, hidden_dim] emb = layers.conv2d(input=emb, num_filters=n_kernals, filter_size=(cnn_dim, hidden_dim), padding=(cnn_dim - 1, 0), act='relu') # emb.size [bsz X len_seq, n_kernals, len_word, 1] emb = layers.transpose(x=emb, perm=[0, 3, 2, 1]) # emb.size [bsz X len_seq, 1, len_word, n_kernals] emb = layers.pool2d(input=emb, pool_size=[len_word, 1], pool_type='max') # emb.size [bsz X len_seq, 1, 1, n_kernals] emb = layers.fc(input=emb, size=embed_dim, num_flatten_dims=-1, act='tanh') # emb.size [bsz X len_seq, 1, 1, embed_dim] emb = layers.reshape(x=emb, shape=(bsz, len_seq, embed_dim)) emb = layers.dropout(x=emb, dropout_prob=output_dropout) return emb
def forward(self, x): return L.pool2d(x, pool_size=1, pool_type="max", pool_stride=2, pool_padding=0)
def __call__(self, image): """ Estimating parameters of geometric transformation Args: image: input Return: batch_C_prime: the matrix of the geometric transformation """ F = self.F loc_lr = self.loc_lr if self.model_name == "large": num_filters_list = [64, 128, 256, 512] fc_dim = 256 else: num_filters_list = [16, 32, 64, 128] fc_dim = 64 for fno in range(len(num_filters_list)): num_filters = num_filters_list[fno] name = "loc_conv%d" % fno if fno == 0: conv = self.conv_bn_layer(image, num_filters, 3, act='relu', name=name) else: conv = self.conv_bn_layer(pool, num_filters, 3, act='relu', name=name) if fno == len(num_filters_list) - 1: pool = layers.adaptive_pool2d(input=conv, pool_size=[1, 1], pool_type='avg') else: pool = layers.pool2d(input=conv, pool_size=2, pool_stride=2, pool_padding=0, pool_type='max') name = "loc_fc1" stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) fc1 = layers.fc(input=pool, size=fc_dim, param_attr=fluid.param_attr.ParamAttr( learning_rate=loc_lr, initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + "_w"), act='relu', name=name) initial_bias = self.get_initial_fiducials() initial_bias = initial_bias.reshape(-1) name = "loc_fc2" param_attr = fluid.param_attr.ParamAttr( learning_rate=loc_lr, initializer=fluid.initializer.NumpyArrayInitializer( np.zeros([fc_dim, F * 2])), name=name + "_w") bias_attr = fluid.param_attr.ParamAttr( learning_rate=loc_lr, initializer=fluid.initializer.NumpyArrayInitializer(initial_bias), name=name + "_b") fc2 = layers.fc(input=fc1, size=F * 2, param_attr=param_attr, bias_attr=bias_attr, name=name) batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False) return batch_C_prime
batch_size = 128 num_classes = 10 epochs = 12 img_rows = 28 img_cols = 28 # define the model X = layers.data(name="img", shape=[-1, 1, 28, 28], dtype="float32") Y = layers.data(name="label", shape=[-1, 1], dtype="int64") h_conv = layers.conv2d(X, num_filters=32, filter_size=(3, 3), act="relu") h_conv = layers.conv2d(h_conv, num_filters=64, filter_size=(3, 3), act="relu") h_pool = layers.pool2d(h_conv, pool_size=(2, 2)) h_dropout = layers.dropout(h_pool, dropout_prob=0.25) h_flatten = layers.flatten(h_dropout) h_fc = layers.fc(h_flatten, size=128, act="relu", bias_attr=fluid.param_attr.ParamAttr(name="b_0")) h_dropout2 = layers.dropout(h_fc, dropout_prob=0.25) pred = layers.fc(h_dropout2, size=num_classes, act="softmax", bias_attr=fluid.param_attr.ParamAttr(name="b_1")) loss = layers.reduce_mean(layers.cross_entropy(input=pred, label=Y)) acc = layers.accuracy(input=pred, label=Y)
def Resnet101(inputs, is_test, trainable, use_dcn): x = P.conv2d(inputs, 64, filter_size=7, stride=2, padding=3, param_attr=ParamAttr(initializer=fluid.initializer.Normal( 0.0, 0.01), name="backbone.conv1.weight", trainable=trainable), bias_attr=False) x = P.batch_norm( input=x, act=None, is_test=is_test, param_attr=ParamAttr(initializer=fluid.initializer.Constant(1.0), regularizer=L2Decay(0.), trainable=trainable, name='backbone.bn1.weight'), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.), trainable=trainable, name='backbone.bn1.bias'), moving_mean_name='backbone.bn1.running_mean', moving_variance_name='backbone.bn1.running_var') x = P.relu(x) x = P.pool2d(x, pool_size=3, pool_type="max", pool_stride=2, pool_padding=1) # stage2 x = conv_block(x, [64, 64, 256], 'backbone.layers.0.0', is_test, trainable, stride=1) x = identity_block(x, [64, 64, 256], 'backbone.layers.0.1', is_test, trainable) x = identity_block(x, [64, 64, 256], 'backbone.layers.0.2', is_test, trainable) # stage3 x = conv_block(x, [128, 128, 512], 'backbone.layers.1.0', is_test, trainable, use_dcn=use_dcn) x = identity_block(x, [128, 128, 512], 'backbone.layers.1.1', is_test, trainable, use_dcn=use_dcn) x = identity_block(x, [128, 128, 512], 'backbone.layers.1.2', is_test, trainable, use_dcn=use_dcn) s8 = identity_block(x, [128, 128, 512], 'backbone.layers.1.3', is_test, trainable, use_dcn=use_dcn) # stage4 x = conv_block(s8, [256, 256, 1024], 'backbone.layers.2.0', is_test, trainable, use_dcn=use_dcn) for i in range(1, 22): x = identity_block(x, [256, 256, 1024], 'backbone.layers.2.%d' % i, is_test, trainable, use_dcn=use_dcn) s16 = identity_block(x, [256, 256, 1024], 'backbone.layers.2.22', is_test, trainable, use_dcn=use_dcn) # stage5 x = conv_block(s16, [512, 512, 2048], 'backbone.layers.3.0', is_test, trainable, use_dcn=use_dcn) x = identity_block(x, [512, 512, 2048], 'backbone.layers.3.1', is_test, trainable, use_dcn=use_dcn) s32 = identity_block(x, [512, 512, 2048], 'backbone.layers.3.2', is_test, trainable, use_dcn=use_dcn) return s8, s16, s32
def build(self, boxNum=64, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, regularization=None, lazy_mode=False): dataInput = pfl.data(name='data_input', shape=[3, 416, 416], dtype='float32') gtbox = pfl.data(name='data_gtbox', shape=[boxNum, 4], dtype='float32') gtlabel = pfl.data(name='data_gtlabel', shape=[boxNum], dtype='int32') anchors = [10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319] layer0_output = _DBL(input=dataInput, num_filters=16, filter_size=3, name='layer0') layer1_output = pfl.pool2d(input=layer0_output, pool_size=2, pool_type='max', pool_stride=2, name='layer1_max') layer2_output = _DBL(input=layer1_output, num_filters=32, filter_size=3, name='layer2') layer3_output = pfl.pool2d(input=layer2_output, pool_size=2, pool_type='max', pool_stride=2, name='layer3_max') layer4_output = _DBL(input=layer3_output, num_filters=64, filter_size=3, name='layer4') layer5_output = pfl.pool2d(input=layer4_output, pool_size=2, pool_type='max', pool_stride=2, name='layer5_max') layer6_output = _DBL(input=layer5_output, num_filters=128, filter_size=3, name='layer6') layer7_output = pfl.pool2d(input=layer6_output, pool_size=2, pool_type='max', pool_stride=2, name='layer7_max') layer8_output = _DBL(input=layer7_output, num_filters=256, filter_size=3, name='layer8') layer9_output = pfl.pool2d(input=layer8_output, pool_size=2, pool_type='max', pool_stride=2, name='layer9_max') layer10_output = _DBL(input=layer9_output, num_filters=512, filter_size=3, name='layer10') layer11_output = pfl.pool2d(input=pfl.pad( layer10_output, paddings=[0, 0, 0, 0, 0, 1, 0, 1]), pool_size=2, pool_type='max', pool_stride=1, name='layer11_max') layer12_output = _DBL(input=layer11_output, num_filters=1024, filter_size=3, name='layer12') layer13_output = _DBL(input=layer12_output, num_filters=256, filter_size=1, padding=0, name='layer13') layer14_output = _DBL(input=layer13_output, num_filters=512, filter_size=3, name='layer14') layer15_output = pfl.conv2d(input=layer14_output, num_filters=18, filter_size=1, name='layer15_conv') # layer16_yolo -> -1 x 18 x 13 x 13 yolo1_loss = pfl.yolov3_loss(name='yolo1_loss', x=layer15_output, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, anchor_mask=[3, 4, 5], class_num=1, ignore_thresh=0.5, downsample_ratio=32) # layer17_route_13 layer18_output = _DBL(input=layer13_output, num_filters=128, filter_size=1, padding=0, name='layer18') layer19_output = pfl.expand(layer18_output, expand_times=[1, 1, 2, 2], name='layer19_upsample') # layer20_route_19_8 layer20_output = pfl.concat([layer19_output, layer8_output], axis=1, name='layer20_concat') layer21_output = _DBL(layer20_output, num_filters=256, filter_size=3, name='layer21') layer22_output = pfl.conv2d(input=layer21_output, num_filters=18, filter_size=1, name='layer22_conv') # layer23_yolo -> -1 x 18 x 26 x 26 yolo2_loss = pfl.yolov3_loss(name='yolo2_loss', x=layer22_output, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, anchor_mask=[0, 1, 2], class_num=1, ignore_thresh=0.5, downsample_ratio=16) loss = pfl.reduce_mean(pfl.elementwise_add(yolo1_loss, yolo2_loss), name="loss_output") optimizer = fluid.optimizer.AdamOptimizer( learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, regularization=regularization, lazy_mode=lazy_mode) optimizer.minimize(loss) self._netOutput1, self._netOutput2 = layer15_output, layer22_output self._loss = loss self._trainExe = fluid.Executor( fluid.CUDAPlace(0)) if self._USE_CUDA else fluid.Executor( fluid.CPUPlace())