def _add_topdown_lateral(self, body_name, body_input, upper_output): lateral_name = 'fpn_inner_' + body_name + '_lateral' topdown_name = 'fpn_topdown_' + body_name fan = body_input.shape[1] if self.norm_type: initializer = Xavier(fan_out=fan) lateral = ConvNorm( body_input, self.num_chan, 1, initializer=initializer, norm_type=self.norm_type, name=lateral_name, bn_name=lateral_name) else: lateral = fluid.layers.conv2d( body_input, self.num_chan, 1, param_attr=ParamAttr( name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr( name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), name=lateral_name) shape = fluid.layers.shape(upper_output) shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4]) out_shape_ = shape_hw * 2 out_shape = fluid.layers.cast(out_shape_, dtype='int32') out_shape.stop_gradient = True topdown = fluid.layers.resize_nearest( upper_output, scale=2., actual_shape=out_shape, name=topdown_name) return lateral + topdown
def __call__(self, roi_feat): fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] mixed_precision_enabled = mixed_precision_global_state() is not None if mixed_precision_enabled: roi_feat = fluid.layers.cast(roi_feat, 'float16') fc6 = fluid.layers.fc(input=roi_feat, size=self.mlp_dim, act='relu', name='fc6', param_attr=ParamAttr( name='fc6_w', initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) head_feat = fluid.layers.fc(input=fc6, size=self.mlp_dim, act='relu', name='fc7', param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), bias_attr=ParamAttr( name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) if mixed_precision_enabled: head_feat = fluid.layers.cast(head_feat, 'float32') return head_feat
def __call__(self, inputs, name=''): x = fluid.layers.swish(inputs) # depthwise x = fluid.layers.conv2d(x, self.num_chan, filter_size=3, padding='SAME', groups=self.num_chan, param_attr=ParamAttr(initializer=Xavier(), name=name + '_dw_w'), bias_attr=False) # pointwise x = fluid.layers.conv2d(x, self.num_chan, filter_size=1, param_attr=ParamAttr(initializer=Xavier(), name=name + '_pw_w'), bias_attr=ParamAttr(regularizer=L2Decay(0.), name=name + '_pw_b')) # bn + act x = fluid.layers.batch_norm( x, momentum=0.997, epsilon=1e-04, param_attr=ParamAttr(initializer=Constant(1.0), regularizer=L2Decay(0.), name=name + '_bn_w'), bias_attr=ParamAttr(regularizer=L2Decay(0.), name=name + '_bn_b')) return x
def _add_topdown_lateral(self, body_name, body_input, upper_output): lateral_name = 'fpn_inner_' + body_name + '_lateral' topdown_name = 'fpn_topdown_' + body_name fan = body_input.shape[1] if self.norm_type: initializer = Xavier(fan_out=fan) lateral = ConvNorm(body_input, self.num_chan, 1, initializer=initializer, norm_type=self.norm_type, freeze_norm=self.freeze_norm, name=lateral_name, norm_name=lateral_name) else: lateral = fluid.layers.conv2d( body_input, self.num_chan, 1, param_attr=ParamAttr(name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr(name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), name=lateral_name) topdown = fluid.layers.resize_nearest(upper_output, scale=2., name=topdown_name) return lateral + topdown
def __call__(self, roi_feat, wb_scalar=1.0, name=''): fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] fc6 = fluid.layers.fc(input=roi_feat, size=self.mlp_dim, act='relu', name='fc6' + name, param_attr=ParamAttr( name='fc6%s_w' % name, initializer=Xavier(fan_out=fan), learning_rate=wb_scalar), bias_attr=ParamAttr(name='fc6%s_b' % name, learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) head_feat = fluid.layers.fc( input=fc6, size=self.mlp_dim, act='relu', name='fc7' + name, param_attr=ParamAttr(name='fc7%s_w' % name, initializer=Xavier(), learning_rate=wb_scalar), bias_attr=ParamAttr(name='fc7%s_b' % name, learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) return head_feat
def __call__(self, roi_feat): fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] fc6 = fluid.layers.fc(input=roi_feat, size=self.mlp_dim, act='relu', name='fc6', param_attr=ParamAttr( name='fc6_w', initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) head_feat = fluid.layers.fc(input=fc6, size=self.mlp_dim, act='relu', name='fc7', param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), bias_attr=ParamAttr( name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) return head_feat
def DilConv(input, C_in, C_out, kernel_size, stride, padding, dilation, name='', affine=True): relu_a = fluid.layers.relu(input) conv2d_a = fluid.layers.conv2d(relu_a, C_in, kernel_size, stride, padding, dilation, groups=C_in, param_attr=ParamAttr( initializer=Xavier(uniform=False, fan_in=0), name=name + 'op.1.weight'), bias_attr=False, use_cudnn=False) conv2d_b = fluid.layers.conv2d(conv2d_a, C_out, 1, param_attr=ParamAttr( initializer=Xavier(uniform=False, fan_in=0), name=name + 'op.2.weight'), bias_attr=False) if affine: dilconv_out = fluid.layers.batch_norm( conv2d_b, param_attr=ParamAttr(initializer=Constant(1.), name=name + 'op.3.weight'), bias_attr=ParamAttr(initializer=Constant(0.), name=name + 'op.3.bias'), moving_mean_name=name + 'op.3.running_mean', moving_variance_name=name + 'op.3.running_var') else: dilconv_out = fluid.layers.batch_norm( conv2d_b, param_attr=ParamAttr(initializer=Constant(1.), learning_rate=0., name=name + 'op.3.weight'), bias_attr=ParamAttr(initializer=Constant(0.), learning_rate=0., name=name + 'op.3.bias'), moving_mean_name=name + 'op.3.running_mean', moving_variance_name=name + 'op.3.running_var') return dilconv_out
def FactorizedReduce(input, C_out, name='', affine=True): relu_a = fluid.layers.relu(input) conv2d_a = fluid.layers.conv2d( relu_a, C_out // 2, 1, 2, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=name + 'conv_1.weight'), bias_attr=False) h_end = relu_a.shape[2] w_end = relu_a.shape[3] slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end]) conv2d_b = fluid.layers.conv2d( slice_a, C_out // 2, 1, 2, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=name + 'conv_2.weight'), bias_attr=False) out = fluid.layers.concat([conv2d_a, conv2d_b], axis=1) if affine: out = fluid.layers.batch_norm( out, param_attr=ParamAttr( initializer=Constant(1.), name=name + 'bn.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=name + 'bn.bias'), moving_mean_name=name + 'bn.running_mean', moving_variance_name=name + 'bn.running_var') else: out = fluid.layers.batch_norm( out, param_attr=ParamAttr( initializer=Constant(1.), learning_rate=0., name=name + 'bn.weight'), bias_attr=ParamAttr( initializer=Constant(0.), learning_rate=0., name=name + 'bn.bias'), moving_mean_name=name + 'bn.running_mean', moving_variance_name=name + 'bn.running_var') return out
def __call__(self, roi_feat, wb_scalar=1.0, name=''): conv = roi_feat fan = self.conv_dim * 3 * 3 initializer = MSRA(uniform=False, fan_in=fan) for i in range(self.num_conv): name = 'bbox_head_conv' + str(i) conv = ConvNorm(conv, self.conv_dim, 3, act='relu', initializer=initializer, norm_type=self.norm_type, freeze_norm=self.freeze_norm, lr_scale=wb_scalar, name=name, norm_name=name) fan = conv.shape[1] * conv.shape[2] * conv.shape[3] head_heat = fluid.layers.fc( input=conv, size=self.mlp_dim, act='relu', name='fc6' + name, param_attr=ParamAttr(name='fc6%s_w' % name, initializer=Xavier(fan_out=fan), learning_rate=wb_scalar), bias_attr=ParamAttr(name='fc6%s_b' % name, regularizer=L2Decay(0.), learning_rate=wb_scalar * 2)) return head_heat
def conv_block(input, groups, filters, ksizes, strides=None, with_pool=True): assert len(filters) == groups assert len(ksizes) == groups strides = [1] * groups if strides is None else strides w_attr = ParamAttr(learning_rate=1., initializer=Xavier()) b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.)) conv = input for i in six.moves.xrange(groups): conv = fluid.layers.conv2d(input=conv, num_filters=filters[i], filter_size=ksizes[i], stride=strides[i], padding=(ksizes[i] - 1) // 2, param_attr=w_attr, bias_attr=b_attr, act='relu') if with_pool: pool = fluid.layers.pool2d(input=conv, pool_size=2, pool_type='max', pool_stride=2, ceil_mode=True) return conv, pool else: return conv
def __call__(self, inputs): """fuse features from various resolutions, initialize the fusion weights""" # upsample for idx in range(len(inputs) - 1): up_scale = int(self.spatial_scales[-1] / self.spatial_scales[idx]) inputs[idx] = fluid.layers.resize_nearest(inputs[idx], scale=up_scale) # normalized weights add_weights = self.get_add_weights(inputs) # fuse features across various resolutions fused_feat = self.fuse_features(inputs, weights=add_weights) # final upsample to get features of stride=4 scale = int(self.stride / self.spatial_scales[-1]) if scale > 1: fused_feat = fluid.layers.resize_nearest(fused_feat, scale=scale) fused_feat = fluid.layers.conv2d( fused_feat, self.num_channels, filter_size=3, stride=1, padding=1, param_attr=ParamAttr( name='centernet_fusefeat_w', initializer=Xavier(fan_out=fused_feat.shape[1])), bias_attr=ParamAttr(name='centernet_fusefeat_b', learning_rate=2.), name='centernet_fusefeat') outputs = [fused_feat] return outputs
def conv3x3x3(in_planes, out_planes, stride=1): return Conv3D(in_planes, out_planes, filter_size=3, stride=stride, padding=1, param_attr=ParamAttr(initializer=Xavier()))
def _head_share(self, roi_feat, wb_scalar=2.0, name=''): # FC6 FC7 fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] fc6 = fluid.layers.fc(input=roi_feat, size=self.head.num_chan, act='relu', name='fc6' + name, param_attr=ParamAttr( name='fc6%s_w' % name, initializer=Xavier(fan_out=fan), learning_rate=wb_scalar, ), bias_attr=ParamAttr(name='fc6%s_b' % name, learning_rate=2.0, regularizer=L2Decay(0.))) fc7 = fluid.layers.fc(input=fc6, size=self.head.num_chan, act='relu', name='fc7' + name, param_attr=ParamAttr( name='fc7%s_w' % name, initializer=Xavier(), learning_rate=wb_scalar, ), bias_attr=ParamAttr(name='fc7%s_b' % name, learning_rate=2.0, regularizer=L2Decay(0.))) cls_score = fluid.layers.fc( input=fc7, size=self.num_classes, act=None, name='cls_score' + name, param_attr=ParamAttr( name='cls_score%s_w' % name, initializer=Normal(loc=0.0, scale=0.01), learning_rate=wb_scalar, ), bias_attr=ParamAttr(name='cls_score%s_b' % name, learning_rate=2.0, regularizer=L2Decay(0.))) return cls_score
def SevenConv(input, C_out, stride, name='', affine=True): relu_a = fluid.layers.relu(input) conv2d_a = fluid.layers.conv2d( relu_a, C_out, (1, 7), (1, stride), (0, 3), param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=name + 'op.1.weight'), bias_attr=False) conv2d_b = fluid.layers.conv2d( conv2d_a, C_out, (7, 1), (stride, 1), (3, 0), param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=name + 'op.2.weight'), bias_attr=False) if affine: out = fluid.layers.batch_norm( conv2d_b, param_attr=ParamAttr( initializer=Constant(1.), name=name + 'op.3.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=name + 'op.3.bias'), moving_mean_name=name + 'op.3.running_mean', moving_variance_name=name + 'op.3.running_var') else: out = fluid.layers.batch_norm( conv2d_b, param_attr=ParamAttr( initializer=Constant(1.), learning_rate=0., name=name + 'op.3.weight'), bias_attr=ParamAttr( initializer=Constant(0.), learning_rate=0., name=name + 'op.3.bias'), moving_mean_name=name + 'op.3.running_mean', moving_variance_name=name + 'op.3.running_var')
def _lite_conv(self, x, out_c, act=None, name=None): conv1 = ConvNorm(input=x, num_filters=x.shape[1], filter_size=5, groups=x.shape[1], norm_type='bn', act='relu6', initializer=Xavier(), name=name + '.depthwise', norm_name=name + '.depthwise.bn') conv2 = ConvNorm(input=conv1, num_filters=out_c, filter_size=1, norm_type='bn', act=act, initializer=Xavier(), name=name + '.pointwise_linear', norm_name=name + '.pointwise_linear.bn') conv3 = ConvNorm(input=conv2, num_filters=out_c, filter_size=1, norm_type='bn', act='relu6', initializer=Xavier(), name=name + '.pointwise', norm_name=name + '.pointwise.bn') conv4 = ConvNorm(input=conv3, num_filters=out_c, filter_size=5, groups=out_c, norm_type='bn', act=act, initializer=Xavier(), name=name + '.depthwise_linear', norm_name=name + '.depthwise_linear.bn') return conv4
def _deconv_upsample(self, x, out_c, name=None): conv1 = ConvNorm(input=x, num_filters=out_c, filter_size=1, norm_type='bn', act='relu6', name=name + '.pointwise', initializer=Xavier(), norm_name=name + '.pointwise.bn') conv2 = fluid.layers.conv2d_transpose( input=conv1, num_filters=out_c, filter_size=4, padding=1, stride=2, groups=out_c, param_attr=ParamAttr(name=name + '.deconv.weights', initializer=Xavier()), bias_attr=False) bn = fluid.layers.batch_norm( input=conv2, act='relu6', param_attr=ParamAttr(name=name + '.deconv.bn.scale', regularizer=L2Decay(0.)), bias_attr=ParamAttr(name=name + '.deconv.bn.offset', regularizer=L2Decay(0.)), moving_mean_name=name + '.deconv.bn.mean', moving_variance_name=name + '.deconv.bn.variance') conv3 = ConvNorm(input=bn, num_filters=out_c, filter_size=1, norm_type='bn', act='relu6', name=name + '.normal', initializer=Xavier(), norm_name=name + '.normal.bn') return conv3
def __init__(self, block, layers, block_inplanes, n_input_channels=3, conv1_t_size=7, conv1_t_stride=1, no_max_pool=False, shortcut_type='B', widen_factor=1.0, n_classes=400): super(ResNet, self).__init__() block_inplanes = [int(x * widen_factor) for x in block_inplanes] self.in_planes = block_inplanes[0] self.no_max_pool = no_max_pool self.conv1 = Conv3D(n_input_channels, self.in_planes, filter_size=(conv1_t_size, 7, 7), stride=(conv1_t_stride, 2, 2), padding=(conv1_t_size // 2, 3, 3)) self.bn1 = BatchNorm(self.in_planes) # self.maxpool = MaxPool3D(filter_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, block_inplanes[0], layers[0], shortcut_type) self.layer2 = self._make_layer(block, block_inplanes[1], layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer(block, block_inplanes[2], layers[2], shortcut_type, stride=2) self.layer4 = self._make_layer(block, block_inplanes[3], layers[3], shortcut_type, stride=2) # self.avgpool = AdaptiveAvgPool3d(1, 1, 1) self.fc = Linear(block_inplanes[3] * block.expansion, n_classes, param_attr=ParamAttr(initializer=Xavier()), act='sigmoid')
def __call__(self, inputs): """fuse features from different resolutions feats (list[Variable]) """ # only one features, just upsample if self.single_scale: fused_feat = inputs[-1] else: # upsample for idx in range(len(inputs) - 1): up_scale = int(self.spatial_scales[-1] / self.spatial_scales[idx]) inputs[idx] = fluid.layers.resize_nearest(inputs[idx], scale=up_scale) # normalized weights add_weights = fluid.layers.relu(self.add_weights) add_weights /= fluid.layers.reduce_sum( add_weights, dim=0, keep_dim=True) + self.eps # fuse features across various resolutions fused_feat = inputs[0] * add_weights[0] for idx in range(1, len(inputs)): fused_feat += inputs[idx] * add_weights[idx] # final upsample to get features of stride=4 scale = int(0.25 / self.spatial_scales[-1]) if scale > 1: fused_feat = fluid.layers.resize_nearest(fused_feat, scale=scale) fused_feat = fluid.layers.conv2d( fused_feat, fused_feat.shape[1], filter_size=3, stride=1, padding=1, param_attr=ParamAttr( name='centernet_fusefeat_w', initializer=Xavier(fan_out=fused_feat.shape[1])), bias_attr=ParamAttr(name='centernet_fusefeat_b', learning_rate=2.), name='centernet_fusefeat') inputs = [fused_feat] return inputs
def ReLUConvBN(input, C_out, kernel_size, stride, padding, name='', affine=True): relu_a = fluid.layers.relu(input) conv2d_a = fluid.layers.conv2d(relu_a, C_out, kernel_size, stride, padding, param_attr=ParamAttr( initializer=Xavier(uniform=False, fan_in=0), name=name + 'op.1.weight'), bias_attr=False) if affine: reluconvbn_out = fluid.layers.batch_norm( conv2d_a, param_attr=ParamAttr(initializer=Constant(1.), name=name + 'op.2.weight'), bias_attr=ParamAttr(initializer=Constant(0.), name=name + 'op.2.bias'), moving_mean_name=name + 'op.2.running_mean', moving_variance_name=name + 'op.2.running_var') else: reluconvbn_out = fluid.layers.batch_norm( conv2d_a, param_attr=ParamAttr(initializer=Constant(1.), learning_rate=0., name=name + 'op.2.weight'), bias_attr=ParamAttr(initializer=Constant(0.), learning_rate=0., name=name + 'op.2.bias'), moving_mean_name=name + 'op.2.running_mean', moving_variance_name=name + 'op.2.running_var') return reluconvbn_out
def __call__(self, inputs): feats = [] # NOTE add two extra levels for idx in range(self.levels): if idx <= len(inputs): if idx == len(inputs): feat = inputs[-1] else: feat = inputs[idx] if feat.shape[1] != self.num_chan: feat = fluid.layers.conv2d( feat, self.num_chan, filter_size=1, padding='SAME', param_attr=ParamAttr(initializer=Xavier()), bias_attr=ParamAttr(regularizer=L2Decay(0.))) feat = fluid.layers.batch_norm( feat, momentum=0.997, epsilon=1e-04, param_attr=ParamAttr( initializer=Constant(1.0), regularizer=L2Decay(0.)), bias_attr=ParamAttr(regularizer=L2Decay(0.))) if idx >= len(inputs): feat = fluid.layers.pool2d( feat, pool_type='max', pool_size=3, pool_stride=2, pool_padding='SAME') feats.append(feat) biFPN = BiFPNCell(self.num_chan, self.levels) for r in range(self.repeat): feats = biFPN(feats, 'bifpn_{}'.format(r)) return feats
def add_fpn_neck( inputs, out_channels=256, num_outs=5, start_level=0, end_level=-1, ): if end_level == -1: end_level = len(inputs) # build laterals lateral_convs = [] for i in range(start_level, end_level): if i < (end_level - 1): lateral_name = 'fpn_inner_' + inputs[i].name + '_lateral' else: lateral_name = 'fpn_inner_' + inputs[i].name conv = fluid.layers.conv2d( input=inputs[i], num_filters=out_channels, filter_size=1, stride=1, padding=0, act=None, param_attr=ParamAttr(name=lateral_name + '_w', initializer=Xavier()), bias_attr=ParamAttr(name=lateral_name + '_b', initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)), name=lateral_name) lateral_convs.append(conv) # build top-down path used_body_levels = len(lateral_convs) ''' top_down_convs = [] for i in range(used_body_levels - 1, 0, -1): conv = fluid.layers.resize_nearest(lateral_convs[i], scale=2, name="res{}_top-down".format(i+3+start_level)) print(conv, lateral_convs[i - 1]) top_down_convs.append(conv) ''' #print("used_body_levels: {}".format(used_body_levels)) for i in range(used_body_levels - 1, 0, -1): ''' lateral_shape = fluid.layers.shape(lateral_convs[i - 1]) lateral_shape = fluid.layers.slice(lateral_shape, axes=[0], starts=[2], ends=[4]) lateral_shape.stop_gradient = True fpn_inner_name = lateral_convs[i - 1].name.replace('lateral', '') topdown_name = fpn_inner_name + '_topdown' top_down = fluid.layers.resize_nearest(lateral_convs[i], scale=2., actual_shape=lateral_shape, name=topdown_name) ''' shape = fluid.layers.shape(lateral_convs[i]) shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4]) shape_hw.stop_gradient = True in_shape = fluid.layers.cast(shape_hw, dtype='int32') out_shape = in_shape * 2 out_shape.stop_gradient = True fpn_inner_name = lateral_convs[i - 1].name.replace('lateral', '') topdown_name = fpn_inner_name + '_topdown' top_down = fluid.layers.resize_nearest(lateral_convs[i], scale=2., actual_shape=out_shape, name=topdown_name) lateral_convs[i - 1] = fluid.layers.elementwise_add(x=lateral_convs[i - 1], y=top_down, name=top_down) # build outputs # part 1: from original levels outs = [] for i in range(used_body_levels): fpn_name = lateral_convs[i].name.replace('inner', '') conv = fluid.layers.conv2d(input=lateral_convs[i], num_filters=out_channels, filter_size=3, stride=1, padding=1, act=None, param_attr=ParamAttr(name=fpn_name + '_w', initializer=Xavier()), bias_attr=ParamAttr( name=fpn_name + '_b', initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)), name=fpn_name) outs.append(conv) # part 2: add extra levels if (num_outs > used_body_levels): fpn_blob = inputs[end_level - 1] for i in range(end_level, num_outs): fpn_blob_in = fpn_blob fpn_blob = fluid.layers.conv2d( input=fpn_blob_in, num_filters=out_channels, filter_size=3, stride=2, padding=1, act='relu', param_attr=ParamAttr(name='fpn{}_w'.format(i + 3), initializer=Xavier()), bias_attr=ParamAttr(name='fpn{}_b'.format(i + 3), initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)), name="fpn{}".format(i)) outs.append(conv) return tuple(outs)
def get_output(self, body_dict): """ Add FPN onto backbone. Args: body_dict(OrderedDict): Dictionary of variables and each element is the output of backbone. Return: fpn_dict(OrderedDict): A dictionary represents the output of FPN with their name. spatial_scale(list): A list of multiplicative spatial scale factor. """ body_name_list = list(body_dict.keys())[::-1] num_backbone_stages = len(body_name_list) self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] fpn_inner_name = 'fpn_inner_' + body_name_list[0] body_input = body_dict[body_name_list[0]] fan = body_input.shape[1] if self.norm_type: initializer = Xavier(fan_out=fan) self.fpn_inner_output[0] = ConvNorm( body_input, self.num_chan, 1, initializer=initializer, norm_type=self.norm_type, name=fpn_inner_name, bn_name=fpn_inner_name) else: self.fpn_inner_output[0] = fluid.layers.conv2d( body_input, self.num_chan, 1, param_attr=ParamAttr( name=fpn_inner_name + "_w", initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr( name=fpn_inner_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), name=fpn_inner_name) for i in range(1, num_backbone_stages): body_name = body_name_list[i] body_input = body_dict[body_name] top_output = self.fpn_inner_output[i - 1] fpn_inner_single = self._add_topdown_lateral(body_name, body_input, top_output) self.fpn_inner_output[i] = fpn_inner_single fpn_dict = {} fpn_name_list = [] for i in range(num_backbone_stages): fpn_name = 'fpn_' + body_name_list[i] fan = self.fpn_inner_output[i].shape[1] * 3 * 3 if self.norm_type: initializer = Xavier(fan_out=fan) fpn_output = ConvNorm( self.fpn_inner_output[i], self.num_chan, 3, initializer=initializer, norm_type=self.norm_type, name=fpn_name, bn_name=fpn_name) else: fpn_output = fluid.layers.conv2d( self.fpn_inner_output[i], self.num_chan, filter_size=3, padding=1, param_attr=ParamAttr( name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr( name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), name=fpn_name) fpn_dict[fpn_name] = fpn_output fpn_name_list.append(fpn_name) if not self.has_extra_convs and self.max_level - self.min_level == len( self.spatial_scale): body_top_name = fpn_name_list[0] body_top_extension = fluid.layers.pool2d( fpn_dict[body_top_name], 1, 'max', pool_stride=2, name=body_top_name + '_subsampled_2x') fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension fpn_name_list.insert(0, body_top_name + '_subsampled_2x') self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet highest_backbone_level = self.min_level + len(self.spatial_scale) - 1 if self.has_extra_convs and self.max_level > highest_backbone_level: fpn_blob = body_dict[body_name_list[0]] for i in range(highest_backbone_level + 1, self.max_level + 1): fpn_blob_in = fpn_blob fpn_name = 'fpn_' + str(i) if i > highest_backbone_level + 1: fpn_blob_in = fluid.layers.relu(fpn_blob) fan = fpn_blob_in.shape[1] * 3 * 3 fpn_blob = fluid.layers.conv2d( input=fpn_blob_in, num_filters=self.num_chan, filter_size=3, stride=2, padding=1, param_attr=ParamAttr( name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr( name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), name=fpn_name) fpn_dict[fpn_name] = fpn_blob fpn_name_list.insert(0, fpn_name) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) return res_dict, self.spatial_scale
def R_Net(): # 定义输入层 image = fluid.layers.data(name='image', shape=[3, 24, 24], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') bbox_target = fluid.layers.data(name='bbox_target', shape=[4], dtype='float32') landmark_target = fluid.layers.data(name='landmark_target', shape=[10], dtype='float32') # 第一层卷积层 conv1 = fluid.layers.conv2d(input=image, num_filters=28, filter_size=3, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv1') conv1_prelu = fluid.layers.prelu(x=conv1, mode='all', name='conv1_prelu') # 第一个池化层 pool1 = fluid.layers.pool2d(input=conv1_prelu, pool_size=3, pool_stride=2, name='pool1') # 第二层卷积层 conv2 = fluid.layers.conv2d(input=pool1, num_filters=48, filter_size=3, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv2') conv2_prelu = fluid.layers.prelu(x=conv2, mode='all', name='conv2_prelu') # 第二个池化层 pool2 = fluid.layers.pool2d(input=conv2_prelu, pool_size=3, pool_stride=2, name='pool2') # 第三层卷积层 conv3 = fluid.layers.conv2d(input=pool2, num_filters=64, filter_size=2, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv3') conv3_prelu = fluid.layers.prelu(x=conv3, mode='all', name='conv3_prelu') # 把图像特征进行展开 fc_flatten = fluid.layers.flatten(conv3_prelu) # 第一个全连接层 fc1 = fluid.layers.fc(input=fc_flatten, size=128, name='fc1') # 是否人脸的分类输出层 cls_prob = fluid.layers.fc(input=fc1, size=2, act='softmax', name='cls_fc') # 是否人脸分类输出交叉熵损失函数 cls_loss = cls_ohem(cls_prob=cls_prob, label=label) # 人脸box的输出层 bbox_pred = fluid.layers.fc(input=fc1, size=4, act=None, name='bbox_fc') # 人脸box的平方差损失函数 bbox_loss = bbox_ohem(bbox_pred=bbox_pred, bbox_target=bbox_target, label=label) # 人脸5个关键点的输出层 landmark_pred = fluid.layers.fc(input=fc1, size=10, act=None, name='landmark_fc') # 人脸关键点的平方差损失函数 landmark_loss = landmark_ohem(landmark_pred=landmark_pred, landmark_target=landmark_target, label=label) # 准确率函数 accuracy = cal_accuracy(cls_prob=cls_prob, label=label) return image, label, bbox_target, landmark_target, cls_loss, bbox_loss, landmark_loss, accuracy, cls_prob, bbox_pred, landmark_pred
def P_Net(): # 定义输入层 image = fluid.layers.data(name='image', shape=[3, 12, 12], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') bbox_target = fluid.layers.data(name='bbox_target', shape=[4], dtype='float32') landmark_target = fluid.layers.data(name='landmark_target', shape=[10], dtype='float32') # 第一层卷积层 conv1 = fluid.layers.conv2d(input=image, num_filters=10, filter_size=3, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv1') conv1_prelu = fluid.layers.prelu(x=conv1, mode='all', name='conv1_prelu') # 唯一一个池化层 pool1 = fluid.layers.pool2d(input=conv1_prelu, pool_size=2, pool_stride=2, name='pool1') # 第二层卷积层 conv2 = fluid.layers.conv2d(input=pool1, num_filters=16, filter_size=3, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv2') conv2_prelu = fluid.layers.prelu(x=conv2, mode='all', name='conv2_prelu') # 第三层卷积层 conv3 = fluid.layers.conv2d(input=conv2_prelu, num_filters=32, filter_size=3, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv3') conv3_prelu = fluid.layers.prelu(x=conv3, mode='all', name='conv3_prelu') # 分类是否人脸的卷积输出层 conv4_1 = fluid.layers.conv2d(input=conv3_prelu, num_filters=2, filter_size=1, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv4_1') conv4_1 = fluid.layers.transpose(conv4_1, [0, 2, 3, 1]) conv4_1 = fluid.layers.squeeze(input=conv4_1, axes=[]) conv4_1_softmax = fluid.layers.softmax(input=conv4_1) # 人脸box的回归卷积输出层 conv4_2 = fluid.layers.conv2d(input=conv3_prelu, num_filters=4, filter_size=1, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv4_2') conv4_2 = fluid.layers.transpose(conv4_2, [0, 2, 3, 1]) # 5个关键点的回归卷积输出层 conv4_3 = fluid.layers.conv2d(input=conv3_prelu, num_filters=10, filter_size=1, param_attr=ParamAttr(initializer=Xavier(), regularizer=L2DecayRegularizer(0.0005)), name='conv4_3') conv4_3 = fluid.layers.transpose(conv4_3, [0, 2, 3, 1]) # 获取是否人脸分类交叉熵损失函数 cls_prob = fluid.layers.squeeze(input=conv4_1_softmax, axes=[], name='cls_prob') cls_loss = cls_ohem(cls_prob=cls_prob, label=label) # 获取人脸box回归平方差损失函数 bbox_pred = fluid.layers.squeeze(input=conv4_2, axes=[], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred=bbox_pred, bbox_target=bbox_target, label=label) # 获取人脸5个关键点回归平方差损失函数 landmark_pred = fluid.layers.squeeze(input=conv4_3, axes=[], name='landmark_pred') landmark_loss = landmark_ohem(landmark_pred=landmark_pred, landmark_target=landmark_target, label=label) # 准确率函数 accuracy = cal_accuracy(cls_prob=cls_prob, label=label) return image, label, bbox_target, landmark_target, cls_loss, bbox_loss, landmark_loss, accuracy, cls_prob, bbox_pred, landmark_pred
def __init__(self, name_scope, out_channels=256, num_outs=5, start_level=0, end_level=3): super(add_fpn_neck, self).__init__(name_scope) self.inputs_name = ['res3_3_sum', 'res4_5_sum', 'res5_2_sum'] self.start_level = start_level self.end_level = end_level self.num_outs = num_outs self.lateral_conv_list = [] for i in range(self.start_level, self.end_level): if i < (self.end_level - 1): lateral_name = 'fpn_inner_' + self.inputs_name[i] + '_lateral' else: lateral_name = 'fpn_inner_' + self.inputs_name[i] lateral_conv = self.add_sublayer( lateral_name, Conv2D(lateral_name, num_filters=out_channels, filter_size=1, stride=1, padding=0, act=None, param_attr=ParamAttr(name=lateral_name + '_w', initializer=Xavier()), bias_attr=ParamAttr(name=lateral_name + '_b', initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)))) self.lateral_conv_list.append(lateral_conv) self.out_conv_list = [] for i in range(self.end_level - self.start_level): fpn_name = 'fpn_' + self.inputs_name[i] out_conv = self.add_sublayer( fpn_name, Conv2D(lateral_name, num_filters=out_channels, filter_size=3, stride=1, padding=1, act=None, param_attr=ParamAttr(name=fpn_name + '_w', initializer=Xavier()), bias_attr=ParamAttr(name=fpn_name + '_b', initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)))) self.out_conv_list.append(out_conv) if (self.num_outs > (self.end_level - self.start_level)): self.out_conv6 = Conv2D( "fpn_{}".format(6), num_filters=out_channels, filter_size=3, stride=2, padding=1, act=None, param_attr=ParamAttr(name='fpn_{}_w'.format(6), initializer=Xavier()), bias_attr=ParamAttr(name='fpn_{}_b'.format(6), initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.))) self.out_conv7 = Conv2D( "fpn_{}".format(7), num_filters=out_channels, filter_size=3, stride=2, padding=1, act=None, param_attr=ParamAttr(name='fpn_{}_w'.format(7), initializer=Xavier()), bias_attr=ParamAttr(name='fpn_{}_b'.format(7), initializer=Constant(value=0.0), learning_rate=2., regularizer=L2Decay(0.)))
def __init__(self, in_channels=[2048, 1024, 512, 256], num_chan=256, min_level=2, max_level=6, spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], has_extra_convs=False, norm_type=None, norm_decay=0., freeze_norm=False, use_c5=True, relu_before_extra_convs=False, reverse_out=False): super(FPN, self).__init__() self.in_channels = in_channels self.freeze_norm = freeze_norm self.num_chan = num_chan self.min_level = min_level self.max_level = max_level self.spatial_scale = spatial_scale self.has_extra_convs = has_extra_convs self.norm_type = norm_type self.norm_decay = norm_decay self.use_c5 = use_c5 self.relu_before_extra_convs = relu_before_extra_convs self.reverse_out = reverse_out self.num_backbone_stages = len(in_channels) # 进入FPN的张量个数 self.fpn_inner_convs = paddle.nn.LayerList( ) # 骨干网络的张量s32, s16, s8, ...使用的卷积 self.fpn_convs = paddle.nn.LayerList() # fs32, fs16, fs8, ...使用的卷积 # fpn_inner_convs for i in range(0, self.num_backbone_stages): cname = 'fpn_inner_res%d_sum_lateral' % (5 - i, ) if i == 0: cname = 'fpn_inner_res%d_sum' % (5 - i, ) use_bias = True if norm_type is None else False conv = Conv2dUnit(in_channels[i], self.num_chan, 1, stride=1, bias_attr=use_bias, norm_type=norm_type, bias_lr=2.0, weight_init=Xavier(fan_out=in_channels[i] * 1 * 1), bias_init=Constant(0.0), act=None, freeze_norm=self.freeze_norm, norm_decay=self.norm_decay, name=cname) self.fpn_inner_convs.append(conv) # fpn_convs for i in range(0, self.num_backbone_stages): use_bias = True if norm_type is None else False conv = Conv2dUnit(self.num_chan, self.num_chan, 3, stride=1, bias_attr=use_bias, norm_type=norm_type, bias_lr=2.0, weight_init=Xavier(fan_out=self.num_chan * 3 * 3), bias_init=Constant(0.0), act=None, freeze_norm=self.freeze_norm, norm_decay=self.norm_decay, name='fpn_res%d_sum' % (5 - i, )) self.fpn_convs.append(conv) # 生成其它尺度的特征图时如果用的是池化层 # pass # 生成其它尺度的特征图时如果用的是卷积层 self.extra_convs = None highest_backbone_level = self.min_level + len(spatial_scale) - 1 if self.has_extra_convs and self.max_level > highest_backbone_level: self.extra_convs = paddle.nn.LayerList() if self.use_c5: in_c = in_channels[0] fan = in_c * 3 * 3 else: in_c = self.num_chan fan = in_c * 3 * 3 for i in range(highest_backbone_level + 1, self.max_level + 1): use_bias = True if norm_type is None else False conv = Conv2dUnit(in_c, self.num_chan, 3, stride=2, bias_attr=use_bias, norm_type=norm_type, bias_lr=2.0, weight_init=Xavier(fan_out=fan), bias_init=Constant(0.0), act=None, freeze_norm=self.freeze_norm, norm_decay=self.norm_decay, name='fpn_%d' % (i, )) self.extra_convs.append(conv) in_c = self.num_chan
def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: return Xavier() else: # For integer and boolean types, initialize with all zeros return Constant()