Python Constant示例，paddle.nn.initializer.Constant Python示例

示例#1

0

显示文件

文件： gfl_head.py 项目： xiegegege/PaddleDetection

 def __init__(self, reg_topk=4, reg_channels=64, add_mean=True):
     super(DGQP, self).__init__()
     self.reg_topk = reg_topk
     self.reg_channels = reg_channels
     self.add_mean = add_mean
     self.total_dim = reg_topk
     if add_mean:
         self.total_dim += 1
     self.reg_conv1 = self.add_sublayer(
         'dgqp_reg_conv1',
         nn.Conv2D(
             in_channels=4 * self.total_dim,
             out_channels=self.reg_channels,
             kernel_size=1,
             weight_attr=ParamAttr(initializer=Normal(
                 mean=0., std=0.01)),
             bias_attr=ParamAttr(initializer=Constant(value=0))))
     self.reg_conv2 = self.add_sublayer(
         'dgqp_reg_conv2',
         nn.Conv2D(
             in_channels=self.reg_channels,
             out_channels=1,
             kernel_size=1,
             weight_attr=ParamAttr(initializer=Normal(
                 mean=0., std=0.01)),
             bias_attr=ParamAttr(initializer=Constant(value=0))))

示例#2

0

显示文件

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 weight_attr=None,
                 bias_attr=None,
                 lr_scale=1,
                 regularizer=None,
                 name=None):
        super(DeformableConvV2, self).__init__()
        self.offset_channel = 2 * kernel_size**2
        self.mask_channel = kernel_size**2

        if lr_scale == 1 and regularizer is None:
            offset_bias_attr = ParamAttr(
                initializer=Constant(0.),
                name='{}._conv_offset.bias'.format(name))
        else:
            offset_bias_attr = ParamAttr(
                initializer=Constant(0.),
                learning_rate=lr_scale,
                regularizer=regularizer,
                name='{}._conv_offset.bias'.format(name))
        self.conv_offset = nn.Conv2D(
            in_channels,
            3 * kernel_size**2,
            kernel_size,
            stride=stride,
            padding=(kernel_size - 1) // 2,
            weight_attr=ParamAttr(
                initializer=Constant(0.0),
                name='{}._conv_offset.weight'.format(name)),
            bias_attr=offset_bias_attr)

        if bias_attr:
            # in FCOS-DCN head, specifically need learning_rate and regularizer
            dcn_bias_attr = ParamAttr(
                name=name + "_bias",
                initializer=Constant(value=0),
                regularizer=L2Decay(0.),
                learning_rate=2.)
        else:
            # in ResNet backbone, do not need bias
            dcn_bias_attr = False
        self.conv_dcn = DeformConv2D(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=(kernel_size - 1) // 2 * dilation,
            dilation=dilation,
            groups=groups,
            weight_attr=weight_attr,
            bias_attr=dcn_bias_attr)

示例#3

0

显示文件

文件： layers.py 项目： AFei19911012/PythonSamples

def ConvTranspose2d(in_channels,
                    out_channels,
                    kernel_size,
                    stride=1,
                    padding=0,
                    output_padding=0,
                    groups=1,
                    bias=True,
                    dilation=1,
                    weight_init=Normal(std=0.001),
                    bias_init=Constant(0.)):
    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
    if bias:
        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
    else:
        bias_attr = False
    conv = nn.Conv2DTranspose(in_channels,
                              out_channels,
                              kernel_size,
                              stride,
                              padding,
                              output_padding,
                              dilation,
                              groups,
                              weight_attr=weight_attr,
                              bias_attr=bias_attr)
    return conv

示例#4

0

显示文件

    def basic_branch(self, num_conv_out_channels, input_ch):
        # the level indexes are defined from fine to coarse,
        # the branch will contain one more part than that of its previous level
        # the sliding step is set to 1
        pyramid_conv_list = nn.LayerList()
        pyramid_fc_list = nn.LayerList()

        idx_levels = 0
        for idx_branches in range(self.num_branches):
            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                idx_levels += 1

            pyramid_conv_list.append(
                nn.Sequential(nn.Conv2D(input_ch, num_conv_out_channels, 1),
                              nn.BatchNorm2D(num_conv_out_channels),
                              nn.ReLU()))

        idx_levels = 0
        for idx_branches in range(self.num_branches):
            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                idx_levels += 1

            fc = nn.Linear(
                in_features=num_conv_out_channels,
                out_features=self.num_classes,
                weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)),
                bias_attr=ParamAttr(initializer=Constant(value=0.)))
            pyramid_fc_list.append(fc)
        return pyramid_conv_list, pyramid_fc_list

示例#5

0

显示文件

文件： quant_layers.py 项目： sandyhouse/Paddle

 def __init__(self,
              name=None,
              channel_num=None,
              quant_bits=8,
              quant_axis=0,
              dtype='float32',
              quant_on_weight=False):
     assert quant_on_weight == True, "Channel_wise only can be used on weight quantization."
     super(FakeQuantChannelWiseAbsMax, self).__init__()
     self._quant_bits = quant_bits
     self._quant_axis = quant_axis
     self._dtype = dtype
     self._name = name
     self._channel_num = channel_num
     scale_prefix = "{}.scale".format(
         name) if name else 'quant_dequant.scale'
     self._scale_name = unique_name.generate(scale_prefix)
     if quant_on_weight:
         scale_attr = ParamAttr(
             name=self._scale_name,
             initializer=Constant(0.0),
             trainable=False)
         self._scale = self.create_parameter(
             shape=[self._channel_num], attr=scale_attr, dtype=self._dtype)
         self._scale.stop_gradient = True
     else:
         self._scale = None

示例#6

0

显示文件

 def __init__(self,
              ch_in,
              ch_out,
              kernel_size,
              stride=1,
              padding=0,
              dilation=1,
              groups=1,
              bias=False):
     super(ConvLayer, self).__init__()
     bias_attr = False
     fan_in = ch_in * kernel_size**2
     bound = 1 / math.sqrt(fan_in)
     param_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
     if bias:
         bias_attr = paddle.ParamAttr(initializer=Constant(0.))
     self.conv = nn.Conv2D(in_channels=ch_in,
                           out_channels=ch_out,
                           kernel_size=kernel_size,
                           stride=stride,
                           padding=padding,
                           dilation=dilation,
                           groups=groups,
                           weight_attr=param_attr,
                           bias_attr=bias_attr)

示例#7

0

显示文件

文件： ttf_head.py 项目： ttjy22/PaddleDetection

 def __init__(self, ch_in, ch_out=128, num_classes=80, conv_num=2):
     super(HMHead, self).__init__()
     head_conv = nn.Sequential()
     for i in range(conv_num):
         name = 'conv.{}'.format(i)
         head_conv.add_sublayer(
             name,
             nn.Conv2D(in_channels=ch_in if i == 0 else ch_out,
                       out_channels=ch_out,
                       kernel_size=3,
                       padding=1,
                       weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                       bias_attr=ParamAttr(learning_rate=2.,
                                           regularizer=L2Decay(0.))))
         head_conv.add_sublayer(name + '.act', nn.ReLU())
     self.feat = self.add_sublayer('hm_feat', head_conv)
     bias_init = float(-np.log((1 - 0.01) / 0.01))
     self.head = self.add_sublayer(
         'hm_head',
         nn.Conv2D(in_channels=ch_out,
                   out_channels=num_classes,
                   kernel_size=1,
                   weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                   bias_attr=ParamAttr(learning_rate=2.,
                                       regularizer=L2Decay(0.),
                                       initializer=Constant(bias_init))))

示例#8

0

显示文件

文件： ttf_fpn.py 项目： AFei19911012/PythonSamples

    def __init__(self, ch_in, ch_out, norm_type='bn'):
        super(Upsample, self).__init__()
        fan_in = ch_in * 3 * 3
        stdv = 1. / math.sqrt(fan_in)
        self.dcn = DeformableConvV2(
            ch_in,
            ch_out,
            kernel_size=3,
            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(initializer=Constant(0),
                                regularizer=L2Decay(0.),
                                learning_rate=2.),
            lr_scale=2.,
            regularizer=L2Decay(0.))

        self.bn = batch_norm(ch_out,
                             norm_type=norm_type,
                             initializer=Constant(1.))

示例#9

0

显示文件

文件： levit.py 项目： AgentMaker/Paddle-Image-Models

    def __init__(self, a, b, bn_weight_init=1, resolution=-100000):
        super().__init__()
        self.add_sublayer("c", nn.Linear(a, b, bias_attr=False))

        bn = nn.BatchNorm1D(b)

        Constant(bn_weight_init)(bn.weight)
        zeros_(bn.bias)

        self.add_sublayer("bn", bn)

示例#10

0

显示文件

    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 groups=1,
                 norm_type=None,
                 norm_groups=32,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None):
        super(ConvNormLayer, self).__init__()
        self.act = act
        norm_lr = 0. if freeze_norm else 1.
        if norm_type is not None:
            assert norm_type in ['bn', 'sync_bn', 'gn'], \
                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
            param_attr = ParamAttr(
                initializer=Constant(1.0),
                learning_rate=norm_lr,
                regularizer=L2Decay(norm_decay),
            )
            bias_attr = ParamAttr(learning_rate=norm_lr,
                                  regularizer=L2Decay(norm_decay))
            global_stats = True if freeze_norm else None
            if norm_type in ['bn', 'sync_bn']:
                self.norm = nn.BatchNorm2D(
                    ch_out,
                    weight_attr=param_attr,
                    bias_attr=bias_attr,
                    use_global_stats=global_stats,
                )
            elif norm_type == 'gn':
                self.norm = nn.GroupNorm(num_groups=norm_groups,
                                         num_channels=ch_out,
                                         weight_attr=param_attr,
                                         bias_attr=bias_attr)
            norm_params = self.norm.parameters()
            if freeze_norm:
                for param in norm_params:
                    param.stop_gradient = True
            conv_bias_attr = False
        else:
            conv_bias_attr = True
            self.norm = None

        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)),
            bias_attr=conv_bias_attr)

示例#11

0

显示文件

文件： ttf_head.py 项目： xiegegege/PaddleDetection

 def __init__(
     self,
     ch_in,
     ch_out=128,
     num_classes=80,
     conv_num=2,
     dcn_head=False,
     lite_head=False,
     norm_type='bn',
 ):
     super(HMHead, self).__init__()
     head_conv = nn.Sequential()
     for i in range(conv_num):
         name = 'conv.{}'.format(i)
         if lite_head:
             lite_name = 'hm.' + name
             head_conv.add_sublayer(
                 lite_name,
                 LiteConv(in_channels=ch_in if i == 0 else ch_out,
                          out_channels=ch_out,
                          norm_type=norm_type))
         else:
             if dcn_head:
                 head_conv.add_sublayer(
                     name,
                     DeformableConvV2(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
                         kernel_size=3,
                         weight_attr=ParamAttr(
                             initializer=Normal(0, 0.01))))
             else:
                 head_conv.add_sublayer(
                     name,
                     nn.Conv2D(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
                         kernel_size=3,
                         padding=1,
                         weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                         bias_attr=ParamAttr(learning_rate=2.,
                                             regularizer=L2Decay(0.))))
             head_conv.add_sublayer(name + '.act', nn.ReLU())
     self.feat = head_conv
     bias_init = float(-np.log((1 - 0.01) / 0.01))
     weight_attr = None if lite_head else ParamAttr(
         initializer=Normal(0, 0.01))
     self.head = nn.Conv2D(in_channels=ch_out,
                           out_channels=num_classes,
                           kernel_size=1,
                           weight_attr=weight_attr,
                           bias_attr=ParamAttr(
                               learning_rate=2.,
                               regularizer=L2Decay(0.),
                               initializer=Constant(bias_init)))

示例#12

0

显示文件

文件： retina_head.py 项目： ghostxsl/PaddleDetection

    def __init__(self,
                 num_classes=80,
                 conv_feat='RetinaFeat',
                 anchor_generator='RetinaAnchorGenerator',
                 bbox_assigner='MaxIoUAssigner',
                 loss_class='FocalLoss',
                 loss_bbox='SmoothL1Loss',
                 nms='MultiClassNMS',
                 prior_prob=0.01,
                 nms_pre=1000,
                 weights=[1., 1., 1., 1.]):
        super(RetinaHead, self).__init__()
        self.num_classes = num_classes
        self.conv_feat = conv_feat
        self.anchor_generator = anchor_generator
        self.bbox_assigner = bbox_assigner
        self.loss_class = loss_class
        self.loss_bbox = loss_bbox
        self.nms = nms
        self.nms_pre = nms_pre
        self.weights = weights

        bias_init_value = -math.log((1 - prior_prob) / prior_prob)
        num_anchors = self.anchor_generator.num_anchors
        self.retina_cls = nn.Conv2D(
            in_channels=self.conv_feat.feat_out,
            out_channels=self.num_classes * num_anchors,
            kernel_size=3,
            stride=1,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)),
            bias_attr=ParamAttr(initializer=Constant(value=bias_init_value)))
        self.retina_reg = nn.Conv2D(
            in_channels=self.conv_feat.feat_out,
            out_channels=4 * num_anchors,
            kernel_size=3,
            stride=1,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)),
            bias_attr=ParamAttr(initializer=Constant(value=0)))

示例#13

0

显示文件

文件： bifpn.py 项目： xiegegege/PaddleDetection

    def __init__(self,
                 channels=256,
                 num_levels=5,
                 eps=1e-5,
                 use_weighted_fusion=True,
                 kernel_size=3,
                 norm_type='bn',
                 norm_groups=32,
                 act='swish'):
        super(BiFPNCell, self).__init__()
        self.channels = channels
        self.num_levels = num_levels
        self.eps = eps
        self.use_weighted_fusion = use_weighted_fusion

        # up
        self.conv_up = nn.LayerList([
            SeparableConvLayer(self.channels,
                               kernel_size=kernel_size,
                               norm_type=norm_type,
                               norm_groups=norm_groups,
                               act=act) for _ in range(self.num_levels - 1)
        ])
        # down
        self.conv_down = nn.LayerList([
            SeparableConvLayer(self.channels,
                               kernel_size=kernel_size,
                               norm_type=norm_type,
                               norm_groups=norm_groups,
                               act=act) for _ in range(self.num_levels - 1)
        ])

        if self.use_weighted_fusion:
            self.up_weights = self.create_parameter(
                shape=[self.num_levels - 1, 2],
                attr=ParamAttr(initializer=Constant(1.)))
            self.down_weights = self.create_parameter(
                shape=[self.num_levels - 1, 3],
                attr=ParamAttr(initializer=Constant(1.)))

示例#14

0

显示文件

文件： quant_layers.py 项目： sandyhouse/Paddle

    def __init__(self,
                 name=None,
                 moving_rate=0.9,
                 quant_bits=8,
                 dtype='float32'):
        super(FakeQuantMovingAverageAbsMax, self).__init__()
        self._moving_rate = moving_rate
        self._quant_bits = quant_bits

        scale_prefix = "{}.scale".format(
            name) if name else 'quant_dequant.scale'
        scale_attr = ParamAttr(
            name=unique_name.generate(scale_prefix),
            initializer=Constant(0.001),
            trainable=False)
        self._scale = self.create_parameter(
            shape=[1], attr=scale_attr, dtype=dtype)
        self._scale.stop_gradient = True

        state_prefix = "{}.state".format(
            name) if name else 'quant_dequant.state'
        state_attr = ParamAttr(
            name=unique_name.generate(state_prefix),
            initializer=Constant(1),
            trainable=False)
        self._state = self.create_parameter(
            shape=[1], attr=state_attr, dtype=dtype)
        self._state.stop_gradient = True

        accum_prefix = "{}.accum".format(
            name) if name else 'quant_dequant.accum'
        accum_attr = ParamAttr(
            name=unique_name.generate(accum_prefix),
            initializer=Constant(1),
            trainable=False)
        self._accum = self.create_parameter(
            shape=[1], attr=accum_attr, dtype=dtype)
        self._accum.stop_gradient = True

示例#15

0

显示文件

    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride,
                 norm_type='bn',
                 norm_groups=32,
                 use_dcn=False,
                 norm_name=None,
                 bias_on=False,
                 lr_scale=1.,
                 name=None):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']

        if bias_on:
            bias_attr = ParamAttr(name=name + "_bias",
                                  initializer=Constant(value=0.),
                                  learning_rate=lr_scale)
        else:
            bias_attr = False

        self.conv = nn.Conv2D(in_channels=ch_in,
                              out_channels=ch_out,
                              kernel_size=filter_size,
                              stride=stride,
                              padding=(filter_size - 1) // 2,
                              groups=1,
                              weight_attr=ParamAttr(name=name + "_weight",
                                                    initializer=Normal(
                                                        mean=0., std=0.01),
                                                    learning_rate=1.),
                              bias_attr=bias_attr)

        param_attr = ParamAttr(name=norm_name + "_scale",
                               learning_rate=1.,
                               regularizer=L2Decay(0.))
        bias_attr = ParamAttr(name=norm_name + "_offset",
                              learning_rate=1.,
                              regularizer=L2Decay(0.))
        if norm_type in ['bn', 'sync_bn']:
            self.norm = nn.BatchNorm2D(ch_out,
                                       weight_attr=param_attr,
                                       bias_attr=bias_attr)
        elif norm_type == 'gn':
            self.norm = nn.GroupNorm(num_groups=norm_groups,
                                     num_channels=ch_out,
                                     weight_attr=param_attr,
                                     bias_attr=bias_attr)

示例#16

0

显示文件

文件： quant_layers.py 项目： sandyhouse/Paddle

    def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
        r"""
        MovingAverageMaxScale layer is used to calculating the output quantization
        scale of Layer. Its computational formula is described as below:

        :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
        :math:`Out = X`
        """
        super(MovingAverageAbsMaxScale, self).__init__()
        self._moving_rate = moving_rate

        scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale'
        scale_name = unique_name.generate(scale_prefix)
        scale_attr = ParamAttr(
            name=scale_name, initializer=Constant(0), trainable=False)
        self._scale = self.create_parameter(
            shape=[1], attr=scale_attr, dtype=dtype)
        self._scale.stop_gradient = True

        state_prefix = "{}.state".format(name) if name else 'outscale.state'
        state_attr = ParamAttr(
            name=unique_name.generate(state_prefix),
            initializer=Constant(0),
            trainable=False)
        self._state = self.create_parameter(
            shape=[1], attr=state_attr, dtype=dtype)
        self._state.stop_gradient = True

        accum_prefix = "{}.accum".format(name) if name else 'outscale.accum'
        accum_attr = ParamAttr(
            name=unique_name.generate(accum_prefix),
            initializer=Constant(0),
            trainable=False)
        self._accum = self.create_parameter(
            shape=[1], attr=accum_attr, dtype=dtype)
        self._accum.stop_gradient = True

示例#17

0

显示文件

文件： quant_layers.py 项目： sandyhouse/Paddle

 def __init__(self,
              name=None,
              quant_bits=8,
              dtype='float32',
              quant_on_weight=False):
     super(FakeQuantAbsMax, self).__init__()
     self._quant_bits = quant_bits
     self._name = name
     scale_prefix = "{}.scale".format(
         name) if name else 'quant_dequant.scale'
     self._scale_name = unique_name.generate(scale_prefix)
     if quant_on_weight:
         scale_attr = ParamAttr(
             name=self._scale_name,
             initializer=Constant(0.001),
             trainable=False)
         self._scale = self.create_parameter(
             shape=[1], attr=scale_attr, dtype=self._dtype)
         self._scale.stop_gradient = True
     else:
         self._scale = None

示例#18

0

显示文件

    def initialize_parameters(self):
        Normal(std=0.02)(self.token_embedding.weight)
        Normal(std=0.01)(self.positional_embedding)

        if isinstance(self.visual, ModifiedResNet):
            if self.visual.attnpool is not None:
                std = self.embed_dim ** -0.5
                normal_ = Normal(std=std)
                normal_(self.visual.attnpool.attn.q_proj.weight)
                normal_(self.visual.attnpool.attn.k_proj.weight)
                normal_(self.visual.attnpool.attn.v_proj.weight)
                normal_(self.visual.attnpool.attn.out_proj.weight)

            for resnet_block in [
                self.visual.layer1,
                self.visual.layer2,
                self.visual.layer3,
                self.visual.layer4,
            ]:
                for name, param in resnet_block.named_parameters():
                    if name.endswith("bn3.weight"):
                        Constant(value=0.0)(param)

        proj_std = (self.transformer.width ** -0.5) * (
            (2 * self.transformer.layers) ** -0.5
        )
        attn_std = self.transformer.width ** -0.5
        fc_std = (2 * self.transformer.width) ** -0.5

        for resblock in self.transformer.resblocks:
            normal_ = Normal(std=attn_std)
            normal_(resblock.attn.q_proj.weight)
            normal_(resblock.attn.k_proj.weight)
            normal_(resblock.attn.v_proj.weight)
            Normal(std=proj_std)(resblock.attn.out_proj.weight)
            Normal(std=fc_std)(resblock.mlp.c_fc.weight)
            Normal(std=proj_std)(resblock.mlp.c_proj.weight)

        if self.text_projection is not None:
            Normal(std=self.transformer.width ** -0.5)(self.text_projection)

示例#19

0

显示文件

文件： levit.py 项目： AgentMaker/Paddle-Image-Models

    def __init__(
        self,
        a,
        b,
        ks=1,
        stride=1,
        pad=0,
        dilation=1,
        groups=1,
        bn_weight_init=1,
        resolution=-10000,
    ):
        super().__init__()
        self.add_sublayer(
            "c",
            nn.Conv2D(a, b, ks, stride, pad, dilation, groups,
                      bias_attr=False))

        bn = nn.BatchNorm2D(b)

        Constant(bn_weight_init)(bn.weight)
        zeros_(bn.bias)

        self.add_sublayer("bn", bn)

示例#20

0

显示文件

文件： jde_embedding_head.py 项目： xiegegege/PaddleDetection

 def __init__(self, init_value=0., use_uncertainy=True):
     super(LossParam, self).__init__()
     self.loss_param = self.create_parameter(
         shape=[1],
         attr=ParamAttr(initializer=Constant(value=init_value)),
         dtype="float32")

示例#21

0

显示文件

文件： resnet.py 项目： xiegegege/PaddleDetection

    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride,
                 groups=1,
                 act=None,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 lr=1.0,
                 dcn_v2=False):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn']
        self.norm_type = norm_type
        self.act = act
        self.dcn_v2 = dcn_v2

        if not self.dcn_v2:
            self.conv = nn.Conv2D(in_channels=ch_in,
                                  out_channels=ch_out,
                                  kernel_size=filter_size,
                                  stride=stride,
                                  padding=(filter_size - 1) // 2,
                                  groups=groups,
                                  weight_attr=ParamAttr(learning_rate=lr),
                                  bias_attr=False)
        else:
            self.offset_channel = 2 * filter_size**2
            self.mask_channel = filter_size**2

            self.conv_offset = nn.Conv2D(
                in_channels=ch_in,
                out_channels=3 * filter_size**2,
                kernel_size=filter_size,
                stride=stride,
                padding=(filter_size - 1) // 2,
                weight_attr=ParamAttr(initializer=Constant(0.)),
                bias_attr=ParamAttr(initializer=Constant(0.)))
            self.conv = DeformConv2D(in_channels=ch_in,
                                     out_channels=ch_out,
                                     kernel_size=filter_size,
                                     stride=stride,
                                     padding=(filter_size - 1) // 2,
                                     dilation=1,
                                     groups=groups,
                                     weight_attr=ParamAttr(learning_rate=lr),
                                     bias_attr=False)

        norm_lr = 0. if freeze_norm else lr
        param_attr = ParamAttr(learning_rate=norm_lr,
                               regularizer=L2Decay(norm_decay),
                               trainable=False if freeze_norm else True)
        bias_attr = ParamAttr(learning_rate=norm_lr,
                              regularizer=L2Decay(norm_decay),
                              trainable=False if freeze_norm else True)

        global_stats = True if freeze_norm else False
        if norm_type == 'sync_bn':
            self.norm = nn.SyncBatchNorm(ch_out,
                                         weight_attr=param_attr,
                                         bias_attr=bias_attr)
        else:
            self.norm = nn.BatchNorm(ch_out,
                                     act=None,
                                     param_attr=param_attr,
                                     bias_attr=bias_attr,
                                     use_global_stats=global_stats)
        norm_params = self.norm.parameters()

        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True

示例#22

0

显示文件

 def __init__(self):
     super(FairMOTLoss, self).__init__()
     self.det_weight = self.create_parameter(
         shape=[1], default_initializer=Constant(-1.85))
     self.reid_weight = self.create_parameter(
         shape=[1], default_initializer=Constant(-1.05))

示例#23

0

显示文件

    def __init__(self,
                 ch_in: int = 3,
                 class_num: int = 20,
                 ignore_thresh: float = 0.7,
                 valid_thresh: float = 0.005,
                 nms_topk: int = 400,
                 nms_posk: int = 100,
                 nms_thresh: float = 0.45,
                 is_train: bool = True,
                 load_checkpoint: str = None):
        super(YOLOv3, self).__init__()

        self.is_train = is_train
        self.block = DarkNet53_conv_body(ch_in=ch_in,
                                         is_test=not self.is_train)
        self.block_outputs = []
        self.yolo_blocks = []
        self.route_blocks_2 = []
        self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.class_num = class_num
        self.ignore_thresh = ignore_thresh
        self.valid_thresh = valid_thresh
        self.nms_topk = nms_topk
        self.nms_posk = nms_posk
        self.nms_thresh = nms_thresh
        ch_in_list = [1024, 768, 384]

        for i in range(3):
            yolo_block = self.add_sublayer(
                "yolo_detecton_block_%d" % (i),
                YoloDetectionBlock(ch_in_list[i],
                                   channel=512 // (2**i),
                                   is_test=not self.is_train))
            self.yolo_blocks.append(yolo_block)

            num_filters = len(self.anchor_masks[i]) * (self.class_num + 5)
            block_out = self.add_sublayer(
                "block_out_%d" % (i),
                nn.Conv2d(
                    1024 // (2**i),
                    num_filters,
                    1,
                    stride=1,
                    padding=0,
                    weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)),
                    bias_attr=paddle.ParamAttr(initializer=Constant(0.0),
                                               regularizer=L2Decay(0.))))
            self.block_outputs.append(block_out)

            if i < 2:
                route = self.add_sublayer(
                    "route2_%d" % i,
                    ConvBNLayer(ch_in=512 // (2**i),
                                ch_out=256 // (2**i),
                                filter_size=1,
                                stride=1,
                                padding=0,
                                is_test=(not self.is_train)))
                self.route_blocks_2.append(route)
            self.upsample = Upsample()

        if load_checkpoint is not None:
            model_dict = paddle.load(load_checkpoint)[0]
            self.set_dict(model_dict)
            print("load custom checkpoint success")

        else:
            checkpoint = os.path.join(self.directory,
                                      'yolov3_darknet53_voc.pdparams')
            if not os.path.exists(checkpoint):
                os.system(
                    'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \
                    + checkpoint)
            model_dict = paddle.load(checkpoint)[0]
            self.set_dict(model_dict)
            print("load pretrained checkpoint success")

示例#24

0

显示文件

文件： vision_transformer.py 项目： lvjian0706/PaddleClas

    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_384_pretrained.pdparams",
    "ViT_base_patch32_384":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch32_384_pretrained.pdparams",
    "ViT_large_patch16_224":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_224_pretrained.pdparams",
    "ViT_large_patch16_384":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams",
    "ViT_large_patch32_384":
    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams",
}

__all__ = list(MODEL_URLS.keys())

trunc_normal_ = TruncatedNormal(std=.02)
normal_ = Normal
zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)


def to_2tuple(x):
    return tuple([x] * 2)


def drop_path(x, drop_prob=0., training=False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = paddle.to_tensor(1 - drop_prob)

示例#25

0

显示文件

    def __init__(self,
                 stacked_convs=2,
                 feat_in=256,
                 feat_out=256,
                 num_classes=15,
                 anchor_strides=[8, 16, 32, 64, 128],
                 anchor_scales=[4],
                 anchor_ratios=[1.0],
                 target_means=0.0,
                 target_stds=1.0,
                 align_conv_type='AlignConv',
                 align_conv_size=3,
                 use_sigmoid_cls=True,
                 anchor_assign=RBoxAssigner().__dict__,
                 reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.1],
                 cls_loss_weight=[1.1, 1.05],
                 reg_loss_type='l1',
                 is_training=True):
        super(S2ANetHead, self).__init__()
        self.stacked_convs = stacked_convs
        self.feat_in = feat_in
        self.feat_out = feat_out
        self.anchor_list = None
        self.anchor_scales = anchor_scales
        self.anchor_ratios = anchor_ratios
        self.anchor_strides = anchor_strides
        self.anchor_strides = paddle.to_tensor(anchor_strides)
        self.anchor_base_sizes = list(anchor_strides)
        self.means = paddle.ones(shape=[5]) * target_means
        self.stds = paddle.ones(shape=[5]) * target_stds
        assert align_conv_type in ['AlignConv', 'Conv', 'DCN']
        self.align_conv_type = align_conv_type
        self.align_conv_size = align_conv_size

        self.use_sigmoid_cls = use_sigmoid_cls
        self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1
        self.sampling = False
        self.anchor_assign = anchor_assign
        self.reg_loss_weight = reg_loss_weight
        self.cls_loss_weight = cls_loss_weight
        self.alpha = 1.0
        self.beta = 1.0
        self.reg_loss_type = reg_loss_type
        self.is_training = is_training

        self.s2anet_head_out = None

        # anchor
        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                S2ANetAnchorGenerator(anchor_base, anchor_scales,
                                      anchor_ratios))

        self.anchor_generators = nn.LayerList(self.anchor_generators)
        self.fam_cls_convs = nn.Sequential()
        self.fam_reg_convs = nn.Sequential()

        for i in range(self.stacked_convs):
            chan_in = self.feat_in if i == 0 else self.feat_out

            self.fam_cls_convs.add_sublayer(
                'fam_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i),
                                            nn.ReLU())

            self.fam_reg_convs.add_sublayer(
                'fam_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i),
                                            nn.ReLU())

        self.fam_reg = nn.Conv2D(
            self.feat_out,
            5,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))
        prior_prob = 0.01
        bias_init = float(-np.log((1 - prior_prob) / prior_prob))
        self.fam_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))

        if self.align_conv_type == "AlignConv":
            self.align_conv = AlignConv(self.feat_out, self.feat_out,
                                        self.align_conv_size)
        elif self.align_conv_type == "Conv":
            self.align_conv = nn.Conv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                bias_attr=ParamAttr(initializer=Constant(0)))

        elif self.align_conv_type == "DCN":
            self.align_conv_offset = nn.Conv2D(
                self.feat_out,
                2 * self.align_conv_size**2,
                1,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=ParamAttr(initializer=Constant(0)))

            self.align_conv = paddle.vision.ops.DeformConv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=False)

        self.or_conv = nn.Conv2D(
            self.feat_out,
            self.feat_out,
            kernel_size=3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))

        # ODM
        self.odm_cls_convs = nn.Sequential()
        self.odm_reg_convs = nn.Sequential()

        for i in range(self.stacked_convs):
            ch_in = self.feat_out
            # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out

            self.odm_cls_convs.add_sublayer(
                'odm_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=ch_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i),
                                            nn.ReLU())

            self.odm_reg_convs.add_sublayer(
                'odm_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=self.feat_out,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i),
                                            nn.ReLU())

        self.odm_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))
        self.odm_reg = nn.Conv2D(
            self.feat_out,
            5,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))

        self.featmap_sizes = []
        self.base_anchors_list = []
        self.refine_anchor_list = []

示例#26

0

显示文件

    def __init__(self,
                 input_dim,
                 filters,
                 filter_size,
                 stride=1,
                 bias_attr=False,
                 norm_type=None,
                 groups=1,
                 norm_groups=32,
                 act=None,
                 freeze_norm=False,
                 is_test=False,
                 norm_decay=0.,
                 lr=1.,
                 bias_lr=None,
                 weight_init=None,
                 bias_init=None,
                 use_dcn=False,
                 name=''):
        super(Conv2dUnit, self).__init__()
        self.filters = filters
        self.filter_size = filter_size
        self.stride = stride
        self.padding = (filter_size - 1) // 2
        self.act = act
        self.freeze_norm = freeze_norm
        self.is_test = is_test
        self.norm_decay = norm_decay
        self.use_dcn = use_dcn
        self.name = name

        # conv
        conv_name = name
        self.conv_offset = None
        if use_dcn:
            conv_battr = False
            if bias_attr:
                blr = lr
                if bias_lr:
                    blr = bias_lr
                conv_battr = ParamAttr(learning_rate=blr,
                                       initializer=bias_init,
                                       regularizer=L2Decay(0.))   # 不可以加正则化的参数：norm层(比如bn层、affine_channel层、gn层)的scale、offset；卷积层的偏移参数。

            self.offset_channel = 2 * filter_size**2
            self.mask_channel = filter_size**2

            self.conv_offset = nn.Conv2D(
                in_channels=input_dim,
                out_channels=3 * filter_size**2,
                kernel_size=filter_size,
                stride=stride,
                padding=self.padding,
                weight_attr=ParamAttr(initializer=Constant(0.)),
                bias_attr=ParamAttr(initializer=Constant(0.)))
            # 官方的DCNv2
            self.conv = DeformConv2D(
                in_channels=input_dim,
                out_channels=filters,
                kernel_size=filter_size,
                stride=stride,
                padding=self.padding,
                dilation=1,
                groups=groups,
                weight_attr=ParamAttr(learning_rate=lr),
                bias_attr=conv_battr)
            # 自实现的DCNv2
            # self.conv = MyDCNv2(
            #     in_channels=input_dim,
            #     out_channels=filters,
            #     kernel_size=filter_size,
            #     stride=stride,
            #     padding=self.padding,
            #     dilation=1,
            #     groups=groups,
            #     weight_attr=ParamAttr(learning_rate=lr),
            #     bias_attr=conv_battr)
        else:
            conv_battr = False
            if bias_attr:
                blr = lr
                if bias_lr:
                    blr = bias_lr
                conv_battr = ParamAttr(learning_rate=blr,
                                       initializer=bias_init,
                                       regularizer=L2Decay(0.))   # 不可以加正则化的参数：norm层(比如bn层、affine_channel层、gn层)的scale、offset；卷积层的偏移参数。
            self.conv = nn.Conv2D(
                in_channels=input_dim,
                out_channels=filters,
                kernel_size=filter_size,
                stride=stride,
                padding=self.padding,
                groups=groups,
                weight_attr=ParamAttr(learning_rate=lr, initializer=weight_init),
                bias_attr=conv_battr)


        # norm
        assert norm_type in [None, 'bn', 'sync_bn', 'gn', 'affine_channel', 'in', 'ln']
        bn, sync_bn, gn, af = get_norm(norm_type)
        if norm_type == 'in':
            norm_groups = filters
        if norm_type == 'ln':
            norm_groups = 1
        if conv_name == "conv1":
            norm_name = "bn_" + conv_name
            if gn:
                norm_name = "gn_" + conv_name
            if af:
                norm_name = "af_" + conv_name
        else:
            norm_name = "bn" + conv_name[3:]
            if gn:
                norm_name = "gn" + conv_name[3:]
            if af:
                norm_name = "af" + conv_name[3:]
        norm_lr = 0. if freeze_norm else lr
        pattr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),   # 不可以加正则化的参数：norm层(比如bn层、affine_channel层、gn层)的scale、offset；卷积层的偏移参数。
            name=norm_name + "_scale",
            trainable=False if freeze_norm else True)
        battr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),   # 不可以加正则化的参数：norm层(比如bn层、affine_channel层、gn层)的scale、offset；卷积层的偏移参数。
            name=norm_name + "_offset",
            trainable=False if freeze_norm else True)
        self.bn = None
        self.gn = None
        self.af = None
        if bn:
            self.bn = paddle.nn.BatchNorm2D(filters, weight_attr=pattr, bias_attr=battr)
        if sync_bn:
            self.bn = paddle.nn.SyncBatchNorm(filters, weight_attr=pattr, bias_attr=battr)
        if gn:
            self.gn = paddle.nn.GroupNorm(num_groups=norm_groups, num_channels=filters, weight_attr=pattr, bias_attr=battr)
        if af:
            self.af = True
            self.scale = self.create_parameter(
                shape=[filters],
                dtype='float32',
                attr=pattr,
                default_initializer=Constant(1.))
            self.offset = self.create_parameter(
                shape=[filters],
                dtype='float32',
                attr=battr,
                default_initializer=Constant(0.), is_bias=True)

        # act
        self.act = None
        if act == 'relu':
            self.act = paddle.nn.ReLU()
        elif act == 'leaky':
            self.act = paddle.nn.LeakyReLU(0.1)
        elif act == 'mish':
            self.act = Mish()
        elif act is None:
            pass
        else:
            raise NotImplementedError("Activation \'{}\' is not implemented.".format(act))

示例#27

0

显示文件

文件： solov2_head.py 项目： bittersweet-tales/TargetDetection

    def __init__(self,
                 num_classes=80,
                 in_channels=256,
                 seg_feat_channels=256,
                 stacked_convs=4,
                 num_grids=[40, 36, 24, 16, 12],
                 kernel_out_channels=256,
                 dcn_v2_stages=[],
                 segm_strides=[8, 8, 16, 32, 32],
                 solov2_loss=None,
                 score_threshold=0.1,
                 mask_threshold=0.5,
                 mask_nms=None):
        super(SOLOv2Head, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.seg_num_grids = num_grids
        self.cate_out_channels = self.num_classes - 1
        self.seg_feat_channels = seg_feat_channels
        self.stacked_convs = stacked_convs
        self.kernel_out_channels = kernel_out_channels
        self.dcn_v2_stages = dcn_v2_stages
        self.segm_strides = segm_strides
        self.solov2_loss = solov2_loss
        self.mask_nms = mask_nms
        self.score_threshold = score_threshold
        self.mask_threshold = mask_threshold

        conv_type = [ConvNormLayer]
        self.conv_func = conv_type[0]
        self.kernel_pred_convs = []
        self.cate_pred_convs = []
        for i in range(self.stacked_convs):
            if i in self.dcn_v2_stages:
                self.conv_func = conv_type[1]
            ch_in = self.in_channels + 2 if i == 0 else self.seg_feat_channels
            kernel_conv = self.add_sublayer(
                'bbox_head.kernel_convs.' + str(i),
                self.conv_func(
                    ch_in=ch_in,
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
                    norm_type='gn',
                    norm_name='bbox_head.kernel_convs.{}.gn'.format(i),
                    name='bbox_head.kernel_convs.{}'.format(i)))
            self.kernel_pred_convs.append(kernel_conv)
            ch_in = self.in_channels if i == 0 else self.seg_feat_channels
            cate_conv = self.add_sublayer(
                'bbox_head.cate_convs.' + str(i),
                self.conv_func(
                    ch_in=ch_in,
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
                    norm_type='gn',
                    norm_name='bbox_head.cate_convs.{}.gn'.format(i),
                    name='bbox_head.cate_convs.{}'.format(i)))
            self.cate_pred_convs.append(cate_conv)

        self.solo_kernel = self.add_sublayer(
            'bbox_head.solo_kernel',
            nn.Conv2D(self.seg_feat_channels,
                      self.kernel_out_channels,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      weight_attr=ParamAttr(
                          name="bbox_head.solo_kernel.weight",
                          initializer=Normal(mean=0., std=0.01)),
                      bias_attr=ParamAttr(name="bbox_head.solo_kernel.bias")))
        self.solo_cate = self.add_sublayer(
            'bbox_head.solo_cate',
            nn.Conv2D(self.seg_feat_channels,
                      self.cate_out_channels,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      weight_attr=ParamAttr(name="bbox_head.solo_cate.weight",
                                            initializer=Normal(mean=0.,
                                                               std=0.01)),
                      bias_attr=ParamAttr(
                          name="bbox_head.solo_cate.bias",
                          initializer=Constant(
                              value=float(-np.log((1 - 0.01) / 0.01))))))

示例#28

0

显示文件

import numpy as np

import paddle
import paddle.nn as nn

from paddle.nn.initializer import TruncatedNormal, KaimingNormal, Constant, Assign


# Common initializations
ones_ = Constant(value=1.0)
zeros_ = Constant(value=0.0)
kaiming_normal_ = KaimingNormal()
trunc_normal_ = TruncatedNormal(std=0.02)


def orthogonal_(tensor, gain=1):
    r"""Fills the input `Tensor` with a (semi) orthogonal matrix, as
    described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.
    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    """
    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

示例#29

0

显示文件

文件： fcos_head.py 项目： rainbowbowbow/PaddleDetection

 def __init__(self):
     super(ScaleReg, self).__init__()
     self.scale_reg = self.create_parameter(
         shape=[1],
         attr=ParamAttr(initializer=Constant(value=1.)),
         dtype="float32")

示例#30

0

显示文件

文件： fcos_head.py 项目： rainbowbowbow/PaddleDetection

    def __init__(self,
                 fcos_feat,
                 num_classes=80,
                 fpn_stride=[8, 16, 32, 64, 128],
                 prior_prob=0.01,
                 fcos_loss='FCOSLoss',
                 norm_reg_targets=True,
                 centerness_on_reg=True):
        super(FCOSHead, self).__init__()
        self.fcos_feat = fcos_feat
        self.num_classes = num_classes
        self.fpn_stride = fpn_stride
        self.prior_prob = prior_prob
        self.fcos_loss = fcos_loss
        self.norm_reg_targets = norm_reg_targets
        self.centerness_on_reg = centerness_on_reg

        conv_cls_name = "fcos_head_cls"
        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
        self.fcos_head_cls = self.add_sublayer(
            conv_cls_name,
            nn.Conv2D(in_channels=256,
                      out_channels=self.num_classes,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      weight_attr=ParamAttr(name=conv_cls_name + "_weights",
                                            initializer=Normal(mean=0.,
                                                               std=0.01)),
                      bias_attr=ParamAttr(
                          name=conv_cls_name + "_bias",
                          initializer=Constant(value=bias_init_value))))

        conv_reg_name = "fcos_head_reg"
        self.fcos_head_reg = self.add_sublayer(
            conv_reg_name,
            nn.Conv2D(in_channels=256,
                      out_channels=4,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      weight_attr=ParamAttr(name=conv_reg_name + "_weights",
                                            initializer=Normal(mean=0.,
                                                               std=0.01)),
                      bias_attr=ParamAttr(name=conv_reg_name + "_bias",
                                          initializer=Constant(value=0))))

        conv_centerness_name = "fcos_head_centerness"
        self.fcos_head_centerness = self.add_sublayer(
            conv_centerness_name,
            nn.Conv2D(in_channels=256,
                      out_channels=1,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      weight_attr=ParamAttr(name=conv_centerness_name +
                                            "_weights",
                                            initializer=Normal(mean=0.,
                                                               std=0.01)),
                      bias_attr=ParamAttr(name=conv_centerness_name + "_bias",
                                          initializer=Constant(value=0))))

        self.scales_regs = []
        for i in range(len(self.fpn_stride)):
            lvl = int(math.log(int(self.fpn_stride[i]), 2))
            feat_name = 'p{}_feat'.format(lvl)
            scale_reg = self.add_sublayer(feat_name, ScaleReg())
            self.scales_regs.append(scale_reg)