def __init__(self, reg_topk=4, reg_channels=64, add_mean=True): super(DGQP, self).__init__() self.reg_topk = reg_topk self.reg_channels = reg_channels self.add_mean = add_mean self.total_dim = reg_topk if add_mean: self.total_dim += 1 self.reg_conv1 = self.add_sublayer( 'dgqp_reg_conv1', nn.Conv2D( in_channels=4 * self.total_dim, out_channels=self.reg_channels, kernel_size=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0)))) self.reg_conv2 = self.add_sublayer( 'dgqp_reg_conv2', nn.Conv2D( in_channels=self.reg_channels, out_channels=1, kernel_size=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0))))
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, weight_attr=None, bias_attr=None, lr_scale=1, regularizer=None, name=None): super(DeformableConvV2, self).__init__() self.offset_channel = 2 * kernel_size**2 self.mask_channel = kernel_size**2 if lr_scale == 1 and regularizer is None: offset_bias_attr = ParamAttr( initializer=Constant(0.), name='{}._conv_offset.bias'.format(name)) else: offset_bias_attr = ParamAttr( initializer=Constant(0.), learning_rate=lr_scale, regularizer=regularizer, name='{}._conv_offset.bias'.format(name)) self.conv_offset = nn.Conv2D( in_channels, 3 * kernel_size**2, kernel_size, stride=stride, padding=(kernel_size - 1) // 2, weight_attr=ParamAttr( initializer=Constant(0.0), name='{}._conv_offset.weight'.format(name)), bias_attr=offset_bias_attr) if bias_attr: # in FCOS-DCN head, specifically need learning_rate and regularizer dcn_bias_attr = ParamAttr( name=name + "_bias", initializer=Constant(value=0), regularizer=L2Decay(0.), learning_rate=2.) else: # in ResNet backbone, do not need bias dcn_bias_attr = False self.conv_dcn = DeformConv2D( in_channels, out_channels, kernel_size, stride=stride, padding=(kernel_size - 1) // 2 * dilation, dilation=dilation, groups=groups, weight_attr=weight_attr, bias_attr=dcn_bias_attr)
def ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, weight_init=Normal(std=0.001), bias_init=Constant(0.)): weight_attr = paddle.framework.ParamAttr(initializer=weight_init) if bias: bias_attr = paddle.framework.ParamAttr(initializer=bias_init) else: bias_attr = False conv = nn.Conv2DTranspose(in_channels, out_channels, kernel_size, stride, padding, output_padding, dilation, groups, weight_attr=weight_attr, bias_attr=bias_attr) return conv
def basic_branch(self, num_conv_out_channels, input_ch): # the level indexes are defined from fine to coarse, # the branch will contain one more part than that of its previous level # the sliding step is set to 1 pyramid_conv_list = nn.LayerList() pyramid_fc_list = nn.LayerList() idx_levels = 0 for idx_branches in range(self.num_branches): if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): idx_levels += 1 pyramid_conv_list.append( nn.Sequential(nn.Conv2D(input_ch, num_conv_out_channels, 1), nn.BatchNorm2D(num_conv_out_channels), nn.ReLU())) idx_levels = 0 for idx_branches in range(self.num_branches): if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): idx_levels += 1 fc = nn.Linear( in_features=num_conv_out_channels, out_features=self.num_classes, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)), bias_attr=ParamAttr(initializer=Constant(value=0.))) pyramid_fc_list.append(fc) return pyramid_conv_list, pyramid_fc_list
def __init__(self, name=None, channel_num=None, quant_bits=8, quant_axis=0, dtype='float32', quant_on_weight=False): assert quant_on_weight == True, "Channel_wise only can be used on weight quantization." super(FakeQuantChannelWiseAbsMax, self).__init__() self._quant_bits = quant_bits self._quant_axis = quant_axis self._dtype = dtype self._name = name self._channel_num = channel_num scale_prefix = "{}.scale".format( name) if name else 'quant_dequant.scale' self._scale_name = unique_name.generate(scale_prefix) if quant_on_weight: scale_attr = ParamAttr( name=self._scale_name, initializer=Constant(0.0), trainable=False) self._scale = self.create_parameter( shape=[self._channel_num], attr=scale_attr, dtype=self._dtype) self._scale.stop_gradient = True else: self._scale = None
def __init__(self, ch_in, ch_out, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False): super(ConvLayer, self).__init__() bias_attr = False fan_in = ch_in * kernel_size**2 bound = 1 / math.sqrt(fan_in) param_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound)) if bias: bias_attr = paddle.ParamAttr(initializer=Constant(0.)) self.conv = nn.Conv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, weight_attr=param_attr, bias_attr=bias_attr)
def __init__(self, ch_in, ch_out=128, num_classes=80, conv_num=2): super(HMHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) head_conv.add_sublayer( name, nn.Conv2D(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = self.add_sublayer('hm_feat', head_conv) bias_init = float(-np.log((1 - 0.01) / 0.01)) self.head = self.add_sublayer( 'hm_head', nn.Conv2D(in_channels=ch_out, out_channels=num_classes, kernel_size=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.), initializer=Constant(bias_init))))
def __init__(self, ch_in, ch_out, norm_type='bn'): super(Upsample, self).__init__() fan_in = ch_in * 3 * 3 stdv = 1. / math.sqrt(fan_in) self.dcn = DeformableConvV2( ch_in, ch_out, kernel_size=3, weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)), bias_attr=ParamAttr(initializer=Constant(0), regularizer=L2Decay(0.), learning_rate=2.), lr_scale=2., regularizer=L2Decay(0.)) self.bn = batch_norm(ch_out, norm_type=norm_type, initializer=Constant(1.))
def __init__(self, a, b, bn_weight_init=1, resolution=-100000): super().__init__() self.add_sublayer("c", nn.Linear(a, b, bias_attr=False)) bn = nn.BatchNorm1D(b) Constant(bn_weight_init)(bn.weight) zeros_(bn.bias) self.add_sublayer("bn", bn)
def __init__(self, ch_in, ch_out, filter_size, stride=1, groups=1, norm_type=None, norm_groups=32, norm_decay=0., freeze_norm=False, act=None): super(ConvNormLayer, self).__init__() self.act = act norm_lr = 0. if freeze_norm else 1. if norm_type is not None: assert norm_type in ['bn', 'sync_bn', 'gn'], \ "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type) param_attr = ParamAttr( initializer=Constant(1.0), learning_rate=norm_lr, regularizer=L2Decay(norm_decay), ) bias_attr = ParamAttr(learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) global_stats = True if freeze_norm else None if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D( ch_out, weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats, ) elif norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=norm_groups, num_channels=ch_out, weight_attr=param_attr, bias_attr=bias_attr) norm_params = self.norm.parameters() if freeze_norm: for param in norm_params: param.stop_gradient = True conv_bias_attr = False else: conv_bias_attr = True self.norm = None self.conv = nn.Conv2D( in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)), bias_attr=conv_bias_attr)
def __init__( self, ch_in, ch_out=128, num_classes=80, conv_num=2, dcn_head=False, lite_head=False, norm_type='bn', ): super(HMHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) if lite_head: lite_name = 'hm.' + name head_conv.add_sublayer( lite_name, LiteConv(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, norm_type=norm_type)) else: if dcn_head: head_conv.add_sublayer( name, DeformableConvV2( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, weight_attr=ParamAttr( initializer=Normal(0, 0.01)))) else: head_conv.add_sublayer( name, nn.Conv2D( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = head_conv bias_init = float(-np.log((1 - 0.01) / 0.01)) weight_attr = None if lite_head else ParamAttr( initializer=Normal(0, 0.01)) self.head = nn.Conv2D(in_channels=ch_out, out_channels=num_classes, kernel_size=1, weight_attr=weight_attr, bias_attr=ParamAttr( learning_rate=2., regularizer=L2Decay(0.), initializer=Constant(bias_init)))
def __init__(self, num_classes=80, conv_feat='RetinaFeat', anchor_generator='RetinaAnchorGenerator', bbox_assigner='MaxIoUAssigner', loss_class='FocalLoss', loss_bbox='SmoothL1Loss', nms='MultiClassNMS', prior_prob=0.01, nms_pre=1000, weights=[1., 1., 1., 1.]): super(RetinaHead, self).__init__() self.num_classes = num_classes self.conv_feat = conv_feat self.anchor_generator = anchor_generator self.bbox_assigner = bbox_assigner self.loss_class = loss_class self.loss_bbox = loss_bbox self.nms = nms self.nms_pre = nms_pre self.weights = weights bias_init_value = -math.log((1 - prior_prob) / prior_prob) num_anchors = self.anchor_generator.num_anchors self.retina_cls = nn.Conv2D( in_channels=self.conv_feat.feat_out, out_channels=self.num_classes * num_anchors, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=bias_init_value))) self.retina_reg = nn.Conv2D( in_channels=self.conv_feat.feat_out, out_channels=4 * num_anchors, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0)))
def __init__(self, channels=256, num_levels=5, eps=1e-5, use_weighted_fusion=True, kernel_size=3, norm_type='bn', norm_groups=32, act='swish'): super(BiFPNCell, self).__init__() self.channels = channels self.num_levels = num_levels self.eps = eps self.use_weighted_fusion = use_weighted_fusion # up self.conv_up = nn.LayerList([ SeparableConvLayer(self.channels, kernel_size=kernel_size, norm_type=norm_type, norm_groups=norm_groups, act=act) for _ in range(self.num_levels - 1) ]) # down self.conv_down = nn.LayerList([ SeparableConvLayer(self.channels, kernel_size=kernel_size, norm_type=norm_type, norm_groups=norm_groups, act=act) for _ in range(self.num_levels - 1) ]) if self.use_weighted_fusion: self.up_weights = self.create_parameter( shape=[self.num_levels - 1, 2], attr=ParamAttr(initializer=Constant(1.))) self.down_weights = self.create_parameter( shape=[self.num_levels - 1, 3], attr=ParamAttr(initializer=Constant(1.)))
def __init__(self, name=None, moving_rate=0.9, quant_bits=8, dtype='float32'): super(FakeQuantMovingAverageAbsMax, self).__init__() self._moving_rate = moving_rate self._quant_bits = quant_bits scale_prefix = "{}.scale".format( name) if name else 'quant_dequant.scale' scale_attr = ParamAttr( name=unique_name.generate(scale_prefix), initializer=Constant(0.001), trainable=False) self._scale = self.create_parameter( shape=[1], attr=scale_attr, dtype=dtype) self._scale.stop_gradient = True state_prefix = "{}.state".format( name) if name else 'quant_dequant.state' state_attr = ParamAttr( name=unique_name.generate(state_prefix), initializer=Constant(1), trainable=False) self._state = self.create_parameter( shape=[1], attr=state_attr, dtype=dtype) self._state.stop_gradient = True accum_prefix = "{}.accum".format( name) if name else 'quant_dequant.accum' accum_attr = ParamAttr( name=unique_name.generate(accum_prefix), initializer=Constant(1), trainable=False) self._accum = self.create_parameter( shape=[1], attr=accum_attr, dtype=dtype) self._accum.stop_gradient = True
def __init__(self, ch_in, ch_out, filter_size, stride, norm_type='bn', norm_groups=32, use_dcn=False, norm_name=None, bias_on=False, lr_scale=1., name=None): super(ConvNormLayer, self).__init__() assert norm_type in ['bn', 'sync_bn', 'gn'] if bias_on: bias_attr = ParamAttr(name=name + "_bias", initializer=Constant(value=0.), learning_rate=lr_scale) else: bias_attr = False self.conv = nn.Conv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=1, weight_attr=ParamAttr(name=name + "_weight", initializer=Normal( mean=0., std=0.01), learning_rate=1.), bias_attr=bias_attr) param_attr = ParamAttr(name=norm_name + "_scale", learning_rate=1., regularizer=L2Decay(0.)) bias_attr = ParamAttr(name=norm_name + "_offset", learning_rate=1., regularizer=L2Decay(0.)) if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D(ch_out, weight_attr=param_attr, bias_attr=bias_attr) elif norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=norm_groups, num_channels=ch_out, weight_attr=param_attr, bias_attr=bias_attr)
def __init__(self, name=None, moving_rate=0.9, dtype='float32'): r""" MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer. Its computational formula is described as below: :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)` :math:`Out = X` """ super(MovingAverageAbsMaxScale, self).__init__() self._moving_rate = moving_rate scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale' scale_name = unique_name.generate(scale_prefix) scale_attr = ParamAttr( name=scale_name, initializer=Constant(0), trainable=False) self._scale = self.create_parameter( shape=[1], attr=scale_attr, dtype=dtype) self._scale.stop_gradient = True state_prefix = "{}.state".format(name) if name else 'outscale.state' state_attr = ParamAttr( name=unique_name.generate(state_prefix), initializer=Constant(0), trainable=False) self._state = self.create_parameter( shape=[1], attr=state_attr, dtype=dtype) self._state.stop_gradient = True accum_prefix = "{}.accum".format(name) if name else 'outscale.accum' accum_attr = ParamAttr( name=unique_name.generate(accum_prefix), initializer=Constant(0), trainable=False) self._accum = self.create_parameter( shape=[1], attr=accum_attr, dtype=dtype) self._accum.stop_gradient = True
def __init__(self, name=None, quant_bits=8, dtype='float32', quant_on_weight=False): super(FakeQuantAbsMax, self).__init__() self._quant_bits = quant_bits self._name = name scale_prefix = "{}.scale".format( name) if name else 'quant_dequant.scale' self._scale_name = unique_name.generate(scale_prefix) if quant_on_weight: scale_attr = ParamAttr( name=self._scale_name, initializer=Constant(0.001), trainable=False) self._scale = self.create_parameter( shape=[1], attr=scale_attr, dtype=self._dtype) self._scale.stop_gradient = True else: self._scale = None
def initialize_parameters(self): Normal(std=0.02)(self.token_embedding.weight) Normal(std=0.01)(self.positional_embedding) if isinstance(self.visual, ModifiedResNet): if self.visual.attnpool is not None: std = self.embed_dim ** -0.5 normal_ = Normal(std=std) normal_(self.visual.attnpool.attn.q_proj.weight) normal_(self.visual.attnpool.attn.k_proj.weight) normal_(self.visual.attnpool.attn.v_proj.weight) normal_(self.visual.attnpool.attn.out_proj.weight) for resnet_block in [ self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4, ]: for name, param in resnet_block.named_parameters(): if name.endswith("bn3.weight"): Constant(value=0.0)(param) proj_std = (self.transformer.width ** -0.5) * ( (2 * self.transformer.layers) ** -0.5 ) attn_std = self.transformer.width ** -0.5 fc_std = (2 * self.transformer.width) ** -0.5 for resblock in self.transformer.resblocks: normal_ = Normal(std=attn_std) normal_(resblock.attn.q_proj.weight) normal_(resblock.attn.k_proj.weight) normal_(resblock.attn.v_proj.weight) Normal(std=proj_std)(resblock.attn.out_proj.weight) Normal(std=fc_std)(resblock.mlp.c_fc.weight) Normal(std=proj_std)(resblock.mlp.c_proj.weight) if self.text_projection is not None: Normal(std=self.transformer.width ** -0.5)(self.text_projection)
def __init__( self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000, ): super().__init__() self.add_sublayer( "c", nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) bn = nn.BatchNorm2D(b) Constant(bn_weight_init)(bn.weight) zeros_(bn.bias) self.add_sublayer("bn", bn)
def __init__(self, init_value=0., use_uncertainy=True): super(LossParam, self).__init__() self.loss_param = self.create_parameter( shape=[1], attr=ParamAttr(initializer=Constant(value=init_value)), dtype="float32")
def __init__(self, ch_in, ch_out, filter_size, stride, groups=1, act=None, norm_type='bn', norm_decay=0., freeze_norm=True, lr=1.0, dcn_v2=False): super(ConvNormLayer, self).__init__() assert norm_type in ['bn', 'sync_bn'] self.norm_type = norm_type self.act = act self.dcn_v2 = dcn_v2 if not self.dcn_v2: self.conv = nn.Conv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr(learning_rate=lr), bias_attr=False) else: self.offset_channel = 2 * filter_size**2 self.mask_channel = filter_size**2 self.conv_offset = nn.Conv2D( in_channels=ch_in, out_channels=3 * filter_size**2, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, weight_attr=ParamAttr(initializer=Constant(0.)), bias_attr=ParamAttr(initializer=Constant(0.))) self.conv = DeformConv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, dilation=1, groups=groups, weight_attr=ParamAttr(learning_rate=lr), bias_attr=False) norm_lr = 0. if freeze_norm else lr param_attr = ParamAttr(learning_rate=norm_lr, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) bias_attr = ParamAttr(learning_rate=norm_lr, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) global_stats = True if freeze_norm else False if norm_type == 'sync_bn': self.norm = nn.SyncBatchNorm(ch_out, weight_attr=param_attr, bias_attr=bias_attr) else: self.norm = nn.BatchNorm(ch_out, act=None, param_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats) norm_params = self.norm.parameters() if freeze_norm: for param in norm_params: param.stop_gradient = True
def __init__(self): super(FairMOTLoss, self).__init__() self.det_weight = self.create_parameter( shape=[1], default_initializer=Constant(-1.85)) self.reid_weight = self.create_parameter( shape=[1], default_initializer=Constant(-1.05))
def __init__(self, ch_in: int = 3, class_num: int = 20, ignore_thresh: float = 0.7, valid_thresh: float = 0.005, nms_topk: int = 400, nms_posk: int = 100, nms_thresh: float = 0.45, is_train: bool = True, load_checkpoint: str = None): super(YOLOv3, self).__init__() self.is_train = is_train self.block = DarkNet53_conv_body(ch_in=ch_in, is_test=not self.is_train) self.block_outputs = [] self.yolo_blocks = [] self.route_blocks_2 = [] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 ] self.class_num = class_num self.ignore_thresh = ignore_thresh self.valid_thresh = valid_thresh self.nms_topk = nms_topk self.nms_posk = nms_posk self.nms_thresh = nms_thresh ch_in_list = [1024, 768, 384] for i in range(3): yolo_block = self.add_sublayer( "yolo_detecton_block_%d" % (i), YoloDetectionBlock(ch_in_list[i], channel=512 // (2**i), is_test=not self.is_train)) self.yolo_blocks.append(yolo_block) num_filters = len(self.anchor_masks[i]) * (self.class_num + 5) block_out = self.add_sublayer( "block_out_%d" % (i), nn.Conv2d( 1024 // (2**i), num_filters, 1, stride=1, padding=0, weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), bias_attr=paddle.ParamAttr(initializer=Constant(0.0), regularizer=L2Decay(0.)))) self.block_outputs.append(block_out) if i < 2: route = self.add_sublayer( "route2_%d" % i, ConvBNLayer(ch_in=512 // (2**i), ch_out=256 // (2**i), filter_size=1, stride=1, padding=0, is_test=(not self.is_train))) self.route_blocks_2.append(route) self.upsample = Upsample() if load_checkpoint is not None: model_dict = paddle.load(load_checkpoint)[0] self.set_dict(model_dict) print("load custom checkpoint success") else: checkpoint = os.path.join(self.directory, 'yolov3_darknet53_voc.pdparams') if not os.path.exists(checkpoint): os.system( 'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \ + checkpoint) model_dict = paddle.load(checkpoint)[0] self.set_dict(model_dict) print("load pretrained checkpoint success")
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_384_pretrained.pdparams", "ViT_base_patch32_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch32_384_pretrained.pdparams", "ViT_large_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_224_pretrained.pdparams", "ViT_large_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams", "ViT_large_patch32_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams", } __all__ = list(MODEL_URLS.keys()) trunc_normal_ = TruncatedNormal(std=.02) normal_ = Normal zeros_ = Constant(value=0.) ones_ = Constant(value=1.) def to_2tuple(x): return tuple([x] * 2) def drop_path(x, drop_prob=0., training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... """ if drop_prob == 0. or not training: return x keep_prob = paddle.to_tensor(1 - drop_prob)
def __init__(self, stacked_convs=2, feat_in=256, feat_out=256, num_classes=15, anchor_strides=[8, 16, 32, 64, 128], anchor_scales=[4], anchor_ratios=[1.0], target_means=0.0, target_stds=1.0, align_conv_type='AlignConv', align_conv_size=3, use_sigmoid_cls=True, anchor_assign=RBoxAssigner().__dict__, reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.1], cls_loss_weight=[1.1, 1.05], reg_loss_type='l1', is_training=True): super(S2ANetHead, self).__init__() self.stacked_convs = stacked_convs self.feat_in = feat_in self.feat_out = feat_out self.anchor_list = None self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_strides = paddle.to_tensor(anchor_strides) self.anchor_base_sizes = list(anchor_strides) self.means = paddle.ones(shape=[5]) * target_means self.stds = paddle.ones(shape=[5]) * target_stds assert align_conv_type in ['AlignConv', 'Conv', 'DCN'] self.align_conv_type = align_conv_type self.align_conv_size = align_conv_size self.use_sigmoid_cls = use_sigmoid_cls self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1 self.sampling = False self.anchor_assign = anchor_assign self.reg_loss_weight = reg_loss_weight self.cls_loss_weight = cls_loss_weight self.alpha = 1.0 self.beta = 1.0 self.reg_loss_type = reg_loss_type self.is_training = is_training self.s2anet_head_out = None # anchor self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( S2ANetAnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.anchor_generators = nn.LayerList(self.anchor_generators) self.fam_cls_convs = nn.Sequential() self.fam_reg_convs = nn.Sequential() for i in range(self.stacked_convs): chan_in = self.feat_in if i == 0 else self.feat_out self.fam_cls_convs.add_sublayer( 'fam_cls_conv_{}'.format(i), nn.Conv2D( in_channels=chan_in, out_channels=self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i), nn.ReLU()) self.fam_reg_convs.add_sublayer( 'fam_reg_conv_{}'.format(i), nn.Conv2D( in_channels=chan_in, out_channels=self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i), nn.ReLU()) self.fam_reg = nn.Conv2D( self.feat_out, 5, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) prior_prob = 0.01 bias_init = float(-np.log((1 - prior_prob) / prior_prob)) self.fam_cls = nn.Conv2D( self.feat_out, self.cls_out_channels, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(bias_init))) if self.align_conv_type == "AlignConv": self.align_conv = AlignConv(self.feat_out, self.feat_out, self.align_conv_size) elif self.align_conv_type == "Conv": self.align_conv = nn.Conv2D( self.feat_out, self.feat_out, self.align_conv_size, padding=(self.align_conv_size - 1) // 2, bias_attr=ParamAttr(initializer=Constant(0))) elif self.align_conv_type == "DCN": self.align_conv_offset = nn.Conv2D( self.feat_out, 2 * self.align_conv_size**2, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) self.align_conv = paddle.vision.ops.DeformConv2D( self.feat_out, self.feat_out, self.align_conv_size, padding=(self.align_conv_size - 1) // 2, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=False) self.or_conv = nn.Conv2D( self.feat_out, self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) # ODM self.odm_cls_convs = nn.Sequential() self.odm_reg_convs = nn.Sequential() for i in range(self.stacked_convs): ch_in = self.feat_out # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out self.odm_cls_convs.add_sublayer( 'odm_cls_conv_{}'.format(i), nn.Conv2D( in_channels=ch_in, out_channels=self.feat_out, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i), nn.ReLU()) self.odm_reg_convs.add_sublayer( 'odm_reg_conv_{}'.format(i), nn.Conv2D( in_channels=self.feat_out, out_channels=self.feat_out, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i), nn.ReLU()) self.odm_cls = nn.Conv2D( self.feat_out, self.cls_out_channels, 3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(bias_init))) self.odm_reg = nn.Conv2D( self.feat_out, 5, 3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) self.featmap_sizes = [] self.base_anchors_list = [] self.refine_anchor_list = []
def __init__(self, input_dim, filters, filter_size, stride=1, bias_attr=False, norm_type=None, groups=1, norm_groups=32, act=None, freeze_norm=False, is_test=False, norm_decay=0., lr=1., bias_lr=None, weight_init=None, bias_init=None, use_dcn=False, name=''): super(Conv2dUnit, self).__init__() self.filters = filters self.filter_size = filter_size self.stride = stride self.padding = (filter_size - 1) // 2 self.act = act self.freeze_norm = freeze_norm self.is_test = is_test self.norm_decay = norm_decay self.use_dcn = use_dcn self.name = name # conv conv_name = name self.conv_offset = None if use_dcn: conv_battr = False if bias_attr: blr = lr if bias_lr: blr = bias_lr conv_battr = ParamAttr(learning_rate=blr, initializer=bias_init, regularizer=L2Decay(0.)) # 不可以加正则化的参数:norm层(比如bn层、affine_channel层、gn层)的scale、offset;卷积层的偏移参数。 self.offset_channel = 2 * filter_size**2 self.mask_channel = filter_size**2 self.conv_offset = nn.Conv2D( in_channels=input_dim, out_channels=3 * filter_size**2, kernel_size=filter_size, stride=stride, padding=self.padding, weight_attr=ParamAttr(initializer=Constant(0.)), bias_attr=ParamAttr(initializer=Constant(0.))) # 官方的DCNv2 self.conv = DeformConv2D( in_channels=input_dim, out_channels=filters, kernel_size=filter_size, stride=stride, padding=self.padding, dilation=1, groups=groups, weight_attr=ParamAttr(learning_rate=lr), bias_attr=conv_battr) # 自实现的DCNv2 # self.conv = MyDCNv2( # in_channels=input_dim, # out_channels=filters, # kernel_size=filter_size, # stride=stride, # padding=self.padding, # dilation=1, # groups=groups, # weight_attr=ParamAttr(learning_rate=lr), # bias_attr=conv_battr) else: conv_battr = False if bias_attr: blr = lr if bias_lr: blr = bias_lr conv_battr = ParamAttr(learning_rate=blr, initializer=bias_init, regularizer=L2Decay(0.)) # 不可以加正则化的参数:norm层(比如bn层、affine_channel层、gn层)的scale、offset;卷积层的偏移参数。 self.conv = nn.Conv2D( in_channels=input_dim, out_channels=filters, kernel_size=filter_size, stride=stride, padding=self.padding, groups=groups, weight_attr=ParamAttr(learning_rate=lr, initializer=weight_init), bias_attr=conv_battr) # norm assert norm_type in [None, 'bn', 'sync_bn', 'gn', 'affine_channel', 'in', 'ln'] bn, sync_bn, gn, af = get_norm(norm_type) if norm_type == 'in': norm_groups = filters if norm_type == 'ln': norm_groups = 1 if conv_name == "conv1": norm_name = "bn_" + conv_name if gn: norm_name = "gn_" + conv_name if af: norm_name = "af_" + conv_name else: norm_name = "bn" + conv_name[3:] if gn: norm_name = "gn" + conv_name[3:] if af: norm_name = "af" + conv_name[3:] norm_lr = 0. if freeze_norm else lr pattr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), # 不可以加正则化的参数:norm层(比如bn层、affine_channel层、gn层)的scale、offset;卷积层的偏移参数。 name=norm_name + "_scale", trainable=False if freeze_norm else True) battr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), # 不可以加正则化的参数:norm层(比如bn层、affine_channel层、gn层)的scale、offset;卷积层的偏移参数。 name=norm_name + "_offset", trainable=False if freeze_norm else True) self.bn = None self.gn = None self.af = None if bn: self.bn = paddle.nn.BatchNorm2D(filters, weight_attr=pattr, bias_attr=battr) if sync_bn: self.bn = paddle.nn.SyncBatchNorm(filters, weight_attr=pattr, bias_attr=battr) if gn: self.gn = paddle.nn.GroupNorm(num_groups=norm_groups, num_channels=filters, weight_attr=pattr, bias_attr=battr) if af: self.af = True self.scale = self.create_parameter( shape=[filters], dtype='float32', attr=pattr, default_initializer=Constant(1.)) self.offset = self.create_parameter( shape=[filters], dtype='float32', attr=battr, default_initializer=Constant(0.), is_bias=True) # act self.act = None if act == 'relu': self.act = paddle.nn.ReLU() elif act == 'leaky': self.act = paddle.nn.LeakyReLU(0.1) elif act == 'mish': self.act = Mish() elif act is None: pass else: raise NotImplementedError("Activation \'{}\' is not implemented.".format(act))
def __init__(self, num_classes=80, in_channels=256, seg_feat_channels=256, stacked_convs=4, num_grids=[40, 36, 24, 16, 12], kernel_out_channels=256, dcn_v2_stages=[], segm_strides=[8, 8, 16, 32, 32], solov2_loss=None, score_threshold=0.1, mask_threshold=0.5, mask_nms=None): super(SOLOv2Head, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.seg_num_grids = num_grids self.cate_out_channels = self.num_classes - 1 self.seg_feat_channels = seg_feat_channels self.stacked_convs = stacked_convs self.kernel_out_channels = kernel_out_channels self.dcn_v2_stages = dcn_v2_stages self.segm_strides = segm_strides self.solov2_loss = solov2_loss self.mask_nms = mask_nms self.score_threshold = score_threshold self.mask_threshold = mask_threshold conv_type = [ConvNormLayer] self.conv_func = conv_type[0] self.kernel_pred_convs = [] self.cate_pred_convs = [] for i in range(self.stacked_convs): if i in self.dcn_v2_stages: self.conv_func = conv_type[1] ch_in = self.in_channels + 2 if i == 0 else self.seg_feat_channels kernel_conv = self.add_sublayer( 'bbox_head.kernel_convs.' + str(i), self.conv_func( ch_in=ch_in, ch_out=self.seg_feat_channels, filter_size=3, stride=1, norm_type='gn', norm_name='bbox_head.kernel_convs.{}.gn'.format(i), name='bbox_head.kernel_convs.{}'.format(i))) self.kernel_pred_convs.append(kernel_conv) ch_in = self.in_channels if i == 0 else self.seg_feat_channels cate_conv = self.add_sublayer( 'bbox_head.cate_convs.' + str(i), self.conv_func( ch_in=ch_in, ch_out=self.seg_feat_channels, filter_size=3, stride=1, norm_type='gn', norm_name='bbox_head.cate_convs.{}.gn'.format(i), name='bbox_head.cate_convs.{}'.format(i))) self.cate_pred_convs.append(cate_conv) self.solo_kernel = self.add_sublayer( 'bbox_head.solo_kernel', nn.Conv2D(self.seg_feat_channels, self.kernel_out_channels, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr( name="bbox_head.solo_kernel.weight", initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(name="bbox_head.solo_kernel.bias"))) self.solo_cate = self.add_sublayer( 'bbox_head.solo_cate', nn.Conv2D(self.seg_feat_channels, self.cate_out_channels, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(name="bbox_head.solo_cate.weight", initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr( name="bbox_head.solo_cate.bias", initializer=Constant( value=float(-np.log((1 - 0.01) / 0.01))))))
import numpy as np import paddle import paddle.nn as nn from paddle.nn.initializer import TruncatedNormal, KaimingNormal, Constant, Assign # Common initializations ones_ = Constant(value=1.0) zeros_ = Constant(value=0.0) kaiming_normal_ = KaimingNormal() trunc_normal_ = TruncatedNormal(std=0.02) def orthogonal_(tensor, gain=1): r"""Fills the input `Tensor` with a (semi) orthogonal matrix, as described in `Exact solutions to the nonlinear dynamics of learning in deep linear neural networks` - Saxe, A. et al. (2013). The input tensor must have at least 2 dimensions, and for tensors with more than 2 dimensions the trailing dimensions are flattened. Args: tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2` gain: optional scaling factor Examples: >>> w = torch.empty(3, 5) >>> nn.init.orthogonal_(w) """ if tensor.ndimension() < 2: raise ValueError("Only tensors with 2 or more dimensions are supported")
def __init__(self): super(ScaleReg, self).__init__() self.scale_reg = self.create_parameter( shape=[1], attr=ParamAttr(initializer=Constant(value=1.)), dtype="float32")
def __init__(self, fcos_feat, num_classes=80, fpn_stride=[8, 16, 32, 64, 128], prior_prob=0.01, fcos_loss='FCOSLoss', norm_reg_targets=True, centerness_on_reg=True): super(FCOSHead, self).__init__() self.fcos_feat = fcos_feat self.num_classes = num_classes self.fpn_stride = fpn_stride self.prior_prob = prior_prob self.fcos_loss = fcos_loss self.norm_reg_targets = norm_reg_targets self.centerness_on_reg = centerness_on_reg conv_cls_name = "fcos_head_cls" bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob) self.fcos_head_cls = self.add_sublayer( conv_cls_name, nn.Conv2D(in_channels=256, out_channels=self.num_classes, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(name=conv_cls_name + "_weights", initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr( name=conv_cls_name + "_bias", initializer=Constant(value=bias_init_value)))) conv_reg_name = "fcos_head_reg" self.fcos_head_reg = self.add_sublayer( conv_reg_name, nn.Conv2D(in_channels=256, out_channels=4, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(name=conv_reg_name + "_weights", initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(name=conv_reg_name + "_bias", initializer=Constant(value=0)))) conv_centerness_name = "fcos_head_centerness" self.fcos_head_centerness = self.add_sublayer( conv_centerness_name, nn.Conv2D(in_channels=256, out_channels=1, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(name=conv_centerness_name + "_weights", initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(name=conv_centerness_name + "_bias", initializer=Constant(value=0)))) self.scales_regs = [] for i in range(len(self.fpn_stride)): lvl = int(math.log(int(self.fpn_stride[i]), 2)) feat_name = 'p{}_feat'.format(lvl) scale_reg = self.add_sublayer(feat_name, ScaleReg()) self.scales_regs.append(scale_reg)