def __init__(self, pRpn): super().__init__(pRpn) # init bias for cls prior_prob = 0.01 pi = -math.log((1-prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv4_weight = X.var("cls_conv4_weight", init=X.gauss(std=0.01)) self.cls_conv4_bias = X.var("cls_conv4_bias", init=X.zero_init()) self.cls_pred_weight = X.var("cls_pred_weight", init=X.gauss(std=0.01)) self.cls_pred_bias = X.var("cls_pred_bias", init=X.constant(pi)) # shared regression weight and bias self.bbox_conv1_weight = X.var("bbox_conv1_weight", init=X.gauss(std=0.01)) self.bbox_conv1_bias = X.var("bbox_conv1_bias", init=X.zero_init()) self.bbox_conv2_weight = X.var("bbox_conv2_weight", init=X.gauss(std=0.01)) self.bbox_conv2_bias = X.var("bbox_conv2_bias", init=X.zero_init()) self.bbox_conv3_weight = X.var("bbox_conv3_weight", init=X.gauss(std=0.01)) self.bbox_conv3_bias = X.var("bbox_conv3_bias", init=X.zero_init()) self.bbox_conv4_weight = X.var("bbox_conv4_weight", init=X.gauss(std=0.01)) self.bbox_conv4_bias = X.var("bbox_conv4_bias", init=X.zero_init()) self.bbox_pred_weight = X.var("bbox_pred_weight", init=X.gauss(std=0.01)) self.bbox_pred_bias = X.var("bbox_pred_bias", init=X.zero_init()) self._cls_logit_dict = None self._bbox_delta_dict = None
def __init__(self, pBbox): super().__init__(pBbox) self.stage = pBbox.stage self._cls_logit = None self._bbox_delta = None self._proposal = None # declare weight and bias stage = self.stage xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) self.fc1_weight = X.var("bbox_fc1_%s_weight" % stage, init=xavier_init) self.fc2_weight = X.var("bbox_fc2_%s_weight" % stage, init=xavier_init) self.cls_logit_weight = X.var("bbox_cls_logit_%s_weight" % stage, init=X.gauss(0.01)) self.bbox_delta_weight = X.var("bbox_reg_delta_%s_weight" % stage, init=X.gauss(0.001)) self.fc1_bias = X.var("bbox_fc1_%s_bias" % stage) self.fc2_bias = X.var("bbox_fc2_%s_bias" % stage) self.cls_logit_bias = X.var("bbox_cls_logit_%s_bias" % stage) self.bbox_delta_bias = X.var("bbox_reg_delta_%s_bias" % stage)
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if not isinstance(head_feat, dict): head_feat = dict(classification=head_feat, regression=head_feat) if p.fp16: head_feat["classification"] = X.to_fp32(head_feat["classification"], name="bbox_cls_head_to_fp32") head_feat["regression"] = X.to_fp32(head_feat["regression"], name="bbox_reg_head_to_fp32") cls_logit = X.fc( head_feat["classification"], filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01) ) bbox_delta = X.fc( head_feat["regression"], filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.001) ) return cls_logit, bbox_delta
def get_output(self, conv_feat): if self._cls_logit is not None and self._bbox_delta is not None: return self._cls_logit, self._bbox_delta p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len( p.anchor_generate.scale) conv_channel = p.head.conv_channel conv = X.convrelu(conv_feat, kernel=3, filter=conv_channel, name="rpn_conv_3x3", no_bias=False, init=X.gauss(0.01)) if p.fp16: conv = X.to_fp32(conv, name="rpn_conv_3x3_fp32") cls_logit = X.conv(conv, filter=2 * num_base_anchor, name="rpn_cls_logit", no_bias=False, init=X.gauss(0.01)) bbox_delta = X.conv(conv, filter=4 * num_base_anchor, name="rpn_bbox_delta", no_bias=False, init=X.gauss(0.01)) self._cls_logit = cls_logit self._bbox_delta = bbox_delta return self._cls_logit, self._bbox_delta
def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict
def PConvModule(x, out_channels=256, kernel_size=[3, 3, 3], dilation=[1, 1, 1], groups=[1, 1, 1], ibn=None, part_deform=False, PConv_idx=-1, start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None): assert PConv_idx > -1 and feat_sizes is not None name_pref = 'PConv{}_sepc'.format(PConv_idx) sepc0_weight, sepc0_bias = X.var(name=name_pref+'0_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'0_bias', init=X.zero_init()) sepc1_weight, sepc1_bias = X.var(name=name_pref+'1_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'1_bias', init=X.zero_init()) sepc2_weight, sepc2_bias = X.var(name=name_pref+'2_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'2_bias', init=X.zero_init()) sepc0_offset_weight, sepc0_offset_bias = None, None sepc1_offset_weight, sepc1_offset_bias = None, None sepc2_offset_weight, sepc2_offset_bias = None, None if part_deform: # NOTE zero_init for offset's weight and bias sepc0_offset_weight, sepc0_offset_bias = X.var(name=name_pref+'0_offset_weight', init=X.zero_init()), X.var(name=name_pref+'0_offset_bias', init=X.zero_init()) sepc1_offset_weight, sepc1_offset_bias = X.var(name=name_pref+'1_offset_weight', init=X.zero_init()), X.var(name=name_pref+'1_offset_bias', init=X.zero_init()) sepc2_offset_weight, sepc2_offset_bias = X.var(name=name_pref+'2_offset_weight', init=X.zero_init()), X.var(name=name_pref+'2_offset_bias', init=X.zero_init()) norm_func = [] if ibn: assert norm is not None norm_func = partial(norm, name=name_pref+'_ibn') sepc_conv0_func = partial( sepc_conv, name='PConv{}_sepc0_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[0], stride=1, padding=(kernel_size[0]+(dilation[0]-1)*2)//2, dilation=dilation[0], groups=groups[0], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc0_weight, bias=sepc0_bias, weight_offset=sepc0_offset_weight, bias_offset=sepc0_offset_bias) sepc_conv1_func = partial( sepc_conv, name='PConv{}_sepc1_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[1], stride=1, padding=(kernel_size[1]+(dilation[1]-1)*2)//2, dilation=dilation[1], groups=groups[1], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc1_weight, bias=sepc1_bias, weight_offset=sepc1_offset_weight, bias_offset=sepc1_offset_bias) sepc_conv2_func = partial( sepc_conv, name='PConv{}_sepc2_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[2], stride=2, padding=(kernel_size[2]+(dilation[2]-1)*2)//2, dilation=dilation[2], groups=groups[2], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc2_weight, bias=sepc2_bias, weight_offset=sepc2_offset_weight, bias_offset=sepc2_offset_bias) next_x = [] for level, feature in enumerate(x): temp_fea = sepc_conv1_func(i=level, x=feature) if level > 0: tmp = sepc_conv2_func(i=level, x=x[level - 1]) temp_fea = temp_fea + tmp if level < len(x) - 1: tmp_x = sepc_conv0_func(i=level,x=x[level+1]) if bilinear_upsample: tmp_x = mx.contrib.symbol.BilinearResize2D(tmp_x, scale_height=2, scale_width=2, name='PConv{}_upsampling_level{}'.format(PConv_idx,level)) else: tmp_x = mx.sym.UpSampling(tmp_x, scale=2, sample_type='nearest', num_args=1, name='PConv{}_upsampling_level{}'.format(PConv_idx,level)) tmp_x = mx.sym.slice_like(tmp_x, temp_fea) temp_fea = temp_fea + tmp_x next_x.append(temp_fea) if ibn: next_x = ibn_func(next_x, norm_func, feat_sizes) next_x = [relu(item, name='PConv{}_level{}_relu'.format(PConv_idx, level)) for level,item in enumerate(next_x)] return next_x
def get_output(self, conv_feat): if self._cls_logit is not None and self._bbox_delta is not None: return self._cls_logit, self._bbox_delta p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel if p.normalizer.__name__ == "fix_bn": conv = X.convrelu( conv_feat, kernel=3, filter=conv_channel, name="rpn_conv_3x3", no_bias=False, init=X.gauss(0.01) ) elif p.normalizer.__name__ in ["sync_bn", "gn"]: conv = X.convnormrelu( p.normalizer, conv_feat, kernel=3, filter=conv_channel, name="rpn_conv_3x3", no_bias=False, init=X.gauss(0.01) ) else: raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__)) if p.fp16: conv = X.to_fp32(conv, name="rpn_conv_3x3_fp32") cls_logit = X.conv( conv, filter=2 * num_base_anchor, name="rpn_cls_logit", no_bias=False, init=X.gauss(0.01) ) bbox_delta = X.conv( conv, filter=4 * num_base_anchor, name="rpn_bbox_delta", no_bias=False, init=X.gauss(0.01) ) self._cls_logit = cls_logit self._bbox_delta = bbox_delta return self._cls_logit, self._bbox_delta
def get_rcnn_feature(self, rcnn_feat): p = self.pNeck conv_channel = p.conv_channel conv_neck = X.convrelu(rcnn_feat, filter=conv_channel, no_bias=False, init=X.gauss(0.01), name="conv_neck") return conv_neck
def SEPCFPN(inputs, out_channels=256, pconv_deform=False, lcconv_deform=None, ibn=None, Pconv_num=4, start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None): assert feat_sizes is not None Pconvs_list = [] for i in range(Pconv_num): Pconvs_list.append(partial( PConvModule, out_channels=out_channels, ibn=ibn, part_deform=pconv_deform, PConv_idx=i, start_level=start_level, norm=norm, bilinear_upsample=bilinear_upsample, feat_sizes=feat_sizes)) if lcconv_deform is not None: assert lcconv_deform in [False, True] lconv_weight, lconv_bias = X.var(name='LConv_weight', init=X.gauss(std=0.01)), X.var(name='LConv_bias',init=X.zero_init()) cconv_weight, cconv_bias = X.var(name='CConv_weight', init=X.gauss(std=0.01)), X.var(name='CConv_bias',init=X.zero_init()) lconv_offset_weight, lconv_offset_bias = None, None cconv_offset_weight, cconv_offset_bias = None, None if lcconv_deform: lconv_offset_weight, lconv_offset_bias=X.var(name='LConv_offset_weight', init=X.zero_init()), X.var(name='LConv_offset_bias', init=X.zero_init()) cconv_offset_weight, cconv_offset_bias=X.var(name='CConv_offset_weight', init=X.zero_init()), X.var(name='CConv_offset_bias', init=X.zero_init()) lconv_func = partial(sepc_conv, name='LConv{}_',out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level, weight=lconv_weight, bias=lconv_bias, weight_offset=lconv_offset_weight, bias_offset=lconv_offset_bias) cconv_func = partial(sepc_conv, name='CConv{}_', out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level, weight=cconv_weight, bias=cconv_bias, weight_offset=cconv_offset_weight, bias_offset=cconv_offset_bias) if ibn: assert norm is not None lbn = partial(norm, name='lconv_ibn') cbn = partial(norm, name='cconv_ibn') x = inputs for pconv in Pconvs_list: x = pconv(x) if lcconv_deform is None: return x cls_outs = [cconv_func(i=level, x=item) for level, item in enumerate(x)] loc_outs = [lconv_func(i=level, x=item) for level, item in enumerate(x)] if ibn: cls_outs = ibn_func(cls_outs, cbn, feat_sizes) loc_outs = ibn_func(loc_outs, lbn, feat_sizes) outs = [mx.sym.Concat(*[relu(s), relu(l)], num_args=2, dim=1) for s, l in zip(cls_outs, loc_outs)] return outs
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if p.fp16: head_feat = X.to_fp32(head_feat, name="bbox_head_to_fp32") cls_logit = X.fc(head_feat, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(head_feat, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.001)) return cls_logit, bbox_delta
def __init__(self, pBbox): super(CascadeBbox2fcHead, self).__init__(pBbox) self.stage = pBbox.stage self._cls_logit = None self._bbox_delta = None self._proposal = None # for stage '1st_3rd', using weight from 1st stage weight_stage = self.stage.split('_')[0] self.fc1_weight = X.var("bbox_fc1_" + weight_stage + "_weight") self.fc2_weight = X.var("bbox_fc2_" + weight_stage + "_weight") self.cls_logit_weight = X.var("bbox_cls_logit_" + weight_stage + "_weight", init=X.gauss(0.01)) self.cls_logit_bias = X.var("bbox_cls_logit_" + weight_stage + "_bias") self.bbox_delta_weight = X.var("bbox_reg_delta_" + weight_stage + "_weight", init=X.gauss(0.001)) self.bbox_delta_bias = X.var("bbox_reg_delta_" + weight_stage + "_bias")
def _reg_head(self, conv_feat): num_block = self.p.num_block or 4 for i in range(num_block): conv_feat = X.conv(conv_feat, kernel=3, filter=256, init=X.gauss(0.01), name="bbox_reg_block%s" % (i + 1)) conv_feat = self.add_norm(conv_feat) conv_feat = X.relu(conv_feat) return conv_feat
def get_output(self, fpn_conv_feats, roi_feat, rois, is_train): ''' Args: fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] rois: [batch_image, image_roi, 4] is_train: boolean Returns: cls_logit: [batch_image * image_roi, num_class] bbox_delta: [batch_image * image_roi, num_class * 4] tsd_cls_logit: [batch_image * image_roi, num_class] tsd_bbox_delta: [batch_image * image_roi, num_class * 4] delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1] delta_r: [batch_image * image_roi, 2, 1, 1] ''' xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # roi_feat: [batch_roi, 256, 7, 7] flatten = X.reshape( roi_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") # [batch_roi, 256*7*7, 1, 1] x1 = flatten x2 = X.relu(X.conv(data=x1, kernel=1, filter=256, name="delta_shared_fc1", no_bias=False), name="delta_shared_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.relu(X.conv(x2, filter=256, name="delta_c_fc1", init=X.gauss(0.01)), name="delta_c_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.conv(delta_c, filter=2 * self.p.roi_size**2, name="delta_c_fc2", init=X.gauss(0.01)) # [batch_roi, 2*7*7, 1, 1] delta_r = X.relu(X.conv(x2, filter=256, name="delta_r_fc1", init=X.gauss(0.01)), name="delta_r_fc1_relu") # [batch_roi, 256, 1, 1] delta_r = X.conv(delta_r, filter=2, name="delta_r_fc2", init=X.gauss(0.01)) # [batch_roi, 2, 1, 1] image_roi = self.p.image_roi if is_train else 1000 batch_image = self.p.batch_image TSD_cls_feats = self.delta_c_pool.get_roi_feature( fpn_conv_feats, rois, delta_c, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_loc_feats = self.delta_r_pool.get_roi_feature( fpn_conv_feats, rois, delta_r, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_x_cls = self._convs_and_fcs( TSD_cls_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pc', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_loc_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pr', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_cls = self._convs_and_fcs( TSD_x_cls, 0, self.p.TSD.num_cls_fcs, name='TSD_cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_x_reg, 0, self.p.TSD.num_reg_fcs, name='TSD_reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] num_class = self.p.num_class num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class tsd_cls_logit = X.fc(TSD_x_cls, filter=num_class, name='tsd_cls_logit', init=X.gauss(0.01)) tsd_bbox_delta = X.fc(TSD_x_reg, filter=4 * num_reg_class, name='tsd_reg_delta', init=X.gauss(0.01)) x = self._convs_and_fcs(roi_feat, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='shared_fc', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_cls = x x_reg = x x_cls = self._convs_and_fcs(x_cls, 0, self.p.TSD.num_cls_fcs, name='cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_reg = self._convs_and_fcs(x_reg, 0, self.p.TSD.num_reg_fcs, name='reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) cls_logit = X.fc(x_cls, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(x_reg, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.01)) if self.p.fp16: cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32") bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32") tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32") tsd_bbox_delta = X.to_fp32(tsd_bbox_delta, name="tsd_bbox_delta_fp32") delta_c = X.to_fp32(delta_c, name="delta_c_fp32") delta_r = X.to_fp32(delta_r, name="delta_r_fp32") return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
def get_output(self, conv_fpn_feat): p = self.p centerness_logit_dict = {} cls_logit_dict = {} offset_logit_dict = {} # heads are shared across stages shared_conv1_w = X.var(name="shared_conv1_3x3_weight", init=X.gauss(0.01)) shared_conv1_b = X.var(name="shared_conv1_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv2_w = X.var(name="shared_conv2_3x3_weight", init=X.gauss(0.01)) shared_conv2_b = X.var(name="shared_conv2_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv3_w = X.var(name="shared_conv3_3x3_weight", init=X.gauss(0.01)) shared_conv3_b = X.var(name="shared_conv3_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv4_w = X.var(name="shared_conv4_3x3_weight", init=X.gauss(0.01)) shared_conv4_b = X.var(name="shared_conv4_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) centerness_conv_w = X.var(name="centerness_conv_3x3_weight", init=X.gauss(0.01)) centerness_conv_b = X.var(name="centerness_conv_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) cls_conv_w = X.var(name="cls_conv_3x3_weight", init=X.gauss(0.01)) cls_conv_b = X.var(name="cls_conv_3x3_bias", init=X.constant(-math.log(99)), lr_mult=2, wd_mult=0) # init with -log((1-0.01)/0.01) offset_conv1_w = X.var(name="offset_conv1_3x3_weight", init=X.gauss(0.01)) offset_conv1_b = X.var(name="offset_conv1_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv2_w = X.var(name="offset_conv2_3x3_weight", init=X.gauss(0.01)) offset_conv2_b = X.var(name="offset_conv2_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv3_w = X.var(name="offset_conv3_3x3_weight", init=X.gauss(0.01)) offset_conv3_b = X.var(name="offset_conv3_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv4_w = X.var(name="offset_conv4_3x3_weight", init=X.gauss(0.01)) offset_conv4_b = X.var(name="offset_conv4_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv5_w = X.var(name="offset_conv5_3x3_weight", init=X.gauss(0.01)) offset_conv5_b = X.var(name="offset_conv5_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) for stride in p.FCOSParam.stride: # centerness & cls shared layer shared_conv1 = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=256, no_bias=False, name="shared_conv1_3x3_%s" % stride, weight=shared_conv1_w, bias=shared_conv1_b, ) shared_gn1 = X.gn(shared_conv1, name='shared_gn1_3x3_%s' % stride, num_group=32) shared_relu1 = X.relu(shared_gn1, name='shared_relu1_3x3_%s' % stride) shared_conv2 = X.conv( shared_relu1, kernel=3, filter=256, no_bias=False, name="shared_conv2_3x3_%s" % stride, weight=shared_conv2_w, bias=shared_conv2_b, ) shared_gn2 = X.gn(shared_conv2, name='shared_gn2_3x3_%s' % stride, num_group=32) shared_relu2 = X.relu(shared_gn2, name='shared_relu2_3x3_%s' % stride) shared_conv3 = X.conv( shared_relu2, kernel=3, filter=256, no_bias=False, name="shared_conv3_3x3_%s" % stride, weight=shared_conv3_w, bias=shared_conv3_b, ) shared_gn3 = X.gn(shared_conv3, name='shared_gn3_3x3_%s' % stride, num_group=32) shared_relu3 = X.relu(shared_gn3, name='shared_relu3_3x3_%s' % stride) shared_conv4 = X.conv( shared_relu3, kernel=3, filter=256, no_bias=False, name="shared_conv4_3x3_%s" % stride, weight=shared_conv4_w, bias=shared_conv4_b, ) shared_gn4 = X.gn(shared_conv4, name='shared_gn4_3x3_%s' % stride, num_group=32) shared_relu4 = X.relu(shared_gn4, name='shared_relu4_3x3_%s' % stride) # centerness head center_logit = X.conv( shared_relu4, kernel=3, filter=1, name="center_conv_3x3_%s" % stride, no_bias=False, weight=centerness_conv_w, bias=centerness_conv_b, ) # cls head cls_logit = X.conv( shared_relu4, kernel=3, filter=p.FCOSParam.num_classifier, # remove bg channel name="cls_conv_3x3_%s" % stride, no_bias=False, weight=cls_conv_w, bias=cls_conv_b, ) # offset head offset_conv1 = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=256, name="offset_conv1_3x3_%s" % stride, no_bias=False, weight=offset_conv1_w, bias=offset_conv1_b, ) offset_gn1 = X.gn(offset_conv1, name='offset_gn1_3x3_%s' % stride, num_group=32) offset_relu1 = X.relu(offset_gn1, name='offset_relu1_3x3_%s' % stride) offset_conv2 = X.conv( offset_relu1, kernel=3, filter=256, name="offset_conv2_3x3_%s" % stride, no_bias=False, weight=offset_conv2_w, bias=offset_conv2_b, ) offset_gn2 = X.gn(offset_conv2, name='offset_gn2_3x3_%s' % stride, num_group=32) offset_relu2 = X.relu(offset_gn2, name='offset_relu2_3x3_%s' % stride) offset_conv3 = X.conv( offset_relu2, kernel=3, filter=256, name="offset_conv3_3x3_%s" % stride, no_bias=False, weight=offset_conv3_w, bias=offset_conv3_b, ) offset_gn3 = X.gn(offset_conv3, name='offset_gn3_3x3_%s' % stride, num_group=32) offset_relu3 = X.relu(offset_gn3, name='offset_relu3_3x3_%s' % stride) offset_conv4 = X.conv( offset_relu3, kernel=3, filter=256, name="offset_conv1_3x3_%s" % stride, no_bias=False, weight=offset_conv4_w, bias=offset_conv4_b, ) offset_gn4 = X.gn(offset_conv4, name='offset_gn4_3x3_%s' % stride, num_group=32) offset_relu4 = X.relu(offset_gn4, name='offset_relu4_3x3_%s' % stride) offset_logit = X.conv( offset_relu4, kernel=3, filter=4, name="offset_conv5_3x3_%s" % stride, no_bias=False, weight=offset_conv5_w, bias=offset_conv5_b, ) offset_logit = mx.sym.broadcast_mul(lhs=offset_logit, rhs=X.var(name="offset_scale_%s_w" % stride, init=X.constant(1), shape=(1,1,1,1))) offset_logit = mx.sym.exp(offset_logit) centerness_logit_dict[stride] = center_logit cls_logit_dict[stride] = cls_logit offset_logit_dict[stride] = offset_logit self.centerness_logit_dict = centerness_logit_dict self.cls_logit_dict = cls_logit_dict self.offset_logit_dict = offset_logit_dict return self.centerness_logit_dict, self.cls_logit_dict, self.offset_logit_dict
def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_gamma = X.var('rpn_conv_gamma') rpn_conv_beta = X.var('rpn_conv_beta') rpn_conv_mmean = X.var('rpn_conv_moving_mean') rpn_conv_mvar = X.var('rpn_conv_moving_var') rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) if p.normalizer.__name__ == "fix_bn": pass elif p.normalizer.__name__ == "sync_bn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, moving_mean=rpn_conv_mmean, moving_var=rpn_conv_mvar, name="rpn_conv_3x3_bn_%s" % stride ) elif p.normalizer.__name__ == "gn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, name="rpn_conv_3x3_gn_%s" % stride ) else: raise NotImplementedError("Unsupported normalizer {}".format(p.normalizer.__name__)) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict
def __init__(self, pHead): self.p = patch_config_as_nothrow(pHead) num_points = self.p.point_generate.num_points self.dcn_kernel = int(math.sqrt(num_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) assert self.dcn_kernel * self.dcn_kernel == num_points, \ "The points number should be square." assert self.dcn_kernel % 2 == 1, "The dcn kernel size should be odd." # init moment method dtype = "float16" if self.p.fp16 else "float32" self.moment_transfer = X.var(name="moment_transfer", shape=(2, ), init=X.zero_init(), lr_mult=0.01, dtype=dtype) # init bias for cls prior_prob = 0.01 pi = -math.log((1 - prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv_weight = X.var("cls_conv_weight", init=X.gauss(std=0.01)) self.cls_conv_bias = X.var("cls_conv_bias", init=X.zero_init()) self.cls_out_weight = X.var("cls_out_weight", init=X.gauss(std=0.01)) self.cls_out_bias = X.var("cls_out_bias", init=X.constant(pi)) # shared regression weight and bias self.reg_conv1_weight = X.var("reg_conv1_weight", init=X.gauss(std=0.01)) self.reg_conv1_bias = X.var("reg_conv1_bias", init=X.zero_init()) self.reg_conv2_weight = X.var("reg_conv2_weight", init=X.gauss(std=0.01)) self.reg_conv2_bias = X.var("reg_conv2_bias", init=X.zero_init()) self.reg_conv3_weight = X.var("reg_conv3_weight", init=X.gauss(std=0.01)) self.reg_conv3_bias = X.var("reg_conv3_bias", init=X.zero_init()) self.pts_init_conv_weight = X.var("pts_init_conv_weight", init=X.gauss(std=0.01)) self.pts_init_conv_bias = X.var("pts_init_conv_bias", init=X.zero_init()) self.pts_init_out_weight = X.var("pts_init_out_weight", init=X.gauss(std=0.01)) self.pts_init_out_bias = X.var("pts_init_out_bias", init=X.zero_init()) self.pts_refine_conv_weight = X.var("pts_refine_conv_weight", init=X.gauss(std=0.01)) self.pts_refine_conv_bias = X.var("pts_refine_conv_bias", init=X.zero_init()) self.pts_refine_out_weight = X.var("pts_refine_out_weight", init=X.gauss(std=0.01)) self.pts_refine_out_bias = X.var("pts_refine_out_bias", init=X.zero_init()) self._pts_out_inits = None self._pts_out_refines = None self._cls_outs = None
def get_refine_output(self, roi_feature, cls_logit, bbox_delta, cls_sec_logit, bbox_sec_delta): p = self.p num_class = p.num_class repeat_time = p.repeat_time class_agnostic = p.regress_target.class_agnostic num_reg_class = 2 if class_agnostic else num_class cls_logit = mx.sym.slice_axis(mx.sym.softmax(cls_logit), axis=1, begin=1, end=num_class) cls_sec_logit = mx.sym.slice_axis(mx.sym.softmax(cls_sec_logit), axis=1, begin=1, end=num_class) bbox_delta = mx.sym.slice_axis(bbox_delta, axis=1, begin=4, end=num_reg_class * 4) bbox_sec_delta = mx.sym.slice_axis(bbox_sec_delta, axis=1, begin=4, end=num_reg_class * 4) pred_feat1 = mx.sym.tile(mx.sym.concat(*[bbox_delta, cls_logit], dim=1), reps=(1, repeat_time)) pred_feat2 = mx.sym.tile(mx.sym.concat( *[bbox_sec_delta, cls_sec_logit], dim=1), reps=(1, repeat_time)) refine_feat1 = mx.sym.concat(*[roi_feature, pred_feat1], dim=1) refine_feat2 = mx.sym.concat(*[roi_feature, pred_feat2], dim=1) head_feat1 = X.fc(refine_feat1, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine1') head_feat1 = X.relu(head_feat1) head_feat2 = X.fc(refine_feat2, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine2') head_feat2 = X.relu(head_feat2) refine_cls_logit = X.fc(head_feat1, filter=num_class, name='refine_bbox_cls_logit1', init=X.gauss(0.01)) refine_cls_sec_logit = X.fc(head_feat2, filter=num_class, name='refine_bbox_cls_logit2', init=X.gauss(0.01)) refine_bbox_delta = X.fc(head_feat1, filter=4 * num_reg_class, name='refine_bbox_reg_delta1', init=X.gauss(0.001)) refine_bbox_sec_delta = X.fc(head_feat2, filter=4 * num_reg_class, name='refine_bbox_reg_delta2', init=X.gauss(0.001)) return refine_cls_logit, refine_bbox_delta, refine_cls_sec_logit, refine_bbox_sec_delta
def get_output(self, conv_feat): if self._cls_logit_list is not None and self._bbox_delta_list is not None: return self._cls_logit_list, self._bbox_delta_list p = self.p stride = p.anchor_generate.stride if not isinstance(stride, tuple): stride = (stride) conv_channel = p.head.conv_channel num_base_anchor = len(p.anchor_generate.ratio) * len( p.anchor_generate.scale) num_class = p.num_class prior_prob = 0.01 pi = -math.log((1 - prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv4_weight = X.var("cls_conv4_weight", init=X.gauss(std=0.01)) self.cls_conv4_bias = X.var("cls_conv4_bias", init=X.zero_init()) self.cls_pred_weight = X.var("cls_pred_weight", init=X.gauss(std=0.01)) self.cls_pred_bias = X.var("cls_pred_bias", init=X.constant(pi)) # shared regression weight and bias self.bbox_conv1_weight = X.var("bbox_conv1_weight", init=X.gauss(std=0.01)) self.bbox_conv1_bias = X.var("bbox_conv1_bias", init=X.zero_init()) self.bbox_conv2_weight = X.var("bbox_conv2_weight", init=X.gauss(std=0.01)) self.bbox_conv2_bias = X.var("bbox_conv2_bias", init=X.zero_init()) self.bbox_conv3_weight = X.var("bbox_conv3_weight", init=X.gauss(std=0.01)) self.bbox_conv3_bias = X.var("bbox_conv3_bias", init=X.zero_init()) self.bbox_conv4_weight = X.var("bbox_conv4_weight", init=X.gauss(std=0.01)) self.bbox_conv4_bias = X.var("bbox_conv4_bias", init=X.zero_init()) self.bbox_pred_weight = X.var("bbox_pred_weight", init=X.gauss(std=0.01)) self.bbox_pred_bias = X.var("bbox_pred_bias", init=X.zero_init()) cls_logit_list = [] bbox_delta_list = [] for i, s in enumerate(stride): cls_logit = self._cls_subnet(conv_feat=conv_feat[i], conv_channel=conv_channel, num_base_anchor=num_base_anchor, num_class=num_class) bbox_delta = self._bbox_subnet(conv_feat=conv_feat[i], conv_channel=conv_channel, num_base_anchor=num_base_anchor, num_class=num_class) cls_logit_list.append(cls_logit) bbox_delta_list.append(bbox_delta) self._cls_logit_list = cls_logit_list self._bbox_delta_list = bbox_delta_list return self._cls_logit_list, self._bbox_delta_list