def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict
def _bbox_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class, stride): p = self.p norm = p.normalizer # regression subnet bbox_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.bbox_conv1_weight, bias=self.bbox_conv1_bias, no_bias=False, name="bbox_conv1") bbox_conv1 = norm(bbox_conv1, name="bbox_conv1_bn_s{}".format(stride)) bbox_conv1_relu = X.relu(bbox_conv1) bbox_conv2 = X.conv(data=bbox_conv1_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv2_weight, bias=self.bbox_conv2_bias, no_bias=False, name="bbox_conv2") bbox_conv2 = norm(bbox_conv2, name="bbox_conv2_bn_s{}".format(stride)) bbox_conv2_relu = X.relu(bbox_conv2) bbox_conv3 = X.conv(data=bbox_conv2_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv3_weight, bias=self.bbox_conv3_bias, no_bias=False, name="bbox_conv3") bbox_conv3 = norm(bbox_conv3, name="bbox_conv3_bn_s{}".format(stride)) bbox_conv3_relu = X.relu(bbox_conv3) bbox_conv4 = X.conv(data=bbox_conv3_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv4_weight, bias=self.bbox_conv4_bias, no_bias=False, name="bbox_conv4") bbox_conv4 = norm(bbox_conv4, name="bbox_conv4_bn_s{}".format(stride)) bbox_conv4_relu = X.relu(bbox_conv4) if p.fp16: bbox_conv4_relu = X.to_fp32(bbox_conv4_relu, name="bbox_conv4_fp32") output_channel = num_base_anchor * 4 output = X.conv(data=bbox_conv4_relu, kernel=3, filter=output_channel, weight=self.bbox_pred_weight, bias=self.bbox_pred_bias, no_bias=False, name="bbox_pred") return output
def _cls_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class, stride): p = self.p norm = p.normalizer # classification subnet cls_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.cls_conv1_weight, bias=self.cls_conv1_bias, no_bias=False, name="cls_conv1") cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride)) cls_conv1_relu = X.relu(cls_conv1) cls_conv2 = X.conv(data=cls_conv1_relu, kernel=3, filter=conv_channel, weight=self.cls_conv2_weight, bias=self.cls_conv2_bias, no_bias=False, name="cls_conv2") cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride)) cls_conv2_relu = X.relu(cls_conv2) cls_conv3 = X.conv(data=cls_conv2_relu, kernel=3, filter=conv_channel, weight=self.cls_conv3_weight, bias=self.cls_conv3_bias, no_bias=False, name="cls_conv3") cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride)) cls_conv3_relu = X.relu(cls_conv3) cls_conv4 = X.conv(data=cls_conv3_relu, kernel=3, filter=conv_channel, weight=self.cls_conv4_weight, bias=self.cls_conv4_bias, no_bias=False, name="cls_conv4") cls_conv4 = norm(cls_conv4, name="cls_conv4_bn_s{}".format(stride)) cls_conv4_relu = X.relu(cls_conv4) if p.fp16: cls_conv4_relu = X.to_fp32(cls_conv4_relu, name="cls_conv4_fp32") output_channel = num_base_anchor * (num_class - 1) output = X.conv(data=cls_conv4_relu, kernel=3, filter=output_channel, weight=self.cls_pred_weight, bias=self.cls_pred_bias, no_bias=False, name="cls_pred") return output
def get_output(self, conv_feat): if self._cls_logit is not None and self._bbox_delta is not None: return self._cls_logit, self._bbox_delta p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel if p.normalizer.__name__ == "fix_bn": conv = X.convrelu( conv_feat, kernel=3, filter=conv_channel, name="rpn_conv_3x3", no_bias=False, init=X.gauss(0.01) ) elif p.normalizer.__name__ in ["sync_bn", "gn"]: conv = X.convnormrelu( p.normalizer, conv_feat, kernel=3, filter=conv_channel, name="rpn_conv_3x3", no_bias=False, init=X.gauss(0.01) ) else: raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__)) if p.fp16: conv = X.to_fp32(conv, name="rpn_conv_3x3_fp32") cls_logit = X.conv( conv, filter=2 * num_base_anchor, name="rpn_cls_logit", no_bias=False, init=X.gauss(0.01) ) bbox_delta = X.conv( conv, filter=4 * num_base_anchor, name="rpn_bbox_delta", no_bias=False, init=X.gauss(0.01) ) self._cls_logit = cls_logit self._bbox_delta = bbox_delta return self._cls_logit, self._bbox_delta
def resnet_trident_stage(cls, data, name, num_block, filter, stride, dilate, norm_type, norm_mom, ndev, num_branch, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform): """ One resnet stage is comprised of multiple resnet units. Refer to depth config for more information. :param data: :param name: :param num_block: :param filter: :param stride: :param dilate: :param norm_type: :param norm_mom: :param ndev: :param num_branch: :param branch_ids: :param branch_bn_shared: :param branch_conv_shared: :return: """ assert isinstance(dilate, list) and len( dilate ) == num_branch, 'dilate should be a list with num_branch items.' d = [(d, d) for d in dilate] data = cls.resnet_unit(data, "{}_unit1".format(name), filter, stride, 1, True, norm_type, norm_mom, ndev) data = [data] * num_branch for i in range(2, num_block + 1): if branch_deform and i >= num_block - 2: unit_deform = True else: unit_deform = False # cast back to fp32 as deformable conv is not optimized for fp16 if unit_deform and i == num_block - 2: for j in range(num_branch): data[j] = X.to_fp32(data[j], name="deform_to32") data = cls.resnet_trident_unit(data, "{}_unit{}".format(name, i), filter, (1, 1), d, False, norm_type, norm_mom, ndev, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform=unit_deform) return data
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if not isinstance(head_feat, dict): head_feat = dict(classification=head_feat, regression=head_feat) if p.fp16: head_feat["classification"] = X.to_fp32( head_feat["classification"], name="bbox_cls_head_to_fp32") head_feat["regression"] = X.to_fp32(head_feat["regression"], name="bbox_reg_head_to_fp32") cls_logit = X.fc(head_feat["classification"], filter=num_class, name='bbox_cls_logit1', init=X.gauss(0.01)) cls_sec_logit = X.fc(head_feat["classification"], filter=num_class, name='bbox_cls_logit2', init=X.gauss(0.01)) bbox_delta = X.fc(head_feat["regression"], filter=4 * num_reg_class, name='bbox_reg_delta1', init=X.gauss(0.001)) bbox_sec_delta = X.fc(head_feat["regression"], filter=4 * num_reg_class, name='bbox_reg_delta2', init=X.gauss(0.001)) return cls_logit, bbox_delta, cls_sec_logit, bbox_sec_delta, head_feat[ 'regression'] # NOTE
def get_roi_feature(self, conv_fpn_feat, proposal): p = self.p rcnn_stride = p.stride roi_canonical_scale = p.roi_canonical_scale roi_canonical_level = p.roi_canonical_level group = mx.symbol.Custom( op_type="assign_layer_fpn", rois=proposal, rcnn_stride=rcnn_stride, roi_canonical_scale=roi_canonical_scale, roi_canonical_level=roi_canonical_level, name="assign_layer_fpn" ) proposal_fpn = dict() for i, stride in enumerate(rcnn_stride): proposal_fpn["stride%s" % stride] = group[i] if p.fp16: for stride in rcnn_stride: conv_fpn_feat["stride%s" % stride] = X.to_fp32( conv_fpn_feat["stride%s" % stride], name="fpn_stride%s_to_fp32" ) fpn_roi_feats = list() for stride in rcnn_stride: feat_lvl = conv_fpn_feat["stride%s" % stride] proposal_lvl = proposal_fpn["stride%s" % stride] roi_feat = X.roi_align( feat_lvl, rois=proposal_lvl, out_size=p.out_size, stride=stride, name="roi_align" ) roi_feat = X.reshape( data=roi_feat, shape=(-3, -2), name='roi_feat_reshape' ) fpn_roi_feats.append(roi_feat) roi_feat = X.add_n(*fpn_roi_feats) if p.fp16: roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16") return roi_feat
def _get_mask_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size) dim_reduced = self.pMask.dim_reduced msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) current = conv_feat for i in range(4): current = X.conv( current, name="mask_fcn_conv{}".format(i + 1), filter=dim_reduced, kernel=3, no_bias=False, init=msra_init ) current = self.add_norm(current) current = X.relu(current) mask_up = current for i in range(up_stride // 2): weight = X.var( name="mask_up{}_weight".format(i), init=msra_init, lr_mult=1, wd_mult=1) mask_up = mx.sym.Deconvolution( mask_up, kernel=(2, 2), stride=(2, 2), num_filter=dim_reduced, no_bias=False, weight=weight, name="mask_up{}".format(i) ) mask_up = X.relu( mask_up, name="mask_up{}_relu".format(i)) mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32') self._head_feat = mask_up return self._head_feat
def get_roi_feature(self, rcnn_feat, proposal): p = self.p if p.fp16: rcnn_feat = X.to_fp32(rcnn_feat, "rcnn_feat_to_fp32") roi_feat = X.roi_align(rcnn_feat, rois=proposal, out_size=p.out_size, stride=p.stride, name="roi_align") if p.fp16: roi_feat = X.to_fp16(roi_feat, "roi_feat_to_fp16") roi_feat = X.reshape(roi_feat, (-3, -2)) return roi_feat
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if p.fp16: head_feat = X.to_fp32(head_feat, name="bbox_head_to_fp32") cls_logit = X.fc(head_feat, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(head_feat, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.001)) return cls_logit, bbox_delta
def get_output(self, conv_feat): pBbox = self.pBbox num_class = pBbox.num_class head_feat = self._get_mask_head_logit(conv_feat) msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) if self.pMask: head_feat = X.to_fp32(head_feat, name="mask_head_to_fp32") mask_fcn_logit = X.conv( head_feat, filter=num_class, name="mask_fcn_logit", no_bias=False, init=msra_init ) return mask_fcn_logit
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat from mxnext.backbone.resnet_v1 import Builder unit = Builder.resnet_stage(conv_feat, name="stage4", num_block=3, filter=2048, stride=1, dilate=1, norm_type=self.p.normalizer, norm_mom=0.9, ndev=8) unit = X.to_fp32(unit, name='c5_to_fp32') pool1 = X.pool(unit, global_pool=True, name='pool1') self._head_feat = pool1 return self._head_feat
def _cls_subnet(self, conv_feat, stride): p = self.p norm = p.normalizer conv_channel = p.head.conv_channel # classification subset cls_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.cls_conv1_weight, bias=self.cls_conv1_bias, no_bias=False, name="cls_conv1") cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride)) cls_conv1_relu = X.relu(cls_conv1) cls_conv2 = X.conv(data=cls_conv1_relu, kernel=3, filter=conv_channel, weight=self.cls_conv2_weight, bias=self.cls_conv2_bias, no_bias=False, name="cls_conv2") cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride)) cls_conv2_relu = X.relu(cls_conv2) cls_conv3 = X.conv(data=cls_conv2_relu, kernel=3, filter=conv_channel, weight=self.cls_conv3_weight, bias=self.cls_conv3_bias, no_bias=False, name="cls_conv3") cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride)) cls_conv3_relu = X.relu(cls_conv3) if p.fp16: cls_conv3_relu = X.to_fp32(cls_conv3_relu, name="cls_conv3_fp32") return cls_conv3_relu
def _reg_subnet(self, conv_feat, stride): p = self.p norm = p.normalizer conv_channel = p.head.conv_channel # regression subnet reg_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.reg_conv1_weight, bias=self.reg_conv1_bias, no_bias=False, name="reg_conv1") reg_conv1 = norm(reg_conv1, name="reg_conv1_bn_s{}".format(stride)) reg_conv1_relu = X.relu(reg_conv1) reg_conv2 = X.conv(data=reg_conv1_relu, kernel=3, filter=conv_channel, weight=self.reg_conv2_weight, bias=self.reg_conv2_bias, no_bias=False, name="reg_conv2") reg_conv2 = norm(reg_conv2, name="reg_conv2_bn_s{}".format(stride)) reg_conv2_relu = X.relu(reg_conv2) reg_conv3 = X.conv(data=reg_conv2_relu, kernel=3, filter=conv_channel, weight=self.reg_conv3_weight, bias=self.reg_conv3_bias, no_bias=False, name="reg_conv3") reg_conv3 = norm(reg_conv3, name="reg_conv3_bn_s{}".format(stride)) reg_conv3_relu = X.relu(reg_conv3) if p.fp16: reg_conv3_relu = X.to_fp32(reg_conv3_relu, name="reg_conv3_fp32") return reg_conv3_relu
def _bbox_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class, stride, nb_conv=0): p = self.p if nb_conv <= 0: bbox_conv4_relu = conv_feat if p.fp16: bbox_conv4_relu = X.to_fp32(bbox_conv4_relu, name="bbox_conv4_fp32") output_channel = num_base_anchor * 4 output = X.conv(data=bbox_conv4_relu, kernel=3, filter=output_channel, weight=self.bbox_pred_weight, bias=self.bbox_pred_bias, no_bias=False, name="bbox_pred") return output return super()._bbox_subnet(conv_feat, conv_channel, num_base_anchor, num_class, stride)
def get_roi_feature(self, conv_fpn_feat, proposal): p = self.p rcnn_stride = p.stride group = mx.symbol.Custom(rois=proposal, op_type='assign_layer_fpn') proposal_fpn = dict() proposal_fpn["stride4"] = group[1] proposal_fpn["stride8"] = group[2] proposal_fpn["stride16"] = group[3] proposal_fpn["stride32"] = group[4] if p.fp16: for stride in rcnn_stride: conv_fpn_feat["stride%s" % stride] = X.to_fp32( conv_fpn_feat["stride%s" % stride], name="fpn_stride%s_to_fp32") fpn_roi_feats = list() for stride in rcnn_stride: feat_lvl = conv_fpn_feat["stride%s" % stride] proposal_lvl = proposal_fpn["stride%s" % stride] roi_feat = X.roi_align(feat_lvl, rois=proposal_lvl, out_size=p.out_size, stride=stride, name="roi_align") roi_feat = X.reshape(data=roi_feat, shape=(-3, -2), name='roi_feat_reshape') fpn_roi_feats.append(roi_feat) roi_feat = X.add_n(*fpn_roi_feats) if p.fp16: roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16") return roi_feat
def fpn_conv_down(self, data): if self.fpn_feat: return self.fpn_feat c2, c3, c4, c5 = data if self.p.fp16: c2 = X.to_fp32(c2, name="c2_to_fp32") c3 = X.to_fp32(c3, name="c3_to_fp32") c4 = X.to_fp32(c4, name="c4_to_fp32") c5 = X.to_fp32(c5, name="c5_to_fp32") xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # P5 p5 = X.conv(data=c5, filter=256, no_bias=False, weight=X.var(name="P5_lateral_weight", init=xavier_init), bias=X.var(name="P5_lateral_bias", init=X.zero_init()), name="P5_lateral") p5_conv = X.conv(data=p5, kernel=3, filter=256, no_bias=False, weight=X.var(name="P5_conv_weight", init=xavier_init), bias=X.var(name="P5_conv_bias", init=X.zero_init()), name="P5_conv") # P4 p5_up = mx.sym.UpSampling(p5, scale=2, sample_type="nearest", name="P5_upsampling", num_args=1) p4_la = X.conv(data=c4, filter=256, no_bias=False, weight=X.var(name="P4_lateral_weight", init=xavier_init), bias=X.var(name="P4_lateral_bias", init=X.zero_init()), name="P4_lateral") p5_clip = mx.sym.Crop(*[p5_up, p4_la], name="P4_clip") p4 = mx.sym.ElementWiseSum(*[p5_clip, p4_la], name="P4_sum") p4_conv = X.conv(data=p4, kernel=3, filter=256, no_bias=False, weight=X.var(name="P4_conv_weight", init=xavier_init), bias=X.var(name="P4_conv_bias", init=X.zero_init()), name="P4_conv") # P3 p4_up = mx.sym.UpSampling(p4, scale=2, sample_type="nearest", name="P4_upsampling", num_args=1) p3_la = X.conv(data=c3, filter=256, no_bias=False, weight=X.var(name="P3_lateral_weight", init=xavier_init), bias=X.var(name="P3_lateral_bias", init=X.zero_init()), name="P3_lateral") p4_clip = mx.sym.Crop(*[p4_up, p3_la], name="P3_clip") p3 = mx.sym.ElementWiseSum(*[p4_clip, p3_la], name="P3_sum") p3_conv = X.conv(data=p3, kernel=3, filter=256, no_bias=False, weight=X.var(name="P3_conv_weight", init=xavier_init), bias=X.var(name="P3_conv_bias", init=X.zero_init()), name="P3_conv") # P2 p3_up = mx.sym.UpSampling(p3, scale=2, sample_type="nearest", name="P3_upsampling", num_args=1) p2_la = X.conv(data=c2, filter=256, no_bias=False, weight=X.var(name="P2_lateral_weight", init=xavier_init), bias=X.var(name="P2_lateral_bias", init=X.zero_init()), name="P2_lateral") p3_clip = mx.sym.Crop(*[p3_up, p2_la], name="P2_clip") p2 = mx.sym.ElementWiseSum(*[p3_clip, p2_la], name="P2_sum") p2_conv = X.conv(data=p2, kernel=3, filter=256, no_bias=False, weight=X.var(name="P2_conv_weight", init=xavier_init), bias=X.var(name="P2_conv_bias", init=X.zero_init()), name="P2_conv") # P6 p6 = X.pool(p5_conv, name="P6_subsampling", kernel=1, stride=2, pad=0, pool_type='max') if self.p.fp16: p6 = X.to_fp16(p6, name="p6_to_fp16") p5_conv = X.to_fp16(p5_conv, name="p5_conv_to_fp16") p4_conv = X.to_fp16(p4_conv, name="p4_conv_to_fp16") p3_conv = X.to_fp16(p3_conv, name="p3_conv_to_fp16") p2_conv = X.to_fp16(p2_conv, name="p2_conv_to_fp16") conv_fpn_feat = dict() conv_fpn_feat.update({ "stride64": p6, "stride32": p5_conv, "stride16": p4_conv, "stride8": p3_conv, "stride4": p2_conv }) self.fpn_feat = conv_fpn_feat return self.fpn_feat
def get_output(self, fpn_conv_feats, roi_feat, rois, is_train): ''' Args: fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] rois: [batch_image, image_roi, 4] is_train: boolean Returns: cls_logit: [batch_image * image_roi, num_class] bbox_delta: [batch_image * image_roi, num_class * 4] tsd_cls_logit: [batch_image * image_roi, num_class] tsd_bbox_delta: [batch_image * image_roi, num_class * 4] delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1] delta_r: [batch_image * image_roi, 2, 1, 1] ''' xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # roi_feat: [batch_roi, 256, 7, 7] flatten = X.reshape( roi_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") # [batch_roi, 256*7*7, 1, 1] x1 = flatten x2 = X.relu(X.conv(data=x1, kernel=1, filter=256, name="delta_shared_fc1", no_bias=False), name="delta_shared_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.relu(X.conv(x2, filter=256, name="delta_c_fc1", init=X.gauss(0.01)), name="delta_c_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.conv(delta_c, filter=2 * self.p.roi_size**2, name="delta_c_fc2", init=X.gauss(0.01)) # [batch_roi, 2*7*7, 1, 1] delta_r = X.relu(X.conv(x2, filter=256, name="delta_r_fc1", init=X.gauss(0.01)), name="delta_r_fc1_relu") # [batch_roi, 256, 1, 1] delta_r = X.conv(delta_r, filter=2, name="delta_r_fc2", init=X.gauss(0.01)) # [batch_roi, 2, 1, 1] image_roi = self.p.image_roi if is_train else 1000 batch_image = self.p.batch_image TSD_cls_feats = self.delta_c_pool.get_roi_feature( fpn_conv_feats, rois, delta_c, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_loc_feats = self.delta_r_pool.get_roi_feature( fpn_conv_feats, rois, delta_r, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_x_cls = self._convs_and_fcs( TSD_cls_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pc', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_loc_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pr', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_cls = self._convs_and_fcs( TSD_x_cls, 0, self.p.TSD.num_cls_fcs, name='TSD_cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_x_reg, 0, self.p.TSD.num_reg_fcs, name='TSD_reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] num_class = self.p.num_class num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class tsd_cls_logit = X.fc(TSD_x_cls, filter=num_class, name='tsd_cls_logit', init=X.gauss(0.01)) tsd_bbox_delta = X.fc(TSD_x_reg, filter=4 * num_reg_class, name='tsd_reg_delta', init=X.gauss(0.01)) x = self._convs_and_fcs(roi_feat, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='shared_fc', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_cls = x x_reg = x x_cls = self._convs_and_fcs(x_cls, 0, self.p.TSD.num_cls_fcs, name='cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_reg = self._convs_and_fcs(x_reg, 0, self.p.TSD.num_reg_fcs, name='reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) cls_logit = X.fc(x_cls, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(x_reg, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.01)) if self.p.fp16: cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32") bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32") tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32") tsd_bbox_delta = X.to_fp32(tsd_bbox_delta, name="tsd_bbox_delta_fp32") delta_c = X.to_fp32(delta_c, name="delta_c_fp32") delta_r = X.to_fp32(delta_r, name="delta_r_fp32") return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_gamma = X.var('rpn_conv_gamma') rpn_conv_beta = X.var('rpn_conv_beta') rpn_conv_mmean = X.var('rpn_conv_moving_mean') rpn_conv_mvar = X.var('rpn_conv_moving_var') rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) if p.normalizer.__name__ == "fix_bn": pass elif p.normalizer.__name__ == "sync_bn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, moving_mean=rpn_conv_mmean, moving_var=rpn_conv_mvar, name="rpn_conv_3x3_bn_%s" % stride ) elif p.normalizer.__name__ == "gn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, name="rpn_conv_3x3_gn_%s" % stride ) else: raise NotImplementedError("Unsupported normalizer {}".format(p.normalizer.__name__)) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict