def get_output(self, conv_feat): p = self.p stage = self.stage num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if not isinstance(head_feat, dict): head_feat = dict(classification=head_feat, regression=head_feat) if p.fp16: head_feat["classification"] = X.to_fp32( head_feat["classification"], name="bbox_cls_head_to_fp32_" + stage) head_feat["regression"] = X.to_fp32(head_feat["regression"], name="bbox_reg_head_to_fp32_" + stage) cls_logit = X.fc(head_feat["classification"], filter=num_class, weight=self.cls_logit_weight, bias=self.cls_logit_bias, name='bbox_cls_logit_' + stage) bbox_delta = X.fc(head_feat["regression"], filter=4 * num_reg_class, weight=self.bbox_delta_weight, bias=self.bbox_delta_bias, name='bbox_reg_delta_' + stage) self._cls_logit = cls_logit self._bbox_delta = bbox_delta return cls_logit, bbox_delta
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if not isinstance(head_feat, dict): head_feat = dict(classification=head_feat, regression=head_feat) if p.fp16: head_feat["classification"] = X.to_fp32(head_feat["classification"], name="bbox_cls_head_to_fp32") head_feat["regression"] = X.to_fp32(head_feat["regression"], name="bbox_reg_head_to_fp32") cls_logit = X.fc( head_feat["classification"], filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01) ) bbox_delta = X.fc( head_feat["regression"], filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.001) ) return cls_logit, bbox_delta
def _cls_head(self, conv_feat): xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.flatten(conv_feat, name="bbox_feat_flatten") fc1 = X.fc(flatten, filter=1024, name="bbox_cls_fc1", init=xavier_init) fc1 = self.add_norm(fc1) fc1 = X.relu(fc1) fc2 = X.fc(fc1, filter=1024, name="bbox_cls_fc2", init=xavier_init) fc2 = self.add_norm(fc2) fc2 = X.relu(fc2) return fc2
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.flatten(conv_feat, name="bbox_feat_flatten") fc1 = X.fc(flatten, filter=1024, name="bbox_fc1", init=xavier_init) fc1 = X.relu(fc1) fc2 = X.fc(fc1, filter=1024, name="bbox_fc2", init=xavier_init) fc2 = X.relu(fc2) self._head_feat = fc2 return self._head_feat
def _get_output(self, mask_pred_logits, conv_feat): num_class = self.pBbox.num_class msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) normal_init = mx.init.Normal(0.01) kaiming_uniform = mx.init.Xavier(rnd_type='uniform', factor_type='in', magnitude=3) mask_pred_logits = mx.sym.expand_dims(mask_pred_logits, axis=1) iou_head_maxpool_1 = X.pool( mask_pred_logits, name='iou_head_maxpool_1', kernel=2, stride=2, pad=0, ) iou_head_input = X.concat([conv_feat, iou_head_maxpool_1], axis=1, name='iou_head_input') hi = iou_head_input for ii in range(3): hi = X.conv( hi, filter=256, kernel=3, stride=1, name='iou_head_conv_%d'%ii, no_bias=False, init=msra_init, ) hi = X.relu(hi) hi = X.conv( hi, filter=256, kernel=3, stride=2, name='iou_head_conv_3', no_bias=False, init=msra_init ) hi = X.relu(hi) hi = X.flatten(data=hi) fc1 = X.relu(X.fc(hi, filter=1024, name='iou_head_FC1', init=kaiming_uniform)) fc2 = X.relu(X.fc(fc1, filter=1024, name='iou_head_FC2', init=kaiming_uniform)) iou_pred_logits = X.fc(fc2, filter=num_class, name='iou_head_pred', init=normal_init) return iou_pred_logits
def get_output(self, conv_feat): p = self.p num_class = p.num_class num_reg_class = 2 if p.regress_target.class_agnostic else num_class head_feat = self._get_bbox_head_logit(conv_feat) if p.fp16: head_feat = X.to_fp32(head_feat, name="bbox_head_to_fp32") cls_logit = X.fc(head_feat, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(head_feat, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.001)) return cls_logit, bbox_delta
def get_output(self, fpn_conv_feats, roi_feat, rois, is_train): ''' Args: fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] rois: [batch_image, image_roi, 4] is_train: boolean Returns: cls_logit: [batch_image * image_roi, num_class] bbox_delta: [batch_image * image_roi, num_class * 4] tsd_cls_logit: [batch_image * image_roi, num_class] tsd_bbox_delta: [batch_image * image_roi, num_class * 4] delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1] delta_r: [batch_image * image_roi, 2, 1, 1] ''' xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # roi_feat: [batch_roi, 256, 7, 7] flatten = X.reshape( roi_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") # [batch_roi, 256*7*7, 1, 1] x1 = flatten x2 = X.relu(X.conv(data=x1, kernel=1, filter=256, name="delta_shared_fc1", no_bias=False), name="delta_shared_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.relu(X.conv(x2, filter=256, name="delta_c_fc1", init=X.gauss(0.01)), name="delta_c_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.conv(delta_c, filter=2 * self.p.roi_size**2, name="delta_c_fc2", init=X.gauss(0.01)) # [batch_roi, 2*7*7, 1, 1] delta_r = X.relu(X.conv(x2, filter=256, name="delta_r_fc1", init=X.gauss(0.01)), name="delta_r_fc1_relu") # [batch_roi, 256, 1, 1] delta_r = X.conv(delta_r, filter=2, name="delta_r_fc2", init=X.gauss(0.01)) # [batch_roi, 2, 1, 1] image_roi = self.p.image_roi if is_train else 1000 batch_image = self.p.batch_image TSD_cls_feats = self.delta_c_pool.get_roi_feature( fpn_conv_feats, rois, delta_c, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_loc_feats = self.delta_r_pool.get_roi_feature( fpn_conv_feats, rois, delta_r, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_x_cls = self._convs_and_fcs( TSD_cls_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pc', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_loc_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pr', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_cls = self._convs_and_fcs( TSD_x_cls, 0, self.p.TSD.num_cls_fcs, name='TSD_cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_x_reg, 0, self.p.TSD.num_reg_fcs, name='TSD_reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] num_class = self.p.num_class num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class tsd_cls_logit = X.fc(TSD_x_cls, filter=num_class, name='tsd_cls_logit', init=X.gauss(0.01)) tsd_bbox_delta = X.fc(TSD_x_reg, filter=4 * num_reg_class, name='tsd_reg_delta', init=X.gauss(0.01)) x = self._convs_and_fcs(roi_feat, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='shared_fc', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_cls = x x_reg = x x_cls = self._convs_and_fcs(x_cls, 0, self.p.TSD.num_cls_fcs, name='cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_reg = self._convs_and_fcs(x_reg, 0, self.p.TSD.num_reg_fcs, name='reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) cls_logit = X.fc(x_cls, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(x_reg, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.01)) if self.p.fp16: cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32") bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32") tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32") tsd_bbox_delta = X.to_fp32(tsd_bbox_delta, name="tsd_bbox_delta_fp32") delta_c = X.to_fp32(delta_c, name="delta_c_fp32") delta_r = X.to_fp32(delta_r, name="delta_r_fp32") return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
def get_refine_output(self, roi_feature, cls_logit, bbox_delta, cls_sec_logit, bbox_sec_delta): p = self.p num_class = p.num_class repeat_time = p.repeat_time class_agnostic = p.regress_target.class_agnostic num_reg_class = 2 if class_agnostic else num_class cls_logit = mx.sym.slice_axis(mx.sym.softmax(cls_logit), axis=1, begin=1, end=num_class) cls_sec_logit = mx.sym.slice_axis(mx.sym.softmax(cls_sec_logit), axis=1, begin=1, end=num_class) bbox_delta = mx.sym.slice_axis(bbox_delta, axis=1, begin=4, end=num_reg_class * 4) bbox_sec_delta = mx.sym.slice_axis(bbox_sec_delta, axis=1, begin=4, end=num_reg_class * 4) pred_feat1 = mx.sym.tile(mx.sym.concat(*[bbox_delta, cls_logit], dim=1), reps=(1, repeat_time)) pred_feat2 = mx.sym.tile(mx.sym.concat( *[bbox_sec_delta, cls_sec_logit], dim=1), reps=(1, repeat_time)) refine_feat1 = mx.sym.concat(*[roi_feature, pred_feat1], dim=1) refine_feat2 = mx.sym.concat(*[roi_feature, pred_feat2], dim=1) head_feat1 = X.fc(refine_feat1, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine1') head_feat1 = X.relu(head_feat1) head_feat2 = X.fc(refine_feat2, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine2') head_feat2 = X.relu(head_feat2) refine_cls_logit = X.fc(head_feat1, filter=num_class, name='refine_bbox_cls_logit1', init=X.gauss(0.01)) refine_cls_sec_logit = X.fc(head_feat2, filter=num_class, name='refine_bbox_cls_logit2', init=X.gauss(0.01)) refine_bbox_delta = X.fc(head_feat1, filter=4 * num_reg_class, name='refine_bbox_reg_delta1', init=X.gauss(0.001)) refine_bbox_sec_delta = X.fc(head_feat2, filter=4 * num_reg_class, name='refine_bbox_reg_delta2', init=X.gauss(0.001)) return refine_cls_logit, refine_bbox_delta, refine_cls_sec_logit, refine_bbox_sec_delta