def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 # classification loss cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, 2, -1, 0, 0), # (N,C,H,W) -> (N,2,C/2,H,W) name="rpn_cls_logit_reshape") cls_loss = X.softmax_output(data=cls_logit_reshape, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") # regression loss reg_loss = X.smooth_l1((bbox_delta - bbox_target), scalar=3.0, name='rpn_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss(reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss') return cls_loss, reg_loss
def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_roi = p.image_roi * p.batch_image batch_image = p.batch_image cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 # classification loss cls_loss = X.softmax_output(data=cls_logit, label=cls_label, normalization='batch', grad_scale=1.0 * scale_loss_shift, name='bbox_cls_loss') # bounding box regression reg_loss = X.smooth_l1(bbox_delta - bbox_target, scalar=1.0, name='bbox_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss( reg_loss, grad_scale=1.0 / batch_roi * scale_loss_shift, name='bbox_reg_loss', ) # append label cls_label = X.reshape(cls_label, shape=(batch_image, -1), name='bbox_label_reshape') cls_label = X.block_grad(cls_label, name='bbox_label_blockgrad') # output return cls_loss, reg_loss, cls_label
def get_loss(self, conv_fpn_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor rpn_stride = p.anchor_generate.stride cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 rpn_cls_logit_list = [] rpn_bbox_delta_list = [] for stride in rpn_stride: rpn_cls_logit = cls_logit_dict[stride] rpn_bbox_delta = bbox_delta_dict[stride] rpn_cls_logit_reshape = X.reshape( data=rpn_cls_logit, shape=(0, 2, -1), name="rpn_cls_score_reshape_stride%s" % stride ) rpn_bbox_delta_reshape = X.reshape( data=rpn_bbox_delta, shape=(0, 0, -1), name="rpn_bbox_pred_reshape_stride%s" % stride ) rpn_bbox_delta_list.append(rpn_bbox_delta_reshape) rpn_cls_logit_list.append(rpn_cls_logit_reshape) # concat output of each level rpn_bbox_delta_concat = X.concat(rpn_bbox_delta_list, axis=2, name="rpn_bbox_pred_concat") rpn_cls_logit_concat = X.concat(rpn_cls_logit_list, axis=2, name="rpn_cls_score_concat") cls_loss = X.softmax_output( data=rpn_cls_logit_concat, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss" ) # regression loss reg_loss = X.smooth_l1( (rpn_bbox_delta_concat - bbox_target), scalar=3.0, name='rpn_reg_l1' ) reg_loss = bbox_weight * reg_loss reg_loss = X.loss( reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss' ) return cls_loss, reg_loss
def get_loss(self, conv_feat, gt_bboxes, im_infos, rpn_groups): p = self.p num_class = p.num_class batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 cls_label = X.var("rpn_cls_label") bbox_target = X.var("rpn_reg_target") bbox_weight = X.var("rpn_reg_weight") # classification loss cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, num_class, -1, 0, 0), # (N,C,H,W) -> (N,num_class,C/num_class,H,W) name="rpn_cls_logit_reshape") cls_loss = None if p.use_groupsoftmax: cls_loss = mx.sym.contrib.GroupSoftmaxOutput( data=cls_logit_reshape, label=cls_label, group=rpn_groups, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") else: cls_loss = X.softmax_output(data=cls_logit_reshape, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") # regression loss reg_loss = X.smooth_l1((bbox_delta - bbox_target), scalar=3.0, name='rpn_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss(reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss') return cls_loss, reg_loss
def get_loss(self, conv_fpn_feat, gt_bbox, im_info): p = self.p batch_image = p.batch_image image_anchor = p.anchor_assign.image_anchor rpn_stride = p.anchor_generate.stride anchor_scale = p.anchor_generate.scale anchor_ratio = p.anchor_generate.ratio num_anchor = len(p.anchor_generate.ratio) * len( p.anchor_generate.scale) cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 rpn_cls_logit_list = [] rpn_bbox_delta_list = [] feat_list = [] for stride in rpn_stride: rpn_cls_logit = cls_logit_dict[stride] rpn_bbox_delta = bbox_delta_dict[stride] rpn_cls_logit_reshape = X.reshape( data=rpn_cls_logit, shape=(0, 2, num_anchor, -1), name="rpn_cls_score_reshape_stride%s" % stride) rpn_bbox_delta_reshape = X.reshape( data=rpn_bbox_delta, shape=(0, 0, -1), name="rpn_bbox_pred_reshape_stride%s" % stride) rpn_bbox_delta_list.append(rpn_bbox_delta_reshape) rpn_cls_logit_list.append(rpn_cls_logit_reshape) feat_list.append(rpn_cls_logit) if p.nnvm_rpn_target: from mxnext.tvm.rpn_target import _fpn_rpn_target_batch anchor_list = [ self.anchor_dict["stride%s" % s] for s in rpn_stride ] gt_bbox = mx.sym.slice_axis(gt_bbox, axis=-1, begin=0, end=4) max_side = p.anchor_generate.max_side allowed_border = p.anchor_assign.allowed_border fg_fraction = p.anchor_assign.pos_fraction fg_thr = p.anchor_assign.pos_thr bg_thr = p.anchor_assign.neg_thr cls_label, bbox_target, bbox_weight = _fpn_rpn_target_batch( mx.sym, feat_list, anchor_list, gt_bbox, im_info, batch_image, num_anchor, max_side, rpn_stride, allowed_border, image_anchor, fg_fraction, fg_thr, bg_thr) else: cls_label = X.var("rpn_cls_label") bbox_target = X.var("rpn_reg_target") bbox_weight = X.var("rpn_reg_weight") # concat output of each level rpn_bbox_delta_concat = X.concat(rpn_bbox_delta_list, axis=2, name="rpn_bbox_pred_concat") rpn_cls_logit_concat = X.concat(rpn_cls_logit_list, axis=-1, name="rpn_cls_score_concat") cls_loss = X.softmax_output(data=rpn_cls_logit_concat, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") # regression loss reg_loss = X.smooth_l1((rpn_bbox_delta_concat - bbox_target), scalar=3.0, name='rpn_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss(reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss') return cls_loss, reg_loss, X.stop_grad(cls_label, "rpn_cls_label_blockgrad")
def get_loss(self, rois, roi_feat, fpn_conv_feats, cls_label, bbox_target, bbox_weight, gt_bbox): ''' Args: rois: [batch_image, image_roi, 4] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] cls_label: [batch_image * image_roi] bbox_target: [batch_image * image_roi, num_class * 4] bbox_weight: [batch_image * image_roi, num_class * 4] gt_bbox: [batch_image, max_gt_num, 4] Returns: cls_loss: [batch_image * image_roi, num_class] reg_loss: [batch_image * image_roi, num_class * 4] tsd_cls_loss: [batch_image * image_roi, num_class] tsd_reg_loss: [batch_image * image_roi, num_class * 4] tsd_cls_pc_loss: [batch_image * image_roi] tsd_reg_pc_loss: [batch_image * image_roi] cls_label: [batch_image, image_roi] ''' p = self.p assert not p.regress_target.class_agnostic batch_image = p.batch_image image_roi = p.image_roi batch_roi = batch_image * image_roi smooth_l1_scalar = p.regress_target.smooth_l1_scalar or 1.0 cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r = self.get_output( fpn_conv_feats, roi_feat, rois, is_train=True) rois_r = self._get_delta_r_box(delta_r, rois) tsd_reg_target = self.get_reg_target( rois_r, gt_bbox) # [batch_roi, num_class*4] scale_loss_shift = 128 if self.p.fp16 else 1.0 # origin loss cls_loss = X.softmax_output(data=cls_logit, label=cls_label, normalization='batch', grad_scale=1.0 * scale_loss_shift, name='bbox_cls_loss') reg_loss = X.smooth_l1(bbox_delta - bbox_target, scalar=smooth_l1_scalar, name='bbox_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss( reg_loss, grad_scale=1.0 / batch_roi * scale_loss_shift, name='bbox_reg_loss', ) # tsd loss tsd_cls_loss = X.softmax_output(data=tsd_cls_logit, label=cls_label, normalization='batch', grad_scale=1.0 * scale_loss_shift, name='tsd_bbox_cls_loss') tsd_reg_loss = X.smooth_l1(tsd_bbox_delta - tsd_reg_target, scalar=smooth_l1_scalar, name='tsd_bbox_reg_l1') tsd_reg_loss = bbox_weight * tsd_reg_loss tsd_reg_loss = X.loss( tsd_reg_loss, grad_scale=1.0 / batch_roi * scale_loss_shift, name='tsd_bbox_reg_loss', ) losses = [ cls_loss, reg_loss, tsd_cls_loss, tsd_reg_loss, tsd_cls_pc_loss ] if p.TSD.pc_cls: losses.append( self.cls_pc_loss(cls_logit, tsd_cls_logit, cls_label, scale_loss_shift)) if p.TSD.pc_reg: losses.append( self.reg_pc_loss(bbox_delta, tsd_bbox_delta, rois, rois_r, gt_bbox, cls_label, scale_loss_shift)) # append label cls_label = X.reshape(cls_label, shape=(batch_image, -1), name='bbox_label_reshape') cls_label = X.block_grad(cls_label, name='bbox_label_blockgrad') losses.append(cls_label) return tuple(losses)