def _squeeze_excitation(self, input, num_channels, name=None): mixed_precision_enabled = mixed_precision_global_state() is not None pool = fluid.layers.pool2d( input=input, pool_size=0, pool_type='avg', global_pooling=True, use_cudnn=mixed_precision_enabled) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) squeeze = fluid.layers.fc( input=pool, size=int(num_channels / self.reduction_ratio), act='relu', param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + '_sqz_weights'), bias_attr=ParamAttr(name=name + '_sqz_offset')) stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) excitation = fluid.layers.fc( input=squeeze, size=num_channels, act='sigmoid', param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + '_exc_weights'), bias_attr=ParamAttr(name=name + '_exc_offset')) scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) return scale
def build(self, feed_vars, mode='train'): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] if mode == 'train': gt_box = feed_vars['gt_box'] gt_label = feed_vars['gt_label'] gt_score = feed_vars['gt_score'] return { 'loss': self.yolo_head.get_loss(body_feats, gt_box, gt_label, gt_score) } else: im_size = feed_vars['im_size'] return self.yolo_head.get_prediction(body_feats, im_size)
def __call__(self, roi_feat): fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] mixed_precision_enabled = mixed_precision_global_state() is not None if mixed_precision_enabled: roi_feat = fluid.layers.cast(roi_feat, 'float16') fc6 = fluid.layers.fc(input=roi_feat, size=self.mlp_dim, act='relu', name='fc6', param_attr=ParamAttr( name='fc6_w', initializer=Xavier(fan_out=fan)), bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) head_feat = fluid.layers.fc(input=fc6, size=self.mlp_dim, act='relu', name='fc7', param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), bias_attr=ParamAttr( name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) if mixed_precision_enabled: head_feat = fluid.layers.cast(head_feat, 'float32') return head_feat
def build(self, feed_vars, mode='train'): im = feed_vars['image'] if mode == 'train': gt_labels = feed_vars['gt_label'] gt_targets = feed_vars['gt_target'] fg_num = feed_vars['fg_num'] else: im_info = feed_vars['im_info'] mixed_precision_enabled = mixed_precision_global_state() is not None if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if mixed_precision_enabled: body_feats = [fluid.layers.cast(f, 'float32') for f in body_feats] body_feats = self.fpn(body_feats) # XXX not used for training, but the parameters are needed when # exporting inference model anchors = self.anchor_grid() if mode == 'train': loss = self.efficient_head.get_loss(body_feats, gt_labels, gt_targets, fg_num) loss_cls = loss['loss_cls'] loss_bbox = loss['loss_bbox'] total_loss = loss_cls + self.box_loss_weight * loss_bbox loss.update({'loss': total_loss}) return loss else: pred = self.efficient_head.get_prediction(body_feats, anchors, im_info) return pred
def build(self, feed_vars, mode='train', exclude_nms=False): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] predict_hm, predict_wh = self.ttf_head.get_output( body_feats, 'ttf_head', is_test=mode == 'test') if mode == 'train': heatmap = feed_vars['ttf_heatmap'] box_target = feed_vars['ttf_box_target'] reg_weight = feed_vars['ttf_reg_weight'] loss = self.ttf_head.get_loss(predict_hm, predict_wh, heatmap, box_target, reg_weight) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: results = self.ttf_head.get_bboxes(predict_hm, predict_wh, feed_vars['scale_factor']) return results
def build(self, feed_vars, mode='train'): im = feed_vars['image'] if mode == 'train' or mode == 'eval': gt_bbox = feed_vars['gt_bbox'] gt_class = feed_vars['gt_class'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] locs, confs, box, box_var = self.multi_box_head( inputs=body_feats, image=im, num_classes=self.num_classes) if mode == 'train': loss = fluid.layers.ssd_loss(locs, confs, gt_bbox, gt_class, box, box_var) loss = fluid.layers.reduce_sum(loss) return {'loss': loss} else: pred = self.output_decoder(locs, confs, box, box_var) return {'bbox': pred}
def build(self, feed_vars, mode='train'): im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] gt_class = feed_vars['gt_class'] is_crowd = feed_vars['is_crowd'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN body_feats, spatial_scale = self.fpn.get_output(body_feats) # retinanet head if mode == 'train': loss = self.retina_head.get_loss(body_feats, spatial_scale, im_info, gt_bbox, gt_class, is_crowd) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: pred = self.retina_head.get_prediction(body_feats, spatial_scale, im_info) return pred
def depthwise_separable(self, input, num_filters1, num_filters2, num_groups, stride, scale, name=None): mixed_precision_enabled = mixed_precision_global_state() is not None depthwise_conv = self._conv_norm(input=input, filter_size=3, num_filters=int(num_filters1 * scale), stride=stride, padding=1, num_groups=int(num_groups * scale), use_cudnn=mixed_precision_enabled, name=name + "_dw") pointwise_conv = self._conv_norm(input=depthwise_conv, filter_size=1, num_filters=int(num_filters2 * scale), stride=1, padding=0, name=name + "_sep") return pointwise_conv
def build(self, feed_vars, mode='train'): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if self.fpn is not None: body_feats, spatial_scale = self.fpn.get_output(body_feats) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] mask_feat_pred = self.mask_head.get_output(body_feats) if mode == 'train': ins_labels = [] cate_labels = [] grid_orders = [] fg_num = feed_vars['fg_num'] for i in range(self.num_level): ins_label = 'ins_label{}'.format(i) if ins_label in feed_vars: ins_labels.append(feed_vars[ins_label]) cate_label = 'cate_label{}'.format(i) if cate_label in feed_vars: cate_labels.append(feed_vars[cate_label]) grid_order = 'grid_order{}'.format(i) if grid_order in feed_vars: grid_orders.append(feed_vars[grid_order]) cate_preds, kernel_preds = self.bbox_head.get_outputs(body_feats) losses = self.bbox_head.get_loss(cate_preds, kernel_preds, mask_feat_pred, ins_labels, cate_labels, grid_orders, fg_num) total_loss = fluid.layers.sum(list(losses.values())) losses.update({'loss': total_loss}) return losses else: im_info = feed_vars['im_info'] outs = self.bbox_head.get_outputs(body_feats, is_eval=True) seg_inputs = outs + (mask_feat_pred, im_info) return self.bbox_head.get_prediction(*seg_inputs)
def build(self, feed_vars, mode='train'): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN body_feats, spatial_scale = self.fpn.get_output(body_feats) # MaskFeatHead。 [bs, 256, s4, s4] 掩码原型 mask_feats = self.mask_feat_head.get_mask_feats(body_feats) # SOLOv2Head if mode == 'train': # kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid], 每个格子的预测卷积核。 从 小感受野 到 大感受野。 # cls_preds里每个元素形状是 [N, 80, seg_num_grid, seg_num_grid], 每个格子的预测概率,未进行sigmoid()激活。 从 小感受野 到 大感受野。 kernel_preds, cls_preds = self.solo_head.get_prediction(body_feats, eval=False) gt_objs = [] gt_clss = [] gt_masks = [] gt_pos_idx = [] for i in range(len(self.solo_head.strides)): gt_objs.append(feed_vars['layer%d_gt_objs' % i]) gt_clss.append(feed_vars['layer%d_gt_clss' % i]) gt_masks.append(feed_vars['layer%d_gt_masks' % i]) gt_pos_idx.append(feed_vars['layer%d_gt_pos_idx' % i]) loss = self.solo_head.get_loss(kernel_preds, cls_preds, mask_feats, gt_objs, gt_clss, gt_masks, gt_pos_idx) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: ori_shape = feed_vars['ori_shape'] resize_shape = feed_vars['resize_shape'] # kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid], 每个格子的预测卷积核。 从 小感受野 到 大感受野。 # cls_preds里每个元素形状是 [N, seg_num_grid, seg_num_grid, 80], 每个格子的预测概率,已进行sigmoid()激活。 从 小感受野 到 大感受野。 kernel_preds, cls_preds = self.solo_head.get_prediction(body_feats, eval=True) pred = self.solo_head.get_seg(kernel_preds, cls_preds, mask_feats, ori_shape, resize_shape) return pred
def build(self, feed_vars, mode='train'): print('build-----------------------------------------------') [ 'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_segm' ] if mode == 'train': required_fields = [ 'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_segm' ] else: required_fields = ['im_shape', 'im_info'] self._input_check(required_fields, feed_vars) im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': gt_box = feed_vars['gt_bbox'] gt_class = feed_vars['gt_class'] gt_segm = feed_vars['gt_segm'] is_crowd = feed_vars['is_crowd'] gt_num = feed_vars['gt_num'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN body_feats, spatial_scale = self.fpn.get_output(body_feats) print(type(self.yolact_head)) # retinanet head if mode == 'train': loss = self.yolact_head.get_loss(body_feats, spatial_scale, im_info, gt_box, gt_class, gt_segm, is_crowd, gt_num) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: pred = self.yolact_head.get_prediction(body_feats, spatial_scale, im_info) return pred
def build(self, feed_vars, mode='train'): im = feed_vars['image'] im_info = feed_vars['im_info'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN body_feats, spatial_scale = self.fpn.get_output(body_feats) # fcosnet head if mode == 'train': tag_labels = [] tag_bboxes = [] tag_centerness = [] for i in range(len(self.fcos_head.fpn_stride)): # reg_target, labels, scores, centerness k_lbl = 'labels{}'.format(i) if k_lbl in feed_vars: tag_labels.append(feed_vars[k_lbl]) k_box = 'reg_target{}'.format(i) if k_box in feed_vars: tag_bboxes.append(feed_vars[k_box]) k_ctn = 'centerness{}'.format(i) if k_ctn in feed_vars: tag_centerness.append(feed_vars[k_ctn]) # tag_labels, tag_bboxes, tag_centerness loss = self.fcos_head.get_loss(body_feats, tag_labels, tag_bboxes, tag_centerness) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: pred = self.fcos_head.get_prediction(body_feats, im_info) return pred
def get_add_weights(self, inputs): """use fully connected layers to generate add_weights""" # 1. avg_pool mixed_precision_enabled = mixed_precision_global_state() is not None xs = [ fluid.layers.pool2d(input=x, pool_size=0, pool_type='avg', global_pooling=True, use_cudnn=mixed_precision_enabled) for x in inputs ] xs = fluid.layers.concat(xs, axis=1) add_weights = self.squeeze_excitation(xs, self.num_channels * len(inputs)) add_weights = fluid.layers.reshape( add_weights, (len(inputs), -1, self.num_channels)) add_weights = fluid.layers.softmax(add_weights, axis=0) return add_weights
def build(self, feed_vars, mode='train', exclude_nms=False): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] gt_class = feed_vars['gt_class'] gt_score = feed_vars['gt_score'] # Get targets for splited yolo loss calculation num_output_layer = len(self.yolo_head.anchor_masks) targets = [] for i in range(num_output_layer): k = 'target{}'.format(i) if k in feed_vars: targets.append(feed_vars[k]) loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class, gt_score, targets) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: im_size = feed_vars['im_size'] # exclude_nms only for benchmark, postprocess(NMS) is not needed return self.yolo_head.get_prediction( body_feats, im_size, exclude_nms=exclude_nms)
def build(self, feed_vars, mode='train'): im = feed_vars['image'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] # cast features back to FP32 if mixed_precision_enabled: body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] gt_class = feed_vars['gt_class'] gt_score = feed_vars['gt_score'] # Get targets for splited yolo loss calculation # YOLOv3 supports up to 3 output layers currently targets = [] for i in range(3): k = 'target{}'.format(i) if k in feed_vars: targets.append(feed_vars[k]) loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class, gt_score, targets) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: im_size = feed_vars['im_size'] return self.yolo_head.get_prediction(body_feats, im_size)
def get_add_weights(self, inputs): # get channel weights mixed_precision_enabled = mixed_precision_global_state() is not None cha_weights = [ fluid.layers.pool2d(input=x, pool_size=0, pool_type='avg', global_pooling=True, use_cudnn=mixed_precision_enabled) for x in inputs ] cha_weights = fluid.layers.concat(cha_weights, axis=1) cha_weights = self.squeeze_excitation(cha_weights, self.num_channels * len(inputs)) # (n, 4c) --> (n, 4, c) cha_weights = fluid.layers.reshape( cha_weights, (-1, len(inputs), self.num_channels)) # (n, 4, c) --> (4, n, c) cha_weights = fluid.layers.transpose(cha_weights, [1, 0, 2]) cha_weights = fluid.layers.softmax(cha_weights, axis=0) # get spatial weights spa_weights = fluid.layers.concat(inputs, axis=1) spa_weights = fluid.layers.conv2d( spa_weights, len(inputs), filter_size=1, param_attr=ParamAttr(name='spafuse_weights'), bias_attr=ParamAttr(name='spafuse_bias')) # (n, 4, h, w) --> (n, 4, 1, h, w) spa_weights = fluid.layers.unsqueeze(spa_weights, axes=[2]) # (n, 4, 1, h, w) --> (4, n, 1, h, w) spa_weights = fluid.layers.transpose(spa_weights, [1, 0, 2, 3, 4]) spa_weights = fluid.layers.softmax(spa_weights, axis=0) return spa_weights, cha_weights
def BlazeBlock(self, input, in_channels, out_channels, double_channels=None, stride=1, use_5x5kernel=True, name=None): assert stride in [1, 2] use_pool = not stride == 1 use_double_block = double_channels is not None act = 'relu' if use_double_block else None mixed_precision_enabled = mixed_precision_global_state() is not None if use_5x5kernel: conv_dw = self._conv_norm( input=input, filter_size=5, num_filters=in_channels, stride=stride, padding=2, num_groups=in_channels, use_cudnn=mixed_precision_enabled, name=name + "1_dw") else: conv_dw_1 = self._conv_norm( input=input, filter_size=3, num_filters=in_channels, stride=1, padding=1, num_groups=in_channels, use_cudnn=mixed_precision_enabled, name=name + "1_dw_1") conv_dw = self._conv_norm( input=conv_dw_1, filter_size=3, num_filters=in_channels, stride=stride, padding=1, num_groups=in_channels, use_cudnn=mixed_precision_enabled, name=name + "1_dw_2") conv_pw = self._conv_norm( input=conv_dw, filter_size=1, num_filters=out_channels, stride=1, padding=0, act=act, name=name + "1_sep") if use_double_block: if use_5x5kernel: conv_dw = self._conv_norm( input=conv_pw, filter_size=5, num_filters=out_channels, stride=1, padding=2, use_cudnn=mixed_precision_enabled, name=name + "2_dw") else: conv_dw_1 = self._conv_norm( input=conv_pw, filter_size=3, num_filters=out_channels, stride=1, padding=1, num_groups=out_channels, use_cudnn=mixed_precision_enabled, name=name + "2_dw_1") conv_dw = self._conv_norm( input=conv_dw_1, filter_size=3, num_filters=out_channels, stride=1, padding=1, num_groups=out_channels, use_cudnn=mixed_precision_enabled, name=name + "2_dw_2") conv_pw = self._conv_norm( input=conv_dw, filter_size=1, num_filters=double_channels, stride=1, padding=0, name=name + "2_sep") # shortcut if use_pool: shortcut_channel = double_channels or out_channels shortcut_pool = self._pooling_block(input, stride, stride) channel_pad = self._conv_norm( input=shortcut_pool, filter_size=1, num_filters=shortcut_channel, stride=1, padding=0, name="shortcut" + name) return fluid.layers.elementwise_add( x=channel_pad, y=conv_pw, act='relu') return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
def build(self, feed_vars, mode='train'): if mode == 'train': required_fields = [ 'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info' ] else: required_fields = ['im_shape', 'im_info'] self._input_check(required_fields, feed_vars) im = feed_vars['image'] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] is_crowd = feed_vars['is_crowd'] im_info = feed_vars['im_info'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN if self.fpn is not None: body_feats, spatial_scale = self.fpn.get_output(body_feats) # rpn proposals rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) if mode == 'train': rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd) else: if self.rpn_only: im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois) rois = rpn_rois / im_scale return {'proposal': rois} proposal_list = [] roi_feat_list = [] rcnn_pred_list = [] rcnn_target_list = [] proposals = None bbox_pred = None for i in range(3): if i > 0: refined_bbox = self._decode_box( proposals, bbox_pred, curr_stage=i - 1, ) else: refined_bbox = rpn_rois if mode == 'train': outs = self.bbox_assigner(input_rois=refined_bbox, feed_vars=feed_vars, curr_stage=i) proposals = outs[0] rcnn_target_list.append(outs) else: proposals = refined_bbox proposal_list.append(proposals) # extract roi features roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale) roi_feat_list.append(roi_feat) # bbox head cls_score, bbox_pred = self.bbox_head.get_output( roi_feat, wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], name='_' + str(i + 1) if i > 0 else '') rcnn_pred_list.append((cls_score, bbox_pred)) # get mask rois rois = proposal_list[2] if mode == 'train': loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list, self.cascade_rcnn_loss_weight) loss.update(rpn_loss) labels_int32 = rcnn_target_list[2][1] mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner( rois=rois, gt_classes=feed_vars['gt_class'], is_crowd=feed_vars['is_crowd'], gt_segms=feed_vars['gt_mask'], im_info=feed_vars['im_info'], labels_int32=labels_int32) if self.fpn is None: bbox_head_feat = self.bbox_head.get_head_feat() feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32) else: feat = self.roi_extractor(body_feats, mask_rois, spatial_scale, is_mask=True) mask_loss = self.mask_head.get_loss(feat, mask_int32) loss.update(mask_loss) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: mask_name = 'mask_pred' mask_pred, bbox_pred = self.single_scale_eval( body_feats, spatial_scale, im_info, mask_name, bbox_pred, roi_feat_list, rcnn_pred_list, proposal_list, feed_vars['im_shape']) return {'bbox': bbox_pred, 'mask': mask_pred}
def build(self, feed_vars, mode='train'): if mode == 'train': required_fields = [ 'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info' ] else: required_fields = ['im_shape', 'im_info'] self._input_check(required_fields, feed_vars) im = feed_vars['image'] im_info = feed_vars['im_info'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN spatial_scale = None if self.fpn is not None: body_feats, spatial_scale = self.fpn.get_output(body_feats) # RPN proposals rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) if mode == 'train': rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_bbox'], feed_vars['is_crowd']) outs = self.bbox_assigner(rpn_rois=rois, gt_classes=feed_vars['gt_class'], is_crowd=feed_vars['is_crowd'], gt_boxes=feed_vars['gt_bbox'], im_info=feed_vars['im_info']) rois = outs[0] labels_int32 = outs[1] if self.fpn is None: last_feat = body_feats[list(body_feats.keys())[-1]] roi_feat = self.roi_extractor(last_feat, rois) else: roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) loss = self.bbox_head.get_loss(roi_feat, labels_int32, *outs[2:]) loss.update(rpn_loss) mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner( rois=rois, gt_classes=feed_vars['gt_class'], is_crowd=feed_vars['is_crowd'], gt_segms=feed_vars['gt_mask'], im_info=feed_vars['im_info'], labels_int32=labels_int32) if self.fpn is None: bbox_head_feat = self.bbox_head.get_head_feat() feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32) else: feat = self.roi_extractor(body_feats, mask_rois, spatial_scale, is_mask=True) mask_loss = self.mask_head.get_loss(feat, mask_int32) loss.update(mask_loss) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: if self.rpn_only: im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.sequence_expand(im_scale, rois) rois = rois / im_scale return {'proposal': rois} mask_name = 'mask_pred' mask_pred, bbox_pred = self.single_scale_eval( body_feats, mask_name, rois, im_info, feed_vars['im_shape'], spatial_scale) return {'bbox': bbox_pred, 'mask': mask_pred}
def build(self, feed_vars, mode='train'): if mode == 'train': required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info'] else: required_fields = ['im_shape', 'im_info'] self._input_check(required_fields, feed_vars) im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] is_crowd = feed_vars['is_crowd'] else: im_shape = feed_vars['im_shape'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') body_feats = self.backbone(im) body_feat_names = list(body_feats.keys()) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) if self.fpn is not None: body_feats, spatial_scale = self.fpn.get_output(body_feats) rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) if mode == 'train': rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd) # sampled rpn proposals for var in ['gt_class', 'is_crowd', 'gt_bbox', 'im_info']: assert var in feed_vars, "{} has no {}".format(feed_vars, var) outs = self.bbox_assigner(rpn_rois=rois, gt_classes=feed_vars['gt_class'], is_crowd=feed_vars['is_crowd'], gt_boxes=feed_vars['gt_bbox'], im_info=feed_vars['im_info']) rois = outs[0] labels_int32 = outs[1] bbox_targets = outs[2] bbox_inside_weights = outs[3] bbox_outside_weights = outs[4] else: if self.rpn_only: im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.sequence_expand(im_scale, rois) rois = rois / im_scale return {'proposal': rois} if self.fpn is None: # in models without FPN, roi extractor only uses the last level of # feature maps. And body_feat_names[-1] represents the name of # last feature map. body_feat = body_feats[body_feat_names[-1]] roi_feat = self.roi_extractor(body_feat, rois) else: roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) if mode == 'train': loss = self.bbox_head.get_loss(roi_feat, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights) loss.update(rpn_loss) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: pred = self.bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) return pred
def build(self, feed_vars, mode='train'): if mode == 'train': required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info'] else: required_fields = ['im_shape', 'im_info'] self._input_check(required_fields, feed_vars) im = feed_vars['image'] im_info = feed_vars['im_info'] if mode == 'train': gt_bbox = feed_vars['gt_bbox'] is_crowd = feed_vars['is_crowd'] mixed_precision_enabled = mixed_precision_global_state() is not None # cast inputs to FP16 if mixed_precision_enabled: im = fluid.layers.cast(im, 'float16') # backbone body_feats = self.backbone(im) # cast features back to FP32 if mixed_precision_enabled: body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) for k, v in body_feats.items()) # FPN if self.fpn is not None: body_feats, spatial_scale = self.fpn.get_output(body_feats) # rpn proposals rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) if mode == 'train': #fluid.layers.Print(gt_bbox) #fluid.layers.Print(is_crowd) rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd) else: if self.rpn_only: im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois) rois = rpn_rois / im_scale return {'proposal': rois} proposal_list = [] roi_feat_list = [] rcnn_pred_list = [] rcnn_target_list = [] proposals = None bbox_pred = None max_overlap = None for i in range(3): if i > 0: refined_bbox = self._decode_box( proposals, bbox_pred, curr_stage=i - 1, ) else: refined_bbox = rpn_rois if mode == 'train': outs = self.bbox_assigner(input_rois=refined_bbox, feed_vars=feed_vars, curr_stage=i, max_overlap=max_overlap) proposals = outs[0] max_overlap = outs[-1] rcnn_target_list.append(outs[:-1]) else: proposals = refined_bbox proposal_list.append(proposals) # extract roi features roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale) roi_feat_list.append(roi_feat) # bbox head cls_score, bbox_pred = self.bbox_head.get_output( roi_feat, wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], name='_' + str(i + 1) if i > 0 else '') rcnn_pred_list.append((cls_score, bbox_pred)) if mode == 'train': loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list, self.cascade_rcnn_loss_weight) loss.update(rpn_loss) total_loss = fluid.layers.sum(list(loss.values())) loss.update({'loss': total_loss}) return loss else: pred = self.bbox_head.get_prediction(im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list, proposal_list, self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg) return pred