def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): """Test for mask head with test time augmentation.""" if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_results = self._mask_forward(x, mask_rois) mask_results['mask_pred'] = self._mask_point_forward_test( x, mask_rois, det_labels, mask_results['mask_pred'], img_meta) # convert to numpy array to save memory aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head(mask_feats) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) # here is to flip back the masks. merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] # sxg: for aug test, the masks are always fixed to # the size of img[0] segm_result = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, # the mask size is the ori ones. scale_factor=1.0, # and won't rescale to the rescale img. rescale=False) return segm_result
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.point_rend_roihead.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_pred = self.point_rend_roihead._forward_mask_test(x, _bboxes, det_labels) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.point_rend_roihead.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test_cascade_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] if self.mask_head.mask_score: aug_mask_ious = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None file_name = img_meta[0]['filename'] img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_6dof_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test(i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_pred = self._mask_forward_test( i, x, _bboxes, semantic_feat=semantic_feat) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if self.with_car_cls_rot: if self.test_cfg.keep_all_stages: raise NotImplementedError else: car_cls_coco = 2 stage_num = self.num_stages-1 pos_box = det_bboxes[det_labels == car_cls_coco] # !!!!!!!!!!!!!!!!!!!!! Quite import bug below, scale is needed!!!!!!!!!!!!! pos_box = (pos_box * scale_factor if rescale else det_bboxes) if len(pos_box): car_cls_score_pred, quaternion_pred, car_cls_rot_feats = self._carcls_rot_forward_test(stage_num, x, pos_box, semantic_feat) else: car_cls_score_pred, quaternion_pred, car_cls_rot_feats = [], [], [] if self.with_translation: if len(pos_box): trans_pred_world = self._translation_forward_test(pos_box[:, :4], scale_factor, car_cls_rot_feats, ori_shape) else: trans_pred_world = [] ms_6dof_result['ensemble'] = {'car_cls_score_pred': car_cls_score_pred, 'quaternion_pred': quaternion_pred, 'trans_pred_world': trans_pred_world, 'file_name': file_name} if not self.test_cfg.keep_all_stages: if self.with_translation: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble'], ms_6dof_result['ensemble']) elif self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def simple_test(self, img, img_meta, proposals=None, rescale=False): """Run inference on a single image. Args: img (Tensor): must be in shape (N, C, H, W) img_meta (list[dict]): a list with one dictionary element. See `mmdet/datasets/pipelines/formatting.py:Collect` for details of meta dicts. proposals : if specified overrides rpn proposals rescale (bool): if True returns boxes in original image space Returns: dict: results """ x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.context_roi is not None: img_rois = bbox2roi([ torch.tensor( [[0, img.size(3), 0, img.size(2)]], dtype=torch.float).repeat(rois.size(0), 1).cuda() ]) context_feats = self.context_roi( x[:self.context_roi.num_inputs], img_rois) context_feats = self.context_head(context_feats) else: context_feats = None if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) if self.semantic_info_flow: # TODO last_feat = None for j in range(i): # cls_score, bbox_pred, last_feat = self.bbox_head[i](bbox_feats, last_feat, context_feats, # return_feats=True) cls_score, bbox_pred, last_feat = self.bbox_head[j]( bbox_feats, last_feat, context_feats, return_feats=True) cls_score, bbox_pred = bbox_head(bbox_feats, last_feat, context_feats, return_feats=False) else: # TODO ZSD cls_score, bbox_pred = bbox_head(bbox_feats, None, context_feats, return_feats=False) # OD # cls_score, bbox_pred = bbox_head(bbox_feats) # if self.context_roi is not None: # cls_score, bbox_pred = bbox_head(bbox_feats, context_feats) # else: # cls_score, bbox_pred = bbox_head(bbox_feats) if self.use_lsoftmax: cls_score = self.lsoftmax(cls_score) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_roi_extractor = self.mask_roi_extractor[i] mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats, i) mask_pred = mask_head(mask_feats) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) # TODO GZSD if self.gzsd_mode: if self.bbox_head[-1].num_classes == 66: results = bbox2result(det_bboxes, det_labels, 65 + 15 + 1) else: results = bbox2result(det_bboxes, det_labels, 48 + 17 + 1) return results bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) # TODO output num_classes open these line when unseen inference # bbox_result = bbox2result(det_bboxes, det_labels, 16) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: if isinstance(scale_factor, float): # aspect ratio fixed _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) else: _bboxes = ( det_bboxes[:, :4] * torch.from_numpy(scale_factor).to(det_bboxes.device) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_feats = self.mask_roi_extractor[i]( x[:len(self.mask_roi_extractor[i].featmap_strides )], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, nms_cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(mask_head.num_classes - 1) ] else: _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_pred = self._mask_forward_test( i, x, _bboxes, semantic_feat=semantic_feat) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1) ] else: _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: semantic_pred, semantic_feat = self.semantic_head( x[:len(self.semantic_head.featmap_strides)]) semantic_result = self.simple_test_semantic(semantic_pred, img_meta, rescale=rescale) else: semantic_feat = None semantic_result = None img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, nms_cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(mask_head.num_classes - 1)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_pred = self._mask_forward_test( i, x, _bboxes, semantic_feat=semantic_feat) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] occ_matrix = [] occ_ind_map = {} else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) # srm assert self.with_srm merged_mask_pred = torch.from_numpy(merged_masks).cuda() aligned_masks = self.get_aligned_masks( merged_mask_pred, _bboxes, det_labels, rcnn_test_cfg, img_shape=img_meta[0]['img_shape']) srm_results_flat = self.simple_test_srm(aligned_masks, det_labels, return_flat=True) # occ # ========== occ ========== assert self.with_occ num_instances = det_labels.size()[0] assert det_bboxes.size()[0] == num_instances indices = list(range(num_instances)) indices.sort(key=lambda ind: srm_results_flat[ind] ) # sorted by srm scores indices.reverse() old2_ranking = [0] * len(indices) for rank, old in enumerate(indices): old2_ranking[old] = rank occ_ind_map = {} new_ind = -1 for label in range(80): for old_ind in range(num_instances): if det_labels[old_ind] == label: new_ind += 1 occ_ind_map[old2_ranking[ old_ind]] = new_ind # map score ranking to new_ind assert aligned_masks.size()[0] == num_instances occ_matrix = self.simple_test_occ(mask_feats, merged_mask_pred, aligned_masks, det_labels, old2_ranking) occ_matrix = occ_matrix.tolist() # ========== occ ========== ms_segm_result['ensemble'] = segm_result assert self.with_mask and self.with_semantic and not self.test_cfg.keep_all_stages assert self.with_occ aux_results = dict( occ_results=dict(matrix=occ_matrix, ind_map=occ_ind_map)) return ms_bbox_result['ensemble'], ms_segm_result[ 'ensemble'], semantic_result, aux_results
def _cascade_rcnn_simple_test(self, img, img_meta, proposals=None, rescale=False, numpy_res=True, decode_mask=True): assert numpy_res assert not self.test_cfg.keep_all_stages x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_roi_extractor = self.mask_roi_extractor[i] mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats, i) mask_pred = mask_head(mask_feats) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) # bbox_result = bbox2result(det_bboxes, det_labels, # self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = [det_bboxes, det_labels] if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 if decode_mask: segm_result = [[] for _ in range(mask_classes)] else: segm_result = None else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) if decode_mask: aug_masks.append(mask_pred.sigmoid().cpu().numpy()) else: aug_masks.append(mask_pred.sigmoid()) if decode_mask: merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) else: merged_masks = torch.stack(aug_masks).mean(dim=0) merged_masks = merged_masks.cpu().numpy() mask_encoded = partial( self.mask_head[-1].get_seg_masks, mask_pred=merged_masks, det_bboxes=_bboxes, det_labels=det_labels, rcnn_test_cfg=rcnn_test_cfg, ori_shape=ori_shape, scale_factor=scale_factor, rescale=rescale, ) # partial is a class # we need to transfer required attributes as well mask_encoded.num_classes = self.mask_head[-1].num_classes mask_encoded.class_agnostic = self.mask_head[-1].class_agnostic segm_result = mask_encoded ms_segm_result['ensemble'] = segm_result # if not self.test_cfg.keep_all_stages: # if self.with_mask: # results = (ms_bbox_result['ensemble'], # ms_segm_result['ensemble']) # else: # results = ms_bbox_result['ensemble'] # else: # if self.with_mask: # results = { # stage: (ms_bbox_result[stage], ms_segm_result[stage]) # for stage in ms_bbox_result # } # else: # results = ms_bbox_result if numpy_res: det_bboxes, det_labels = ms_bbox_result['ensemble'] det_bboxes = det_bboxes.cpu().numpy() det_labels = det_labels.cpu().numpy() if self.with_mask: masks = ms_segm_result['ensemble'] if decode_mask: # masks = mmcv.concat_list(masks) masks = np.asarray(masks) if self.with_mask: return det_bboxes, det_labels, masks else: return det_bboxes, det_labels
def simple_test(self, x, proposal_list, img_metas, rescale=False): """Test without augmentation. Args: x (tuple[Tensor]): Features from upstream network. Each has shape (batch_size, c, h, w). proposal_list (list(Tensor)): Proposals from rpn head. Each has shape (num_proposals, 5), last dimension 5 represent (x1, y1, x2, y2, score). img_metas (list[dict]): Meta information of images. rescale (bool): Whether to rescale the results to the original image. Default: True. Returns: list[list[np.ndarray]] or list[tuple]: When no mask branch, it is bbox results of each image and classes with type `list[list[np.ndarray]]`. The outer list corresponds to each image. The inner list corresponds to each class. When the model has mask branch, it contains bbox results and mask results. The outer list corresponds to each image, and first element of tuple is bbox results, second element is mask results. """ assert self.with_bbox, 'Bbox head must be implemented.' num_imgs = len(proposal_list) img_shapes = tuple(meta['img_shape'] for meta in img_metas) ori_shapes = tuple(meta['ori_shape'] for meta in img_metas) scale_factors = tuple(meta['scale_factor'] for meta in img_metas) # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) if rois.shape[0] == 0: # There is no proposal in the whole batch bbox_results = [[ np.zeros((0, 5), dtype=np.float32) for _ in range(self.bbox_head[-1].num_classes) ]] * num_imgs if self.with_mask: mask_classes = self.mask_head[-1].num_classes segm_results = [[[] for _ in range(mask_classes)] for _ in range(num_imgs)] results = list(zip(bbox_results, segm_results)) else: results = bbox_results return results for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) # split batch bbox prediction back to each image cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] num_proposals_per_img = tuple( len(proposals) for proposals in proposal_list) rois = rois.split(num_proposals_per_img, 0) cls_score = cls_score.split(num_proposals_per_img, 0) if isinstance(bbox_pred, torch.Tensor): bbox_pred = bbox_pred.split(num_proposals_per_img, 0) else: bbox_pred = self.bbox_head[i].bbox_pred_split( bbox_pred, num_proposals_per_img) ms_scores.append(cls_score) if i < self.num_stages - 1: if self.bbox_head[i].custom_activation: cls_score = [ self.bbox_head[i].loss_cls.get_activation(s) for s in cls_score ] refine_rois_list = [] for j in range(num_imgs): if rois[j].shape[0] > 0: bbox_label = cls_score[j][:, :-1].argmax(dim=1) refined_rois = self.bbox_head[i].regress_by_class( rois[j], bbox_label, bbox_pred[j], img_metas[j]) refine_rois_list.append(refined_rois) rois = torch.cat(refine_rois_list) # average scores of each image by stages cls_score = [ sum([score[i] for score in ms_scores]) / float(len(ms_scores)) for i in range(num_imgs) ] # apply bbox post-processing to each image individually det_bboxes = [] det_labels = [] for i in range(num_imgs): det_bbox, det_label = self.bbox_head[-1].get_bboxes( rois[i], cls_score[i], bbox_pred[i], img_shapes[i], scale_factors[i], rescale=rescale, cfg=rcnn_test_cfg) det_bboxes.append(det_bbox) det_labels.append(det_label) bbox_results = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head[-1].num_classes) for i in range(num_imgs) ] ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes): mask_classes = self.mask_head[-1].num_classes segm_results = [[[] for _ in range(mask_classes)] for _ in range(num_imgs)] else: if rescale and not isinstance(scale_factors[0], float): scale_factors = [ torch.from_numpy(scale_factor).to(det_bboxes[0].device) for scale_factor in scale_factors ] _bboxes = [ det_bboxes[i][:, :4] * scale_factors[i] if rescale else det_bboxes[i][:, :4] for i in range(len(det_bboxes)) ] mask_rois = bbox2roi(_bboxes) num_mask_rois_per_img = tuple( _bbox.size(0) for _bbox in _bboxes) aug_masks = [] for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) mask_pred = mask_results['mask_pred'] # split batch mask prediction back to each image mask_pred = mask_pred.split(num_mask_rois_per_img, 0) aug_masks.append([ m.sigmoid().cpu().detach().numpy() for m in mask_pred ]) # apply mask post-processing to each image individually segm_results = [] for i in range(num_imgs): if det_bboxes[i].shape[0] == 0: segm_results.append( [[] for _ in range(self.mask_head[-1].num_classes)]) else: aug_mask = [mask[i] for mask in aug_masks] merged_masks = merge_aug_masks( aug_mask, [[img_metas[i]]] * self.num_stages, rcnn_test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes[i], det_labels[i], rcnn_test_cfg, ori_shapes[i], scale_factors[i], rescale) segm_results.append(segm_result) ms_segm_result['ensemble'] = segm_results if self.with_mask: results = list( zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble'])) else: results = ms_bbox_result['ensemble'] return results
def simple_test(self, img, img_metas, proposals=None, rescale=False, Ensemble_Test=True ): """Run inference on a single image. Args: img (Tensor): must be in shape (N, C, H, W) img_metas (list[dict]): a list with one dictionary element. See `mmdet/datasets/pipelines/formatting.py:Collect` for details of meta dicts. proposals : if specified overrides rpn proposals rescale (bool): if True returns boxes in original image space Returns: dict: results """ # 经过残差网络 和 FPN后获得的 特征图 x = self.extract_feat(img) # 经过rpn网络,获取建议框列表 (或者选择覆盖当前建议框) proposal_list = self.simple_test_rpn( x, img_metas, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_metas[0]['img_shape'] ori_shape = img_metas[0]['ori_shape'] scale_factor = img_metas[0]['scale_factor'] # "ms" in variable names means multi-stage 级联结果 ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn # rois: 符合bbox_roi_extractor输入参数的建议框格式 rois = bbox2roi(proposal_list) ''' 获取其他模型的rois,roi_feats,bbox_pred,cls_score ''' other_rois, other_roi_feats, other_bbox_pred, other_cls_score = None, None, None, None if Ensemble_Test==True: other_rois, other_roi_feats, other_bbox_pred, other_cls_score = self.Ensemble_load_tensor(img_metas) # 为了继续往下传获取预测框所对应的特征图 # 级联结构,获取roi 而后抽取对应特征 final_bbox_feats = None for i in range(self.num_stages): # 第i个bbox的 roi_extractor 抽取器 bbox_roi_extractor = self.bbox_roi_extractor[i] # 第i个bbox的 head bbox_head = self.bbox_head[i] # 建议框roi后的特征 bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) final_bbox_feats = bbox_feats ''' 进行roi_feats融合 ''' if i == 0 and Ensemble_Test==True: # 第一级联融合其他模型的roi_feats bbox_feats = torch.cat((bbox_feats,other_roi_feats),0) if self.with_shared_head: # 若设置了shared_head则使用 bbox_feats = self.shared_head(bbox_feats) ''' 进行分类 和 框回归 ''' cls_score, bbox_pred = bbox_head(bbox_feats) # 此级结果加入ms_scores ms_scores.append(cls_score) if i < self.num_stages - 1: # 如果使用的是级联结构 bbox_label = cls_score.argmax(dim=1) # 传入当前级数得到的回归框, 以当前回归框 计算其roi,将这个roi传入下一级模块 (这样的效果就会使得回归框不断接近真实框) if i == 0 and Ensemble_Test==True: # 第一级联 融合其他模型的 rois ''' rois融合 ''' ensemble_rois = torch.cat((rois, other_rois), 0) rois = bbox_head.regress_by_class(ensemble_rois, bbox_label, bbox_pred, img_metas[0]) else: rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_metas[0]) # 得到级联结构的得到平均 cls_score cls_score = sum(ms_scores) / self.num_stages # 获得det det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg, Ensemble_Test=Ensemble_Test, # 多模型集成识别 save_mode=False, # 表示开启识别图像并且保存模式, 保存:预测框、预测框对应特征图 roi_feats=final_bbox_feats, # 自定义参数 img_metas = img_metas, # 自定义参数 mode_name="CascadeRCNN" # 模型名字 ) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: # cascade_mask_rcnn时用到此分支 if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: if isinstance(scale_factor, float): # aspect ratio fixed _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) else: _bboxes = ( det_bboxes[:, :4] * torch.from_numpy(scale_factor).to(det_bboxes.device) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_metas] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None ms_scores = [] for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # 由于集成所有的det_bboxes都为原始的大小 det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_sum_masks = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) # 先对所有stages的mask平均 merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_sum_masks.append(merged_masks) merged_masks = np.mean(aug_sum_masks, axis=0) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) ms_segm_result['ensemble'] = segm_result # 简易测试tta是否正确 if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def simple_test(self, img, img_meta, proposals=None, rescale=False, use_semantic_mask=False): x = self.extract_feat(img) if self.save_feat is True: filename = img_meta[0]['filename'] P2 = x[0].detach().cpu().numpy() P3 = x[1].detach().cpu().numpy() P4 = x[2].detach().cpu().numpy() P5 = x[3].detach().cpu().numpy() P6 = x[4].detach().cpu().numpy() #print('P2.shape: ', P2.shape) #print('P3.shape: ', P3.shape) #print('P4.shape: ', P4.shape) #print('P5.shape: ', P5.shape) #print('P6.shape: ', P6.shape) import numpy as np p4_folder = os.path.join(self.save_folder, 'P4') p5_folder = os.path.join(self.save_folder, 'P5') p6_folder = os.path.join(self.save_folder, 'P6') if not os.path.exists(p4_folder): os.makedirs(p4_folder) if not os.path.exists(p5_folder): os.makedirs(p5_folder) if not os.path.exists(p6_folder): os.makedirs(p6_folder) np.save(os.path.join(p4_folder, filename + '.npy'), P4) np.save(os.path.join(p5_folder, filename + '.npy'), P5) np.save(os.path.join(p6_folder, filename + '.npy'), P6) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: semantic_mask, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, nms_cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_pred = self._mask_forward_test( i, x, _bboxes, semantic_feat=semantic_feat) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage # bboxes of 3 rcnn stages. ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) # get rois from proposal_list. for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] # get roi for box head, and send roi to box_head. bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: # keep all bboxes that produced by all stages. det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_roi_extractor = self.mask_roi_extractor[i] mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(mask_head.num_classes - 1) ] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = mask_head(mask_feats) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: # select the max label, and make new rois to generate # new pred bboxes. bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1) ] else: # if rescale then resized to target size. _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def simple_test(self, x, proposal_list, img_metas, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' num_imgs = len(proposal_list) img_shapes = tuple(meta['img_shape'] for meta in img_metas) ori_shapes = tuple(meta['ori_shape'] for meta in img_metas) scale_factors = tuple(meta['scale_factor'] for meta in img_metas) # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) # split batch bbox prediction back to each image cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] num_proposals_per_img = tuple( len(proposals) for proposals in proposal_list) rois = rois.split(num_proposals_per_img, 0) cls_score = cls_score.split(num_proposals_per_img, 0) if isinstance(bbox_pred, torch.Tensor): bbox_pred = bbox_pred.split(num_proposals_per_img, 0) else: bbox_pred = self.bbox_head[i].bbox_pred_split( bbox_pred, num_proposals_per_img) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = [s[:, :-1].argmax(dim=1) for s in cls_score] rois = torch.cat([ self.bbox_head[i].regress_by_class(rois[j], bbox_label[j], bbox_pred[j], img_metas[j]) for j in range(num_imgs) ]) # average scores of each image by stages cls_score = [ sum([score[i] for score in ms_scores]) / float(len(ms_scores)) for i in range(num_imgs) ] # apply bbox post-processing to each image individually det_bboxes = [] det_labels = [] for i in range(num_imgs): det_bbox, det_label = self.bbox_head[-1].get_bboxes( rois[i], cls_score[i], bbox_pred[i], img_shapes[i], scale_factors[i], rescale=rescale, cfg=rcnn_test_cfg) det_bboxes.append(det_bbox) det_labels.append(det_label) if torch.onnx.is_in_onnx_export(): return det_bboxes, det_labels bbox_results = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head[-1].num_classes) for i in range(num_imgs) ] ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes): mask_classes = self.mask_head[-1].num_classes segm_results = [[[] for _ in range(mask_classes)] for _ in range(num_imgs)] else: if rescale and not isinstance(scale_factors[0], float): scale_factors = [ torch.from_numpy(scale_factor).to(det_bboxes[0].device) for scale_factor in scale_factors ] _bboxes = [ det_bboxes[i][:, :4] * scale_factors[i] if rescale else det_bboxes[i][:, :4] for i in range(len(det_bboxes)) ] mask_rois = bbox2roi(_bboxes) num_mask_rois_per_img = tuple( _bbox.size(0) for _bbox in _bboxes) aug_masks = [] for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) mask_pred = mask_results['mask_pred'] # split batch mask prediction back to each image mask_pred = mask_pred.split(num_mask_rois_per_img, 0) aug_masks.append( [m.sigmoid().cpu().numpy() for m in mask_pred]) # apply mask post-processing to each image individually segm_results = [] for i in range(num_imgs): if det_bboxes[i].shape[0] == 0: segm_results.append( [[] for _ in range(self.mask_head[-1].num_classes)]) else: aug_mask = [mask[i] for mask in aug_masks] merged_masks = merge_aug_masks( aug_mask, [[img_metas[i]]] * self.num_stages, rcnn_test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes[i], det_labels[i], rcnn_test_cfg, ori_shapes[i], scale_factors[i], rescale) segm_results.append(segm_result) ms_segm_result['ensemble'] = segm_results if self.with_mask: results = list( zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble'])) else: results = ms_bbox_result['ensemble'] return results
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) if self.with_glbctx: glbctx_feats = [self.glbctx_head(feat)[1] for feat in img_feats] else: glbctx_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) det_bbox_results = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: det_segm_results = [[] for _ in range(self.mask_head.num_classes)] else: aug_masks = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) # get relay feature on mask_rois bbox_results = self._bbox_forward( -1, x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) relayed_feat = bbox_results['relayed_feat'] relayed_feat = self.feat_relay_head(relayed_feat) mask_results = self._mask_forward( x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat, relayed_feat=relayed_feat) mask_pred = mask_results['mask_pred'] aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] det_segm_results = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(det_bbox_results, det_segm_results)] else: return [det_bbox_results]
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] dummy_scale_factor = np.ones(4) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=dummy_scale_factor, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[[] for _ in range(self.mask_head[-1].num_classes)] ] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def simple_test(self, img, img_meta, proposals=None, rescale=False, **kwargs): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) olongtail_cls_score_all = [] for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, olongtail_cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, kwargs['olongtailmodel'][i], semantic_feat=semantic_feat) ms_scores.append(cls_score) olongtail_cls_score_all.append(olongtail_cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, nms_cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_pred = self._mask_forward_test( i, x, _bboxes, semantic_feat=semantic_feat) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) ## olongtail_cls_score_all = torch.stack(olongtail_cls_score_all).mean(0) ##to saveout the cls head prediction for ensembling # olongtail_scores = F.softmax(olongtail_cls_score_all, dim=1) # save_idx = img_meta[0]['filename'].split('/')[-1].split('.')[0] # dets_save_path = osp.join('./ensemble_non_ssd', 'ensemble_valset', 'det', 'htc_31d9_x101_64d_ms_dcn_longtail_clsscore', # save_idx + '_') # # dets_save_path = osp.join('./ensemble', 'ensemble_testset', 'det', 'htc_31d9_x101_64d_ms_dcn_longtail_clsscore', # # save_idx + '_') # # # if not osp.exists(osp.join('./ensemble', 'ensemble_testset', 'det', 'htc_31d9_x101_64d_ms_dcn_longtail_clsscore')): # # os.mkdir(osp.join('./ensemble', 'ensemble_testset', 'det', 'htc_31d9_x101_64d_ms_dcn_longtail_clsscore')) # # torch.save(olongtail_scores, dets_save_path + 'dets_cls.pt') # return 0 det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, olongtail_cls_score_all, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentation.""" single_stage_cfg = self.test_cfg.single_stage single_stage_cfg.update(dict(aug_test=True)) imgs_per_gpu = len(img_metas[0]) aug_bboxes = [[] for _ in range(imgs_per_gpu)] aug_scores = [[] for _ in range(imgs_per_gpu)] mask_roi_feats = [] stuff_outs = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): bbox_outs = self.bbox_head(x) bbox_inputs = bbox_outs[:2] + (img_meta, single_stage_cfg, False) results_list = self.bbox_head.get_bboxes(*bbox_inputs) # mask subnet mask_roi_feats.append(bbox_outs[-1]) # stuff subnet stuff_head_inputs = bbox_outs[-2] stuff_out = self.stuff_head(stuff_head_inputs) stuff_outs.append(stuff_out) for i, results in enumerate(results_list): mlvl_bboxes, mlvl_scores = results aug_bboxes[i].append(mlvl_bboxes) aug_scores[i].append(mlvl_scores) aug_img_metas = [] for i in range(imgs_per_gpu): aug_img_meta = [] for j in range(len(img_metas)): aug_img_meta.append(img_metas[j][i]) aug_img_metas.append(aug_img_meta) det_results = [] for aug_bbox, aug_score, aug_img_meta in zip(aug_bboxes, aug_scores, aug_img_metas): merged_bboxes, merged_scores = merge_aug_bboxes(aug_bbox, aug_score, aug_img_meta, single_stage_cfg, return_mean=False) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, single_stage_cfg.score_thr, single_stage_cfg.nms, single_stage_cfg.max_per_img) det_results.append((det_bboxes, det_labels)) bbox_results = [ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in det_results ] # mask imgs_per_gpu = len(img_metas[0]) aug_masks = [[] for _ in range(imgs_per_gpu)] for x, img_meta in zip(mask_roi_feats, img_metas): # we should rescale the det bboxes outside `simple_test_mask` # because there are `flip` for aug test setting, while in the # simple test setting, just have the scale scaled_det_results = [] for results, meta in zip(det_results, img_meta): det_bboxes, _ = results img_shape = meta['img_shape'] scale_factor = meta['scale_factor'] flip = meta['flip'] scaled_det_bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) scaled_det_results.append(scaled_det_bboxes) mask_rois = bbox2roi(scaled_det_results) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) for img_id, meta in enumerate(img_meta): idx_img = mask_rois[:, 0] == img_id mask_pred_img = mask_pred[idx_img] # convert to numpy array to save memory mask_pred_img_np = mask_pred_img.sigmoid().cpu().numpy() aug_masks[img_id].append(mask_pred_img_np) segm_results = [] for det_result, aug_mask, aug_img_meta in zip(det_results, aug_masks, aug_img_metas): det_bboxes, det_labels = det_result merged_masks = merge_aug_masks(aug_mask, aug_img_meta, self.test_cfg.single_stage_mask) # perform `get_seg_masks` here for `merged_masks` # `ori_shape` for all augmented images are the same here ori_shape = aug_img_meta[0]['ori_shape'] segm_result = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.single_stage_mask, ori_shape, scale_factor=1.0, rescale=False) segm_results.append(segm_result) # stuff stuff_results = self.stuff_head.get_stuff_map_aug(stuff_outs, img_metas, rescale=rescale) bbox_segm_stuff_results = [] for bbox_result, segm_result, stuff_result in zip( bbox_results, segm_results, stuff_results): bbox_segm_stuff_results.append( (bbox_result, segm_result, stuff_result)) return bbox_segm_stuff_results[0]
def simple_test(self, x, proposal_list, img_metas, rescale=False): """Test without augmentation.""" if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None num_imgs = len(proposal_list) img_shapes = tuple(meta['img_shape'] for meta in img_metas) ori_shapes = tuple(meta['ori_shape'] for meta in img_metas) scale_factors = tuple(meta['scale_factor'] for meta in img_metas) # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic_feat) # split batch bbox prediction back to each image cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] num_proposals_per_img = tuple(len(p) for p in proposal_list) rois = rois.split(num_proposals_per_img, 0) cls_score = cls_score.split(num_proposals_per_img, 0) bbox_pred = bbox_pred.split(num_proposals_per_img, 0) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = [s[:, :-1].argmax(dim=1) for s in cls_score] rois = torch.cat([ bbox_head.regress_by_class(rois[i], bbox_label[i], bbox_pred[i], img_metas[i]) for i in range(num_imgs) ]) # average scores of each image by stages cls_score = [ sum([score[i] for score in ms_scores]) / float(len(ms_scores)) for i in range(num_imgs) ] # apply bbox post-processing to each image individually det_bboxes = [] det_labels = [] for i in range(num_imgs): det_bbox, det_label = self.bbox_head[-1].get_bboxes( rois[i], cls_score[i], bbox_pred[i], img_shapes[i], scale_factors[i], rescale=rescale, cfg=rcnn_test_cfg) det_bboxes.append(det_bbox) det_labels.append(det_label) bbox_result = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head[-1].num_classes) for i in range(num_imgs) ] ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes): mask_classes = self.mask_head[-1].num_classes segm_results = [[[] for _ in range(mask_classes)] for _ in range(num_imgs)] else: if rescale and not isinstance(scale_factors[0], float): scale_factors = [ torch.from_numpy(scale_factor).to(det_bboxes[0].device) for scale_factor in scale_factors ] _bboxes = [ det_bboxes[i][:, :4] * scale_factors[i] if rescale else det_bboxes[i] for i in range(num_imgs) ] mask_rois = bbox2roi(_bboxes) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None num_bbox_per_img = tuple(len(_bbox) for _bbox in _bboxes) for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) # split batch mask prediction back to each image mask_pred = mask_pred.split(num_bbox_per_img, 0) aug_masks.append( [mask.sigmoid().cpu().numpy() for mask in mask_pred]) # apply mask post-processing to each image individually segm_results = [] for i in range(num_imgs): if det_bboxes[i].shape[0] == 0: segm_results.append( [[] for _ in range(self.mask_head[-1].num_classes)]) else: aug_mask = [mask[i] for mask in aug_masks] merged_mask = merge_aug_masks( aug_mask, [[img_metas[i]]] * self.num_stages, rcnn_test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_mask, _bboxes[i], det_labels[i], rcnn_test_cfg, ori_shapes[i], scale_factors[i], rescale) segm_results.append(segm_result) ms_segm_result['ensemble'] = segm_results if self.with_mask: results = list( zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble'])) else: results = ms_bbox_result['ensemble'] return results
def simple_test(self, x, proposal_list, img_metas, rescale=False): if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_metas[0]['img_shape'] ori_shape = img_metas[0]['ori_shape'] scale_factor = img_metas[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic_feat) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_results['bbox_pred'], img_metas[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * det_bboxes.new_tensor(scale_factor) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_metas] * self.num_stages, self.test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def simple_test(self, img, img_metas, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_metas, self.test_cfg.rpn) if proposals is None else proposals if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_metas[0]['img_shape'] ori_shape = img_metas[0]['ori_shape'] scale_factor = img_metas[0]['scale_factor'] ms_bbox_result = {} ms_segm_result = {} ms_maskscore_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_metas[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 # NOTE segm_result = [[] for _ in range(mask_classes)] mask_scores = [[] for _ in range(mask_classes)] else: _bboxes = ( det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] aug_masks_wo_sigmoid = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_masks_wo_sigmoid.append(mask_pred.cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_metas] * self.num_stages, self.test_cfg.rcnn) merged_masks_wo_sigmoid = merge_aug_masks(aug_masks_wo_sigmoid, [img_metas]*self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) merged_masks_torch = torch.from_numpy(merged_masks_wo_sigmoid).to(mask_feats.dtype).to(mask_feats.device) mask_results = dict(mask_pred=merged_masks_torch, mask_feats=mask_feats) assert mask_feats.size(0) == merged_masks_torch.size(0)==det_labels.size(0) mask_iou_pred = self.mask_iou_head( mask_results['mask_feats'], mask_results['mask_pred'][range(det_labels.size(0)), det_labels]) mask_scores = self.mask_iou_head.get_mask_scores( mask_iou_pred, det_bboxes, det_labels) ms_segm_result['ensemble'] = segm_result ms_maskscore_result['ensemble'] = mask_scores if self.with_mask: results = ms_bbox_result['ensemble'], (ms_segm_result['ensemble'], ms_maskscore_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in self.extract_feats(imgs) ] else: semantic_feats = [None] * len(img_metas) # recompute feats to save memory proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] aug_masks_wo_sigmoid = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) aug_masks_wo_sigmoid.append(mask_pred.cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) merged_masks_wo_sigmoid = merge_aug_masks(aug_masks_wo_sigmoid, aug_img_metas, self.test_cfg.rcnn) merged_masks_torch = torch.from_numpy(merged_masks_wo_sigmoid).to(mask_feats.dtype).to(mask_feats.device) assert mask_feats.size(0) == merged_masks_torch.size(0)==det_labels.size(0) mask_iou_pred = self.mask_iou_head(mask_feats, merged_masks_torch[range(det_labels.size(0)), det_labels]) mask_scores = self.mask_iou_head.get_mask_scores(mask_iou_pred, det_bboxes, det_labels) return bbox_result, (segm_result, mask_scores) else: return bbox_result
def simple_test(self, img, img_meta, proposals=None, rescale=False): """Run inference on a single image. Args: img (Tensor): must be in shape (N, C, H, W) img_meta (list[dict]): a list with one dictionary element. See `mmdet/datasets/pipelines/formatting.py:Collect` for details of meta dicts. proposals : if specified overrides rpn proposals rescale (bool): if True returns boxes in original image space Returns: dict: results """ x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: if isinstance(scale_factor, float): # aspect ratio fixed _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) else: _bboxes = ( det_bboxes[:, :4] * torch.from_numpy(scale_factor).to(det_bboxes.device) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def simple_test(self, x, proposal_list, img_metas, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' img_shape = img_metas[0]['img_shape'] ori_shape = img_metas[0]['ori_shape'] scale_factor = img_metas[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_metas[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes segm_result = [[] for _ in range(mask_classes)] else: _bboxes = ( det_bboxes[:, :4] * det_bboxes.new_tensor(scale_factor) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_metas] * self.num_stages, self.test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def simple_test(self, img, img_meta, proposals=None, rescale=False): x = self.extract_feat(img) proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg.rcnn rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if self.with_mask: mask_roi_extractor = self.mask_roi_extractor[i] mask_head = self.mask_head[i] if det_bboxes.shape[0] == 0: mask_classes = mask_head.num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats, i) mask_pred = mask_head(mask_feats) segm_result = mask_head.get_seg_masks( mask_pred, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['stage{}'.format(i)] = segm_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes - 1 segm_result = [[] for _ in range(mask_classes)] else: if isinstance(scale_factor, float): # aspect ratio fixed _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) else: _bboxes = ( det_bboxes[:, :4] * torch.from_numpy(scale_factor).to(det_bboxes.device) if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor, rescale) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) ms_scores = [] for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) #由于集成所有的det_bboxes都为原始的大小 det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_sum_masks = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) #先对所有stages的mask平均 merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_sum_masks.append(merged_masks) #再对所有的多尺度图片平均,由于mask大小都是28*28,所以无需复杂考虑,但是此处增强后所有图片均为正常模式,翻转的图片恢复 merged_masks = np.mean(aug_sum_masks, axis=0) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) ms_segm_result['ensemble'] = segm_result #简易测试tta是否正确 # img_h, img_w = ori_shape[:2] # if img_h == 506: # i = 0 # elif img_h == 480: # i = 1 # elif img_h == 551: # i = 2 # elif img_h == 546: # i = 3 # elif img_h == 463: # i = 4 # # img = plt.imread("/home/zhangyun/下载/津南比赛数据集/ori_coco/train2014/{}.jpg".format(i)) # img = np.array(img) # h, w, _ = img.shape # bboxes = np.vstack(bbox_results) # if segm_result is not None: # segms = mmcv.concat_list(segm_result) # inds = np.where(bboxes[:, -1] > 0.3)[0] # for i in inds: # color_mask = np.random.randint( # 0, 256, (1, 3), dtype=np.uint8) # mask = maskUtils.decode(segms[i]).astype(np.bool) # img[mask] = img[mask] * 0.3 + color_mask * 0.7 # # labels = [ # np.full(bbox.shape[0], i, dtype=np.int32) # for i, bbox in enumerate(bbox_results) # ] # labels = np.concatenate(labels) # if 0.3 > 0: # assert bboxes.shape[1] == 5 # scores = bboxes[:, -1] # inds = scores > 0.3 # bboxes = bboxes[inds, :] # labels = labels[inds] # plt.imshow(img) # for bbox in bboxes: # plt.gca().add_patch( # plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], # bbox[3] - bbox[1], fill=False, # edgecolor='r', linewidth=3) # ) # # plt.show() if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results