def aug_test_bboxes(self, feats, img_metas, rescale=False): """Test det bboxes with test time augmentation. Args: feats (list[Tensor]): the outer list indicates test-time augmentations and inner Tensor should have a shape NxCxHxW, which contains features for all images in the batch. img_metas (list[list[dict]]): the outer list indicates test-time augs (multiscale, flip, etc.) and the inner list indicates images in a batch. each dict has image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[ndarray]: bbox results of each class """ # check with_nms argument gb_sig = signature(self.get_bboxes) gb_args = [p.name for p in gb_sig.parameters.values()] gbs_sig = signature(self._get_bboxes_single) gbs_args = [p.name for p in gbs_sig.parameters.values()] assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \ f'{self.__class__.__name__}' \ ' does not support test-time augmentation' aug_bboxes = [] aug_scores = [] aug_factors = [] # score_factors for NMS for x, img_meta in zip(feats, img_metas): # only one image in the batch outs = self.forward(x) bbox_inputs = outs + (img_meta, self.test_cfg, False, False) bbox_outputs = self.get_bboxes(*bbox_inputs)[0] aug_bboxes.append(bbox_outputs[0]) aug_scores.append(bbox_outputs[1]) # bbox_outputs of some detectors (e.g., ATSS, FCOS, YOLOv3) # contains additional element to adjust scores before NMS if len(bbox_outputs) >= 3: aug_factors.append(bbox_outputs[2]) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = self.merge_aug_bboxes( aug_bboxes, aug_scores, img_metas) merged_factors = torch.cat(aug_factors, dim=0) if aug_factors else None det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, self.test_cfg.score_thr, self.test_cfg.nms, self.test_cfg.max_per_img, score_factors=merged_factors) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= det_bboxes.new_tensor( img_metas[0][0]['scale_factor']) bbox_results = bbox2result(_det_bboxes, det_labels, self.num_classes) return bbox_results
def simple_test(self, img, img_metas, rescale=False): """Test function without test time augmentation. Args: imgs (list[torch.Tensor]): List of multiple images img_metas (list[dict]): List of image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[list[np.ndarray]]: BBox results of each image and classes. The outer list corresponds to each image. The inner list corresponds to each class. """ x = self.extract_feat(img) outs = self.bbox_head(x) bbox_list = self.bbox_head.get_bboxes( *outs, img_metas, rescale=rescale) # skip post-processing when exporting to ONNX if torch.onnx.is_in_onnx_export(): return bbox_list bbox_results = [ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in bbox_list ] return bbox_results
def aug_test(self, x, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ det_bboxes, det_labels = self.aug_test_bboxes(x, img_metas, proposal_list, self.test_cfg) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= det_bboxes.new_tensor( img_metas[0][0]['scale_factor']) bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head.num_classes) # det_bboxes always keep the original scale if self.with_mask: segm_results = self.aug_test_mask(x, img_metas, det_bboxes, det_labels) return [(bbox_results, segm_results)] else: return [bbox_results]
def simple_test(self, x, proposal_list, img_metas, proposals=None, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' det_bboxes, det_labels = self.simple_test_bboxes( x, img_metas, proposal_list, self.test_cfg, rescale=rescale) bbox_results = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head.num_classes) for i in range(len(det_bboxes)) ] if not self.with_mask: return bbox_results else: segm_results = self.simple_test_mask( x, img_metas, det_bboxes, det_labels, rescale=rescale) return list(zip(bbox_results, segm_results))
async def async_simple_test(self, x, proposal_list, img_metas, proposals=None, rescale=False): """Async test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' det_bboxes, det_labels = await self.async_test_bboxes( x, img_metas, proposal_list, self.test_cfg, rescale=rescale) bbox_results = bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) if not self.with_mask: return bbox_results else: segm_results = await self.async_test_mask( x, img_metas, det_bboxes, det_labels, rescale=rescale, mask_test_cfg=self.test_cfg.get('mask')) return bbox_results, segm_results
def aug_test(self, imgs, img_metas, rescale=False): """Augment testing of CornerNet. Args: imgs (list[Tensor]): Augmented images. img_metas (list[list[dict]]): Meta information of each image, e.g., image size, scaling factor, etc. rescale (bool): If True, return boxes in original image space. Default: False. Note: ``imgs`` must including flipped image pairs. Returns: list[list[np.ndarray]]: BBox results of each image and classes. The outer list corresponds to each image. The inner list corresponds to each class. """ img_inds = list(range(len(imgs))) assert img_metas[0][0]['flip'] + img_metas[1][0]['flip'], ( 'aug test must have flipped image pair') aug_results = [] for ind, flip_ind in zip(img_inds[0::2], img_inds[1::2]): img_pair = torch.cat([imgs[ind], imgs[flip_ind]]) x = self.extract_feat(img_pair) outs = self.bbox_head(x) bbox_list = self.bbox_head.get_bboxes( *outs, [img_metas[ind], img_metas[flip_ind]], False, False) aug_results.append(bbox_list[0]) aug_results.append(bbox_list[1]) bboxes, labels = self.merge_aug_results(aug_results, img_metas) bbox_results = bbox2result(bboxes, labels, self.bbox_head.num_classes) return [bbox_results]
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[[] for _ in range(self.mask_head[-1].num_classes)] ] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def simple_test(self, x, proposal_list, img_metas, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' num_imgs = len(proposal_list) img_shapes = tuple(meta['img_shape'] for meta in img_metas) ori_shapes = tuple(meta['ori_shape'] for meta in img_metas) scale_factors = tuple(meta['scale_factor'] for meta in img_metas) # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) # split batch bbox prediction back to each image cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] num_proposals_per_img = tuple( len(proposals) for proposals in proposal_list) rois = rois.split(num_proposals_per_img, 0) cls_score = cls_score.split(num_proposals_per_img, 0) if isinstance(bbox_pred, torch.Tensor): bbox_pred = bbox_pred.split(num_proposals_per_img, 0) else: bbox_pred = self.bbox_head[i].bbox_pred_split( bbox_pred, num_proposals_per_img) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = [s[:, :-1].argmax(dim=1) for s in cls_score] rois = torch.cat([ self.bbox_head[i].regress_by_class(rois[j], bbox_label[j], bbox_pred[j], img_metas[j]) for j in range(num_imgs) ]) # average scores of each image by stages cls_score = [ sum([score[i] for score in ms_scores]) / float(len(ms_scores)) for i in range(num_imgs) ] # apply bbox post-processing to each image individually det_bboxes = [] det_labels = [] for i in range(num_imgs): det_bbox, det_label = self.bbox_head[-1].get_bboxes( rois[i], cls_score[i], bbox_pred[i], img_shapes[i], scale_factors[i], rescale=rescale, cfg=rcnn_test_cfg) det_bboxes.append(det_bbox) det_labels.append(det_label) bbox_results = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head[-1].num_classes) for i in range(num_imgs) ] ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes): mask_classes = self.mask_head[-1].num_classes segm_results = [[[] for _ in range(mask_classes)] for _ in range(num_imgs)] else: if rescale and not isinstance(scale_factors[0], float): scale_factors = [ torch.from_numpy(scale_factor).to(det_bboxes[0].device) for scale_factor in scale_factors ] _bboxes = [ det_bboxes[i][:, :4] * scale_factors[i] if rescale else det_bboxes[i][:, :4] for i in range(len(det_bboxes)) ] mask_rois = bbox2roi(_bboxes) num_mask_rois_per_img = tuple( _bbox.size(0) for _bbox in _bboxes) aug_masks = [] for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) mask_pred = mask_results['mask_pred'] # split batch mask prediction back to each image mask_pred = mask_pred.split(num_mask_rois_per_img, 0) aug_masks.append( [m.sigmoid().cpu().numpy() for m in mask_pred]) # apply mask post-processing to each image individually segm_results = [] for i in range(num_imgs): if det_bboxes[i].shape[0] == 0: segm_results.append( [[] for _ in range(self.mask_head[-1].num_classes)]) else: aug_mask = [mask[i] for mask in aug_masks] merged_masks = merge_aug_masks( aug_mask, [[img_metas[i]]] * self.num_stages, rcnn_test_cfg) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _bboxes[i], det_labels[i], rcnn_test_cfg, ori_shapes[i], scale_factors[i], rescale) segm_results.append(segm_result) ms_segm_result['ensemble'] = segm_results if self.with_mask: results = list( zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble'])) else: results = ms_bbox_result['ensemble'] return results