Пример #1
0
    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1] for feat in img_feats
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        # recompute feats to save memory
        # proposal_list = self.aug_test_rpn(img_feats, img_metas,
        #                                   self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                bbox_results = self._bbox_forward(i,
                                                  x,
                                                  rois,
                                                  semantic_feat=semantic)
                ms_scores.append(bbox_results['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'].argmax(dim=1)
                    rois = bbox_head.regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                mask_classes = self.mask_head[-1].num_classes
                segm_result = [[] for _ in range(mask_classes - 1)]
                mask_scores = [[] for _ in range(mask_classes - 1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta, semantic in zip(img_feats, img_metas,
                                                 semantic_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    mask_feats = self.mask_roi_extractor[-1](
                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],
                        mask_rois)
                    if self.with_semantic:
                        semantic_feat = semantic
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                -2:]:
                            mask_semantic_feat = F.adaptive_avg_pool2d(
                                mask_semantic_feat, mask_feats.shape[-2:])
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)

                import numpy as np
                merged_masks_torch = torch.from_numpy(merged_masks).to(
                    torch.float16).cuda()
                mask_results = dict(
                    mask_pred=merged_masks_torch, mask_feats=mask_feats
                )  # NOTE not sure whether mask features is correct here

                mask_iou_pred = self.mask_iou_head(
                    mask_results['mask_feats'],
                    mask_results['mask_pred'][range(det_labels.size(0)),
                                              det_labels])
                mask_scores = self.mask_iou_head.get_mask_scores(
                    mask_iou_pred, det_bboxes, det_labels)

            return bbox_result, (segm_result, mask_scores)
        else:
            return bbox_result
    def aug_test(self, imgs, img_metas, **kwargs):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rpn_test_cfg = self.models[0].test_cfg.rpn
        imgs_per_gpu = len(img_metas[0])
        aug_proposals = [[] for _ in range(imgs_per_gpu)]
        for model in self.models:
            # recompute feats to save memory
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.rpn_head.simple_test_rpn(x, img_meta)
                for i, proposals in enumerate(proposal_list):
                    aug_proposals[i].append(proposals)
        # after merging, proposals will be rescaled to the original image size
        proposal_list = [
            merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
            for proposals, img_meta in zip(aug_proposals, img_metas)
        ]
 
        semantic_feats = [None] * len(img_metas)

        rcnn_test_cfg = self.models[0].test_cfg
        aug_bboxes = []
        aug_scores = []
        for model in self.models:
            for x, img_meta, semantic in zip(imgs, img_metas, semantic_feats):
                # only one image in the batch
                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']
                flip = img_meta[0]['flip']
                flip_direction = img_meta[0]['flip_direction']
    
                proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                         scale_factor, flip, flip_direction)
                # "ms" in variable names means multi-stage
                ms_scores = []
    
                rois = bbox2roi([proposals])
                for i in range(model.roi_head.num_stages):
                    bbox_head = model.roi_head.bbox_head[i]
                    bbox_results = model.roi_head._bbox_forward(
                        i, x, rois, semantic_feat=semantic)
                    ms_scores.append(bbox_results['cls_score'])
    
                    if i < model.roi_head.num_stages - 1:
                        bbox_label = bbox_results['cls_score'].argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_results['bbox_pred'],
                            img_meta[0])
    
                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_bboxes(
                    rois,
                    cls_score,
                    bbox_results['bbox_pred'],
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)

        if self.models[0].with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.models[0].mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for model in self.models:
                    for x, img_meta, semantic in zip(imgs, img_metas,
                                                     semantic_feats):
                        img_shape = img_meta[0]['img_shape']
                        scale_factor = img_meta[0]['scale_factor']
                        flip = img_meta[0]['flip']
                        flip_direction = img_meta[0]['flip_direction']
                        _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                               scale_factor, flip, flip_direction)
                        mask_rois = bbox2roi([_bboxes])
                        mask_feats = model.mask_roi_extractor[-1](
                            x[:len(model.mask_roi_extractor[-1].featmap_strides)],
                            mask_rois)
                        if model.with_semantic:
                            semantic_feat = semantic
                            mask_semantic_feat = model.semantic_roi_extractor(
                                [semantic_feat], mask_rois)
                            if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                    -2:]:
                                mask_semantic_feat = F.adaptive_avg_pool2d(
                                    mask_semantic_feat, mask_feats.shape[-2:])
                            mask_feats += mask_semantic_feat
                        last_feat = None
                        for i in range(model.num_stages):
                            mask_head = model.mask_head[i]
                            if model.mask_info_flow:
                                mask_pred, last_feat = mask_head(
                                    mask_feats, last_feat)
                            else:
                                mask_pred = mask_head(mask_feats)
                            aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                            aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.models[0].test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.models[0].mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
Пример #3
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           centernesses,
                           mlvl_anchors,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False):
        """Transform outputs for a single batch item into labeled boxes.

        Args:
            cls_scores (list[Tensor]): Box scores for a single scale level
                Has shape (num_anchors * num_classes, H, W).
            bbox_preds (list[Tensor]): Box energies / deltas for a single
                scale level with shape (num_anchors * 4, H, W).
            centernesses (list[Tensor]): Centerness for a single scale level
                Has shape (num_anchors * 1, H, W).
            mlvl_anchors (list[Tensor]): Box reference for a single scale level
                with shape (num_total_anchors, 4).
            img_shape (tuple[int]): Shape of the input image,
                (height, width, 3).
            scale_factor (ndarray): Scale factor of the image arange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config | None): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.

        Returns:
            tuple(Tensor):
                det_bboxes (Tensor): BBox predictions in shape (n, 5), where
                    the first 4 columns are bounding box positions
                    (tl_x, tl_y, br_x, br_y) and the 5-th column is a score
                    between 0 and 1.
                det_labels (Tensor): A (n,) tensor where each item is the
                    predicted class label of the corresponding box.
        """
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_centerness = []
        for cls_score, bbox_pred, centerness, anchors in zip(
                cls_scores, bbox_preds, centernesses, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                centerness = centerness[topk_inds]

            bboxes = self.bbox_coder.decode(anchors,
                                            bbox_pred,
                                            max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_centerness.append(centerness)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        # Add a dummy background class to the backend when using sigmoid
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
        mlvl_centerness = torch.cat(mlvl_centerness)

        det_bboxes, det_labels = multiclass_nms(mlvl_bboxes,
                                                mlvl_scores,
                                                cfg.score_thr,
                                                cfg.nms,
                                                cfg.max_per_img,
                                                score_factors=mlvl_centerness)
        return det_bboxes, det_labels
Пример #4
0
    def aug_test(self, features, proposal_list, img_metas, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(features, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_results = self._bbox_forward(i, x, rois)
                ms_scores.append(bbox_results['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'][:, :-1].argmax(
                        dim=1)
                    rois = self.bbox_head[i].regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[[]
                                for _ in range(self.mask_head[-1].num_classes)]
                               ]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(features, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_results = self._mask_forward(i, x, mask_rois)
                        aug_masks.append(
                            mask_results['mask_pred'].sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return [(bbox_result, segm_result)]
        else:
            return [bbox_result]
Пример #5
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        # recompute feats to save memory
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas,
                                          self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            # only one image in the batch
            img_shape = img_metas[0]['img_shape']
            scale_factor = img_metas[0]['scale_factor']
            flip = img_metas[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)

                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_metas[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(self.extract_feats(imgs), img_metas):
                    img_shape = img_metas[0]['img_shape']
                    scale_factor = img_metas[0]['scale_factor']
                    flip = img_metas[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_feats = self.mask_roi_extractor[i](
                            x[:len(self.mask_roi_extractor[i].featmap_strides
                                   )], mask_rois)
                        if self.with_shared_head:
                            mask_feats = self.shared_head(mask_feats)
                        mask_pred = self.mask_head[i](mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg.rcnn)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
Пример #6
0
    def _bbox_post_process(self,
                           mlvl_scores,
                           mlvl_bboxes,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True,
                           mlvl_score_factors=None,
                           **kwargs):
        """bbox post-processing method.

        The boxes would be rescaled to the original image scale and do
        the nms operation. Usually with_nms is False is used for aug test.

        Args:
            mlvl_scores (list[Tensor]): Box scores from all scale
                levels of a single image, each item has shape
                (num_bboxes, num_class).
            mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale
                levels of a single image, each item has shape (num_bboxes, 4).
            scale_factor (ndarray, optional): Scale factor of the image arange
                as (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            with_nms (bool): If True, do nms before return boxes.
                Default: True.
            mlvl_score_factors (list[Tensor], optional): Score factor from
                all scale levels of a single image, each item has shape
                (num_bboxes, ). Default: None.

        Returns:
            tuple[Tensor]: Results of detected bboxes and labels. If with_nms
                is False and mlvl_score_factor is None, return mlvl_bboxes and
                mlvl_scores, else return mlvl_bboxes, mlvl_scores and
                mlvl_score_factor. Usually with_nms is False is used for aug
                test. If with_nms is True, then return the following format

                - det_bboxes (Tensor): Predicted bboxes with shape \
                    [num_bboxes, 5], where the first 4 columns are bounding \
                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \
                    column are scores between 0 and 1.
                - det_labels (Tensor): Predicted labels of the corresponding \
                    box with shape [num_bboxes].
        """
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        # Add a dummy background class to the backend when using sigmoid
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)

        mlvl_iou_preds = torch.cat(mlvl_score_factors)
        mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt()
        det_bboxes, det_labels = multiclass_nms(mlvl_bboxes,
                                                mlvl_nms_scores,
                                                cfg.score_thr,
                                                cfg.nms,
                                                cfg.max_per_img,
                                                score_factors=None)
        if self.with_score_voting and len(det_bboxes) > 0:
            det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels,
                                                       mlvl_bboxes,
                                                       mlvl_nms_scores,
                                                       cfg.score_thr)

        return det_bboxes, det_labels
Пример #7
0
 def _get_bboxes_single(self,
                        cls_scores,
                        bbox_preds,
                        mlvl_anchors,
                        mlvl_masks,
                        img_shape,
                        scale_factor,
                        cfg,
                        rescale=False):
     cfg = self.test_cfg if cfg is None else cfg
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds,
                                                    mlvl_anchors,
                                                    mlvl_masks):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         # if no location is kept, end.
         if mask.sum() == 0:
             continue
         # reshape scores and bbox_pred
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # filter scores, bbox_pred w.r.t. mask.
         # anchors are filtered in get_anchors() beforehand.
         scores = scores[mask, :]
         bbox_pred = bbox_pred[mask, :]
         if scores.dim() == 0:
             anchors = anchors.unsqueeze(0)
             scores = scores.unsqueeze(0)
             bbox_pred = bbox_pred.unsqueeze(0)
         # filter anchors, bbox_pred, scores w.r.t. scores
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 # remind that we set FG labels to [0, num_class-1]
                 # since mmdet v2.0
                 # BG cat_id: num_class
                 max_scores, _ = scores[:, :-1].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bboxes = self.bbox_coder.decode(anchors,
                                         bbox_pred,
                                         max_shape=img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         # Add a dummy background class to the backend when using sigmoid
         # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
         # BG cat_id: num_class
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
     # multi class NMS
     det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Пример #8
0
 def _get_bboxes_single(self,
                        cls_scores,
                        bbox_preds,
                        mlvl_points,
                        img_shape,
                        scale_factor,
                        cfg,
                        rescale=False,
                        nms=True):
     cfg = self.test_cfg if cfg is None else cfg
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
     mlvl_bboxes = []
     mlvl_scores = []
     for i_lvl, (cls_score, bbox_pred, points) in enumerate(
             zip(cls_scores, bbox_preds, mlvl_points)):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 # remind that we set FG labels to [0, num_class-1]
                 # since mmdet v2.0
                 # BG cat_id: num_class
                 max_scores, _ = scores[:, :-1].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             points = points[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1)
         bboxes = bbox_pred * self.point_strides[i_lvl] + bbox_pos_center
         x1 = bboxes[:, 0].clamp(min=0, max=img_shape[1])
         y1 = bboxes[:, 1].clamp(min=0, max=img_shape[0])
         x2 = bboxes[:, 2].clamp(min=0, max=img_shape[1])
         y2 = bboxes[:, 3].clamp(min=0, max=img_shape[0])
         bboxes = torch.stack([x1, y1, x2, y2], dim=-1)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         # Add a dummy background class to the backend when using sigmoid
         # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
         # BG cat_id: num_class
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
     if nms:
         det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                                 cfg.score_thr, cfg.nms,
                                                 cfg.max_per_img)
         return det_bboxes, det_labels
     else:
         return mlvl_bboxes, mlvl_scores
Пример #9
0
    def get_bboxes_single(self, results_raw, scale_factor, cfg, rescale=False):
        assert len(results_raw) == self.num_scales
        multi_lvl_bboxes = []
        multi_lvl_cls_scores = []
        multi_lvl_conf_scores = []
        for i_scale in range(self.num_scales):
            result_raw = results_raw[i_scale]
            num_grid_h = result_raw.size(1)
            num_grid_w = result_raw.size(2)

            # (anchors*(cxcywh+conf+class), h, w ) -- > (anchors, h, w, cxcywh+conf+class)
            prediction_raw = result_raw.view(self.num_anchors_per_scale,
                                             self.num_attrib,
                                             num_grid_h, num_grid_w).permute(
                                                 0, 2, 3, 1).contiguous()

            # grid x y offset, with stride step included

            stride = self.anchor_strides[i_scale]

            grid_x, grid_y = self._get_anchors_grid_xy(num_grid_h, num_grid_w,
                                                       stride,
                                                       result_raw.device)

            # Get outputs x, y

            x_center_pred = torch.sigmoid(
                prediction_raw[..., 0]) * stride + grid_x  # Center x
            y_center_pred = torch.sigmoid(
                prediction_raw[..., 1]) * stride + grid_y  # Center y

            anchors = torch.tensor(self.anchor_base_sizes[i_scale],
                                   device=result_raw.device,
                                   dtype=torch.float32)

            anchor_w = anchors[:, 0:1].view((-1, 1, 1))
            anchor_h = anchors[:, 1:2].view((-1, 1, 1))

            w_pred = torch.exp(prediction_raw[..., 2]) * anchor_w  # Width
            h_pred = torch.exp(prediction_raw[..., 3]) * anchor_h  # Height

            x1_pred = x_center_pred - w_pred / 2
            y1_pred = y_center_pred - h_pred / 2

            x2_pred = x_center_pred + w_pred / 2
            y2_pred = y_center_pred + h_pred / 2

            bbox_pred = torch.stack((x1_pred, y1_pred, x2_pred, y2_pred),
                                    dim=3).view((-1, 4))  # cxcywh
            conf_pred = torch.sigmoid(prediction_raw[..., 4]).view(-1)  # Conf
            if self.hierarchical_label:
                cls_pred = self.loss_cls.parase_output(prediction_raw[..., 5:])
            else:
                cls_pred = torch.sigmoid(prediction_raw[..., 5:]).view(
                    -1, self.num_classes_no_bkg)  # Cls pred one-hot.

            conf_thr = cfg.get('conf_thr', -1)
            conf_inds = conf_pred.ge(conf_thr).nonzero().flatten()
            bbox_pred = bbox_pred[conf_inds, :]
            cls_pred = cls_pred[conf_inds, :]
            conf_pred = conf_pred[conf_inds]

            nms_pre = cfg.get('nms_pre', -1)
            if 0 < nms_pre < conf_pred.size(0):
                _, topk_inds = conf_pred.topk(nms_pre)
                bbox_pred = bbox_pred[topk_inds, :]
                cls_pred = cls_pred[topk_inds, :]
                conf_pred = conf_pred[topk_inds]
            multi_lvl_bboxes.append(bbox_pred)
            multi_lvl_cls_scores.append(cls_pred)
            multi_lvl_conf_scores.append(conf_pred)

        multi_lvl_bboxes = torch.cat(multi_lvl_bboxes)
        multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores)
        multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores)

        if multi_lvl_conf_scores.size(0) == 0:
            return torch.zeros((0, 5)), torch.zeros((0, ))

        if rescale:
            multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor)

        padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0],
                                                 1)
        multi_lvl_cls_scores = torch.cat([padding, multi_lvl_cls_scores],
                                         dim=1)

        det_bboxes, det_labels = multiclass_nms(
            multi_lvl_bboxes,
            multi_lvl_cls_scores,
            cfg.score_thr,
            cfg.nms,
            cfg.max_per_img,
            score_factors=multi_lvl_conf_scores)

        return det_bboxes, det_labels
Пример #10
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           mlvl_anchors,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True):
        """Transform outputs for a single batch item into labeled boxes.

        Args:
            cls_scores (list[Tensor]): Box scores for a single scale level
                has shape (num_classes, H, W).
            bbox_preds (list[Tensor]): Box distribution logits for a single
                scale level with shape (4*(n+1), H, W), n is max value of
                integral set.
            mlvl_anchors (list[Tensor]): Box reference for a single scale level
                with shape (num_total_anchors, 4).
            img_shape (tuple[int]): Shape of the input image,
                (height, width, 3).
            scale_factor (ndarray): Scale factor of the image arange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config | None): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            with_nms (bool): If True, do nms before return boxes.
                Default: True.

        Returns:
            tuple(Tensor):
                det_bboxes (Tensor): Bbox predictions in shape (N, 5), where
                    the first 4 columns are bounding box positions
                    (tl_x, tl_y, br_x, br_y) and the 5-th column is a score
                    between 0 and 1.
                det_labels (Tensor): A (N,) tensor where each item is the
                    predicted class label of the corresponding box.
        """
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        for cls_score, bbox_pred, stride, anchors in zip(
                cls_scores, bbox_preds, self.anchor_generator.strides,
                mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            assert stride[0] == stride[1]

            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0)
            if self.use_dfl:
                bbox_pred = self.integral(bbox_pred) * stride[0]
            else:
                bbox_pred = bbox_pred.reshape( (-1,4) ) * stride[0]

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = scores.max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]

            bboxes = distance2bbox(
                self.anchor_center(anchors), bbox_pred, max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)

        mlvl_scores = torch.cat(mlvl_scores)
        # Add a dummy background class to the backend when using sigmoid
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)

        if with_nms:
            det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)
            return det_bboxes, det_labels
        else:
            return mlvl_bboxes, mlvl_scores
Пример #11
0
def single_gpu_test(args, cfg, model):
    testImgList = os.listdir(args.testImgpath)
    for imgfile in testImgList:
        imgfile = imgfile.strip()
        img = Image.open(os.path.join(args.testImgpath, imgfile))
        image = img.convert('RGB')
        img = np.array(image)
        width, height, channel = img.shape
        rows = int(math.ceil(1.0 * (width - args.cropsize) / args.stride)) + 1
        cols = int(math.ceil(1.0 * (height - args.cropsize) / args.stride)) + 1
        multi_bboxes = list()
        multi_scores = list()
        for row in range(rows):
            if width > args.cropsize:
                y_start = min(row * args.stride, width - args.cropsize)
                y_end = y_start + args.cropsize
            else:
                y_start = 0
                y_end = width
            for col in range(cols):
                if height > args.cropsize:
                    x_start = min(col * args.stride, height - args.cropsize)
                    x_end = x_start + args.cropsize
                else:
                    x_start = 0
                    x_end = height
                subimg = copy.deepcopy(img[y_start:y_end, x_start:x_end, :])
                w, h, c = np.shape(subimg)
                outimg = np.zeros((args.cropsize, args.cropsize, 3))
                outimg[0:w, 0:h, :] = subimg
                result = inference_detector(model, outimg)  #15
                bboxes = np.vstack(result)
                labels = [  #0-15
                    np.full(bbox.shape[0], i + 1, dtype=np.int32)
                    for i, bbox in enumerate(result)
                ]
                labels = np.concatenate(labels)
                if len(bboxes) > 0:
                    bboxes[:, :4] += [x_start, y_start, x_start, y_start]
                    multi_bboxes.append(bboxes[:, :4])
                    scores = np.zeros(
                        (bboxes.shape[0], len(ODAI_LABEL_MAP.keys())))  #0-15
                    for i, j in zip(range(bboxes.shape[0]), labels):
                        scores[i, j] = bboxes[i, 4]
                    multi_scores.append(scores)
        crop_num = len(multi_bboxes)
        if crop_num > 0:
            multi_bboxes = np.vstack(multi_bboxes)
            multi_scores = np.vstack(multi_scores)
            multi_bboxes = torch.Tensor(multi_bboxes)
            multi_scores = torch.Tensor(multi_scores)
            score_thr = 0.1
            nms = dict(type='nms', iou_thr=0.5)
            max_per_img = 2000
            det_bboxes, det_labels = multiclass_nms(multi_bboxes, multi_scores,
                                                    score_thr, nms,
                                                    max_per_img)

            if det_bboxes.shape[0] > 0:
                det_bboxes = np.array(det_bboxes)
                det_labels = np.array(det_labels)  #0-14
                image = draw_boxes_with_label_and_scores(
                    img, det_bboxes[:, :4], det_bboxes[:, 4], det_labels, 0)
                image.save(os.path.join(args.saveImgpath, imgfile))

                CLASS_DOTA = ODAI_LABEL_MAP.keys()
                LABEl_NAME_MAP = get_label_name_map()
                write_handle_r = {}
                osp(args.saveTxtpath)

                for sub_class in CLASS_DOTA:
                    if sub_class == 'back-ground':
                        continue
                    write_handle_r[sub_class] = open(
                        os.path.join(args.saveTxtpath,
                                     'Task2_%s.txt' % sub_class), 'a+')
                """
                :det_bboxes: format [x_c, y_c, w, h, theta, score]
                :det_labels: [label]
                """
                boxes = []

                for rect in det_bboxes[:, :4]:
                    boxes.append([rect[0], rect[1], rect[2], rect[3]])

                rboxes = np.array(boxes, dtype=np.float32)

                for i, rbox in enumerate(rboxes):
                    command = '%s %.5f %.5f %.5f %.5f %.5f\n' % (
                        imgfile[:-4], det_bboxes[i, 4], rbox[0], rbox[1],
                        rbox[2], rbox[3])

                    write_handle_r[LABEl_NAME_MAP[int(det_labels[i]) +
                                                  1]].write(command)

                for sub_class in CLASS_DOTA:
                    if sub_class == 'back-ground':
                        continue
                    write_handle_r[sub_class].close()
Пример #12
0
    def aug_test(self,
                 imgs,
                 img_metas,
                 templates,
                 proposals=None,
                 rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes will fit the scale
        of imgs[0].
        """
        # recompute feats to save memory
        #y = self.extract_feats(imgs, templates)
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs, templates),
                                          img_metas, self.test_cfg.rpn)
        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs, templates), img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(i, x, rois)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        return bbox_result
Пример #13
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           mlvl_points,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True):
        """Transform outputs for a single batch item into bbox predictions.

        Args:
            cls_scores (list[Tensor]): Box iou-aware scores for a single scale
                level with shape (num_points * num_classes, H, W).
            bbox_preds (list[Tensor]): Box offsets for a single scale
                level with shape (num_points * 4, H, W).
            mlvl_points (list[Tensor]): Box reference for a single scale level
                with shape (num_total_points, 4).
            img_shape (tuple[int]): Shape of the input image,
                (height, width, 3).
            scale_factor (ndarray): Scale factor of the image arrange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config | None): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            with_nms (bool): If True, do nms before returning boxes.
                Default: True.

        Returns:
            tuple(Tensor):
                det_bboxes (Tensor): BBox predictions in shape (n, 5), where
                    the first 4 columns are bounding box positions
                    (tl_x, tl_y, br_x, br_y) and the 5-th column is a score
                    between 0 and 1.
                det_labels (Tensor): A (n,) tensor where each item is the
                    predicted class label of the corresponding box.
        """
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_bboxes = []
        mlvl_scores = []
        for cls_score, bbox_pred, points in zip(cls_scores, bbox_preds,
                                                mlvl_points):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).contiguous().sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4).contiguous()

            nms_pre = cfg.get('nms_pre', -1)
            if 0 < nms_pre < scores.shape[0]:
                max_scores, _ = scores.max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                points = points[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
            bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
        if with_nms:
            det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)
            return det_bboxes, det_labels
        else:
            return mlvl_bboxes, mlvl_scores
Пример #14
0
    def get_bboxes(self,
                   rois,
                   cls_score,
                   bbox_pred,
                   img_shape,
                   scale_factor,
                   rescale=False,
                   cfg=None):
        """Transform network output for a batch into bbox predictions.

        Args:
            rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5).
                last dimension 5 arrange as (batch_index, x1, y1, x2, y2).
            cls_score (Tensor): Box scores, has shape
                (num_boxes, num_classes + 1).
            bbox_pred (Tensor, optional): Box energies / deltas.
                has shape (num_boxes, num_classes * 4).
            img_shape (Sequence[int], optional): Maximum bounds for boxes,
                specifies (H, W, C) or (H, W).
            scale_factor (ndarray): Scale factor of the
               image arrange as (w_scale, h_scale, w_scale, h_scale).
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None

        Returns:
            tuple[Tensor, Tensor]:
                Fisrt tensor is `det_bboxes`, has the shape
                (num_boxes, 5) and last
                dimension 5 represent (tl_x, tl_y, br_x, br_y, score).
                Second tensor is the labels with shape (num_boxes, ).
        """

        # some loss (Seesaw loss..) may have custom activation
        if self.custom_cls_channels:
            scores = self.loss_cls.get_activation(cls_score)
        else:
            scores = F.softmax(cls_score,
                               dim=-1) if cls_score is not None else None
        # bbox_pred would be None in some detector when with_reg is False,
        # e.g. Grid R-CNN.
        if bbox_pred is not None:
            bboxes = self.bbox_coder.decode(rois[..., 1:],
                                            bbox_pred,
                                            max_shape=img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1])
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0])

        if rescale and bboxes.size(0) > 0:

            scale_factor = bboxes.new_tensor(scale_factor)
            bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view(
                bboxes.size()[0], -1)

        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(bboxes, scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)

            return det_bboxes, det_labels
Пример #15
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_cls_preds,
                       bbox_reg_preds,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     cfg = self.test_cfg if cfg is None else cfg
     mlvl_bboxes = []
     mlvl_scores = []
     mlvl_confids = []
     assert len(cls_scores) == len(bbox_cls_preds) == len(
         bbox_reg_preds) == len(mlvl_anchors)
     for cls_score, bbox_cls_pred, bbox_reg_pred, anchors in zip(
             cls_scores, bbox_cls_preds, bbox_reg_preds, mlvl_anchors):
         assert cls_score.size()[-2:] == bbox_cls_pred.size(
         )[-2:] == bbox_reg_pred.size()[-2::]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_cls_pred = bbox_cls_pred.permute(1, 2, 0).reshape(
             -1, self.side_num * 4)
         bbox_reg_pred = bbox_reg_pred.permute(1, 2, 0).reshape(
             -1, self.side_num * 4)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, :-1].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_cls_pred = bbox_cls_pred[topk_inds, :]
             bbox_reg_pred = bbox_reg_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bbox_preds = [
             bbox_cls_pred.contiguous(),
             bbox_reg_pred.contiguous()
         ]
         bboxes, confids = self.bbox_coder.decode(
             anchors.contiguous(), bbox_preds, max_shape=img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
         mlvl_confids.append(confids)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     mlvl_confids = torch.cat(mlvl_confids)
     if self.use_sigmoid_cls:
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
     det_bboxes, det_labels = multiclass_nms(
         mlvl_bboxes,
         mlvl_scores,
         cfg.score_thr,
         cfg.nms,
         cfg.max_per_img,
         score_factors=mlvl_confids)
     return det_bboxes, det_labels
Пример #16
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       mlvl_anchors,
                       mlvl_masks,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds,
                                                    mlvl_anchors,
                                                    mlvl_masks):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         # if no centeration is kept, end.
         if mask.sum() == 0:
             continue
         # reshape scores and bbox_pred
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # filter scores, bbox_pred w.r.t. mask.
         # anchors are filtered in get_anchors() beforehand.
         scores = scores[mask, :]
         bbox_pred = bbox_pred[mask, :]
         if scores.dim() == 0:
             anchors = anchors.unsqueeze(0)
             scores = scores.unsqueeze(0)
             bbox_pred = bbox_pred.unsqueeze(0)
         # filter anchors, bbox_pred, scores w.r.t. scores
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     # multi class NMS
     det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Пример #17
0
    def aug_test(self, imgs, img_metas, rescale=False, **kwargs):
        """
        Test with augmentations.
        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """

        rpn_test_cfg = self.models[0].test_cfg.rpn
        rcnn_test_cfg = self.models[0].test_cfg.rcnn

        # For each model, compute detections
        aug_bboxes = []
        aug_scores = []
        aug_img_metas = []
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.simple_test_rpn(x, img_meta,
                                                      rpn_test_cfg)

                _, semantic_feat = model.semantic_head(x)

                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']

                ms_scores = []
                rois = bbox2roi(proposal_list)
                for i in range(model.num_stages):
                    bbox_head = model.bbox_head[i]
                    cls_score, bbox_pred = model._bbox_forward_test(
                        i, x, rois, semantic_feat=semantic_feat)
                    ms_scores.append(cls_score)

                    if i < model.num_stages - 1:
                        bbox_label = cls_score.argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_pred, img_meta[0])

                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)
                aug_img_metas.append(img_meta)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes,
                                                        aug_scores,
                                                        aug_img_metas,
                                                        rcnn_test_cfg,
                                                        type='concat')
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)

        if self.models[0].with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [
                    []
                    for _ in range(self.models[0].mask_head[-1].num_classes -
                                   1)
                ]
            else:
                aug_masks = []
                aug_img_metas = []
                for model in [self.models[0]]:
                    for x, img_meta in\
                            zip(model.extract_feats(imgs), img_metas):
                        scale_factor = img_meta[0]['scale_factor']
                        flip = img_meta[0]['flip']
                        img_shape = img_meta[0]['img_shape']
                        _bboxes = (det_bboxes[:, :4] *
                                   scale_factor if rescale else det_bboxes)
                        mask_rois = bbox2roi([_bboxes])
                        mask_roi_extractor = model.mask_roi_extractor[-1]
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)

                        _, semantic_feat = model.semantic_head(x)
                        mask_semantic_feat = model.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        mask_feats += mask_semantic_feat
                        last_feat = None

                        for i in range(model.num_stages):
                            mask_head = model.mask_head[i]
                            if model.mask_info_flow:
                                mask_pred, last_feat = mask_head(
                                    mask_feats, last_feat)
                            else:
                                mask_pred = mask_head(mask_feats)
                            aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                            aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               rcnn_test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                scale_factor = img_metas[0][0]['scale_factor']
                segm_result = self.models[0].mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=rescale)

                # compute relations
                rel_model = self.models[0]
                for x, img_meta in zip(rel_model.extract_feats(imgs),
                                       img_metas):
                    _, semantic_feat = rel_model.semantic_head(x)
                    filename = img_meta[0]['filename']
                    im_height, im_width, _ = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    ori_shape = img_meta[0]['ori_shape']
                    relation_preds = rel_model._rel_forward_test(
                        x,
                        det_bboxes,
                        det_labels,
                        merged_masks,
                        scale_factor,
                        ori_shape,
                        semantic_feat=semantic_feat,
                        im_width=im_width,
                        im_height=im_height)

                    if rel_model.rel_save_folder is not None:
                        np.save(
                            os.path.join(rel_model.rel_save_folder,
                                         filename + '.npy'), relation_preds)

            return bbox_result, segm_result
        else:
            return bbox_result
    def _get_bboxes_single(self,
                           cls_score_list,
                           bbox_pred_list,
                           mlvl_anchors,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True):
        """Transform outputs for a single batch item into bbox predictions.

        Args:
            cls_score_list (list[Tensor]): Box scores for a single scale level
                Has shape (num_anchors * num_classes, H, W).
            bbox_pred_list (list[Tensor]): Box energies / deltas for a single
                scale level with shape (num_anchors * 4, H, W).
            mlvl_anchors (list[Tensor]): Box reference for a single scale level
                with shape (num_total_anchors, 4).
            img_shape (tuple[int]): Shape of the input image,
                (height, width, 3).
            scale_factor (ndarray): Scale factor of the image arange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            with_nms (bool): If True, do nms before return boxes.
                Default: True.

        Returns:
            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
                are bounding box positions (tl_x, tl_y, br_x, br_y) and the
                5-th column is a score between 0 and 1.
        """
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        for cls_score, bbox_pred, anchors in zip(cls_score_list,
                                                 bbox_pred_list, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            cls_score = cls_score.permute(1, 2,
                                          0).reshape(-1, self.cls_out_channels)
            if self.use_sigmoid_cls:
                scores = cls_score.sigmoid()
            else:
                scores = cls_score.softmax(-1)
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                # Get maximum scores for foreground classes.
                if self.use_sigmoid_cls:
                    max_scores, _ = scores.max(dim=1)
                else:
                    # remind that we set FG labels to [0, num_class-1]
                    # since mmdet v2.0
                    # BG cat_id: num_class
                    max_scores, _ = scores[:, :-1].max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
            bboxes = self.bbox_coder.decode(anchors,
                                            bbox_pred,
                                            max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        if self.use_sigmoid_cls:
            # Add a dummy background class to the backend when using sigmoid
            # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
            # BG cat_id: num_class
            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)

        if with_nms:
            det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)
            return det_bboxes, det_labels
        else:
            return mlvl_bboxes, mlvl_scores
Пример #19
0
    def _get_bboxes_single(self,
                           pred_maps_list,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True):
        """Transform outputs for a single batch item into bbox predictions.

        Args:
            pred_maps_list (list[Tensor]): Prediction maps for different scales
                of each single image in the batch.
            scale_factor (ndarray): Scale factor of the image arrange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config | None): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.
                Default: False.
            with_nms (bool): If True, do nms before return boxes.
                Default: True.

        Returns:
            tuple(Tensor):
                det_bboxes (Tensor): BBox predictions in shape (n, 5), where
                    the first 4 columns are bounding box positions
                    (tl_x, tl_y, br_x, br_y) and the 5-th column is a score
                    between 0 and 1.
                det_labels (Tensor): A (n,) tensor where each item is the
                    predicted class label of the corresponding box.
        """
        cfg = self.test_cfg if cfg is None else cfg
        assert len(pred_maps_list) == self.num_levels
        multi_lvl_bboxes = []
        multi_lvl_cls_scores = []
        multi_lvl_conf_scores = []
        num_levels = len(pred_maps_list)
        featmap_sizes = [
            pred_maps_list[i].shape[-2:] for i in range(num_levels)
        ]
        multi_lvl_anchors = self.anchor_generator.grid_anchors(
            featmap_sizes, pred_maps_list[0][0].device)
        for i in range(self.num_levels):
            # get some key info for current scale
            pred_map = pred_maps_list[i]
            stride = self.featmap_strides[i]

            # (h, w, num_anchors*num_attrib) -> (h*w*num_anchors, num_attrib)
            pred_map = pred_map.permute(1, 2, 0).reshape(-1, self.num_attrib)

            pred_map[..., :2] = torch.sigmoid(pred_map[..., :2])
            bbox_pred = self.bbox_coder.decode(multi_lvl_anchors[i],
                                               pred_map[..., :4], stride)
            # conf and cls
            conf_pred = torch.sigmoid(pred_map[..., 4]).view(-1)
            cls_pred = torch.sigmoid(pred_map[..., 5:]).view(
                -1, self.num_classes)  # Cls pred one-hot.

            # Filtering out all predictions with conf < conf_thr
            conf_thr = cfg.get('conf_thr', -1)
            if conf_thr > 0:
                # add as_tuple=False for compatibility in Pytorch 1.6
                conf_inds = conf_pred.ge(conf_thr).nonzero(
                    as_tuple=False).flatten()
                bbox_pred = bbox_pred[conf_inds, :]
                cls_pred = cls_pred[conf_inds, :]
                conf_pred = conf_pred[conf_inds]

            # Get top-k prediction
            nms_pre = cfg.get('nms_pre', -1)
            if 0 < nms_pre < conf_pred.size(0):
                _, topk_inds = conf_pred.topk(nms_pre)
                bbox_pred = bbox_pred[topk_inds, :]
                cls_pred = cls_pred[topk_inds, :]
                conf_pred = conf_pred[topk_inds]

            # Save the result of current scale
            multi_lvl_bboxes.append(bbox_pred)
            multi_lvl_cls_scores.append(cls_pred)
            multi_lvl_conf_scores.append(conf_pred)

        # Merge the results of different scales together
        multi_lvl_bboxes = torch.cat(multi_lvl_bboxes)
        multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores)
        multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores)

        if with_nms and (multi_lvl_conf_scores.size(0) == 0):
            return torch.zeros((0, 5)), torch.zeros((0, ))

        if rescale:
            multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor)

        # In mmdet 2.x, the class_id for background is num_classes.
        # i.e., the last column.
        padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0],
                                                 1)
        multi_lvl_cls_scores = torch.cat([multi_lvl_cls_scores, padding],
                                         dim=1)

        # Support exporting to onnx without nms
        if with_nms and cfg.get('nms', None) is not None:
            det_bboxes, det_labels = multiclass_nms(
                multi_lvl_bboxes,
                multi_lvl_cls_scores,
                cfg.score_thr,
                cfg.nms,
                cfg.max_per_img,
                score_factors=multi_lvl_conf_scores)
            return det_bboxes, det_labels
        else:
            return (multi_lvl_bboxes, multi_lvl_cls_scores,
                    multi_lvl_conf_scores)
Пример #20
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           iou_preds,
                           mlvl_anchors,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False,
                           with_nms=True):
        """Transform outputs for a single batch item into labeled boxes.
        This method is almost same as `ATSSHead._get_bboxes_single()`.
        We use sqrt(iou_preds * cls_scores) in NMS process instead of just
        cls_scores. Besides, score voting is used when `` score_voting``
        is set to True.
        """
        assert with_nms, 'PAA only supports "with_nms=True" now'
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_iou_preds = []
        for cls_score, bbox_pred, iou_preds, anchors in zip(
                cls_scores, bbox_preds, iou_preds, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            iou_preds = iou_preds.permute(1, 2, 0).reshape(-1).sigmoid()
            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * iou_preds[:, None]).sqrt().max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                iou_preds = iou_preds[topk_inds]

            bboxes = self.bbox_coder.decode(anchors,
                                            bbox_pred,
                                            max_shape=img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_iou_preds.append(iou_preds)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        # Add a dummy background class to the backend when using sigmoid
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
        mlvl_iou_preds = torch.cat(mlvl_iou_preds)
        mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt()
        det_bboxes, det_labels = multiclass_nms(mlvl_bboxes,
                                                mlvl_nms_scores,
                                                cfg.score_thr,
                                                cfg.nms,
                                                cfg.max_per_img,
                                                score_factors=None)
        if self.with_score_voting:
            det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels,
                                                       mlvl_bboxes,
                                                       mlvl_nms_scores,
                                                       cfg.score_thr)

        return det_bboxes, det_labels
Пример #21
0
    def aug_test(self, features, proposal_list, img_metas, rescale=False):
        """Test with augmentations.
        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        aug_bboxes_tail = []
        aug_scores_tail = []
        for x, img_meta in zip(features, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)

            # "ms" in variable names means multi-stage
            ms_scores = []
            ms_scores_tail = []

            rois = bbox2roi([proposals])
            rois_tail = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_results = self._bbox_forward(i, x, rois)
                ms_scores.append(bbox_results['cls_score'])

                bbox_results_tail = self._bbox_forward_tail(i, x, rois_tail)
                ms_scores_tail.append(bbox_results_tail['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'][:, :-1].argmax(
                        dim=1)
                    rois = self.bbox_head[i].regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

                    bbox_label_tail = bbox_results_tail[
                        'cls_score'][:, :-1].argmax(dim=1)
                    rois_tail = self.bbox_head_tail[i].regress_by_class(
                        rois_tail, bbox_label_tail,
                        bbox_results_tail['bbox_pred'], img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            # print('a', bboxes.shape, scores.shape)
            cls_score_tail = sum(ms_scores_tail) / float(len(ms_scores_tail))
            bboxes_tail, scores_tail = self.bbox_head_tail[-1].get_bboxes(
                rois_tail,
                cls_score_tail,
                bbox_results_tail['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            # print('b', bboxes_tail.shape, scores_tail.shape)
            # print(scores_tail)
            # print(scores)
            # if self.labels is not None:
            #     inds = []
            #     for label in self.labels:
            #         inds.append(torch.nonzero(scores == label, as_tuple=False).squeeze(1))
            #     inds = torch.cat(inds)
            #     bboxes = bboxes[inds]
            #     scores = scores[inds]
            # if self.labels_tail is not None:
            #     inds = []
            #     for label in self.labels_tail:
            #         inds.append(torch.nonzero(scores_tail == label, as_tuple=False).squeeze(1))
            #     inds = torch.cat(inds)
            #     bboxes_tail = bboxes_tail[inds]
            #     scores_tail = scores_tail[inds]
            # print(bboxes,bboxes.shape)
            # print(bboxes_tail, bboxes_tail.shape)
            # if bboxes.shape[0] == 0:
            #     det_bboxes = bboxes_tail
            #     det_labels = scores_tail
            # elif bboxes_tail.shape[0] == 0:
            #     det_bboxes = bboxes
            #     det_labels = scores
            # else:
            #     det_bboxes = torch.cat((bboxes, bboxes_tail))
            #     det_labels = torch.cat((scores, scores_tail))

            # aug_bboxes.append(det_bboxes)
            # aug_scores.append(det_labels)
            # print('c', det_bboxes.shape)
            # print('d', det_labels.shape)
            det_bboxes = torch.cat((bboxes, bboxes_tail))
            det_labels = torch.cat((scores, scores_tail))
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_labels)
            # aug_bboxes_tail.append(bboxes_tail)
            # aug_scores_tail.append(scores_tail)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        # print('e', merged_bboxes.shape, merged_scores.shape)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        # # after merging, bboxes will be rescaled to the original image size
        # merged_bboxes_tail, merged_scores_tail = merge_aug_bboxes(
        #     aug_bboxes_tail, aug_scores_tail, img_metas, rcnn_test_cfg)
        # # print('e', merged_bboxes.shape, merged_scores.shape)
        # det_bboxes_tail, det_labels_tail = multiclass_nms(merged_bboxes_tail, merged_scores_tail,
        #                                         rcnn_test_cfg.score_thr,
        #                                         rcnn_test_cfg.nms,
        #                                         rcnn_test_cfg.max_per_img)
        # if self.labels is not None:
        #     inds = []
        #     for label in self.labels:
        #         inds.append(torch.nonzero(det_labels == label, as_tuple=False).squeeze(1))
        #     inds = torch.cat(inds)
        #     det_bboxes_post = det_bboxes[inds]
        #     det_labels_post = det_labels[inds]
        # if self.labels_tail is not None:
        #     inds = []
        #     for label in self.labels_tail:
        #         inds.append(torch.nonzero(det_labels_tail == label, as_tuple=False).squeeze(1))
        #     inds = torch.cat(inds)
        #     det_bboxes_tail_post = det_bboxes_tail[inds]
        #     det_labels_tail_post = det_labels_tail[inds]

        # det_bboxes = torch.cat((det_bboxes_post, det_bboxes_tail_post))
        # det_labels = torch.cat((det_labels_post, det_labels_tail_post))

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(features, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_results = self._mask_forward(i, x, mask_rois)
                        aug_masks.append(
                            mask_results['mask_pred'].sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return [(bbox_result, segm_result)]
        else:
            return [bbox_result]