Exemplo n.º 1
0
 def loss_odm_single(self, odm_cls_score, odm_bbox_pred, labels,
                     label_weights, bbox_targets, bbox_weights,
                     refine_anchors, num_total_samples, cfg):
     # classification loss
     labels = labels.reshape(-1)
     label_weights = label_weights.reshape(-1)
     odm_cls_score = odm_cls_score.permute(0, 2, 3, 1).reshape(
         -1, self.cls_out_channels)
     loss_odm_cls = self.loss_odm_cls(odm_cls_score,
                                      labels,
                                      label_weights,
                                      avg_factor=num_total_samples)
     # regression loss
     bbox_targets = bbox_targets.reshape(-1, 5)
     bbox_weights = bbox_weights.reshape(-1, 5)
     odm_bbox_pred = odm_bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5)
     if cfg.get('calc_offset') == False:
         refine_anchors = refine_anchors.reshape(-1, 5)
         decode_bboxes = delta2rbox(refine_anchors, odm_bbox_pred)
         loss_odm_bbox = self.loss_odm_bbox(decode_bboxes,
                                            bbox_targets,
                                            bbox_weights,
                                            avg_factor=num_total_samples)
     else:
         loss_odm_bbox = self.loss_odm_bbox(odm_bbox_pred,
                                            bbox_targets,
                                            bbox_weights,
                                            avg_factor=num_total_samples)
     return loss_odm_cls, loss_odm_bbox
Exemplo n.º 2
0
def bbox_decode(bbox_preds,
                anchors,
                means=[0, 0, 0, 0, 0],
                stds=[1, 1, 1, 1, 1]):
    """decode bbox from deltas
    Args:
        bbox_preds: [N,5,H,W]
        anchors: [H*W,5]

    return:
        bboxes: [N,H,W,5]
    """
    num_imgs, _, H, W = bbox_preds.shape
    bboxes_list = []
    for img_id in range(num_imgs):
        bbox_pred = bbox_preds[img_id]
        # bbox_pred.shape=[5,H,W]
        bbox_delta = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
        bboxes = delta2rbox(anchors,
                            bbox_delta,
                            means,
                            stds,
                            wh_ratio_clip=1e-6)
        bboxes = bboxes.reshape(H, W, 5)
        bboxes_list.append(bboxes)
    return torch.stack(bboxes_list, dim=0)
Exemplo n.º 3
0
    def get_bboxes_single(self,
                          cls_score_list,
                          bbox_pred_list,
                          mlvl_anchors,
                          img_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        """
        Transform outputs for a single batch item into labeled boxes.
        """
        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        for cls_score, bbox_pred, anchors in zip(cls_score_list,
                                                 bbox_pred_list, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            cls_score = cls_score.permute(
                1, 2, 0).reshape(-1, self.cls_out_channels)

            if self.use_sigmoid_cls:
                scores = cls_score.sigmoid()
            else:
                scores = cls_score.softmax(-1)

            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
            # anchors = rect2rbox(anchors)
            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                # Get maximum scores for foreground classes.
                if self.use_sigmoid_cls:
                    max_scores, _ = scores.max(dim=1)
                else:
                    max_scores, _ = scores[:, 1:].max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
            if 'calc_offset' in cfg.keys() and cfg['calc_offset'] == False:
                bboxes = bbox_pred
            else:
                bboxes = delta2rbox(anchors, bbox_pred, self.target_means,
                                    self.target_stds, img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes[..., :4] /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        if self.use_sigmoid_cls:
            # Add a dummy background class to the front when using sigmoid
            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
            mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        det_bboxes, det_labels = multiclass_ml_nms_rbox(mlvl_bboxes,
                                                        mlvl_scores,
                                                        cfg.score_thr, cfg.nms,
                                                        cfg.max_per_img)
        return det_bboxes, det_labels
Exemplo n.º 4
0
    def get_det_bboxes(self,
                       rois,
                       cls_score,
                       bbox_pred,
                       img_shape,
                       scale_factor,
                       rescale=False,
                       cfg=None):
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None

        if bbox_pred is not None:
            rois = rect2rbox(rois[:, 1:])
            bboxes = delta2rbox(rois, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)
        if rescale:
            if not isinstance(scale_factor, float):
                scale_factor = torch.from_numpy(scale_factor).to(bboxes.device)
            bboxes[..., 0::5] = bboxes[..., 0::5] / scale_factor
            bboxes[..., 1::5] = bboxes[..., 1::5] / scale_factor
            bboxes[..., 2::5] = bboxes[..., 2::5] / scale_factor
            bboxes[..., 3::5] = bboxes[..., 3::5] / scale_factor
        if cfg is None:
            return bboxes, scores
        else:
            # bboxes = poly2rrect_torch(bboxes)
            """
            nms_rotated accept degree only
            """
            bboxes = bboxes.double()
            scores = scores.double()
            det_bboxes, det_labels = multiclass_nms_rbox(
                bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img)
            return det_bboxes, det_labels