def regress_by_class(self, rois, label, bbox_pred, img_meta): """Regress the bbox for the predicted class. Used in Cascade R-CNN. Args: rois (Tensor): shape (n, 5) or (n, 6) label (Tensor): shape (n, ) bbox_pred (Tensor): shape (n, 5*(#class+1)) or (n, 5) img_meta (dict): Image meta info. Returns: Tensor: Regressed bboxes, the same shape as input rois. """ assert rois.size(1) == 5 or rois.size(1) == 6 if not self.reg_class_agnostic: label = label * 5 inds = torch.stack( (label, label + 1, label + 2, label + 3, label + 4), 1) bbox_pred = torch.gather(bbox_pred, 1, inds) assert bbox_pred.size(1) == 5 if rois.size(1) == 5: new_rois = delta2bbox_rotated(rois, bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) else: bboxes = delta2bbox_rotated(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) new_rois = torch.cat((rois[:, [0]], bboxes), dim=1) return new_rois
def bbox_decode(bbox_preds, anchors, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1], num_anchors=1): """ Decode bboxes from deltas :param bbox_preds: [N,5,H,W] :param anchors: [H*W,5] :param means: mean value to decode bbox :param stds: std value to decode bbox :return: [N,H,W,5] """ num_imgs, _, H, W = bbox_preds.shape bboxes_list = [] for img_id in range(num_imgs): bbox_pred = bbox_preds[img_id] # bbox_pred.shape=[5,H,W] bbox_delta = bbox_pred.permute(1, 2, 0).reshape(-1, 5) bboxes = delta2bbox_rotated(anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6) bboxes = bboxes.reshape(num_anchors, H, W, 5) bboxes_list.append(bboxes) return torch.stack(bboxes_list, dim=0)
def get_bboxes_single(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """ Transform outputs for a single batch item into labeled boxes. """ assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute( 1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5) # anchors = rect2rbox(anchors) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox_rotated(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes[..., :4] /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels = multiclass_nms_rotated(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def bbox_decode(bbox_preds, anchors, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]): num_imgs, _, H, W = bbox_preds.shape bboxes_list = [] for img_id in range(num_imgs): bbox_pred = bbox_preds[img_id] anchor = anchors[img_id] # bbox_pred.shape=[5,H,W] bbox_delta = bbox_pred.permute(1, 2, 0).reshape(-1, 5) bboxes = delta2bbox_rotated(anchor, bbox_delta, means, stds, wh_ratio_clip=1e-6) bboxes = bboxes.reshape(H, W, 5) bboxes_list.append(bboxes) return torch.stack(bboxes_list, dim=0)
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None rotated_rois = bbox_to_rotated_box(rois[:, 1:]) if bbox_pred is not None: bboxes = delta2bbox_rotated(rotated_rois, bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rotated_rois.clone() polys = rotated_box_to_poly(bboxes) if img_shape is not None: polys[:, 0::2].clamp_(min=0, max=img_shape[1] - 1) polys[:, 1::2].clamp_(min=0, max=img_shape[0] - 1) bboxes = poly_to_rotated_box(polys) if rescale: if isinstance(scale_factor, float): bboxes[..., :4] /= scale_factor else: bboxes[..., :4] /= torch.from_numpy(scale_factor).to( bboxes.device) if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms_rotated( bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_refine_anchors(self, bbox_preds, init_anchors, featmap_sizes, img_metas, device='cuda'): num_levels = len(featmap_sizes) anchor_list = [] for img_id, img_meta in enumerate(img_metas): mlvl_anchors_list = [] for i in range(num_levels): # generate refined anchors bbox_pred = bbox_preds[i].detach() bbox_pred = bbox_pred[img_id].permute(1, 2, 0).reshape(-1, 5) refined_anchor = delta2bbox_rotated(init_anchors[img_id][i], bbox_pred, self.target_means, self.target_stds, wh_ratio_clip=1e-6) mlvl_anchors_list.append(refined_anchor) anchor_list.append(mlvl_anchors_list) valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = [] for i in range(num_levels): anchor_stride = self.anchor_strides[i] feat_h, feat_w = featmap_sizes[i] h, w, _ = img_meta['pad_shape'] valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) flags = self.anchor_generators[i].valid_flags( (feat_h, feat_w), (valid_feat_h, valid_feat_w), device=device) multi_level_flags.append(flags) valid_flag_list.append(multi_level_flags) return anchor_list, valid_flag_list