def regress_by_class(self, rois, label, bbox_pred, img_meta): """Regress the bbox for the predicted class. Used in Cascade R-CNN. Args: rois (Tensor): shape (n, 4) or (n, 5) label (Tensor): shape (n, ) bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4) img_meta (dict): Image meta info. Returns: Tensor: Regressed bboxes, the same shape as input rois. """ assert rois.size(1) == 4 or rois.size(1) == 5 if not self.reg_class_agnostic: label = label * 4 inds = torch.stack((label, label + 1, label + 2, label + 3), 1) bbox_pred = torch.gather(bbox_pred, 1, inds) assert bbox_pred.size(1) == 4 if rois.size(1) == 4: new_rois = delta2bbox(rois, bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) else: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) new_rois = torch.cat((rois[:, [0]], bboxes), dim=1) return new_rois
def get_track_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if self.use_sigmoid_cls: scores = torch.sigmoid( cls_score) if cls_score is not None else None else: scores = F.softmax( cls_score, dim=1)[:, 1][:, None] if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: bboxes /= scale_factor if cfg is None: bboxes = torch.cat([bboxes, scores], dim=1) return bboxes
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: bboxes /= scale_factor if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_det_bboxes(self, rois, cls_score, bbox_pred, depth_pred, depth_uncertainty_pred, dim_pred, rot_pred, cen_2d_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: bboxes /= scale_factor def get_depth(pred, scale: float = 2.0): return torch.exp(pred / scale).view(pred.size(0), -1, 1) def get_uncertainty_prob(depth_uncertainty_pred): if depth_uncertainty_pred is None: return None return torch.clamp( depth_uncertainty_pred, min=0.0, max=1.0) def get_dim(pred, scale: float = 2.0): return torch.exp(pred / scale).view(pred.size(0), -1, 3) def get_alpha(rot): """Generate alpha value from predicted CLSREG array Args: rot (torch.Tensor): rotation CLSREG array: (B, num_classes, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] Returns: torch.Tensor: (B, num_classes, 1) """ alpha1 = torch.atan( rot[:, :, 2:3] / rot[:, :, 3:4]) + (-0.5 * np.pi) alpha2 = torch.atan( rot[:, :, 6:7] / rot[:, :, 7:8]) + (0.5 * np.pi) # Model is not decisive at index overlap region # Could be unstable for the alpha estimation # idx1 = (rot[:, :, 1:2] > rot[:, :, 0:1]).float() # idx2 = (rot[:, :, 5:6] > rot[:, :, 4:5]).float() # alpha = (alpha1 * idx1 + alpha2 * idx2) # alpha /= (idx1 + idx2 + ((idx1 + idx2) == 0)) idx = (rot[:, :, 1:2] > rot[:, :, 5:6]).float() alpha = alpha1 * idx + alpha2 * (1 - idx) return alpha def get_delta_2d(delta_cen, scale: float = 10.0): return delta_cen.view(delta_cen.size(0), -1, 2) * scale def get_box_cen(bbox): return torch.cat( [(bbox[:, 0::4, None] + bbox[:, 2::4, None]) / 2.0, (bbox[:, 1::4, None] + bbox[:, 3::4, None]) / 2.0], dim=2) def get_cen2d(delta_2d, box_cen): return delta_2d + box_cen depth_pred = get_depth(depth_pred) depth_uncertainty_prob = get_uncertainty_prob(depth_uncertainty_pred) if not self.use_uncertainty: depth_uncertainty_prob = depth_uncertainty_prob * 0. + 1.0 dim_pred = get_dim(dim_pred) rot_pred = get_alpha(rot_pred) delta_2d = get_delta_2d(cen_2d_pred, scale=self.center_scale) cen2d_pred = get_cen2d(delta_2d, get_box_cen(bboxes.detach())) if cfg is None: return bboxes, scores, depth_pred, depth_uncertainty_prob, dim_pred, rot_pred, cen2d_pred else: det_bboxes, det_labels, det_depths, det_depth_uncertainty, dim_preds, rot_preds, cen2d_preds = \ multiclass_3d_nms( bboxes, scores, depth_pred, depth_uncertainty_prob, dim_pred, rot_pred, cen2d_pred, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels, det_depths, det_depth_uncertainty, dim_preds, rot_preds, cen2d_preds