def __call__(self, bboxes, gt_bboxes): """ Args: bboxes (Tensor): Predicted boxes with unnormalized coordinates (x1, y1, x2, y2). Shape [num_query, 4]. gt_bboxes (Tensor): Ground truth boxes with unnormalized coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. Returns: torch.Tensor: iou_cost value with weight """ # overlaps: [num_bboxes, num_gt] overlaps = bbox_overlaps( bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False) # The 1 is a constant that doesn't change the matching, so omitted. iou_cost = -overlaps return iou_cost * self.weight
def fast_nms(multi_bboxes, multi_scores, multi_coeffs, score_thr, iou_thr, top_k, max_num=-1): """Fast NMS in `YOLACT <https://arxiv.org/abs/1904.02689>`_. Fast NMS allows already-removed detections to suppress other detections so that every instance can be decided to be kept or discarded in parallel, which is not possible in traditional NMS. This relaxation allows us to implement Fast NMS entirely in standard GPU-accelerated matrix operations. Args: multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) multi_scores (Tensor): shape (n, #class+1), where the last column contains scores of the background class, but this will be ignored. multi_coeffs (Tensor): shape (n, #class*coeffs_dim). score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. iou_thr (float): IoU threshold to be considered as conflicted. top_k (int): if there are more than top_k bboxes before NMS, only top top_k will be kept. max_num (int): if there are more than max_num bboxes after NMS, only top max_num will be kept. If -1, keep all the bboxes. Default: -1. Returns: tuple: (bboxes, labels, coefficients), tensors of shape (k, 5), (k, 1), and (k, coeffs_dim). Labels are 0-based. """ scores = multi_scores[:, :-1].t() # [#class, n] scores, idx = scores.sort(1, descending=True) idx = idx[:, :top_k].contiguous() scores = scores[:, :top_k] # [#class, topk] num_classes, num_dets = idx.size() boxes = multi_bboxes[idx.view(-1), :].view(num_classes, num_dets, 4) coeffs = multi_coeffs[idx.view(-1), :].view(num_classes, num_dets, -1) iou = bbox_overlaps(boxes, boxes) # [#class, topk, topk] iou.triu_(diagonal=1) iou_max, _ = iou.max(dim=1) # Now just filter out the ones higher than the threshold keep = iou_max <= iou_thr # Second thresholding introduces 0.2 mAP gain at negligible time cost keep *= scores > score_thr # Assign each kept detection to its corresponding class classes = torch.arange( num_classes, device=boxes.device)[:, None].expand_as(keep) classes = classes[keep] boxes = boxes[keep] coeffs = coeffs[keep] scores = scores[keep] # Only keep the top max_num highest scores across all classes scores, idx = scores.sort(0, descending=True) if max_num > 0: idx = idx[:max_num] scores = scores[:max_num] classes = classes[idx] boxes = boxes[idx] coeffs = coeffs[idx] cls_dets = torch.cat([boxes, scores[:, None]], dim=1) return cls_dets, classes, coeffs
def loss(self, cls_scores, bbox_preds, iou_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) iou_preds (list[Tensor]): iou_preds for each scale level with shape (N, num_anchors * 1, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (list[Tensor] | None): Specify which bounding boxes can be ignored when are computing the loss. Returns: dict[str, Tensor]: A dictionary of loss gmm_assignment. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, ) (labels, labels_weight, bboxes_target, bboxes_weight, pos_inds, pos_gt_index) = cls_reg_targets cls_scores = levels_to_images(cls_scores) cls_scores = [ item.reshape(-1, self.cls_out_channels) for item in cls_scores ] bbox_preds = levels_to_images(bbox_preds) bbox_preds = [item.reshape(-1, 4) for item in bbox_preds] iou_preds = levels_to_images(iou_preds) iou_preds = [item.reshape(-1, 1) for item in iou_preds] pos_losses_list, = multi_apply(self.get_pos_loss, anchor_list, cls_scores, bbox_preds, labels, labels_weight, bboxes_target, bboxes_weight, pos_inds) with torch.no_grad(): labels, label_weights, bbox_weights, num_pos = multi_apply( self.paa_reassign, pos_losses_list, labels, labels_weight, bboxes_weight, pos_inds, pos_gt_index, anchor_list, ) num_pos = sum(num_pos) if num_pos == 0: num_pos = len(img_metas) # convert all tensor list to a flatten tensor cls_scores = torch.cat(cls_scores, 0).view(-1, cls_scores[0].size(-1)) bbox_preds = torch.cat(bbox_preds, 0).view(-1, bbox_preds[0].size(-1)) iou_preds = torch.cat(iou_preds, 0).view(-1, iou_preds[0].size(-1)) labels = torch.cat(labels, 0).view(-1) flatten_anchors = torch.cat( [torch.cat(item, 0) for item in anchor_list]) labels_weight = torch.cat(labels_weight, 0).view(-1) bboxes_target = torch.cat(bboxes_target, 0).view(-1, bboxes_target[0].size(-1)) pos_inds_flatten = ((labels >= 0) & (labels < self.num_classes)).nonzero().reshape(-1) losses_cls = self.loss_cls(cls_scores, labels, labels_weight, avg_factor=num_pos) if num_pos: pos_bbox_pred = self.bbox_coder.decode( flatten_anchors[pos_inds_flatten], bbox_preds[pos_inds_flatten]) pos_bbox_target = bboxes_target[pos_inds_flatten] iou_target = bbox_overlaps(pos_bbox_pred.detach(), pos_bbox_target, is_aligned=True) losses_iou = self.loss_centerness(iou_preds[pos_inds_flatten], iou_target.unsqueeze(-1), avg_factor=num_pos) losses_bbox = self.loss_bbox(pos_bbox_pred, pos_bbox_target, iou_target.clamp(min=eps), avg_factor=iou_target.sum()) else: losses_iou = iou_preds.sum() * 0 losses_bbox = bbox_preds.sum() * 0 return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_iou=losses_iou)
def score_voting(self, det_bboxes, det_labels, mlvl_bboxes, mlvl_nms_scores, score_thr): """Implementation of score voting method works on each remaining boxes after NMS procedure. Args: det_bboxes (Tensor): Remaining boxes after NMS procedure, with shape (k, 5), each dimension means (x1, y1, x2, y2, score). det_labels (Tensor): The label of remaining boxes, with shape (k, 1),Labels are 0-based. mlvl_bboxes (Tensor): All boxes before the NMS procedure, with shape (num_anchors,4). mlvl_nms_scores (Tensor): The scores of all boxes which is used in the NMS procedure, with shape (num_anchors, num_class) mlvl_iou_preds (Tensot): The predictions of IOU of all boxes before the NMS procedure, with shape (num_anchors, 1) score_thr (float): The score threshold of bboxes. Returns: tuple: Usually returns a tuple containing voting results. - det_bboxes_voted (Tensor): Remaining boxes after score voting procedure, with shape (k, 5), each dimension means (x1, y1, x2, y2, score). - det_labels_voted (Tensor): Label of remaining bboxes after voting, with shape (num_anchors,). """ candidate_mask = mlvl_nms_scores > score_thr candidate_mask_nozeros = candidate_mask.nonzero() candidate_inds = candidate_mask_nozeros[:, 0] candidate_labels = candidate_mask_nozeros[:, 1] candidate_bboxes = mlvl_bboxes[candidate_inds] candidate_scores = mlvl_nms_scores[candidate_mask] det_bboxes_voted = [] det_labels_voted = [] for cls in range(self.cls_out_channels): candidate_cls_mask = candidate_labels == cls if not candidate_cls_mask.any(): continue candidate_cls_scores = candidate_scores[candidate_cls_mask] candidate_cls_bboxes = candidate_bboxes[candidate_cls_mask] det_cls_mask = det_labels == cls det_cls_bboxes = det_bboxes[det_cls_mask].view( -1, det_bboxes.size(-1)) det_candidate_ious = bbox_overlaps(det_cls_bboxes[:, :4], candidate_cls_bboxes) for det_ind in range(len(det_cls_bboxes)): single_det_ious = det_candidate_ious[det_ind] pos_ious_mask = single_det_ious > 0.01 pos_ious = single_det_ious[pos_ious_mask] pos_bboxes = candidate_cls_bboxes[pos_ious_mask] pos_scores = candidate_cls_scores[pos_ious_mask] pis = (torch.exp(-(1 - pos_ious)**2 / 0.025) * pos_scores)[:, None] voted_box = torch.sum(pis * pos_bboxes, dim=0) / torch.sum( pis, dim=0) voted_score = det_cls_bboxes[det_ind][-1:][None, :] det_bboxes_voted.append( torch.cat((voted_box[None, :], voted_score), dim=1)) det_labels_voted.append(cls) det_bboxes_voted = torch.cat(det_bboxes_voted, dim=0) det_labels_voted = det_labels.new_tensor(det_labels_voted) return det_bboxes_voted, det_labels_voted
def get_pos_loss(self, anchors, cls_score, bbox_pred, label, label_weight, bbox_target, bbox_weight, pos_inds): """Calculate loss of all potential positive samples obtained from first match process. Args: anchors (list[Tensor]): Anchors of each scale. cls_score (Tensor): Box scores of single image with shape (num_anchors, num_classes) bbox_pred (Tensor): Box energies / deltas of single image with shape (num_anchors, 4) label (Tensor): classification target of each anchor with shape (num_anchors,) label_weight (Tensor): Classification loss weight of each anchor with shape (num_anchors). bbox_target (dict): Regression target of each anchor with shape (num_anchors, 4). bbox_weight (Tensor): Bbox weight of each anchor with shape (num_anchors, 4). pos_inds (Tensor): Index of all positive samples got from first assign process. Returns: Tensor: Losses of all positive samples in single image. """ if not len(pos_inds): return cls_score.new([]), num_total_pos = max(pos_inds.numel(), 1) anchors_all_level = torch.cat(anchors, 0) pos_scores = cls_score[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_label = label[pos_inds] pos_label_weight = label_weight[pos_inds] pos_bbox_target = bbox_target[pos_inds] pos_bbox_weight = bbox_weight[pos_inds] pos_anchors = anchors_all_level[pos_inds] pos_bbox_pred = self.bbox_coder.decode(pos_anchors, pos_bbox_pred) score = label_weight.new_zeros(label.shape) score[pos_inds] = bbox_overlaps(pos_bbox_pred.detach(), pos_bbox_target, is_aligned=True) # to keep loss dimension # loss_cls = self.loss_cls( # pos_scores, # pos_label, # pos_label_weight, # avg_factor=self.loss_cls.loss_weight, # reduction_override='none') loss_bbox = self.loss_bbox(pos_bbox_pred, pos_bbox_target, pos_bbox_weight, avg_factor=self.loss_cls.loss_weight, reduction_override='none') # loss_qfl = self.loss_qfl(cls_score, (label, score), # weight=label_weight, # avg_factor=num_total_pos) # loss_cls = loss_cls.sum(-1) pos_loss = loss_bbox return pos_loss,
def forward_test(self, img, img_metas, proposals=None, rescale=False): # for model in self.models[:-1]: # x = F.relu(model(x)) # x = self.models[-1](x) # don't use relu for last model # TODO: Check this with torch.no_grad(): t2 = time() x1 = self.models[0](img, img_metas, return_loss=False, rescale=True) x2 = self.models[1](img, img_metas, return_loss=False, rescale=True) print(time()-t2) o = [[np.concatenate(r_c) for r_c in zip(*r_img)] for r_img in zip(x1, x2)] o_cars = torch.tensor(o[0][0], dtype=torch.float32).cuda() print() ## TODO: Add center distance o_cars = [torch.tensor(x1[0][0]), torch.tensor(x2[0][0])] K = x1[0][0].shape[0] N = x2[0][0].shape[0] F = 3 T = np.zeros(((K, N, F)), dtype=np.float32) # TODO: torch.zeros Avoid numpy t = time() overlaps = bbox_overlaps(o_cars[0][:, :4], o_cars[1][:, :4]) scores_1 = o_cars[0][:, 4].unsqueeze(1).repeat((1, N)) scores_2 = o_cars[1][:, 4].unsqueeze(1).repeat((1, K)).T T[:, :, 0] = overlaps T[:, :, 1] = scores_1 T[:, :, 2] = scores_2 T = torch.tensor(T).cuda() non_empty_indices = torch.nonzero(T[:, :, 0]) non_empty_indices = torch.nonzero(T[:, :, 0], as_tuple=True) # flat_T = T.reshape(-1, F) # non_empty_elements = flat_T[torch.nonzero(flat_T[:, 0], as_tuple=True)] non_empty_elements = T[non_empty_indices[0], non_empty_indices[1], :] non_empty_elements_T = non_empty_elements.permute(1, 0) non_empty_elements_T = non_empty_elements_T.unsqueeze(1).unsqueeze(0).cuda() # Shape [1,3,1, #non-zero] T_out = torch.zeros((1, K, N)).cuda() new_scores = self.fusion(non_empty_elements_T, T_out, non_empty_indices) x1[0][0][:, 4] = new_scores.cpu() # print(time()-t) # for k in o_cars[0]: # for n in o_cars[1]: # iou = bbox_overlaps(k[:4], n[:4]) # x2 = [ [x2[0][ ] , []] # o = [] # for r_img in zip(x1, x2): # o_img = [] # for r_c in zip(*r_img): # o_c = np.concatenate(r_c) # o_img.append(o_c) # o.append(o_img) # [torch.cat(r_c) for r_c in zip(r_img) for r_img in zip(x1,x2)] # r = torch.cat([x1, x2]) return o, x1
def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore=None, gt_masks=None, proposals=None, **kwargs): # for model in self.models[:-1]: # x = F.relu(model(x)) # x = self.models[-1](x) # don't use relu for last model # TODO: Check this with torch.no_grad(): # torch.backends.cudnn.enabled = False # This solves the error of using different types of GPU t2 = time() x1 = self.models[0]([img], [img_metas], return_loss=False, rescale=True) # print("Faster:", time() - t2) t2 = time() # img2 = img.to('cuda:1') x2 = self.models[1]([img], [img_metas], return_loss=False, rescale=True) # print("Retina:", time()-t2) # o = [[np.concatenate(r_c) for r_c in zip(*r_img)] for r_img in zip(x1, x2)] # # o_cars = torch.tensor(o[0][0], dtype=torch.float16).cuda() # print() ## TODO: Add center distance # TODO: Esta parte (Tensor preparation) es muy lenta t = time() x1[0][0] = x1[0][0][x1[0][0][:, 4].argsort()][::-1][:1000].copy() x2[0][0] = x2[0][0][x2[0][0][:, 4].argsort()][::-1][:1000].copy() o_cars = [torch.tensor(x1[0][0]), torch.tensor(x2[0][0])] K = x1[0][0].shape[0] N = x2[0][0].shape[0] F = 3 T = np.zeros(((K, N, F)), dtype=np.float32) # TODO: torch.zeros Avoid numpy Float 16 t = time() overlaps = bbox_overlaps(o_cars[0][:, :4], o_cars[1][:, :4]) # print("BBox overlaps:", time() - t) scores_1 = o_cars[0][:, 4].unsqueeze(1).repeat((1, N)) scores_2 = o_cars[1][:, 4].unsqueeze(1).repeat((1, K)).T T[:, :, 0] = overlaps T[:, :, 1] = scores_1 T[:, :, 2] = scores_2 T = torch.tensor(T).cuda().half() # Fill last element of column with all IoU zeros with -1 non_overlapping_dets = ~overlaps.sum(dim=1).bool() T[non_overlapping_dets, -1, 0] = -1 # IoU -1 T[non_overlapping_dets, -1, -1] = -1 # Score 2nd -1 non_empty_indices = torch.nonzero(T[:, :, 0]) non_empty_indices = torch.nonzero(T[:, :, 0], as_tuple=True) # flat_T = T.reshape(-1, F) # non_empty_elements = flat_T[torch.nonzero(flat_T[:, 0], as_tuple=True)] non_empty_elements = T[non_empty_indices[0], non_empty_indices[1], :] non_empty_elements_T = non_empty_elements.permute(1, 0) non_empty_elements_T = non_empty_elements_T.unsqueeze(1).unsqueeze(0).cuda() # Shape [1,3,1, #non-zero] T_out = torch.zeros((1, K, N)).cuda().half() # print("Tensor preparation:", time() - t) t2 = time() new_scores = self.fusion(non_empty_elements_T, T_out, non_empty_indices) # print("Fusion:", time() - t2) # TODO: Uncomment # x1[0][0][:, 4] = new_scores.cpu().detach().numpy() bboxes = x1 # [# images, #n_classes, # n_boxes] losses = dict() # assign_result = [self.assigner.assign( # x[0], gt_bboxes[0], gt_bboxes_ignore, gt_labels[0]) for x in x1] # # sampling_result = self.sampler.sample(assign_result, anchors, # # gt_bboxes) t2 = time() num_imgs = len(img_metas) if gt_bboxes_ignore is None: gt_bboxes_ignore = [None for _ in range(num_imgs)] sampling_results = [] for i in range(num_imgs): assign_result = self.assigner.assign( torch.tensor(bboxes[i][0]).cuda(), gt_bboxes[i], gt_bboxes_ignore[i], gt_labels[i]) sampling_result = self.sampler.sample( assign_result, torch.tensor(bboxes[i][0]).cuda(), gt_bboxes[i],) # gt_labels[i]) sampling_results.append(sampling_result) bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels, rcnn_train_cfg=None) # cls_score = torch.tensor(1 - x1[0][0][:, 4], requires_grad=True).cuda() cls_score = 1 - new_scores loss_bbox = self.loss(cls_score, *bbox_targets) losses.update(loss_bbox) # print("Loss Assigner:", time() - t2) # loss_cls = dict( # type='CrossEntropyLoss', # use_sigmoid=False, # loss_weight=1.0) # loss_cls=dict( # type='FocalLoss', # use_sigmoid=True, # gamma=2.0, # alpha=0.25, # loss_weight=1.0) # self.loss_cls = build_loss(loss_cls) # self.loss_cls( # cls_score, # bbox_targets[0], # bbox_targets[1], # avg_factor=1, # reduction_override=None) return losses