def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Fast RCNN only need "labels" field for selecting the targets target = target.copy_with_fields("labels") # get the targets corresponding GT for each proposal matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def __call__(self, proposals, source_score, labels, device, return_targets=False): gt_boxes = torch.zeros((0, 4), dtype=torch.float, device=device) gt_classes = torch.zeros((0, 1), dtype=torch.long, device=device) gt_scores = torch.zeros((0, 1), dtype=torch.float, device=device) # not using the background class _prob = source_score[:, 1:].clone() _labels = labels[1:] positive_classes = _labels.eq(1).nonzero(as_tuple=False)[:, 0] for c in positive_classes: cls_prob = _prob[:, c] max_index = torch.argmax(cls_prob) gt_boxes = torch.cat( (gt_boxes, proposals.bbox[max_index].view(1, -1)), dim=0) gt_classes = torch.cat((gt_classes, c.add(1).view(1, 1)), dim=0) gt_scores = torch.cat((gt_scores, cls_prob[max_index].view(1, 1)), dim=0) _prob[max_index].fill_(0) if return_targets == True: gt_boxes = BoxList(gt_boxes, proposals.size, mode=proposals.mode) gt_boxes.add_field('labels', gt_classes[:, 0].float()) # gt_boxes.add_field('difficult', bb) return gt_boxes if gt_boxes.shape[0] == 0: num_rois = len(source_score) pseudo_labels = torch.zeros(num_rois, dtype=torch.long, device=device) loss_weights = torch.zeros(num_rois, dtype=torch.float, device=device) else: gt_boxes = BoxList(gt_boxes, proposals.size, mode=proposals.mode) overlaps = boxlist_iou(proposals, gt_boxes) max_overlaps, gt_assignment = overlaps.max(dim=1) pseudo_labels = gt_classes[gt_assignment, 0] loss_weights = gt_scores[gt_assignment, 0] # Select background RoIs as those with <= FG_IOU_THRESHOLD bg_inds = max_overlaps.le( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD).nonzero( as_tuple=False)[:, 0] pseudo_labels[bg_inds] = 0 # PCL_TRICK: # ignore_thres = 0.1 # ignore_inds = max_overlaps.le(ignore_thres).nonzero(as_tuple=False)[:,0] # loss_weights[ignore_inds] = 0 return pseudo_labels, loss_weights
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Mask RCNN needs "labels" and "masks "fields for creating the targets target = target.copy_with_fields(["labels", "masks"]) # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): pred_bbox = pred_boxlist.bbox.numpy() pred_label = pred_boxlist.get_field("labels").numpy() pred_score = pred_boxlist.get_field("scores").numpy() gt_bbox = gt_boxlist.bbox.numpy() gt_label = gt_boxlist.get_field("labels").numpy() gt_difficult = gt_boxlist.get_field("difficult").numpy() for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.size), BoxList(gt_bbox_l, gt_boxlist.size), ).numpy() gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
def evaluate_box_proposals(predictions, dataset, thresholds=None, area="all", limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def __call__(self, proposals, source_score, labels, device, return_targets=False): num_rois = len(proposals) k = int(num_rois * self.portion) num_gt_cls = labels[1:].sum() if num_gt_cls != 0 and num_rois != 0: cls_prob = source_score[:, 1:] gt_cls_inds = labels[1:].nonzero(as_tuple=False)[:, 0] sorted_scores, max_inds = cls_prob[:, gt_cls_inds].sort( dim=0, descending=True) sorted_scores = sorted_scores[:k] max_inds = max_inds[:k] _boxes = proposals.bbox[max_inds.t().contiguous().view(-1)].view( num_gt_cls.int(), -1, 4) _boxes = BatchBoxList(_boxes, proposals.size, mode=proposals.mode) ious = batch_boxlist_iou(_boxes, _boxes) k_ind = torch.zeros(num_gt_cls.int(), k, dtype=torch.bool, device=device) k_ind[:, 0] = 1 # always take the one with max score for ii in range(1, k): max_iou, _ = torch.max(ious[:, ii:ii + 1, :ii], dim=2) k_ind[:, ii] = (max_iou < self.iou_th).byte().squeeze(-1) gt_boxes = _boxes.bbox[k_ind] gt_cls_id = gt_cls_inds + 1 temp_cls = torch.ones( (_boxes.bbox.shape[:2]), device=device) * gt_cls_id.view( -1, 1).float() gt_classes = temp_cls[k_ind].view(-1, 1).long() gt_scores = sorted_scores.t().contiguous()[k_ind].view(-1, 1) if gt_boxes.shape[0] != 0: gt_boxes = BoxList(gt_boxes, proposals.size, mode=proposals.mode) overlaps = boxlist_iou(proposals, gt_boxes) # TODO: pytorch and numpy argmax perform differently # max_overlaps, gt_assignment = overlaps.max(dim=1) max_overlaps = torch.tensor(overlaps.cpu().numpy().max(axis=1), device=device) gt_assignment = torch.tensor( overlaps.cpu().numpy().argmax(axis=1), device=device) pseudo_labels = gt_classes[gt_assignment, 0] loss_weights = gt_scores[gt_assignment, 0] # fg_inds = max_overlaps.ge(cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD).nonzero(as_tuple=False)[:,0] # Select background RoIs as those with <= FG_IOU_THRESHOLD bg_inds = max_overlaps.lt( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD).nonzero( as_tuple=False)[:, 0] pseudo_labels[bg_inds] = 0 # compute regression targets if return_targets: matched_targets = gt_boxes[gt_assignment] regression_targets = self.box_coder.encode( matched_targets.bbox, proposals.bbox) return pseudo_labels, loss_weights, regression_targets return pseudo_labels, loss_weights # corner case pseudo_labels = torch.zeros(num_rois, dtype=torch.long, device=device) loss_weights = torch.zeros(num_rois, dtype=torch.float, device=device) if return_targets: regression_targets = torch.zeros(num_rois, 4, dtype=torch.float, device=device) return pseudo_labels, loss_weights, regression_targets return pseudo_labels, loss_weights