def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def prepare_targets(annos, anchors, topk=9, device=torch.device("cuda:0")): anchor_nums = [anchor.shape[0] for anchor in anchors] anchors = torch.cat(anchors).to(device) total_anchors_num = anchors.shape[0] gt_boxes = annos['bboxes'].float().to(device) obj_nums = annos['obj_num'].to(device) obj_clses = annos['cls'].to(device) batch_size = gt_boxes.shape[0] # print('prepare_targets', anchors.device, gt_boxes.device) anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2.0 anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2.0 anchor_points = torch.stack((anchors_cx, anchors_cy), dim=1) cls_targets = [] reg_targets = [] ctness_targets = [] for i in range(batch_size): # print('gt number:', obj_nums[i]) num_gt = obj_nums[i] if num_gt == 0: cls_targets.append( torch.full((anchors.shape[0], ), -1, dtype=torch.int).to(device)) reg_targets.append(torch.zeros_like(anchors).to(device)) ctness_targets.append( torch.zeros((anchors.shape[0], ), dtype=torch.float).to(device)) continue bboxes_per_img = gt_boxes[i][:num_gt] labels_per_im = obj_clses[i][:num_gt] # print('labels_per_im', labels_per_im) gt_cx = (bboxes_per_img[:, 2] + bboxes_per_img[:, 0]) / 2.0 gt_cy = (bboxes_per_img[:, 3] + bboxes_per_img[:, 1]) / 2.0 gt_points = torch.stack((gt_cx, gt_cy), dim=1) # print(gt_points) # print(gt_points.shape, anchor_points.shape) distances = (anchor_points[:, None, :] - gt_points[None, :, :]).pow(2).sum(-1).sqrt() # print('distance', distances.shape) candidate_idxs = [] star_idx = 0 for anchor_num_per_level in anchor_nums: end_idx = star_idx + anchor_num_per_level distances_per_level = distances[star_idx:end_idx, :] _, topk_idxs_per_level = distances_per_level.topk(topk, dim=0, largest=False) # print('topk_idxs_per_level', topk_idxs_per_level.shape) candidate_idxs.append(topk_idxs_per_level + star_idx) star_idx = end_idx candidate_idxs = torch.cat(candidate_idxs, dim=0) # print('candidate_idxs', candidate_idxs) ious = bbox_overlaps(anchors.float(), bboxes_per_img.float()) # print('ious', ious.shape, candidate_idxs.shape) candidate_ious = ious[candidate_idxs, torch.arange(num_gt)] # print('candidate_ious', candidate_ious) iou_mean_per_gt = candidate_ious.mean(0) iou_std_per_gt = candidate_ious.std(0) iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt is_pos = candidate_ious >= iou_thresh_per_gt[None, :] # print('is_pos', is_pos.shape) # Limiting the final positive samples’ center to object for ng in range(num_gt): candidate_idxs[:, ng] += ng * total_anchors_num e_anchors_cx = anchors_cx.view(1, -1).expand( num_gt, total_anchors_num).contiguous().view(-1) e_anchors_cy = anchors_cy.view(1, -1).expand( num_gt, total_anchors_num).contiguous().view(-1) candidate_idxs = candidate_idxs.view(-1) l = e_anchors_cx[candidate_idxs].view(-1, num_gt) - bboxes_per_img[:, 0] t = e_anchors_cy[candidate_idxs].view(-1, num_gt) - bboxes_per_img[:, 1] r = bboxes_per_img[:, 2] - e_anchors_cx[candidate_idxs].view( -1, num_gt) b = bboxes_per_img[:, 3] - e_anchors_cy[candidate_idxs].view( -1, num_gt) # print('l,t,r,b', l.shape) is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 # print('is_in_gts', is_in_gts.shape) is_pos = is_pos & is_in_gts # print('is_pos & is_in_gts', is_pos.shape, is_pos) # if an anchor box is assigned to multiple gts, the one with the highest IoU will be selected. ious_inf = torch.full_like(ious, -INF).t().contiguous().view(-1) index = candidate_idxs.view(-1)[is_pos.view(-1)] ious_inf[index] = ious.t().contiguous().view(-1)[index] ious_inf = ious_inf.view(num_gt, -1).t() # print('ious_inf', ious_inf[0], ious_inf.shape) anchors_to_gt_values, anchors_to_gt_indexs = ious_inf.max(dim=1) # print('anchors_to_gt_values', anchors_to_gt_values, anchors_to_gt_indexs, anchors_to_gt_values.shape) # print('max', ious_inf[0, anchors_to_gt_indexs[0]]) # print(labels_per_im) cls_labels_per_im = labels_per_im[anchors_to_gt_indexs] # print('cls_labels_per_im', cls_labels_per_im, len(cls_labels_per_im)) cls_labels_per_im[anchors_to_gt_values == -INF] = -1 # print('cls_labels_per_im', cls_labels_per_im) matched_gts = bboxes_per_img[anchors_to_gt_indexs] # print('matched_gts', bboxes_per_img.shape, matched_gts.shape, cls_labels_per_im.shape) reg_targets_per_im = box_encode(matched_gts, anchors) cls_targets.append(cls_labels_per_im) reg_targets.append(reg_targets_per_im) # print('res', cls_labels_per_im.shape, reg_targets_per_im.shape) # centerness l = anchors_cx - matched_gts[:, 0] t = anchors_cy - matched_gts[:, 1] r = matched_gts[:, 2] - anchors_cx b = matched_gts[:, 3] - anchors_cy left_right = torch.stack([l, r], dim=1).abs() top_bottom = torch.stack([t, b], dim=1).abs() centerness = torch.sqrt((left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \ (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])) assert not torch.isnan(centerness).any() ctness_targets.append(centerness) cls_targets = torch.stack(cls_targets, dim=0) reg_targets = torch.stack(reg_targets, dim=0) ctness_targets = torch.stack(ctness_targets, dim=0) return cls_targets, reg_targets, ctness_targets
def tpfp_imagenet(det_bboxes, gt_bboxes, gt_bboxes_ignore=None, default_iou_thr=0.5, area_ranges=None): """Check if detected bboxes are true positive or false positive. Args: det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, of shape (k, 4). Default: None default_iou_thr (float): IoU threshold to be considered as matched for medium and large bboxes (small ones have special rules). Default: 0.5. area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, in the format [(min1, max1), (min2, max2), ...]. Default: None. Returns: tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of each array is (num_scales, m). """ # an indicator of ignored gts gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0], dtype=np.bool), np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) # stack gt_bboxes and gt_bboxes_ignore for convenience gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) num_dets = det_bboxes.shape[0] num_gts = gt_bboxes.shape[0] if area_ranges is None: area_ranges = [(None, None)] num_scales = len(area_ranges) # tp and fp are of shape (num_scales, num_gts), each row is tp or fp # of a certain scale. tp = np.zeros((num_scales, num_dets), dtype=np.float32) fp = np.zeros((num_scales, num_dets), dtype=np.float32) if gt_bboxes.shape[0] == 0: if area_ranges == [(None, None)]: fp[...] = 1 else: det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( det_bboxes[:, 3] - det_bboxes[:, 1]) for i, (min_area, max_area) in enumerate(area_ranges): fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 return tp, fp ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), default_iou_thr) # sort all detections by scores in descending order sort_inds = np.argsort(-det_bboxes[:, -1]) for k, (min_area, max_area) in enumerate(area_ranges): gt_covered = np.zeros(num_gts, dtype=bool) # if no area range is specified, gt_area_ignore is all False if min_area is None: gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) else: gt_areas = gt_w * gt_h gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) for i in sort_inds: max_iou = -1 matched_gt = -1 # find best overlapped available gt for j in range(num_gts): # different from PASCAL VOC: allow finding other gts if the # best overlaped ones are already matched by other det bboxes if gt_covered[j]: continue elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou: max_iou = ious[i, j] matched_gt = j # there are 4 cases for a det bbox: # 1. it matches a gt, tp = 1, fp = 0 # 2. it matches an ignored gt, tp = 0, fp = 0 # 3. it matches no gt and within area range, tp = 0, fp = 1 # 4. it matches no gt but is beyond area range, tp = 0, fp = 0 if matched_gt >= 0: gt_covered[matched_gt] = 1 if not (gt_ignore_inds[matched_gt] or gt_area_ignore[matched_gt]): tp[k, i] = 1 elif min_area is None: fp[k, i] = 1 else: bbox = det_bboxes[i, :4] area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if area >= min_area and area < max_area: fp[k, i] = 1 return tp, fp
def tpfp_default(det_bboxes, gt_bboxes, gt_bboxes_ignore=None, iou_thr=0.5, area_ranges=None): """Check if detected bboxes are true positive or false positive. Args: det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, of shape (k, 4). Default: None iou_thr (float): IoU threshold to be considered as matched. Default: 0.5. area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, in the format [(min1, max1), (min2, max2), ...]. Default: None. Returns: tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of each array is (num_scales, m). """ # an indicator of ignored gts gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0], dtype=np.bool), np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) # stack gt_bboxes and gt_bboxes_ignore for convenience gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) num_dets = det_bboxes.shape[0] num_gts = gt_bboxes.shape[0] if area_ranges is None: area_ranges = [(None, None)] num_scales = len(area_ranges) # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of # a certain scale tp = np.zeros((num_scales, num_dets), dtype=np.float32) fp = np.zeros((num_scales, num_dets), dtype=np.float32) # if there is no gt bboxes in this image, then all det bboxes # within area range are false positives if gt_bboxes.shape[0] == 0: if area_ranges == [(None, None)]: fp[...] = 1 else: det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( det_bboxes[:, 3] - det_bboxes[:, 1]) for i, (min_area, max_area) in enumerate(area_ranges): fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 return tp, fp ious = bbox_overlaps(det_bboxes, gt_bboxes) # for each det, the max iou with all gts ious_max = ious.max(axis=1) # for each det, which gt overlaps most with it ious_argmax = ious.argmax(axis=1) # sort all dets in descending order by scores sort_inds = np.argsort(-det_bboxes[:, -1]) for k, (min_area, max_area) in enumerate(area_ranges): gt_covered = np.zeros(num_gts, dtype=bool) # if no area range is specified, gt_area_ignore is all False if min_area is None: gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) else: gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) for i in sort_inds: if ious_max[i] >= iou_thr: matched_gt = ious_argmax[i] if not (gt_ignore_inds[matched_gt] or gt_area_ignore[matched_gt]): if not gt_covered[matched_gt]: gt_covered[matched_gt] = True tp[k, i] = 1 else: fp[k, i] = 1 # otherwise ignore this detected bbox, tp = 0, fp = 0 elif min_area is None: fp[k, i] = 1 else: bbox = det_bboxes[i, :4] area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if area >= min_area and area < max_area: fp[k, i] = 1 return tp, fp
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def cal_rpn(imgsize, featuresize, scale, gtboxes): """ 计算rpn :param imgsize: 输入图像尺寸 :param featuresize: 特征图尺寸,比如VGG16基础网络,此大小为w/16, h/16 :param scale: 输入图像与特征图尺寸缩小比例,比如16 :param gtboxes: (x1, y1, x2, y2) :return: [labels, bbox_targets], base_anchor """ imgh, imgw = imgsize # gen base anchor 生成基本的anchor,一共9个 base_anchor = gen_anchor(featuresize, scale) # calculate iou 计算anchor和gt-box的overlap,用来给anchor上标签 # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # overlaps = cal_overlaps(base_anchor, gtboxes) overlaps = bbox_overlaps(np.ascontiguousarray(base_anchor, dtype=np.float), np.ascontiguousarray(gtboxes, dtype=np.float)) # init labels -1 don't care 0 is negative 1 is positive labels = np.empty(base_anchor.shape[0]) labels.fill(-1) # 初始化label,均为-1 # for each GT box corresponds to an anchor which has highest IOU # 找到和每一个gtbox,overlap最大的那个anchor gt_argmax_overlaps = overlaps.argmax(axis=0) # the anchor with the highest IOU overlap with a GT box # 找到和每一个anchor,overlap最大的那个gtbox anchor_argmax_overlaps = overlaps.argmax(axis=1) anchor_max_overlaps = overlaps[range(overlaps.shape[0]), anchor_argmax_overlaps] # IOU > IOU_POSITIVE labels[anchor_max_overlaps > IOU_POSITIVE] = 1 # IOU <IOU_NEGATIVE labels[anchor_max_overlaps < IOU_NEGATIVE] = 0 # ensure that every GT box has at least one positive RPN region labels[gt_argmax_overlaps] = 1 # only keep anchors inside the image 仅保留那些还在图像内部的anchor,超出图像的都删掉 outside_anchor = np.where((base_anchor[:, 0] < 0) | (base_anchor[:, 1] < 0) | (base_anchor[:, 2] >= imgw) | (base_anchor[:, 3] >= imgh))[0] labels[outside_anchor] = -1 # subsample positive labels ,if greater than RPN_POSITIVE_NUM(default 128) fg_index = np.where(labels == 1)[0] if (len(fg_index) > RPN_POSITIVE_NUM): labels[np.random.choice(fg_index, len(fg_index) - RPN_POSITIVE_NUM, replace=False)] = -1 # subsample negative labels bg_index = np.where(labels == 0)[0] num_bg = RPN_TOTAL_NUM - np.sum(labels == 1) if (len(bg_index) > num_bg): labels[np.random.choice(bg_index, len(bg_index) - num_bg, replace=False)] = -1 # calculate bbox targets bbox_targets = bbox_transfrom(base_anchor, gtboxes[anchor_argmax_overlaps, :]) return [labels, bbox_targets], base_anchor