def __call__(self, img, boxes, labels): h, w, c = img.shape while True: mode = random.choice(self.sample_mode) if mode == 1: return img, boxes, labels min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, w) new_h = random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) patch = np.array( ( int(left), int(top), int(left + new_w), int(top + new_h), ) ) overlaps = bbox_overlaps( patch.reshape(-1, 4), boxes.reshape(-1, 4) ).reshape(-1) if overlaps.min() < min_iou: continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = ( (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]) ) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] # adjust boxes img = img[patch[1] : patch[3], patch[0] : patch[2]] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) return img, boxes, labels
def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0): """Apply bounding-box voting to refine `top_dets` by voting with `all_dets`. See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the referenced paper) can be applied by setting `scoring_method` appropriately. """ # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore] # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore] top_dets_out = top_dets.copy() top_boxes = top_dets[:, :4] all_boxes = all_dets[:, :4] all_scores = all_dets[:, 4] top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes) for k in range(top_dets_out.shape[0]): inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0] boxes_to_vote = all_boxes[inds_to_vote, :] ws = all_scores[inds_to_vote] top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws) if scoring_method == 'ID': # Identity, nothing to do pass elif scoring_method == 'TEMP_AVG': # Average probabilities (considered as P(detected class) vs. # P(not the detected class)) after smoothing with a temperature # hyperparameter. P = np.vstack((ws, 1.0 - ws)) P_max = np.max(P, axis=0) X = np.log(P / P_max) X_exp = np.exp(X / beta) P_temp = X_exp / np.sum(X_exp, axis=0) P_avg = P_temp[0].mean() top_dets_out[k, 4] = P_avg elif scoring_method == 'AVG': # Combine new probs from overlapping boxes top_dets_out[k, 4] = ws.mean() elif scoring_method == 'IOU_AVG': P = ws ws = top_to_all_overlaps[k, inds_to_vote] P_avg = np.average(P, weights=ws) top_dets_out[k, 4] = P_avg elif scoring_method == 'GENERALIZED_AVG': P_avg = np.mean(ws**beta)**(1.0 / beta) top_dets_out[k, 4] = P_avg elif scoring_method == 'QUASI_SUM': top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta else: raise NotImplementedError( 'Unknown scoring method {}'.format(scoring_method) ) return top_dets_out
def analyze_per_img_dets(confusion_matrix, gt_bboxes, gt_labels, result, score_thr=0, tp_iou_thr=0.5, nms_iou_thr=None): """Analyze detection results on each image. Args: confusion_matrix (ndarray): The confusion matrix, has shape (num_classes + 1, num_classes + 1). gt_bboxes (ndarray): Ground truth bboxes, has shape (num_gt, 4). gt_labels (ndarray): Ground truth labels, has shape (num_gt). result (ndarray): Detection results, has shape (num_classes, num_bboxes, 5). score_thr (float): Score threshold to filter bboxes. Default: 0. tp_iou_thr (float): IoU threshold to be considered as matched. Default: 0.5. nms_iou_thr (float|optional): nms IoU threshold, the detection results have done nms in the detector, only applied when users want to change the nms IoU threshold. Default: None. """ true_positives = np.zeros_like(gt_labels) for det_label, det_bboxes in enumerate(result): if nms_iou_thr: det_bboxes, _ = nms( det_bboxes[:, :4], det_bboxes[:, -1], nms_iou_thr, score_threshold=score_thr) ious = bbox_overlaps(det_bboxes[:, :4], gt_bboxes) for i, det_bbox in enumerate(det_bboxes): score = det_bbox[4] det_match = 0 if score >= score_thr: for j, gt_label in enumerate(gt_labels): if ious[i, j] >= tp_iou_thr: det_match += 1 if gt_label == det_label: true_positives[j] += 1 # TP confusion_matrix[gt_label, det_label] += 1 if det_match == 0: # BG FP confusion_matrix[-1, det_label] += 1 for num_tp, gt_label in zip(true_positives, gt_labels): if num_tp == 0: # FN confusion_matrix[gt_label, -1] += 1
def acc_single_video(results, gts, iou_thr=0.5, ignore_iof_thr=0.5, ignore_by_classes=False): """Accumulate results in a single video.""" num_classes = len(results[0]) accumulators = [ mm.MOTAccumulator(auto_id=True) for i in range(num_classes) ] for result, gt in zip(results, gts): if ignore_by_classes: gt_ignore = outs2results(bboxes=gt['bboxes_ignore'], labels=gt['labels_ignore'], num_classes=num_classes)['bbox_results'] else: gt_ignore = [gt['bboxes_ignore'] for i in range(num_classes)] gt = outs2results(bboxes=gt['bboxes'], labels=gt['labels'], ids=gt['instance_ids'], num_classes=num_classes)['bbox_results'] for i in range(num_classes): gt_ids, gt_bboxes = gt[i][:, 0].astype(np.int), gt[i][:, 1:] pred_ids, pred_bboxes = result[i][:, 0].astype( np.int), result[i][:, 1:-1] dist = bbox_distances(gt_bboxes, pred_bboxes, iou_thr) if gt_ignore[i].shape[0] > 0: # 1. assign gt and preds fps = np.ones(pred_bboxes.shape[0]).astype(np.bool) row, col = linear_sum_assignment(dist) for m, n in zip(row, col): if not np.isfinite(dist[m, n]): continue fps[n] = False # 2. ignore by iof iofs = bbox_overlaps(pred_bboxes, gt_ignore[i], mode='iof') ignores = (iofs > ignore_iof_thr).any(axis=1) # 3. filter preds valid_inds = ~(fps & ignores) pred_ids = pred_ids[valid_inds] dist = dist[:, valid_inds] if dist.shape != (0, 0): accumulators[i].update(gt_ids, pred_ids, dist) return accumulators
def calc_tpfpfn(det_bboxes, gt_bboxes, iou_thr=0.5): """Check if detected bboxes are true positive or false positive and if gt bboxes are false negative. Args: det_bboxes (ndarray): Detected bboxes of this image, of shape (m, 5). gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). iou_thr (float): IoU threshold to be considered as matched. Default: 0.5. Returns: float: (tp, fp, fn). """ num_dets = det_bboxes.shape[0] num_gts = gt_bboxes.shape[0] tp = 0 fp = 0 # if there is no gt bboxes in this image, then all det bboxes # within area range are false positives if num_gts == 0: fp = num_dets return tp, fp, 0 ious: np.ndarray = bbox_overlaps(det_bboxes, gt_bboxes) # sort all dets in descending order by scores sort_inds = np.argsort(-det_bboxes[:, -1]) gt_covered = np.zeros(num_gts, dtype=bool) for i in sort_inds: uncovered_ious = ious[i, gt_covered == 0] if len(uncovered_ious): iou_argmax = uncovered_ious.argmax() iou_max = uncovered_ious[iou_argmax] if iou_max >= iou_thr: gt_covered[[x[iou_argmax] for x in np.where(gt_covered == 0)]] = True tp += 1 else: fp += 1 else: fp += 1 fn = (gt_covered == 0).sum() return tp, fp, fn
def verify_match(): data_root = 'data/fabu3d' out_dir = '/private/ningqingqun/results/centernet2_results/match' if not os.path.isdir(out_dir): os.makedirs(out_dir) split_file = os.path.join(data_root, 'pkl', 'val.pkl') dataset = pkl.load(open(split_file, 'rb'), encoding='latin1') for _, label in tqdm(dataset.items()): imf = os.path.join(data_root, 'images', label['filename']) im = cv2.imread(imf) reiszed_im = cv2.resize(im, (1280, 800)) gt_im = reiszed_im.copy() inputs = preprocess(reiszed_im) results = model(inputs.cuda(), return_loss=False, try_dummy=False) out_file = os.path.join(out_dir, os.path.basename(imf)) det_bboxes = get_det_bboxes(results) gt_bboxes = label['ann']['bboxes'] / 1.5 ious = bbox_overlaps(det_bboxes, gt_bboxes) ious_argmax = ious.argmax(axis=1) ious_max = ious.max(axis=1) rand_colors = np.random.rand(100, 3) * 255 # draw det bboxes for i in range(len(det_bboxes)): b = det_bboxes[i] if ious_max[i] > 0.2: color = rand_colors[ious_argmax[i]] else: color = (0, 0, 0) cv2.rectangle( reiszed_im, (b[0], b[1]), (b[2], b[3]), color, thickness=2) # draw gt bboxes for i in range(len(gt_bboxes)): b = gt_bboxes[i] color = rand_colors[i] cv2.rectangle( gt_im, (b[0], b[1]), (b[2], b[3]), color, thickness=2) display_im = np.vstack([reiszed_im, gt_im]) cv2.imwrite(out_file, display_im)
def __call__(self, results): if np.random.rand() <= self.p: for i in range(np.random.randint(1, self.n_crops + 1)): filepath = random.choice(self.crop_paths) crop = mmcv.imread(filepath) crop = self.aug(image=crop)["image"] crop_h, crop_w = crop.shape[:2] h, w = results["img"].shape[:2] if 3 <= crop_w < w and 3 <= crop_h < h: for _ in range(10): y_center = np.random.randint(crop_h // 2, h - crop_h // 2) x_center = np.random.randint(crop_w // 2, w - crop_w // 2) crop_bbox = np.array([ x_center - crop_w // 2, y_center - crop_h // 2, x_center + crop_w // 2, y_center + crop_h // 2, ]).reshape(1, 4) ious = bbox_overlaps(results["gt_bboxes"], crop_bbox, mode="iof") if max(ious) < self.iou_threshold: crop_mask = 255 * np.ones(crop.shape, crop.dtype) results["img"] = cv2.seamlessClone( src=crop, dst=results["img"], mask=crop_mask, p=(x_center, y_center), flags=cv2.NORMAL_CLONE, ) results["gt_bboxes"] = np.concatenate( [results["gt_bboxes"], crop_bbox]) results["gt_labels"] = np.concatenate([ results["gt_labels"], np.full(len(crop_bbox), self.crop_label) ]) break return results
def calc_tpfpfn(gt_bboxes, pred_bboxes, iou_thr=0.5): """Calculate tp, fp, fn. gt_bboxes: (N, 4) np.array in xyxy format pred_bboxes: (N, 5) np.array in xyxy+conf format """ if len(gt_bboxes) == 0 and len(pred_bboxes) == 0: tps, fps, fns = 0, 0, 0 return tps, fps, fns elif len(gt_bboxes) == 0: tps, fps, fns = 0, len(pred_bboxes), 0 return tps, fps, fns elif len(pred_bboxes) == 0: tps, fps, fns = 0, 0, len(gt_bboxes) return tps, fps, fns # sort by conf pred_bboxes = pred_bboxes[pred_bboxes[:, 4].argsort()[::-1]] gt_bboxes = gt_bboxes.copy() tp = 0 fp = 0 for k, pred_bbox in enumerate(pred_bboxes): ious = bbox_overlaps(gt_bboxes, pred_bbox[None, :4]) max_iou = ious.max() if max_iou > iou_thr: tp += 1 gt_bboxes = np.delete(gt_bboxes, ious.argmax(), axis=0) else: fp += 1 if len(gt_bboxes) == 0: fp += len(pred_bboxes) - (k + 1) break fn = len(gt_bboxes) return tp, fp, fn
def score(gt_boxes,anchors): overlaps=bbox_overlaps(gt_boxes,anchors) best=overlaps.max(1) #print(best) return best
def __call__(self, img, boxes, labels): h, w, c = img.shape while True: # import cv2 # import neptune # im2 = img.copy() # for i in range(boxes.shape[0]): # cv2.rectangle(im2, tuple(map(int, boxes[i][:2])), tuple(map(int, boxes[i][-2:])), (255, 0, 0), 4) # neptune.log_image('mosaics', im2) if random.randint(0, 1): return img, boxes, labels for i in range(50): # keep the labeled id idx_labeled = labels[:, 1] > -1 if not any(idx_labeled): idx_labeled = ~idx_labeled boxes_labeled = boxes[idx_labeled] x1 = min(boxes_labeled[:, 0]) y1 = min(boxes_labeled[:, 1]) x2 = max(boxes_labeled[:, 2]) y2 = max(boxes_labeled[:, 3]) # based on above, random choose coord new_x1 = random.uniform(0, x1) new_y1 = random.uniform(0, y1) new_x2 = random.uniform(x2, w) new_y2 = random.uniform(y2, h) new_w = new_x2 - new_x1 new_h = new_y2 - new_y1 # keep the nearby ratio if new_h / new_w < h / w - self.range_ratio or new_h / new_w > h / w + self.range_ratio: continue patch = np.array( (int(new_x1), int(new_y1), int(new_x2), int(new_y2))) overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4), mode='iof1').reshape(-1) if any((self.range_overlaps[0] < overlaps) & (overlaps < self.range_overlaps[1])): continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:, 0] > patch[0]) * ( center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3]) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] # adjust boxes img = img[patch[1]:patch[3], patch[0]:patch[2]] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) return img, boxes, labels
def tpfp_imagenet(det_bboxes, gt_bboxes, gt_bboxes_ignore=None, default_iou_thr=0.5, area_ranges=None): """Check if detected bboxes are true positive or false positive. Args: det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, of shape (k, 4). Default: None default_iou_thr (float): IoU threshold to be considered as matched for medium and large bboxes (small ones have special rules). Default: 0.5. area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, in the format [(min1, max1), (min2, max2), ...]. Default: None. Returns: tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of each array is (num_scales, m). """ # an indicator of ignored gts gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0], dtype=np.bool), np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) # stack gt_bboxes and gt_bboxes_ignore for convenience gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) num_dets = det_bboxes.shape[0] num_gts = gt_bboxes.shape[0] if area_ranges is None: area_ranges = [(None, None)] num_scales = len(area_ranges) # tp and fp are of shape (num_scales, num_gts), each row is tp or fp # of a certain scale. tp = np.zeros((num_scales, num_dets), dtype=np.float32) fp = np.zeros((num_scales, num_dets), dtype=np.float32) if gt_bboxes.shape[0] == 0: if area_ranges == [(None, None)]: fp[...] = 1 else: det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( det_bboxes[:, 3] - det_bboxes[:, 1]) for i, (min_area, max_area) in enumerate(area_ranges): fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 return tp, fp ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), default_iou_thr) # sort all detections by scores in descending order sort_inds = np.argsort(-det_bboxes[:, -1]) for k, (min_area, max_area) in enumerate(area_ranges): gt_covered = np.zeros(num_gts, dtype=bool) # if no area range is specified, gt_area_ignore is all False if min_area is None: gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) else: gt_areas = gt_w * gt_h gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) for i in sort_inds: max_iou = -1 matched_gt = -1 # find best overlapped available gt for j in range(num_gts): # different from PASCAL VOC: allow finding other gts if the # best overlaped ones are already matched by other det bboxes if gt_covered[j]: continue elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou: max_iou = ious[i, j] matched_gt = j # there are 4 cases for a det bbox: # 1. it matches a gt, tp = 1, fp = 0 # 2. it matches an ignored gt, tp = 0, fp = 0 # 3. it matches no gt and within area range, tp = 0, fp = 1 # 4. it matches no gt but is beyond area range, tp = 0, fp = 0 if matched_gt >= 0: gt_covered[matched_gt] = 1 if not (gt_ignore_inds[matched_gt] or gt_area_ignore[matched_gt]): tp[k, i] = 1 elif min_area is None: fp[k, i] = 1 else: bbox = det_bboxes[i, :4] area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if area >= min_area and area < max_area: fp[k, i] = 1 return tp, fp
def __call__(self, img, gt_bboxes, gt_labels, gt_masks, gt_ignore_bboxes, gt_ignore_masks, img_shape, pad_shape): pad_h, pad_w = pad_shape[:2] img_h, img_w = img_shape[:2] crp_h, crp_w = self.crop_size # img [c, H, W] -> [H, W, c] img = img.transpose(1, 2, 0).copy() assert img.shape[0] == pad_h and img_shape[1] == pad_w if pad_h == crp_h and pad_w == crp_w: img = img.transpose(2, 0, 1) return img, gt_bboxes, gt_labels, gt_ignore_bboxes, \ gt_masks, gt_ignore_masks, img_shape, pad_shape tar_size = tuple(map(lambda x, y: x if x < y else y, img_shape[:2], self.crop_size[:2])) label = np.zeros(img_shape[:2], dtype=np.uint8) for idx in range(len(gt_masks)): mask = gt_masks[idx][:img_h, :img_w] label[mask > 0] = 1 count = 50 while count > 0: # randomly select a patch. if random.random() > 3.0 / 8.0 and np.max(label) > 0: tl = tuple(np.maximum(np.min(np.where(label > 0), axis=1) - tar_size, 0)) br = tuple(np.maximum(np.max(np.where(label > 0), axis=1) - tar_size, 0)) br[0] = min(br[0], img_h - tar_size[0]) br[1] = min(br[1], img_w - tar_size[1]) i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 1 else: i = random.randint(img_h - tar_size[0]) if img_h > tar_size[0] else 0 j = random.randint(img_w - tar_size[1]) if img_w > tar_size[1] else 0 patch = np.array((int(j), int(i), int(j + tar_size[1]), int(i + tar_size[0]))) overlaps = bbox_overlaps( patch.reshape(-1, 4), gt_bboxes.reshape(-1, 4), mode='iob').reshape(-1) if len(gt_masks) > 0 and (0 < overlaps.min() < 0.3): count -= 1 continue else: break """ do not select the center...""" new_gt_bbox, new_gt_mask, new_gt_labels = [], [], [] for idx in range(len(gt_masks)): mask = gt_masks[idx] mask = mask[i:i + tar_size[0], j:j + tar_size[1]] if np.max(mask) > 0: cnt = get_cnt_from_mask(mask) if cnt is not None: box = get_rect_from_cnt(cnt) new_gt_bbox.append(box.copy()) mask_p = cv2.copyMakeBorder(mask, 0, crp_h - tar_size[0], 0, crp_w - tar_size[1], borderType=cv2.BORDER_CONSTANT, value=(0,)) new_gt_mask.append(mask_p.copy()) new_gt_labels.append(gt_labels[idx]) new_gt_ignore_bbox = [] new_gt_ignore_mask = [] for idx in range(len(gt_ignore_masks)): mask = gt_ignore_masks[idx] mask = mask[i:i + tar_size[0], j:j + tar_size[1]] if np.max(mask) > 0: cnt = get_cnt_from_mask(mask) if cnt is not None: box = get_rect_from_cnt(cnt) new_gt_ignore_bbox.append(box.copy()) mask_p = cv2.copyMakeBorder(mask, 0, crp_h - tar_size[0], 0, crp_w - tar_size[1], borderType=cv2.BORDER_CONSTANT, value=(0,)) new_gt_ignore_mask.append(mask_p.copy()) if new_gt_bbox: new_gt_bbox = np.array(new_gt_bbox, dtype=np.float32) new_gt_bbox = clip_box(new_gt_bbox, img_w=tar_size[1], img_h=tar_size[0]) new_gt_labels = np.array(new_gt_labels, dtype=np.int64) new_gt_mask = np.stack(new_gt_mask, axis=0) else: new_gt_bbox = np.zeros((0, 4), dtype=np.float32) new_gt_labels = np.array([], dtype=np.int64) if new_gt_ignore_mask: new_gt_ignore_bbox = np.array(new_gt_ignore_bbox, dtype=np.float32) new_gt_ignore_bbox = clip_box(new_gt_ignore_bbox, img_w=tar_size[1], img_h=tar_size[0]) new_gt_ignore_mask = np.stack(new_gt_ignore_mask, axis=0) else: new_gt_ignore_bbox = np.zeros((0, 4), dtype=np.float32) img_crp = img[i:i + tar_size[0], j:j + tar_size[1]] img_shape_new = img_crp.shape img_p = cv2.copyMakeBorder(img_crp, 0, crp_h - tar_size[0], 0, crp_w - tar_size[1], borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0)) pad_shape_new = img_p.shape img_p = img_p.transpose(2, 0, 1) assert len(img_p.shape) == 3 and img_p.shape[0] == 3 return img_p, new_gt_bbox, new_gt_labels, new_gt_ignore_bbox, \ new_gt_mask, new_gt_ignore_mask, img_shape_new, pad_shape_new
def bbox_distances(bboxes1, bboxes2, iou_thr=0.5): """Calculate the IoU distances of two sets of boxes.""" ious = bbox_overlaps(bboxes1, bboxes2, mode='iou') distances = 1 - ious distances = np.where(distances > iou_thr, np.nan, distances) return distances
def __call__(self, results): img, boxes, labels = [ results[k] for k in ("img", "gt_bboxes", "gt_labels") ] h, w, c = img.shape while True: mode = random.choice(self.sample_mode) if mode == 1: return results min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, w) new_h = random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) patch = np.array( (int(left), int(top), int(left + new_w), int(top + new_h))) overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) if len(overlaps) > 0 and overlaps.min() < min_iou: continue # center of boxes should inside the crop img # only adjust boxes and instance masks when the gt is not empty if len(overlaps) > 0: # adjust boxes center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) results["gt_bboxes"] = boxes results["gt_labels"] = labels if "gt_masks" in results: valid_masks = [ results["gt_masks"][i] for i in range(len(mask)) if mask[i] ] # here the valid_masks is not empty results["gt_masks"] = np.stack([ gt_mask[patch[1]:patch[3], patch[0]:patch[2]] for gt_mask in valid_masks ]) # adjust the img no matter whether the gt is empty before crop img = img[patch[1]:patch[3], patch[0]:patch[2]] results["img"] = img # not tested if "gt_semantic_seg" in results: results["gt_semantic_seg"] = results["gt_semantic_seg"][ patch[1]:patch[3], patch[0]:patch[2]] return results
def __call__(self, results, state=None): if state is not None: return self._call_with_state(results, state) img, boxes, labels = [ results[k] for k in ('img', 'gt_bboxes', 'gt_labels') ] h, w, c = img.shape while True: mode = random.choice(self.sample_mode) # Force return origin for no annotation. if len(boxes) == 0: mode = 1 state = dict(mode=mode) if mode == 1: return results, state min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, w) new_h = random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) patch = np.array( (int(left), int(top), int(left + new_w), int(top + new_h))) overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) if overlaps.min() < min_iou: continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:, 0] > patch[0]) * ( center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3]) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] # adjust boxes img = img[patch[1]:patch[3], patch[0]:patch[2]] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) results['img'] = img results['gt_bboxes'] = boxes results['gt_labels'] = labels state['new_w'] = new_w state['new_h'] = new_h state['left'] = left state['top'] = top return results, state
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) cfg.data.test.test_mode = True config_file = args.config checkpoint_file = args.checkpoint # build the model from a config file and a checkpoint file device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = init_detector(config_file, checkpoint_file, device=device) model.CLASSES = Ultra4CocoDataset.CLASSES cls2id = dict(zip(model.CLASSES, range(0, len(model.CLASSES)))) acc = 0.0 tot = 0.0 gt_cls_num = np.zeros((len(model.CLASSES))) tp = np.zeros((len(model.CLASSES))) fp = np.zeros((len(model.CLASSES))) fn = np.zeros((len(model.CLASSES))) tn = np.zeros((len(model.CLASSES))) with open(test_path, "r") as f: filenames = f.readlines() for filename in filenames: img_file = filename.strip() + ".jpg" xml_file = filename.strip() + ".xml" img = cv2.imread(os.path.join(test_img_path, img_file)) if img is not None: xml_path = os.path.join(test_xml_path, xml_file) coords = read_xml(xml_path) if len(coords) is 0: print("No annotations\n") continue gt_bboxes = [coord[:4] for coord in coords] gt_labels = [coord[4] for coord in coords] for label in gt_labels: if label in cls2id.keys(): gt_cls_num[cls2id[label]] += 1 tot += 1 result = inference_detector(model, img) det_bboxes, det_labels, det_scores = get_result(result, score_thr=0.5) ious = bbox_overlaps(np.array(det_bboxes), np.array(gt_bboxes)) ious_max = ious.max(axis=1) ious_argmax = ious.argmax(axis=1) gt_matched_det = np.ones((len(gt_bboxes))) * -1 det_matched_gt = np.ones((len(det_bboxes))) * -1 gt_matched_scores = np.zeros((len(gt_bboxes))) for i in range(0, len(det_bboxes)): if ious_max[i] > 0.5: target_gt = ious_argmax[i] if gt_matched_scores[target_gt] < det_scores[i]: gt_matched_scores[target_gt] = det_scores[i] gt_matched_det[target_gt] = i det_matched_gt[i] = target_gt else: fp[cls2id[model.CLASSES[det_labels[i]]]] += 1 for i in range(0, len(det_matched_gt)): gt = int(det_matched_gt[i]) if gt > -1: if model.CLASSES[det_labels[i]] == gt_labels[gt]: tp[cls2id[model.CLASSES[det_labels[i]]]] += 1 acc += 1 else: fp[cls2id[model.CLASSES[det_labels[i]]]] += 1 for i in range(0, len(model.CLASSES)): fn[i] = gt_cls_num[i] - tp[i] tn[i] = gt_cls_num.sum() - fn[i] - tp[i] - fp[i] print("accuracy: %f" % (acc / tot)) mat = np.zeros((len(model.CLASSES), len(TABLE_HEAD))) for i in range(0, len(model.CLASSES)): mat[i][0] = i mat[i][1] = gt_cls_num[i] mat[i][2] = tp[i] mat[i][3] = fp[i] mat[i][4] = fn[i] mat[i][5] = tp[i] / (tp[i] + fp[i]) mat[i][6] = tp[i] / (tp[i] + fn[i]) print("%s: %.0f gt, %.0f det, %.0f tp, precision: %.6f, recall: %.6f" % (model.CLASSES[i], gt_cls_num[i], tp[i] + fp[i], tp[i], tp[i] / (tp[i] + fp[i]), tp[i] / (tp[i] + fn[i])))\ if os.path.exists(output_xml_name): os.remove(output_xml_name) workbook = openpyxl.Workbook(output_xml_name) sheet = workbook.create_sheet("sheet") sheet.append(TABLE_HEAD) for i in range(0, len(model.CLASSES)): label = model.CLASSES[i] sheet.append([ label, "%.0f" % gt_cls_num[i], "%.0f" % tp[i], "%.0f" % fp[i], "%.0f" % fn[i], "%.6f" % (tp[i] / (tp[i] + fp[i])), "%.6f" % (tp[i] / (tp[i] + fn[i])) ]) workbook.save(output_xml_name)
def __call__(self, results): if 'img_fields' in results: assert results['img_fields'] == ['img'], \ 'Only single img_fields is allowed' img = results['img'] assert 'bbox_fields' in results boxes = [results[key] for key in results['bbox_fields']] boxes = np.concatenate(boxes, 0) h, w, c = img.shape while True: mode = random.choice(self.sample_mode) self.mode = mode if mode == 1: return results min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, w) new_h = random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) patch = np.array( (int(left), int(top), int(left + new_w), int(top + new_h))) # Line or point crop is not allowed if patch[2] == patch[0] or patch[3] == patch[1]: continue overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) if len(overlaps) > 0 and overlaps.min() < min_iou: continue # center of boxes should inside the crop img # only adjust boxes and instance masks when the gt is not empty if len(overlaps) > 0: # adjust boxes def is_center_of_bboxes_in_patch(boxes, patch): center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) return mask mask = is_center_of_bboxes_in_patch(boxes, patch) if not mask.any(): continue for key in results.get('bbox_fields', []): boxes = results[key].copy() mask = is_center_of_bboxes_in_patch(boxes, patch) boxes = boxes[mask] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) results[key] = boxes # labels label_key = self.bbox2label.get(key) if label_key in results: results[label_key] = results[label_key][mask] # mask fields mask_key = self.bbox2mask.get(key) if mask_key in results: results[mask_key] = results[mask_key][ mask.nonzero()[0]].crop(patch) # adjust the img no matter whether the gt is empty before crop img = img[patch[1]:patch[3], patch[0]:patch[2]] results['img'] = img results['img_shape'] = img.shape # seg fields for key in results.get('seg_fields', []): results[key] = results[key][patch[1]:patch[3], patch[0]:patch[2]] return results
def forward(self, mask_feats, mask_preds, classes, bbox_pair=None): # modified to take in classes as input if not self.only_label: assert len(mask_feats.size()) == 4 and mask_feats.size()[1:] == (512, 14, 14) assert len(mask_preds.size()) == 4 and mask_preds.size()[1:] == (2, 28, 28) assert len(classes.size()) == 2 and classes.size()[1:] == (2,) if self.mode == 'feature+mask': mask_preds = mask_preds.sigmoid() mask_pred_pooled = self.max_pool(mask_preds) x = torch.cat([mask_feats, mask_pred_pooled], dim=1) for conv in self.convs: x = self.relu(conv(x)) x = x.view(x.size(0), -1) for fc in self.fcs: x = self.relu(fc(x)) logits = self.fc_final(x) return logits if self.mode == 'feature+mask+label': # one hot coding classes = classes.long() cls_pred = torch.zeros(classes.shape[0], 2 * self.num_class).cuda() for i in range(classes.shape[0]): cls_pred[i, [classes[i, 0], self.num_class + classes[i, 1]]] = 1 mask_preds = mask_preds.sigmoid() mask_pred_pooled = self.max_pool(mask_preds) x = torch.cat([mask_feats, mask_pred_pooled], dim=1) for conv in self.convs: x = self.relu(conv(x)) x = x.view(x.size(0), -1) x = torch.cat([x, cls_pred], dim=-1) for fc in self.fcs: x = self.relu(fc(x)) logits = self.fc_final(x) return logits if self.mode == 'label': # one hot coding classes = classes.long() cls_pred = torch.zeros(classes.shape[0], 2*self.num_class).cuda() for i in range(classes.shape[0]): cls_pred[i, [classes[i, 0], self.num_class+classes[i, 1]]] = 1 x = cls_pred for fc in self.fcs: x = self.relu(fc(x)) logits = self.fc_final(x) return logits if self.mode == 'stats': classes = classes.int() logits = [] for i in range(classes.shape[0]): logits.append(self.cat_occ[classes[i,0]][classes[i,1]]) logits = torch.tensor(logits).float().cuda() return logits if self.mode == 'SHR': logit_one = torch.ones(1).float().cuda() if bbox_pair==None: raise Exception("SHR is only available in testing") # bbox_overlaps take bbox1 as foreground obj by default! bbox1, bbox2 = [x.unsqueeze(dim=0).cpu().numpy() for x in bbox_pair] if bbox_overlaps(bbox1, bbox2, mode='iof')[0] > self.min_thing_area: return torch.stack([logit_one * 1, logit_one * 0]) if bbox_overlaps(bbox2, bbox1, mode='iof')[0] > self.min_thing_area: return torch.stack([logit_one * 0, logit_one * 1]) return torch.stack([logit_one * -1, logit_one * -1])
def create_groundtruth_database(dataset_class_name, data_path, info_prefix, info_path=None, mask_anno_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, add_rgb=False, lidar_only=False, bev_only=False, coors_range=None, with_mask=False): """Given the raw data, generate the ground truth database. Args: dataset_class_name (str): Name of the input dataset. data_path (str): Path of the data. info_prefix (str): Prefix of the info file. info_path (str): Path of the info file. Default: None. mask_anno_path (str): Path of the mask_anno. Default: None. used_classes (list[str]): Classes have been used. Default: None. database_save_path (str): Path to save database. Default: None. db_info_save_path (str): Path to save db_info. Default: None. relative_path (bool): Whether to use relative path. Default: True. with_mask (bool): Whether to use mask. Default: False. """ print(f'Create GT Database of {dataset_class_name}') dataset_cfg = dict(type=dataset_class_name, data_root=data_path, ann_file=info_path) if dataset_class_name == 'KittiDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=with_mask, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) elif dataset_class_name == 'NuScenesDataset': dataset_cfg.update(use_valid_flag=True, pipeline=[ dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(type='LoadPointsFromMultiSweeps', coord_type='LIDAR', sweeps_num=10, use_dim=[0, 1, 2, 3, 4], pad_empty_sweeps=True, remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True) ]) elif dataset_class_name == 'WaymoDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=False, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) dataset = build_dataset(dataset_cfg) if database_save_path is None: database_save_path = osp.join(data_path, f'{info_prefix}_gt_database') if db_info_save_path is None: db_info_save_path = osp.join(data_path, f'{info_prefix}_dbinfos_train.pkl') mmcv.mkdir_or_exist(database_save_path) all_db_infos = dict() if with_mask: coco = COCO(osp.join(data_path, mask_anno_path)) imgIds = coco.getImgIds() file2id = dict() for i in imgIds: info = coco.loadImgs([i])[0] file2id.update({info['file_name']: i}) group_counter = 0 for j in track_iter_progress(list(range(len(dataset)))): input_dict = dataset.get_data_info(j) dataset.pre_pipeline(input_dict) example = dataset.pipeline(input_dict) annos = example['ann_info'] image_idx = example['sample_idx'] points = example['points'].tensor.numpy() gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy() names = annos['gt_names'] group_dict = dict() if 'group_ids' in annos: group_ids = annos['group_ids'] else: group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64) difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32) if 'difficulty' in annos: difficulty = annos['difficulty'] num_obj = gt_boxes_3d.shape[0] point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d) if with_mask: # prepare masks gt_boxes = annos['gt_bboxes'] img_path = osp.split(example['img_info']['filename'])[-1] if img_path not in file2id.keys(): print(f'skip image {img_path} for empty mask') continue img_id = file2id[img_path] kins_annIds = coco.getAnnIds(imgIds=img_id) kins_raw_info = coco.loadAnns(kins_annIds) kins_ann_info = _parse_coco_ann_info(kins_raw_info) h, w = annos['img_shape'][:2] gt_masks = [ _poly2mask(mask, h, w) for mask in kins_ann_info['masks'] ] # get mask inds based on iou mapping bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes) mask_inds = bbox_iou.argmax(axis=0) valid_inds = (bbox_iou.max(axis=0) > 0.5) # mask the image # use more precise crop when it is ready # object_img_patches = np.ascontiguousarray( # np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2)) # crop image patches using roi_align # object_img_patches = crop_image_patch_v2( # torch.Tensor(gt_boxes), # torch.Tensor(mask_inds).long(), object_img_patches) object_img_patches, object_masks = crop_image_patch( gt_boxes, gt_masks, mask_inds, annos['img']) for i in range(num_obj): filename = f'{image_idx}_{names[i]}_{i}.bin' abs_filepath = osp.join(database_save_path, filename) rel_filepath = osp.join(f'{info_prefix}_gt_database', filename) # save point clouds and image patches for each object gt_points = points[point_indices[:, i]] gt_points[:, :3] -= gt_boxes_3d[i, :3] if with_mask: if object_masks[i].sum() == 0 or not valid_inds[i]: # Skip object for empty or invalid mask continue img_patch_path = abs_filepath + '.png' mask_patch_path = abs_filepath + '.mask.png' mmcv.imwrite(object_img_patches[i], img_patch_path) mmcv.imwrite(object_masks[i], mask_patch_path) with open(abs_filepath, 'w') as f: gt_points.tofile(f) if (used_classes is None) or names[i] in used_classes: db_info = { 'name': names[i], 'path': rel_filepath, 'image_idx': image_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes_3d[i], 'num_points_in_gt': gt_points.shape[0], 'difficulty': difficulty[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info['group_id'] = group_dict[local_group_id] if 'score' in annos: db_info['score'] = annos['score'][i] if with_mask: db_info.update({'box2d_camera': gt_boxes[i]}) if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print(f'load {len(v)} {k} database infos') with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def main(): args = parse_args() rgcfg = mmcv.Config.fromfile(args.configrg) rgcfg.data.test.test_mode = True abcfg = mmcv.Config.fromfile(args.configab) abcfg.data.test.test_mode = True rg_config_file = args.configrg rg_checkpoint_file = args.checkpointrg ab_config_file = args.configab ab_checkpoint_file = args.checkpointab # build the model from a config file and a checkpoint file device = torch.device("cuda" if torch.cuda.is_available() else "cpu") modelrg = init_detector(rg_config_file, rg_checkpoint_file, device=device) modelrg.CLASSES = RoseGoldDataset.CLASSES modelab = init_detector(ab_config_file, ab_checkpoint_file, device=device) modelab.CLASSES = UltraABDataset.CLASSES acc = 0.0 tot = 0.0 gt_cls_num = np.zeros((len(modelrg.CLASSES) + len(modelab.CLASSES) - 1)) tp = np.zeros((len(modelrg.CLASSES) + len(modelab.CLASSES) - 1)) fp = np.zeros((len(modelrg.CLASSES) + len(modelab.CLASSES) - 1)) fn = np.zeros((len(modelrg.CLASSES) + len(modelab.CLASSES) - 1)) tn = np.zeros((len(modelrg.CLASSES) + len(modelab.CLASSES) - 1)) cls2id = { '壳牌恒护超凡喜力欧系专属 5W-30 1L': 0, '壳牌恒护超凡喜力欧系专属 5W-30 4L': 1, '壳牌恒护超凡喜力欧系专属 5W-40 1L': 2, '壳牌恒护超凡喜力欧系专属 5W-40 4L': 3, '壳牌恒护超凡喜力亚系专属 5W-30 1L': 4, '壳牌恒护超凡喜力亚系专属 5W-30 4L': 5, '壳牌先锋超凡喜力 SN PLUS 天然气全合成油 0W-20 4L': 6, '壳牌先锋超凡喜力 SN PLUS 天然气全合成油 0W-20 1L': 7, '壳牌先锋超凡喜力 SN PLUS 天然气全合成油 0W-30 4L': 8, '壳牌先锋超凡喜力 SN PLUS 天然气全合成油 0W-30 1L': 9, '壳牌先锋超凡喜力 ACEA C5 天然气全合成油 0W-20 4L': 10, '壳牌先锋超凡喜力 ACEA C5 天然气全合成油 0W-20 1L': 11, '壳牌先锋超凡喜力 ACEA C2 / C3 天然气全合成油 0W-30 4L': 12, '壳牌先锋超凡喜力 ACEA C2 / C3 天然气全合成油 0W-30 1L': 13, '壳牌先锋超凡喜力 ACEA A3 / B4 天然气全合成油 0W-40 4L': 14, '壳牌先锋超凡喜力 ACEA A3 / B4 天然气全合成油 0W-40 1L': 15, '其他': 16 } id2cls = {value: key for key, value in cls2id.items()} with open(test_path, "r") as f: filenames = f.readlines() for filename in filenames: img_file = filename.strip() + ".jpg" xml_file = filename.strip() + ".xml" img = cv2.imread(os.path.join(test_img_path, img_file)) if img is not None: xml_path = os.path.join(test_xml_path, xml_file) coords = read_xml(xml_path) if len(coords) is 0: print("No annotations\n") continue gt_bboxes = [coord[:4] for coord in coords] gt_labels = [coord[4] for coord in coords] for label in gt_labels: gt_cls_num[cls2id[label]] += 1 tot += 1 resultrg = inference_detector(modelrg, img) resultab = inference_detector(modelab, img) det_bboxes, det_labels, det_scores = get_result( resultrg, resultab, modelrg.CLASSES, modelab.CLASSES, score_thr=0.5) ious = bbox_overlaps(np.array(det_bboxes), np.array(gt_bboxes)) ious_max = ious.max(axis=1) ious_argmax = ious.argmax(axis=1) gt_matched_det = np.ones((len(gt_bboxes))) * -1 det_matched_gt = np.ones((len(det_bboxes))) * -1 gt_matched_scores = np.zeros((len(gt_bboxes))) for i in range(0, len(det_bboxes)): if ious_max[i] > 0.5: target_gt = ious_argmax[i] if gt_matched_scores[target_gt] < det_scores[i]: gt_matched_scores[target_gt] = det_scores[i] gt_matched_det[target_gt] = i det_matched_gt[i] = target_gt else: fp[cls2id[det_labels[i]]] += 1 for i in range(0, len(det_matched_gt)): gt = int(det_matched_gt[i]) if gt > -1: if det_labels[i] == gt_labels[gt]: tp[cls2id[det_labels[i]]] += 1 acc += 1 else: fp[cls2id[det_labels[i]]] += 1 for i in range(0, len(cls2id)): fn[i] = gt_cls_num[i] - tp[i] tn[i] = gt_cls_num.sum() - fn[i] - tp[i] - fp[i] print("accuracy: %f" % (acc / tot)) mat = np.zeros((len(cls2id), len(TABLE_HEAD))) for i in range(0, len(cls2id)): mat[i][0] = i mat[i][1] = gt_cls_num[i] mat[i][2] = tp[i] mat[i][3] = fp[i] mat[i][4] = fn[i] mat[i][5] = tp[i] / (tp[i] + fp[i]) mat[i][6] = tp[i] / (tp[i] + fn[i]) print("%s: %.0f gt, %.0f det, %.0f tp, precision: %.6f, recall: %.6f" % (id2cls[i], gt_cls_num[i], tp[i] + fp[i], tp[i], tp[i] / (tp[i] + fp[i]), tp[i] / (tp[i] + fn[i])))\ if os.path.exists("shell_statistics.xlsx"): os.remove("shell_statistics.xlsx") workbook = openpyxl.Workbook("shell_statistics.xlsx") sheet = workbook.create_sheet("sheet") sheet.append(TABLE_HEAD) for i in range(0, len(id2cls)): label = id2cls[i] sheet.append([ label, "%.0f" % gt_cls_num[i], "%.0f" % tp[i], "%.0f" % fp[i], "%.0f" % fn[i], "%.6f" % (tp[i] / (tp[i] + fp[i])), "%.6f" % (tp[i] / (tp[i] + fn[i])) ]) workbook.save("shell_statistics.xlsx")
def test_corner_head_encode_and_decode_heatmap(): """Tests corner head generating and decoding the heatmap.""" s = 256 img_metas = [{ 'img_shape': (s, s, 3), 'scale_factor': 1, 'pad_shape': (s, s, 3), 'border': (0, 0, 0, 0) }] gt_bboxes = [ torch.Tensor([[10, 20, 200, 240], [40, 50, 100, 200], [10, 20, 200, 240]]) ] gt_labels = [torch.LongTensor([1, 1, 2])] self = CornerHead(num_classes=4, in_channels=1, corner_emb_channels=1) feat = [ torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels) ] targets = self.get_targets(gt_bboxes, gt_labels, feat[0].shape, img_metas[0]['pad_shape'], with_corner_emb=self.with_corner_emb) gt_tl_heatmap = targets['topleft_heatmap'] gt_br_heatmap = targets['bottomright_heatmap'] gt_tl_offset = targets['topleft_offset'] gt_br_offset = targets['bottomright_offset'] embedding = targets['corner_embedding'] [top, left], [bottom, right] = embedding[0][0] gt_tl_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4]) gt_br_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4]) gt_tl_embedding_heatmap[0, 0, top, left] = 1 gt_br_embedding_heatmap[0, 0, bottom, right] = 1 batch_bboxes, batch_scores, batch_clses = self.decode_heatmap( tl_heat=gt_tl_heatmap, br_heat=gt_br_heatmap, tl_off=gt_tl_offset, br_off=gt_br_offset, tl_emb=gt_tl_embedding_heatmap, br_emb=gt_br_embedding_heatmap, img_meta=img_metas[0], k=100, kernel=3, distance_threshold=0.5) bboxes = batch_bboxes.view(-1, 4) scores = batch_scores.view(-1, 1) clses = batch_clses.view(-1, 1) idx = scores.argsort(dim=0, descending=True) bboxes = bboxes[idx].view(-1, 4) scores = scores[idx].view(-1) clses = clses[idx].view(-1) valid_bboxes = bboxes[torch.where(scores > 0.05)] valid_labels = clses[torch.where(scores > 0.05)] max_coordinate = valid_bboxes.max() offsets = valid_labels.to(valid_bboxes) * (max_coordinate + 1) gt_offsets = gt_labels[0].to(gt_bboxes[0]) * (max_coordinate + 1) offset_bboxes = valid_bboxes + offsets[:, None] offset_gtbboxes = gt_bboxes[0] + gt_offsets[:, None] iou_matrix = bbox_overlaps(offset_bboxes.numpy(), offset_gtbboxes.numpy()) assert (iou_matrix == 1).sum() == 3
def tpfp_default(det_bboxes, gt_bboxes, gt_bboxes_ignore=None, iou_thr=0.5, area_ranges=None, class_index=None, gt_bboxes_all_classes=None): """Check if detected bboxes are true positive or false positive. Args: det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, of shape (k, 4). Default: None iou_thr (float): IoU threshold to be considered as matched. Default: 0.5. area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, in the format [(min1, max1), (min2, max2), ...]. Default: None. Returns: tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of each array is (num_scales, m). """ # an indicator of ignored gts gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0], dtype=np.bool), np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) # stack gt_bboxes and gt_bboxes_ignore for convenience gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) num_dets = det_bboxes.shape[0] num_gts = gt_bboxes.shape[0] if area_ranges is None: area_ranges = [(None, None)] num_scales = len(area_ranges) # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of # a certain scale tp = np.zeros((num_scales, num_dets), dtype=np.float32) fp = np.zeros((num_scales, num_dets), dtype=np.float32) fp_redundant = np.zeros((num_scales, num_dets), dtype=np.float32) gt_covered = [] ious_max = -1 * np.ones(num_dets, dtype=np.float32) ious_argmax = -1 * np.ones(num_dets, dtype=np.float32) matched_classes = -1 * np.ones(num_dets, dtype=np.float32) # if there is no gt bboxes in this image, then all det bboxes # within area range are false positives if gt_bboxes.shape[0] == 0: if area_ranges == [(None, None)]: fp[...] = 1 else: det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( det_bboxes[:, 3] - det_bboxes[:, 1]) for i, (min_area, max_area) in enumerate(area_ranges): fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 # return tp, fp, fp_redundant, gt_covered, ious_max, ious_argmax, -1 * np.ones(num_dets, dtype=np.float32), [], [] else: ious = bbox_overlaps(det_bboxes, gt_bboxes) # for each det, the max iou with all gts ious_max = ious.max(axis=1) # for each det, which gt overlaps most with it ious_argmax = ious.argmax(axis=1) # sort all dets in descending order by scores sort_inds = np.argsort(-det_bboxes[:, -1]) for k, (min_area, max_area) in enumerate(area_ranges): gt_covered = np.zeros(num_gts, dtype=bool) # if no area range is specified, gt_area_ignore is all False if min_area is None: gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) else: gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * ( gt_bboxes[:, 3] - gt_bboxes[:, 1]) gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) for i in sort_inds: if ious_max[i] >= iou_thr: matched_gt = ious_argmax[i] if not (gt_ignore_inds[matched_gt] or gt_area_ignore[matched_gt]): if not gt_covered[matched_gt]: gt_covered[matched_gt] = True tp[k, i] = 1 else: fp[k, i] = 1 fp_redundant[k, i] = 1 # otherwise ignore this detected bbox, tp = 0, fp = 0 elif min_area is None: fp[k, i] = 1 else: bbox = det_bboxes[i, :4] area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if area >= min_area and area < max_area: fp[k, i] = 1 if gt_bboxes.shape[0] != 0: matched_classes[np.where(ious_max != 0)] = class_index # Check if det matches gt from other class if -1 in matched_classes and gt_bboxes_all_classes['bboxes'].shape[0] != 0: unmatched_inds = np.where(matched_classes == -1) ious_all_classes = bbox_overlaps(det_bboxes, gt_bboxes_all_classes['bboxes']) ious_max_all_classes = ious_all_classes.max(axis=1)[unmatched_inds] ious_argmax_all_classes = ious_all_classes.argmax( axis=1)[unmatched_inds] labels = gt_bboxes_all_classes['labels'][ious_argmax_all_classes] labels[np.where(ious_max_all_classes == 0)] = -1 matched_classes[unmatched_inds] = labels new_iou_max = np.array(ious_max_all_classes, copy=True) new_iou_max[np.where(ious_max_all_classes == 0)] = -1 ious_max[unmatched_inds] = new_iou_max new_ious_argmax = np.array(ious_argmax_all_classes, copy=True) new_ious_argmax[np.where(ious_max_all_classes == 0)] = -1 # Fix matched_gt_index. Find index within each class for i in range(len(new_ious_argmax)): index_all_gts = new_ious_argmax[i] if index_all_gts != -1: label = gt_bboxes_all_classes['labels'][index_all_gts] new_ious_argmax[i] = np.where( gt_bboxes_all_classes['labels'][:index_all_gts] == label)[0].shape[0] ious_argmax[unmatched_inds] = new_ious_argmax gt_iou_max, gt_iou_argmax = [], [] if det_bboxes.shape[0] != 0 and gt_bboxes.shape[0] != 0: gt_iou_max, gt_iou_argmax = ious.max(axis=0), ious.argmax(axis=0) unmatched_inds = np.where(gt_iou_max == 0) gt_iou_max[unmatched_inds] = -1 gt_iou_argmax[unmatched_inds] = -1 return tp, fp, fp_redundant, gt_covered, ious_max, ious_argmax, matched_classes, gt_iou_max, gt_iou_argmax
def __call__(self, results): img, boxes, labels = [ results[k] for k in ('img', 'gt_bboxes', 'gt_labels') ] h, w, c = img.shape while True: mode = random.choice(self.sample_mode) if mode == 1: return results min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, w) new_h = random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) patch = np.array( (int(left), int(top), int(left + new_w), int(top + new_h))) overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) if overlaps.min() < min_iou: continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] # adjust boxes img = img[patch[1]:patch[3], patch[0]:patch[2]] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) results['img'] = img results['gt_bboxes'] = boxes results['gt_labels'] = labels if 'gt_masks' in results: valid_masks = [ results['gt_masks'][i] for i in range(len(mask)) if mask[i] ] results['gt_masks'] = [ gt_mask[patch[1]:patch[3], patch[0]:patch[2]] for gt_mask in valid_masks ] # not tested if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = results['gt_semantic_seg'][ patch[1]:patch[3], patch[0]:patch[2]] return results
def __call__(self, results): img, boxes, labels = [ results[k] for k in ('img', 'gt_bboxes', 'gt_labels') ] results['raw_img'] = img h, w, c = img.shape max_box = [0, 0] # w, h box_list = [] flag_sm = False for ind, box in enumerate(results.get('gt_bboxes', [])): box_w, box_h = int(box[2] - box[0]), int(box[3] - box[1]) if box_w > max_box[0]: max_box[0] = box_w # find max w h if box_h > max_box[1]: max_box[1] = box_h box_list.append(box) if max_box[0] > w // 2 or max_box[1] > h // 2: return results # if max_box[0] < 100 and max_box[1] < 100: # self.min_crop_size = self.sm_min_crop_size # self.max_crop_size = self.sm_max_crop_size # # self.sample_mode = self.sm_min_ious # flag_sm = True while True: mode = random.choice(self.sample_mode) if mode == 1: return results min_iou = mode for i in range(50): new_w = random.uniform(self.min_crop_size * w, self.max_crop_size * w) new_h = random.uniform(self.min_crop_size * h, self.max_crop_size * h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = random.uniform(w - new_w) top = random.uniform(h - new_h) if flag_sm: sm_box = rchoice(box_list) xmax, ymax = sm_box[0], sm_box[1] xmin = round(xmax - new_w) ymin = round(ymax - new_h) b_w = w - new_w b_h = h - new_h top_line = max(xmin, 0) # x1 left_line = max(ymin, 0) # y1 bottom_line = min(xmax, b_w) # x2 right_line = min(ymax, b_h) # y2 left = random.uniform(top_line, bottom_line) top = random.uniform(left_line, right_line) patch = np.array( (int(left), int(top), int(left + new_w), int(top + new_h))) overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) if overlaps.min() < min_iou: continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:, 0] > patch[0]) * ( center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3]) if not mask.any(): continue boxes = boxes[mask] labels = labels[mask] # adjust boxes img = img[patch[1]:patch[3], patch[0]:patch[2]] boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) boxes -= np.tile(patch[:2], 2) results['img'] = img results['gt_bboxes'] = boxes results['gt_labels'] = labels return results