def compute_predicted_bboxes(rois, pred_cls, pred_loc, image_info, cfg): ''' :param cfg: config :param rois: [N, k] k>=5, batch_ix, x1, y1, x2, y2 :param pred_cls:[N, num_classes, 1, 1] :param pred_loc:[N, num_classes * 4, 1, 1] :param image_info:[N, 3] :return: bboxes: [M, 7], batch_ix, x1, y1, x2, y2, score, cls ''' # logger = logging.getLogger('global') rois, pred_cls, pred_loc = map(to_np_array, [rois, pred_cls, pred_loc]) N, num_classes = pred_cls.shape[0:2] B = max(rois[:, 0].astype(np.int32)) + 1 assert (N == rois.shape[0]) nmsed_bboxes = [] for cls in range(1, num_classes): scores = pred_cls[:, cls].squeeze() deltas = pred_loc[:, cls * 4:cls * 4 + 4].squeeze() if cfg['bbox_normalize_stats_precomputed']: deltas = deltas * np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]\ + np.array(cfg['bbox_normalize_means'])[np.newaxis, :] bboxes = bbox_helper.compute_loc_bboxes(rois[:, 1:1 + 4], deltas) bboxes = np.hstack([bboxes, scores[:, np.newaxis]]) # for each image, do nms for b_ix in range(B): rois_ix = np.where(rois[:, 0] == b_ix)[0] pre_scores = scores[rois_ix] pre_bboxes = bboxes[rois_ix] pre_bboxes[:, :4] = bbox_helper.clip_bbox(pre_bboxes[:, :4], image_info[b_ix]) if cfg['score_thresh'] > 0: keep_ix = np.where(pre_scores > cfg['score_thresh'])[0] pre_scores = pre_scores[keep_ix] pre_bboxes = pre_bboxes[keep_ix] if pre_scores.size == 0: continue order = pre_scores.argsort()[::-1] pre_bboxes = pre_bboxes[order, :] keep_index = nms( torch.from_numpy(pre_bboxes).float().cuda(), cfg['nms_iou_thresh']).numpy() post_bboxes = pre_bboxes[keep_index] batch_ix = np.full(post_bboxes.shape[0], b_ix) batch_cls = np.full(post_bboxes.shape[0], cls) post_bboxes = np.hstack([ batch_ix[:, np.newaxis], post_bboxes, batch_cls[:, np.newaxis] ]) nmsed_bboxes.append(post_bboxes) nmsed_bboxes = np.vstack(nmsed_bboxes) if cfg['top_n'] > 0: top_n_bboxes = [] for b_ix in range(B): bboxes = nmsed_bboxes[nmsed_bboxes[:, 0] == b_ix] scores = bboxes[:, -2] order = scores.argsort()[::-1][:cfg['top_n']] bboxes = bboxes[order] top_n_bboxes.append(bboxes) nmsed_bboxes = np.vstack(top_n_bboxes) nmsed_bboxes = (torch.from_numpy(nmsed_bboxes)).float().cuda() return nmsed_bboxes
def compute_rpn_proposals(conv_cls, conv_loc, cfg, image_info): ''' :argument cfg: configs conv_cls: FloatTensor, [batch, num_anchors * x, h, w], conv output of classification conv_loc: FloatTensor, [batch, num_anchors * 4, h, w], conv output of localization image_info: FloatTensor, [batch, 3], image size :returns proposals: Variable, [N, 5], 2-dim: batch_ix, x1, y1, x2, y2 ''' batch_size, num_anchors_4, featmap_h, featmap_w = conv_loc.shape # [K*A, 4] anchors_overplane = anchor_helper.get_anchors_over_plane(featmap_h, featmap_w, cfg['anchor_ratios'], cfg['anchor_scales'], cfg['anchor_stride']) B = batch_size A = num_anchors = num_anchors_4 // 4 assert(A * 4 == num_anchors_4) K = featmap_h * featmap_w cls_view = conv_cls.permute(0, 2, 3, 1).contiguous().view(B, K*A, -1).cpu().numpy() loc_view = conv_loc.permute(0, 2, 3, 1).contiguous().view(B, K*A, 4).cpu().numpy() if torch.is_tensor(image_info): image_info = image_info.cpu().numpy() #all_proposals = [bbox_helper.compute_loc_bboxes(anchors_overplane, loc_view[ix]) for ix in range(B)] # [B, K*A, 4] #pred_loc = np.stack(all_proposals, axis = 0) #pred_cls = cls_view batch_proposals = [] pre_nms_top_n = cfg['pre_nms_top_n'] for b_ix in range(B): scores = cls_view[b_ix, :, -1] # to compatible with sigmoid if pre_nms_top_n <= 0 or pre_nms_top_n > scores.shape[0]: order = scores.argsort()[::-1] else: inds = np.argpartition(-scores, pre_nms_top_n)[:pre_nms_top_n] order = np.argsort(-scores[inds]) order = inds[order] loc_delta = loc_view[b_ix, order, :] loc_anchors = anchors_overplane[order, :] scores = scores[order] boxes = bbox_helper.compute_loc_bboxes(loc_anchors, loc_delta) boxes = bbox_helper.clip_bbox(boxes, image_info[b_ix]) proposals = np.hstack([boxes, scores[:, np.newaxis]]) proposals = proposals[(proposals[:, 2] - proposals[:, 0] + 1 >= cfg['roi_min_size']) & (proposals[:, 3] - proposals[:, 1] + 1 >= cfg['roi_min_size'])] keep_index = nms(torch.from_numpy(proposals).float().cuda(), cfg['nms_iou_thresh']).numpy() if cfg['post_nms_top_n'] > 0: keep_index = keep_index[:cfg['post_nms_top_n']] proposals = proposals[keep_index] batch_ix = np.full(keep_index.shape, b_ix) proposals = np.hstack([batch_ix[:, np.newaxis], proposals]) batch_proposals.append(proposals) batch_proposals = (torch.from_numpy(np.vstack(batch_proposals))).float() if batch_proposals.dim() < 2: batch_proposals.unsqueeze(dim=0) return batch_proposals
def compute_mask_targets(proposals, cfg, ground_truth_bboxes, ground_truth_masks, image_info, ignore_regions=None): ''' Args: proposals:[N, k], k>=5(b_ix, x1,y1,x2,y2, ...) ground_truth_bboxes: [batch_size, max_gts, k], k>=5(x1,y1,x2,y2,label) ground_truth_masks: [batch_size, max_gts, image_h, image_w] image_info: [batch_size, 3], (resized_image_h, resized_image_w, resize_scale) Return: batch_rois: [R, 5] (b_ix, x1,y1,x2,y2) batch_kpt_labels: [R, num_classes, label_h, label_w] ''' proposals_device = proposals.device proposals = to_np_array(proposals) ground_truth_bboxes = to_np_array(ground_truth_bboxes) ground_truth_masks = to_np_array(ground_truth_masks) image_info = to_np_array(image_info) ignore_regions = to_np_array(ignore_regions) B = ground_truth_bboxes.shape[0] batch_rois = [] batch_mask_labels = [] for b_ix in range(B): rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4] gts = ground_truth_bboxes[b_ix] masks = ground_truth_masks[b_ix] # kick out padded gts keep_ix = np.where(gts[:, 2] > gts[:, 1] + 1)[0] if keep_ix.size == 0: continue gts = gts[keep_ix] masks = masks[keep_ix] if cfg['append_gts']: rois = np.vstack([rois, gts[:, :4]]) rois = bbox_helper.clip_bbox(rois.astype(np.int32), image_info[b_ix].astype(np.int32)) R = rois.shape[0] G = gts.shape[0] if R == 0 or G == 0: continue # [R, G] overlaps = bbox_helper.bbox_iou_overlaps(rois, gts) # [R] # (i): a roi that has an IoU higher than than positive_iou_thresh is postive argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0] pos_g_ix = argmax_overlaps[pos_r_ix] # sampling num_positives = pos_r_ix.shape[0] if num_positives == 0: continue if cfg['batch_size_per_image'] > 0 and num_positives > cfg[ 'batch_size_per_image']: keep_ix = np.random.choice(num_positives, size=cfg['batch_size_per_image'], replace=False) pos_r_ix = pos_r_ix[keep_ix] pos_g_ix = pos_g_ix[keep_ix] # gather positive bboxes and related masks pos_rois = rois[pos_r_ix] pos_target_classes = gts[pos_g_ix][:, 4].astype(np.int64) pos_target_masks = masks[pos_g_ix] N = pos_rois.shape[0] pos_mask_labels = generate_mask_labels(pos_rois, pos_target_masks, cfg['label_h'], cfg['label_w']) mask_labels = -np.ones( (N, cfg['num_classes'], cfg['label_h'], cfg['label_w'])) mask_labels[range(N), pos_target_classes, ...] = pos_mask_labels batch_idx = np.full((N, 1), b_ix) pos_rois = np.hstack( [batch_idx, pos_rois, pos_target_classes[:, np.newaxis]]) batch_rois.append(pos_rois) batch_mask_labels.append(mask_labels) if len(batch_rois) == 0: # if there's no positive rois, pad zeros n = 1 batch_rois = np.zeros((n, 5), dtype=np.float32) batch_mask_labels = -np.ones( (n, cfg['num_classes'], cfg['label_h'], cfg['label_w']), dtype=np.float32) else: batch_rois = np.vstack(batch_rois) batch_mask_labels = np.vstack(batch_mask_labels) # debug #import os #import torch.distributed as dist #vis_mask = 'vis_mask' #if not os.path.exists(vis_mask): # os.makedirs(vis_mask) #for i, roi in enumerate(batch_rois): # b_ix, x1, y1, x2, y2, cls = map(int, roi[:6]) # roi_w = x2 - x1 # roi_h = y2 - y1 # img = debugger.get_image(b_ix).copy() # filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0] # mask = batch_mask_labels[i, cls] # mask = cv2.resize(mask, (roi_w, roi_h)) * 100 # img[y1:y2, x1:x2, ...] += mask[..., np.newaxis] # vis_helper.draw_bbox(img, roi[1:1+4]) # cv2.imwrite('vis_mask/{0}_{1}.jpg'.format(filename, i), img) cuda_device = proposals_device f = lambda x: (torch.from_numpy(x)).to(cuda_device) batch_rois = f(batch_rois).float() batch_mask_labels = f(batch_mask_labels).float() return batch_rois, batch_mask_labels
def compute_proposal_targets(proposals, cfg, ground_truth_bboxes, image_info, ignore_regions=None, use_ohem=False): ''' :argument proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2 ground_truth_bboxes: [batch, max_num_gts, k], k>=5, x1,y1,x2,y2,label returns: rois: [N, 5]: cls_targets: [N, num_classes] loc_targets, loc_weights: [N, num_classes * 4] ''' proposals, ground_truth_bboxes, image_info, ignore_regions = \ map(to_np_array, [proposals, ground_truth_bboxes, image_info, ignore_regions]) B = ground_truth_bboxes.shape[0] logger.debug('proposals.shape:{}'.format(proposals.shape)) logger.debug('ground_truth_bboxes.shape:{}'.format( ground_truth_bboxes.shape)) batch_rois = [] batch_labels = [] batch_loc_targets = [] batch_loc_weights = [] for b_ix in range(B): rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4] gts = ground_truth_bboxes[b_ix] # kick out padded empty ground truth bboxes #gts = gts[gts[:, 2] > gts[:, 0] + 1] gts = gts[(gts[:, 2] > gts[:, 0] + 1) & (gts[:, 3] > gts[:, 1] + 1)] if cfg['append_gts']: rois = np.vstack([rois, gts[:, :4]]) rois = bbox_helper.clip_bbox(rois, image_info[b_ix]) R = rois.shape[0] G = gts.shape[0] if R == 0 or G == 0: continue #[R, G] overlaps = bbox_helper.bbox_iou_overlaps(rois, gts) # (i) the anchor with the highest Intersection-over-Union (IoU) # overlap with a ground-truth box is positive # [G] #gt_max_overlaps = overlaps.max(axis=0) #gt_max_overlaps[gt_max_overlaps < 0.1] = -1 #gt_pos_r_ix, gt_pos_g_ix = np.where(overlaps == gt_max_overlaps[np.newaxis, :]) # (ii) an anchor that has an IoU overlap higher than positive_iou_thresh # with any ground-truth box is positive # [R] argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0] pos_g_ix = argmax_overlaps[pos_r_ix] # merge pos_r_ix & gt_pos_b_ix #pos_r_ix = np.concatenate([pos_r_ix, gt_pos_r_ix]) #pos_g_ix = np.concatenate([pos_g_ix, gt_pos_g_ix]) # remove duplicate positives pos_r_ix, return_index = np.unique(pos_r_ix, return_index=True) pos_g_ix = pos_g_ix[return_index] # (iii) We assign a negative label to a non-positive anchor if its IoU ratio # is between [negative_iou_thresh_lo, negative_iou_thresh_low] for all ground-truth boxes neg_r_ix = np.where((max_overlaps < cfg['negative_iou_thresh_hi']) & (max_overlaps >= cfg['negative_iou_thresh_lo']))[0] # remove negatives which located in ignore regions if ignore_regions is not None: cur_ignore = ignore_regions[b_ix] # remove padded ignore regions cur_ignore = cur_ignore[cur_ignore[:, 2] - cur_ignore[:, 0] > 1] if cur_ignore.shape[0] > 0: iof_overlaps = bbox_helper.bbox_iof_overlaps(rois, cur_ignore) max_iof_overlaps = iof_overlaps.max(axis=1) # [B, K*A] ignore_rois_ix = np.where( max_iof_overlaps > cfg['ignore_iou_thresh'])[0] neg_r_ix = np.array(list(set(neg_r_ix) - set(ignore_rois_ix))) # remove positives(rule (i)) from negatives neg_r_ix = np.array(list(set(neg_r_ix) - set(pos_r_ix))) #sampling num_positives = len(pos_r_ix) batch_size_per_image = cfg['batch_size'] # keep all pos and negs if use OHEM if not use_ohem: num_pos_sampling = int(cfg['positive_percent'] * batch_size_per_image) if num_pos_sampling < num_positives: keep_ix = np.random.choice(num_positives, size=num_pos_sampling, replace=False) pos_r_ix = pos_r_ix[keep_ix] pos_g_ix = pos_g_ix[keep_ix] num_positives = num_pos_sampling num_negatives = len(neg_r_ix) num_neg_sampling = batch_size_per_image - num_positives if num_neg_sampling < num_negatives: keep_ix = np.random.choice(num_negatives, size=num_neg_sampling, replace=False) neg_r_ix = neg_r_ix[keep_ix] num_negatives = num_neg_sampling #else: # keep_ix = np.random.choice(num_negatives, size = num_neg_sampling, replace = True) # neg_r_ix = neg_r_ix[keep_ix] # num_negatives = num_neg_sampling # convert neg_r_ix, pos_r_ix and pos_g_ix from np.array to list in case of *_ix == np.array([]) # which can't index np.array pos_r_ix = list(pos_r_ix) pos_g_ix = list(pos_g_ix) neg_r_ix = list(neg_r_ix) # gather positives, matched gts, and negatives pos_rois = rois[pos_r_ix] pos_target_gts = gts[pos_g_ix] neg_rois = rois[neg_r_ix] rois_sampling = np.vstack([pos_rois, neg_rois]) num_pos, num_neg = pos_rois.shape[0], neg_rois.shape[0] num_sampling = num_pos + num_neg # generate targets pos_labels = pos_target_gts[:, 4].astype(np.int32) neg_labels = np.zeros(num_neg) labels = np.concatenate([pos_labels, neg_labels]).astype(np.int32) loc_targets = np.zeros([num_sampling, cfg['num_classes'], 4]) loc_weights = np.zeros([num_sampling, cfg['num_classes'], 4]) pos_loc_targets = bbox_helper.compute_loc_targets( pos_rois, pos_target_gts) if cfg['bbox_normalize_stats_precomputed']: pos_loc_targets = (pos_loc_targets - np.array(cfg['bbox_normalize_means'])[np.newaxis, :]) \ / np.array(cfg['bbox_normalize_stds'])[np.newaxis, :] loc_targets[range(num_pos), pos_labels, :] = pos_loc_targets loc_weights[range(num_pos), pos_labels, :] = 1 loc_targets = loc_targets.reshape([num_sampling, -1]) loc_weights = loc_weights.reshape([num_sampling, -1]) batch_ix = np.full(rois_sampling.shape[0], b_ix) rois_sampling = np.hstack([batch_ix[:, np.newaxis], rois_sampling]) if rois_sampling.shape[0] < batch_size_per_image: rep_num = batch_size_per_image - rois_sampling.shape[0] rep_index = np.random.choice(rois_sampling.shape[0], size=rep_num, replace=True) rois_sampling = np.vstack( [rois_sampling, rois_sampling[rep_index]]) labels = np.concatenate([labels, labels[rep_index]]) loc_targets = np.vstack([loc_targets, loc_targets[rep_index]]) loc_weights = np.vstack([loc_weights, loc_weights[rep_index]]) batch_rois.append(rois_sampling) batch_labels.append(labels) batch_loc_targets.append(loc_targets) batch_loc_weights.append(loc_weights) pos_num = np.where(np.concatenate(batch_labels) > 0)[0].shape[0] neg_num = np.concatenate(batch_labels).shape[0] - pos_num history[0] += pos_num history[1] += neg_num history_pos, history_neg = history pos_percent = history_pos / (history_neg + history_pos) neg_percent = history_neg / (history_neg + history_pos) logger.debug( 'proposal_target(pos/neg): %d=%d+%d, history ratio:%.5f/%.5f' % (pos_num + neg_num, pos_num, neg_num, pos_percent, neg_percent)) batch_rois = (torch.from_numpy( np.vstack(batch_rois))).float().cuda().contiguous() batch_labels = (torch.from_numpy( np.concatenate(batch_labels))).long().cuda().contiguous() batch_loc_targets = (torch.from_numpy( np.vstack(batch_loc_targets))).float().cuda().contiguous() batch_loc_weights = (torch.from_numpy( np.vstack(batch_loc_weights))).float().cuda().contiguous() return batch_rois, batch_labels, batch_loc_targets, batch_loc_weights
def validate_single(val_loader, model, cfg): global best_map logger = logging.getLogger('global') rank, world_size = 0, 1 # switch to evaluate mode model.eval() total_rc = 0 total_gt = 0 logger.info('start validate') if not os.path.exists(args.results_dir): try: os.makedirs(args.results_dir) except Exception as e: print(e) # remove the original results file # if rank == 0: for f in os.listdir(args.results_dir): if 'results.txt.rank' in f and int(f.split('k')[-1]) >= world_size: logger.info("remove %s" % f) os.remove(os.path.join(args.results_dir, f)) fout = open(os.path.join(args.results_dir, 'results.txt.rank%d' % rank), 'w') for iter, input in enumerate(val_loader): img = (input[0]).cuda() img_info = input[1] gt_boxes = input[2] filenames = input[-1] x = { 'cfg': cfg, 'image': img, 'image_info': img_info, 'ground_truth_bboxes': gt_boxes, 'ignore_regions': None } batch_size = img.shape[0] t1 = time.time() t0 = time.time() outputs = model(x)['predict'] t2 = time.time() proposals = outputs[0].data.cpu().numpy() bboxes = outputs[1].data.cpu().numpy() if torch.is_tensor(gt_boxes): gt_boxes = gt_boxes.cpu().numpy() for b_ix in range(batch_size): img_id = filenames[b_ix].rsplit('/', 1)[-1].rsplit('.', 1)[0] img_resize_scale = img_info[b_ix, -1] if args.dataset == 'coco': img_resize_scale = img_info[b_ix, 2] rois_per_image = proposals[proposals[:, 0] == b_ix] dts_per_image = bboxes[bboxes[:, 0] == b_ix] gts_per_image = gt_boxes[b_ix] # rpn recall num_rc, num_gt = bbox_helper.compute_recall( rois_per_image[:, 1:1 + 4], gts_per_image) total_gt += num_gt total_rc += num_rc order = dts_per_image[:, -2].argsort()[::-1][:100] dts_per_image = dts_per_image[order] # faster-rcnn eval for cls in range(1, cfg['shared']['num_classes']): dts_per_cls = dts_per_image[dts_per_image[:, -1] == cls] gts_per_cls = gts_per_image[gts_per_image[:, -1] == cls] dts_per_cls = dts_per_cls[:, 1:-1] # dts_per_cls = dts_per_cls[dts_per_cls[:, -1] > 0.05] gts_per_cls = gts_per_cls[:, :-1] dts_per_cls = bbox_helper.clip_bbox(dts_per_cls, img_info[b_ix, :2]) if len(dts_per_cls) > 0: dts_per_cls[:, :4] = dts_per_cls[:, :4] / img_resize_scale if len(gts_per_cls) > 0: gts_per_cls[:, :4] = gts_per_cls[:, :4] / img_resize_scale for bx in dts_per_cls: if args.dataset == 'coco': fout.write('val2017/{0}.jpg {1} {2}\n'.format( img_id, ' '.join(map(str, bx)), cls)) else: fout.write('{0} {1} {2}\n'.format( img_id, ' '.join(map(str, bx)), cls)) fout.flush() logger.info('Test: [%d/%d] Time: %.3f %d/%d' % (iter, len(val_loader), t2 - t0, total_rc, total_gt)) print_speed(iter + 1, t2 - t0, len(val_loader)) logger.info('rpn300 recall=%f' % (total_rc / total_gt)) fout.close() """ eval the cityscapes for getting the map """ # eval coco ap with official python api if args.dataset == 'coco': eval_coco_ap(args.results_dir, 'bbox', args.val_meta_file) else: Cal_MAP(args.results_dir, args.val_meta_file, int(cfg['shared']['num_classes'])) return total_rc / total_gt