def compute_bbox_regression_targets(rois, overlaps, labels): """ given rois, overlaps, gt labels, compute bounding box regression targets :param rois: roidb[i]['boxes'] k * 4 :param overlaps: roidb[i]['max_overlaps'] k * 1 :param labels: roidb[i]['max_classes'] k * 1 :return: targets[i][class, dx, dy, dw, dh] k * 5 """ # Ensure ROIs are floats rois = rois.astype(np.float, copy=False) # Sanity check if len(rois) != len(overlaps): print('bbox regression: this should not happen') # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: print('something wrong : zero ground truth rois') # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def create_roidb_from_box_list(self, box_list, gt_roidb): """ given ground truth, prepare roidb :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2] :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ assert len( box_list ) == self.num_images, 'number of boxes matrix must match number of images' roidb = [] for i in range(self.num_images): roi_rec = dict() roi_rec['image'] = gt_roidb[i]['image'] roi_rec['height'] = gt_roidb[i]['height'] roi_rec['width'] = gt_roidb[i]['width'] boxes = box_list[i] if boxes.shape[1] == 5: boxes = boxes[:, :4] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] # n boxes and k gt_boxes => n * k overlap gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # for each box in n boxes, select only maximum overlap (must be greater than zero) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] roi_rec.update({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'max_classes': overlaps.argmax(axis=1), 'max_overlaps': overlaps.max(axis=1), 'flipped': False }) # background roi => background class zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0] assert all(roi_rec['max_classes'][zero_indexes] == 0) # foreground roi => foreground class nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0] assert all(roi_rec['max_classes'][nonzero_indexes] != 0) roidb.append(roi_rec) return roidb
def single_image_single_class_confusion_matrix(gt_boxes, pred_boxes, scores, threshold): """ :param gt_boxes: (#gt, 4) :param pred_boxes: (#box, 4) :param scores: (#box, ) :param threshold: float, IoU threshold :return: tp, fp, fn """ if pred_boxes.size == 0: return np.zeros(shape=(0, )), np.zeros(shape=(0, )) if gt_boxes.size == 0: return np.zeros(shape=(pred_boxes.shape[0], )), np.ones(shape=(pred_boxes.shape[0], )) ious = bbox_overlaps(pred_boxes.astype(np.float32, copy=False), gt_boxes.astype(np.float32, copy=False)) # (#box, #gt) max_overlap_for_boxes = np.max(ious, axis=1) gt_for_boxes = np.argmax(ious, axis=1) gt_detected = [False] * ious.shape[1] tp, fp = np.zeros(shape=(pred_boxes.shape[0], )), np.zeros(shape=(pred_boxes.shape[0], )) for ind in np.argsort(-scores): overlap = max_overlap_for_boxes[ind] gt_ind = gt_for_boxes[ind] if overlap >= threshold: if not gt_detected[gt_ind]: tp[ind] = 1 gt_detected[gt_ind] = True else: fp[ind] = 1 else: fp[ind] = 1 return tp, fp
def evalutate_detections(all_boxes, roidb): """evalutate detections. :param all_boxes:all boxes predicted by our model :param roidb:that store the ground truth info :param p:select boxes which probability large than p :param thresh:overlap threshold.""" assert len(all_boxes) == len(roidb) pos_count = 0 for i in range(len(roidb)): ground_truth = roidb[i]['bbox'] pred_boxes = all_boxes[i] if pred_boxes.shape[0] == 0: continue pred_box_ind = np.argmax(pred_boxes[:, 4]) pred_box = pred_boxes[pred_box_ind, :] pred_box = pred_box[np.newaxis, :] overlap = bbox_overlaps(pred_box[:, :4].astype(np.float), ground_truth.astype(np.float)) if overlap[0][0] > 0.9: pos_count += 1 acc = float(pos_count) / len(roidb) print(acc)
def test(args): print('test with', args) global detector output_folder = args.output if not os.path.exists(output_folder): os.mkdir(output_folder) detector = RetinaFace(args.prefix, args.epoch, args.gpu, network=args.network, nocrop=args.nocrop, vote=args.bbox_vote) imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 print('roidb size', len(roidb)) for i in range(len(roidb)): if i % args.parts != args.part: continue #if i%10==0: # print('processing', i, file=sys.stderr) roi = roidb[i] boxes = get_boxes(roi, args.pyramid) if 'boxes' in roi: gt_boxes = roi['boxes'].copy() gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > 0.5: continue #print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > 0.5).sum() recall = found / float(gt_boxes.shape[0]) #print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, 'num:', i, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) recall_all = float(overall[0]) / overall[1] #print('recall_all', _recall, file=sys.stderr) print('[%d]' % i, 'recall', recall, (gt_boxes.shape[0], boxes.shape[0]), 'all:', recall_all, file=sys.stderr) else: print('[%d]' % i, 'detect %d faces' % boxes.shape[0]) _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
def test(args): print('test with', args) global detector output_folder = args.output if not os.path.exists(output_folder): os.mkdir(output_folder) detector = ESSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True) imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 for i in xrange(len(roidb)): if i % 10 == 0: print('processing', i, file=sys.stderr) roi = roidb[i] boxes = get_boxes(roi, args.pyramid) gt_boxes = roidb[i]['boxes'].copy() gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > config.TEST.IOU_THRESH: continue print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > config.TEST.IOU_THRESH).sum() _recall = found / float(gt_boxes.shape[0]) print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) _recall = float(overall[0]) / overall[1] print('recall_all', _recall, file=sys.stderr) _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4])) print('Evaluating detections using official WIDER toolbox...') path = os.path.join(os.path.dirname(__file__), 'wider_eval_tools') eval_output_path = os.path.join(path, 'wider_plots') if not os.path.isdir(eval_output_path): os.mkdir(eval_output_path) cmd = 'cd {} && '.format(path) cmd += 'matlab -nodisplay -nodesktop ' cmd += '-r "dbstop if error; ' cmd += 'wider_eval(\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \ .format(args.output, args.method_name, eval_output_path) print('Running:\n{}'.format(cmd)) subprocess.call(cmd, shell=True)
def test(args): print('test with', args) global detector output_folder = args.output if not os.path.exists(output_folder): os.mkdir(output_folder) detector = SSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True) imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 for i in xrange(len(roidb)): if i % 10 == 0: print('processing', i, file=sys.stderr) roi = roidb[i] boxes = get_boxes(roi, args.pyramid) gt_boxes = roidb[i]['boxes'].copy() gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > config.TEST.IOU_THRESH: continue print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > config.TEST.IOU_THRESH).sum() _recall = found / float(gt_boxes.shape[0]) print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) _recall = float(overall[0]) / overall[1] print('recall_all', _recall, file=sys.stderr) _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
def test_proposals(predictor, test_data, imdb, roidb, vis=False): """ Test detections results using RPN. :param predictor: Predictor :param test_data: data iterator, must be non-shuffled :param imdb: image database :param roidb: roidb :param vis: controls visualization :return: recall, mAP """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] #bbox_file = os.path.join(rpn_folder, imdb.name + '_bbox.txt') #bbox_f = open(bbox_file, 'w') i = 0 t = time.time() output_folder = os.path.join(imdb.root_path, 'output') if not os.path.exists(output_folder): os.mkdir(output_folder) imdb_boxes = list() original_boxes = list() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 #apply scale, for SSH #_, roidb = image.get_image(roidb) for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() oscale = im_info[0, 2] #print('scale', scale, file=sys.stderr) scale = 1.0 #fix scale=1.0 for SSH face detector scores, boxes, data_dict = im_proposal(predictor, data_batch, data_names, scale) #print(scores.shape, boxes.shape, file=sys.stderr) t2 = time.time() - t t = time.time() # assemble proposals dets = np.hstack((boxes, scores)) original_boxes.append(dets) # filter proposals keep = np.where(dets[:, 4:] > config.TEST.SCORE_THRESH)[0] dets = dets[keep, :] imdb_boxes.append(dets) logger.info('generating %d/%d ' % (i + 1, imdb.num_images) + 'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' % (t1, t2)) #if dets.shape[0]==0: # continue if vis: vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale) boxes = dets #max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) #gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] #gt_boxes = roidb[i]['boxes'][gt_inds, :] gt_boxes = roidb[i]['boxes'].copy( ) * oscale # as roidb is the original one, need to scale GT for SSH gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # choose whatever is smaller to iterate #for j in range(gt_boxes.shape[0]): # print('gt %d,%d,%d,%d'% (gt_boxes[j][0], gt_boxes[j][1], gt_boxes[j][2]-gt_boxes[j][0], gt_boxes[j][3]-gt_boxes[j][1]), file=sys.stderr) # gt_max = np.maximum( gt_max, np.array( (gt_boxes[j][2], gt_boxes[j][3]) ) ) #print('gt max', gt_max, file=sys.stderr) #for j in range(boxes.shape[0]): # print('anchor_box %.2f,%.2f,%.2f,%.2f'% (boxes[j][0], boxes[j][1], boxes[j][2]-boxes[j][0], boxes[j][3]-boxes[j][1]), file=sys.stderr) #rounds = min(boxes.shape[0], gt_boxes.shape[0]) #for j in range(rounds): # # find which proposal maximally covers each gt box # argmax_overlaps = overlaps.argmax(axis=0) # print(j, 'argmax_overlaps', argmax_overlaps, file=sys.stderr) # # get the IoU amount of coverage for each gt box # max_overlaps = overlaps.max(axis=0) # print(j, 'max_overlaps', max_overlaps, file=sys.stderr) # # find which gt box is covered by most IoU # gt_ind = max_overlaps.argmax() # gt_ovr = max_overlaps.max() # assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) # # find the proposal box that covers the best covered gt box # box_ind = argmax_overlaps[gt_ind] # print('max box', gt_ind, box_ind, (boxes[box_ind][0], boxes[box_ind][1], boxes[box_ind][2]-boxes[box_ind][0], boxes[box_ind][3]-boxes[box_ind][1], boxes[box_ind][4]), file=sys.stderr) # # record the IoU coverage of this gt box # _gt_overlaps[j] = overlaps[box_ind, gt_ind] # assert (_gt_overlaps[j] == gt_ovr) # # mark the proposal box and the gt box as used # overlaps[box_ind, :] = -1 # overlaps[:, gt_ind] = -1 if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > config.TEST.IOU_THRESH: continue print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) #_idx = np.where(overlaps[:,j]>0.4)[0] #print(j, _idx, file=sys.stderr) #print(overlaps[_idx,j], file=sys.stderr) #for __idx in _idx: # print(gt_boxes[j], boxes[__idx], overlaps[__idx,j], IOU(gt_boxes[j], boxes[__idx,0:4]), file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > config.TEST.IOU_THRESH).sum() _recall = found / float(gt_boxes.shape[0]) print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) _recall = float(overall[0]) / overall[1] print('recall_all', _recall, file=sys.stderr) boxes[:, 0:4] /= oscale _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4])) i += 1 #bbox_f.close() return gt_overlaps = np.sort(gt_overlaps) recalls = np.zeros_like(thresholds) # compute recall for each IoU threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() # print results print('average recall for {}: {:.3f}'.format(area_name, ar)) for threshold, recall in zip(thresholds, recalls): print('recall @{:.2f}: {:.3f}'.format(threshold, recall)) assert len(imdb_boxes) == imdb.num_images, 'calculations not complete' # save results rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl') with open(rpn_file, 'wb') as f: pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL) logger.info('wrote rpn proposals to %s' % rpn_file) return imdb_boxes
def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None): """ evaluate detection proposal recall metrics record max overlap value for each gt box; return vector of overlap values :param roidb: used to evaluate :param candidate_boxes: if not given, use roidb's non-gt boxes :param thresholds: array-like recall threshold :return: None ar: average recall, recalls: vector recalls at each IoU overlap threshold thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps """ area_names = [ 'all', '0-25', '25-50', '50-100', '100-200', '200-300', '300-inf' ] area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2], [50**2, 100**2], [100**2, 200**2], [200**2, 300**2], [300**2, 1e5**2]] area_counts = [] for area_name, area_range in zip(area_names[1:], area_ranges[1:]): area_count = 0 for i in range(self.num_images): if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] boxes_areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) valid_range_inds = np.where((boxes_areas >= area_range[0]) & (boxes_areas < area_range[1]))[0] area_count += len(valid_range_inds) area_counts.append(area_count) total_counts = float(sum(area_counts)) for area_name, area_count in zip(area_names[1:], area_counts): print('percentage of', area_name, area_count / total_counts) print('average number of proposal', total_counts / self.num_images) for area_name, area_range in zip(area_names, area_ranges): gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # check for max_overlaps == 1 avoids including crowd annotations max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = roidb[i]['boxes'][gt_inds, :] gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas < area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # choose whatever is smaller to iterate rounds = min(boxes.shape[0], gt_boxes.shape[0]) for j in range(rounds): # find which proposal maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # get the IoU amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is covered by most IoU gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the IoU coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded IoU coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each IoU threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() # print results print('average recall for {}: {:.3f}'.format(area_name, ar)) for threshold, recall in zip(thresholds, recalls): print('recall @{:.2f}: {:.3f}'.format(threshold, recall))
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] list_result_box[i] = cls_box[valid_ind, :] list_result_mask[i] = cls_mask[valid_ind, :] cls_start = cls_end return list_result_mask, list_result_box
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4): """ Wrapper function for mask voting, note we already know the class of boxes and masks """ masks = masks.astype(np.float32) mask_size = masks.shape[-1] nms = py_nms_wrapper(nms_thresh) # apply nms and sort to get first images according to their scores # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] num_detect = boxes.shape[0] res_mask = [[] for _ in xrange(num_detect)] for i in xrange(num_detect): box = np.round(boxes[i]).astype(int) mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1)) res_mask[i] = mask list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size)) boxes_ar = np.zeros((num_boxes, 4)) for i in xrange(num_boxes): # Get weights according to their segmentation scores cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) # Re-format mask when passing it to mask_aggregation p_mask = [res_mask[j] for j in list(cur_inds)] # do mask aggregation orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh) masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size)) boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis])) list_result_box[c] = boxes_scored_ar list_result_mask[c] = masks_ar return list_result_mask, list_result_box
def get_rois(rois, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ get top N ROIs, used in online hard example mining :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (rois, labels, bbox_targets, bbox_weights) """ if labels is None: if len(gt_boxes) == 0: gt_boxes = np.array([[1, 1, 1, 1, 0]]) overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # select indices keep_indexes = np.arange(rois.shape[0]) if keep_indexes.shape[0] > rois_per_image: keep_indexes = npr.choice(keep_indexes, size=rois_per_image, replace=False) # if not enough, pad until rois_per_image is satisfied while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(rois_per_image - keep_indexes.shape[0], len(rois)) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # suppress any bg defined by overlap bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] labels[bg_indexes] = 0 labels = labels[keep_indexes] rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) / np.array(config.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes) return rois, labels, bbox_targets, bbox_weights
def sample_rois_fpn(rois, assign_levels, fg_rois_per_image, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, mask_targets=None, mask_labels=None, mask_inds=None, gt_boxes=None, im_info=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param assign_levels: [n] :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (rois, labels, bbox_targets, bbox_weights) """ DEBUG = False if labels is None: if len(gt_boxes) == 0: gt_boxes = np.zeros((1, 5)) gt_assignment = np.zeros((len(rois), ), dtype=np.int32) overlaps = np.zeros((len(rois), )) labels = np.zeros((len(rois), )) else: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] num_rois = rois.shape[0] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) if DEBUG: print('fg total num:', len(fg_indexes)) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] if DEBUG: print('bg total num:', len(bg_indexes)) # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) if DEBUG: print('fg num:', len(fg_indexes)) print('bg num:', len(bg_indexes)) # bg rois statistics if DEBUG: bg_assign = assign_levels[bg_indexes] bg_rois_on_levels = dict() for i, s in enumerate(config.RCNN_FEAT_STRIDE): bg_rois_on_levels.update( {'stride%s' % s: len(np.where(bg_assign == s)[0])}) print(bg_rois_on_levels) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) neg_idx = np.where(overlaps < config.TRAIN.FG_THRESH)[0] neg_rois = rois[neg_idx] # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(neg_rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(neg_rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, neg_idx[gap_indexes]) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] assign_levels = assign_levels[keep_indexes] if mask_targets is not None: assert mask_labels is not None assert mask_inds is not None def _mask_umap(mask_targets, mask_labels, mask_inds): _mask_targets = np.zeros((num_rois, num_classes, 28, 28), dtype=np.int8) _mask_weights = np.zeros((num_rois, num_classes, 1, 1), dtype=np.int8) _mask_targets[mask_inds, mask_labels] = mask_targets _mask_weights[mask_inds, mask_labels] = 1 return _mask_targets, _mask_weights # [num_rois, num_classes, 28, 28] mask_targets, mask_weights = _mask_umap(mask_targets, mask_labels, mask_inds) mask_targets = mask_targets[keep_indexes] mask_weights = mask_weights[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) / np.array(config.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes) # Assign to levels rois_on_levels = dict() labels_on_levels = dict() bbox_targets_on_levels = dict() bbox_weights_on_levels = dict() if mask_targets is not None: mask_targets_on_levels = dict() mask_weights_on_levels = dict() for i, s in enumerate(config.RCNN_FEAT_STRIDE): index = np.where(assign_levels == s) _rois = rois[index] _labels = labels[index] _bbox_targets = bbox_targets[index] _bbox_weights = bbox_weights[index] if mask_targets is not None: _mask_targets = mask_targets[index] _mask_weights = mask_weights[index] rois_on_levels.update({'stride%s' % s: _rois}) labels_on_levels.update({'stride%s' % s: _labels}) bbox_targets_on_levels.update({'stride%s' % s: _bbox_targets}) bbox_weights_on_levels.update({'stride%s' % s: _bbox_weights}) if mask_targets is not None: mask_targets_on_levels.update({'stride%s' % s: _mask_targets}) mask_weights_on_levels.update({'stride%s' % s: _mask_weights}) if mask_targets is not None: return rois_on_levels, labels_on_levels, bbox_targets_on_levels, bbox_weights_on_levels, mask_targets_on_levels, mask_weights_on_levels else: return rois_on_levels, labels_on_levels, bbox_targets_on_levels, bbox_weights_on_levels
def sample_rois(self, rois, fg_rois_per_image, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_masks=None): if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] if config.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[fg_indexes, 1:]) fg_indexes = fg_indexes[keep_inds] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = np.random.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] if config.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[bg_indexes, 1:]) bg_indexes = bg_indexes[keep_inds] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = np.random.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) if config.TRAIN.GAP_SELECT_FROM_ALL: gap_indexes = np.random.choice(range(len(rois)), size=gap, replace=False) else: bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes)) gap_indexes = np.random.choice(bg_full_indexes, size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) / np.array(config.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes) if config.TRAIN.IGNORE_GAP: valid_rois_per_this_image = fg_rois_per_this_image+bg_rois_per_this_image labels[valid_rois_per_this_image:] = -1 bbox_weights[valid_rois_per_this_image:] = 0 # masks # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg') mask_reg_targets = -np.ones((len(keep_indexes), 1, self._mask_size, self._mask_size)) for idx, obj in enumerate(fg_indexes): gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int) ex_roi = np.round(rois[idx, 1:]).astype(int) gt_mask = gt_masks[gt_assignment[obj]] mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask) mask_reg_target = cv2.resize(mask_reg_target.astype(np.float), (self._mask_size, self._mask_size)) mask_reg_target = mask_reg_target >= self._binary_thresh mask_reg_targets[idx, ...] = mask_reg_target return rois, labels, bbox_targets, bbox_weights, mask_reg_targets