def single_scale_test_net(net, imdb, targe_size=320, vis=False): num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) det = im_detect(net, im, targe_size) for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: keep = soft_nms(cls_dets, sigma=0.5, Nt=0.30, threshold=cfg.confidence_threshold, method=1) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets if vis and i in [12-1, 48-1, 52-1]: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d}'.format(i + 1, num_images) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join('/share/home/math4/img/', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class global time time += timer.total_time CONF_THRESH = 0 #-np.inf #NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4:8] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = soft_nms(dets, sigma=0.5, Nt=0.3, method=1, threshold=0.5) dets = dets[keep, :] vis_detections(image_name, im, cls, dets, thresh=CONF_THRESH)
def detect(self, net, img): """Detect objects in an image.""" scores, boxes = im_detect(self.net, img) dets_list = [] CONF_THRESH = 0.7 NMS_THRESH = 0.2 for cls_ind, cls in enumerate(CLASSES): if cls in CLASSES_FOR_DETECTION: cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = soft_nms(dets=dets, Nt=NMS_THRESH, method=1) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets_list.append(dets[inds]) dets_all = np.concatenate(dets_list, axis=0) print('Detection found {:d} vehicles'.format(len(dets_all))) return dets_all
def single_scale_test_net(net, imdb, targe_size=320, vis=False, redoInference=True): num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) det_file = os.path.join(output_dir, 'detections.pkl') # Only redo inference when redo flag is set or detections file does not exist if redoInference or not os.path.isfile(det_file): print 'Detection files not existing OR redo parameter set --> Re-executing inference...' for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) det = im_detect(net, im, targe_size) for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: keep = soft_nms(cls_dets, sigma=0.5, Nt=0.30, threshold=cfg.confidence_threshold, method=1) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d}'.format(i + 1, num_images) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) # Else: Load dumped detections file and proceed evaluation else: print 'Detection files already existing --> Loading detections from file...' with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def forward(self, bottom, top): # Algorithm: # # merge the detections from different levels in the pyramid # apply NMS with threshold 0.5 # apply bounding box voting # return the detections attached with class labels and confidence scores num_scales = len(bottom) num_classes = self._num_classes nms_thresh = self._nms_thresh if DEBUG: print 'classes: {}, scales {}'.format(num_classes, num_scales) merge_dets = np.zeros((0, 6), dtype=np.float32) for c in xrange(num_classes): all_dets = np.zeros((0, 6), dtype=np.float32) for i in xrange(num_scales): dets = bottom[i].data.reshape((-1, 6)) ind = np.where(dets[:, 0] == c)[0] if len(ind) > 0: all_dets = np.vstack((all_dets, dets[ind, :])) if all_dets.shape[0] > 1: # keep = nms(all_dets[:,1:], nms_thresh) # merge_dets = np.vstack((merge_dets, all_dets[keep, :])) keep_dets = soft_nms(all_dets[:, 1:], sigma=0.5, method=2) C_F = c * np.ones((keep_dets.shape[0], 1), dtype=np.float32) # keep_dets = np.hstack( (C_F, keep_dets) ) # merge_dets = np.vstack((merge_dets, keep_dets)) B_F, S_F = _bbox_voting(all_dets[:, 1:], keep_dets, self._num_neighbors, self._bbox_thresh) merge_dets = np.vstack( (merge_dets, np.hstack((C_F, B_F, S_F[:, np.newaxis])))) top[0].reshape(*(merge_dets.shape)) top[0].data[...] = merge_dets
def single_scale_test_net(net, imdb, targe_size=(320, 320), vis=False): num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) net_time = 0 for i in xrange(num_images): t = time.time() im = cv2.imread(imdb.image_path_at(i)) det = im_detect_ratio(net, im, targe_size[0], targe_size[1]) t2 = time.time() - t net_time += t2 for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: keep = soft_nms(cls_dets, sigma=0.5, Nt=0.30, threshold=cfg.confidence_threshold, method=1) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d} {:.4f}s'.format(i + 1, num_images, net_time / (i + 1)) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo/vehicles', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) det_time = timer.toc(average=False) #print('im_detect(): output {} boxes'.format(boxes.shape[0])) timer.tic() dets_list = [] cls_list = [] CONF_THRESH = 0.7 NMS_THRESH = 0.2 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) #keep = [i for i in range(dets.shape[0])] keep = soft_nms(dets=dets, Nt=NMS_THRESH, method=1) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets = dets[inds] dets_list.append(dets) cls_list.extend([cls_ind] * len(dets)) dets_all = np.concatenate(dets_list, axis=0) nms_time = timer.toc(average=False) print('Detection took {:.3f}s and found {:d} objects'.format( timer.total_time, len(dets_all))) vis_detections_cv(im_name, im, dets_all, cls_list, thresh=CONF_THRESH)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" if vis: from datasets.kitti import kitti kitti = kitti("valsplit") num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score, [cfg.VIEWP_BINS x viewpoint prob. dist]) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) cache_file = os.path.join(output_dir, 'detections.pkl') times_vector_ = [] if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: all_boxes = cPickle.load(fid) #print '{} gt roidb loaded from {}'.format(self.name, cache_file) print 'Detections cache loaded' warnings.warn( "PLEASE MAKE SURE THAT YOU REALLY WANT TO USE THE CACHE!", UserWarning) #return roidb else: # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb ndetections = 0 if cfg.SMOOTH_L1_ANGLE: viewp_bins = 1 elif cfg.CONTINUOUS_ANGLE: viewp_bins = 1 else: viewp_bins = cfg.VIEWP_BINS if cfg.SMOOTH_L1_ANGLE: allclasses_viewp_bins = imdb.num_classes elif cfg.CONTINUOUS_ANGLE: allclasses_viewp_bins = 1 else: allclasses_viewp_bins = imdb.num_classes * cfg.VIEWP_BINS for i, img_file in enumerate(imdb.image_index): if vis: detts = np.empty([0, 6]) # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. if cfg.TEST.GTPROPOSALS: box_proposals = roidb[i]['boxes'][ roidb[i]['gt_classes'] > -1] else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] if box_proposals is not None and box_proposals.shape[0] <= 0: # if there are no proposals.... scores = np.empty((0, imdb.num_classes), dtype=np.float32) boxes = np.empty((0, imdb.num_classes * 4), dtype=np.float32) if cfg.VIEWPOINTS: assert cfg.CONTINUOUS_ANGLE == False and cfg.SMOOTH_L1_ANGLE == False, 'not implemented' viewpoints = np.empty((0, allclasses_viewp_bins), dtype=np.float32) else: if cfg.TEST.FOURCHANNELS: im = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED) else: im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.VIEWPOINTS: scores, boxes, viewpoints = im_detect( net, im, box_proposals) else: scores, boxes = im_detect(net, im, box_proposals) if i > 3: # CUDA warmup times_vector_.append( _t['im_detect'].toc(average=False)) _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] ndetections += len(inds) cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] if cfg.VIEWPOINTS: if cfg.SMOOTH_L1_ANGLE: viewp = viewpoints[inds, j] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp[:, np.newaxis])) \ .astype(np.float32, copy=False) elif cfg.CONTINUOUS_ANGLE: viewp = viewpoints[inds] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp)) \ .astype(np.float32, copy=False) # TODO: cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp)) \ ? else: # Softmax is only performed over the class N_BINSx "slot" # (that is why we apply it outside Caffe) cls_viewp = softmax( viewpoints[inds, j * cfg.VIEWP_BINS:(j + 1) * cfg.VIEWP_BINS]) # Assert that the result from softmax makes sense assert (all(abs(np.sum(cls_viewp, axis=1) - 1) < 0.1)) cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_viewp)) \ .astype(np.float32, copy=False) else: cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if cfg.TEST.DO_NMS: if cfg.USE_CUSTOM_NMS: if cfg.VIEWPOINTS: nms_returns = nms(cls_dets[:, :-viewp_bins], cfg.TEST.NMS, force_cpu=True) else: nms_returns = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) if nms_returns: keep = nms_returns[0] suppress = nms_returns[1] else: keep = [] elif cfg.TEST.SOFT_NMS > 0: if cfg.VIEWPOINTS: keep = soft_nms(cls_dets[:, :-viewp_bins], method=cfg.TEST.SOFT_NMS) else: keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS) else: if cfg.VIEWPOINTS: keep = nms(cls_dets[:, :-viewp_bins], cfg.TEST.NMS) else: keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] else: if cfg.VIEWPOINTS: cls_dets = cls_dets[cls_dets[:, -viewp_bins - 1].argsort()[::-1], :] else: cls_dets = cls_dets[cls_dets[:, -1].argsort()[::-1], :] if vis: pre_detts = np.hstack( (np.array(cls_dets[:, :5]), j * np.ones( (np.array(cls_dets[:, :5]).shape[0], 1)))) detts = np.vstack((detts, pre_detts)) all_boxes[j][i] = cls_dets if vis: gt_roidb = kitti._load_kitti_annotation(img_file) vis_detections(im, imdb.classes, detts, gt_roidb) # Limit to max_per_image detections *over all classes* if max_per_image > 0: if cfg.VIEWPOINTS: image_scores = np.hstack([ all_boxes[j][i][:, -viewp_bins - 1] for j in xrange(1, imdb.num_classes) ]) else: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: # We usually don't want to do this print "WARNING! Limiting the number of detections" image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): if cfg.VIEWPOINTS: keep = np.where( all_boxes[j][i][:, -viewp_bins - 1] >= image_thresh)[0] else: keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} - {:d} detections - {:.3f}s {:.3f}s' \ .format(i + 1, num_images, ndetections,_t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) np.array(times_vector_) print 'Times:' print 'mean: {:.3f}s'.format(np.mean(times_vector_)) print 'std: {:.3f}s'.format(np.std(times_vector_)) print 'quartiles: {:.3f}s / {:.3f}s / {:.3f}s'.format( np.percentile(times_vector_, 25), np.percentile(times_vector_, 50), np.percentile(times_vector_, 75)) print 'max: {:.3f}s / min: {:.3f}s'.format(np.max(times_vector_), np.min(times_vector_))
def psoft(cls_dets): keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS) return cls_dets[keep]
def test_net(prototxt, caffemodel, imdb, gpus, rank, results_dict, roidb=None, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" cfg.GPU_ID = gpus[rank] caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) net = caffe.Net(prototxt, caffemodel, caffe.TEST) net.name = os.path.splitext(os.path.basename(caffemodel))[0] output_dir = get_output_dir(imdb, net) num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(rank, num_images, len(gpus))] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} result_index = 0 for i in range(rank, num_images, len(gpus)): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] # print i, np.sum(box_proposals), "Filter {} ground-truth rois".format(len(roidb[i]['boxes']) - len(box_proposals)) im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if box_proposals is not None and box_proposals.shape[0] == 0: scores, boxes = np.zeros((0, imdb.num_classes), np.float32), np.zeros( (0, 4 * imdb.num_classes), np.float32) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) #keep = nms(cls_dets, cfg.TEST.NMS) keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS) dets_NMSed = cls_dets[keep, :] if cfg.TEST.BBOX_VOTE: cls_dets = bbox_vote(dets_NMSed, cls_dets) else: cls_dets = dets_NMSed cls_dets = cls_dets[keep, :] save_detections(im, imdb.classes[j], cls_dets, j) #if vis: # vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][result_index] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][result_index][:, -1] for j in xrange(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where( all_boxes[j][result_index][:, -1] >= image_thresh)[0] all_boxes[j][result_index] = all_boxes[j][result_index][ keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) result_index += 1 results_dict[rank] = (all_boxes, output_dir)
def test_net(net, imdb, max_per_image=400, thresh=-np.inf, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, boxes_num_per_batch=0, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] raw_all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] num_boxes = box_proposals.shape[0] if num_boxes < 1: print 'Oops, {} does not have any bbox!'.format(imdb.image_path_at(i)) continue im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if boxes_num_per_batch > 0: num_batch = (num_boxes + boxes_num_per_batch -1) / boxes_num_per_batch scores_batch = np.zeros((num_batch*boxes_num_per_batch, imdb.num_classes), dtype=np.float32) boxes_batch = np.zeros((num_batch*boxes_num_per_batch, 4*imdb.num_classes), dtype=np.float32) # replicate the first box num_batch*boxes_num_per_batch times for preallocation rois = np.tile(box_proposals[0, :], (num_batch*boxes_num_per_batch, 1)) # assign real boxes to rois rois[:num_boxes, :] = box_proposals for j in xrange(num_batch): roi = rois[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] score, box = im_detect(net, im, roi) scores_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = score boxes_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = box # discard duplicated results scores = scores_batch[:num_boxes, :] boxes = boxes_batch[:num_boxes, :] else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() if cfg.TEST.SAVE_MAT: mat_dir = os.path.join(output_dir, imdb._image_set) if not os.path.exists(mat_dir): os.mkdir(mat_dir) im_name = os.path.splitext(os.path.basename(imdb.image_path_at(i)))[0] sio.savemat('%s/%s.mat' % (mat_dir, im_name), {'scores': scores, 'boxes': boxes}) _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) raw_all_boxes[j][i] = cls_dets if cfg.TEST.SOFT_NMS: keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS_METHOD) else: keep = nms(cls_dets, cfg.TEST.NMS) if cfg.TEST.BBOX_VOTE: cls_dets_after_nms = cls_dets[keep, :] cls_dets = bbox_voting(cls_dets_after_nms, cls_dets, threshold=cfg.TEST.BBOX_VOTE_THRESH) else: cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.TEST.CACHE_RAW_ABOXES: raw_aboxes_file = os.path.join(output_dir, 'raw_all_boxes.pkl') with open(raw_aboxes_file, 'wb') as f: cPickle.dump(raw_all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_simple(net, fname_tst, img_dir, out_dir, max_per_image=400, thresh=-np.inf, vis=False): """Test a Fast R-CNN network on an image database.""" is_eval=True save_count=100 num_classes=20 if not os.path.exists(out_dir): os.makedirs(out_dir) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) data= {} # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} count=0 files=[] with open(fname_tst, 'r') as f_in: for line in f_in.readlines(): files.append(line.strip()) for count in range(len(files)): #count+=1 line=files[count] # filter out any ground truth boxes path = img_dir+line +'.jpg' im = cv2.imread(path) _t['im_detect'].tic() scores, boxes = im_detect(net, im) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class dets_all= np.empty((0,6), np.float32) scores_all=np.empty((0,num_classes), np.float32) for j in xrange(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis],)) \ .astype(np.float32, copy=False) keep = soft_nms(cls_dets, method=1) #keep = nms(dets_all[:, :5].astype(np.float32), cfg.TEST.NMS) #keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = np.hstack((cls_dets, j * np.ones((cls_dets.shape[0], 1)))) if len(cls_dets) > 0: dets_all = np.vstack((dets_all, cls_dets[keep].copy())) scores_all = np.vstack((scores_all, scores[inds[keep]].copy())) #keep = nms(dets_all[:,:5].astype(np.float32), cfg.TEST.NMS) #keep = soft_nms(cls_dets, method=1) # dets_all=dets_all[keep] #data[line] = { 'bbox':dets_all[keep].copy(),'cls':scores_all[keep].copy()} data[line] = {'bbox': dets_all.copy(), 'cls': scores_all.copy()} _t['misc'].toc() print(count,line, _t['im_detect'].average_time,_t['misc'].average_time) if count+1 % save_count==0 or count+1==len(files): det_file = os.path.join(out_dir, 'det_%d.pkl' %count) with open(det_file, 'wb') as f: cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) data={}