def prepare_roidb(imdb): """ Enrich the imdb roidb by adding some derived quantities that are useful for training. This function computes the maximum overlap, taken over gt boxes, between each RoI and each gt box. The class with maximum overlap is also recorded. """ roidb = imdb.roidb if not (imdb.name.startswith('coco')): sizes = [ PIL.Image.open(imdb.image_path_at(i)).size for i in range(imdb.num_images) ] for i in range(len(imdb.image_index)): roidb[i]['img_id'] = imdb.image_id_at(i) roidb[i]['image'] = imdb.image_path_at(i) if not (imdb.name.startswith('coco')): roidb[i]['width'] = sizes[i][0] roidb[i]['height'] = sizes[i][1] gt_overlaps = roidb[i]['gt_overlaps'].toarray() max_overlaps = gt_overlaps.max(axis=1) max_classes = gt_overlaps.argmax(axis=1) roidb[i]['max_classes'] = max_classes roidb[i]['max_overlaps'] = max_overlaps zero_inds = np.where(max_overlaps == 0)[0] assert all(max_classes[zero_inds] == 0) # max overlap > 0 => class should not be zero (must be a fg class) nonzero_inds = np.where(max_overlaps > 0)[0] assert all(max_classes[nonzero_inds] != 0)
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ roidb = imdb.roidb if not (imdb.name.startswith('coco')): sizes = [PIL.Image.open(imdb.image_path_at(i)).size for i in range(imdb.num_images)] for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) if not (imdb.name.startswith('coco')): roidb[i]['width'] = sizes[i][0] roidb[i]['height'] = sizes[i][1] # need gt_overlaps as a dense array for argmax gt_overlaps = roidb[i]['gt_overlaps'].toarray() # max overlap with gt over classes (columns) max_overlaps = gt_overlaps.max(axis=1) # gt class that had the max overlap max_classes = gt_overlaps.argmax(axis=1) roidb[i]['max_classes'] = max_classes roidb[i]['max_overlaps'] = max_overlaps # sanity checks # max overlap of 0 => class should be zero (background) zero_inds = np.where(max_overlaps == 0)[0] assert all(max_classes[zero_inds] == 0) # max overlap > 0 => class should not be zero (must be a fg class) nonzero_inds = np.where(max_overlaps > 0)[0] assert all(max_classes[nonzero_inds] != 0)
#num_images = 100 #del imdb.image_index[num_images:] #num_images = 10#len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) zl.tic() # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} max_per_image = 50 thresh = 0.7 for i in xrange(num_images): # filter out any ground truth boxes im_path = imdb.image_path_at(i) im_name = im_path.split('/')[-1] eb_roi = h5_rois[im_name] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() #scores, boxes = im_detect(net, im, box_proposals) scores, boxes = im_detect(net, im, eb_roi) attention = net.blobs['attention'].data.squeeze() #net.blobs['attention'].data #scores = np.multiply(scores,attention) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0]
except: content = pickle.load(fid, encoding='bytes') boxpathList = args.box.split('/') save_base = '/'.join(boxpathList[-5:-1]) save_path = os.path.join('../cache', save_base) save_path = os.path.join(save_path, boxpathList[-1].split('.')[0]) if not os.path.exists(save_path): os.makedirs(save_path) save_path = '../cache/' + save_path imdbname = boxpathList[-5] print('getting imdb {:s}'.format(imdbname)) imdb = get_imdb('voc_2007_test') for idx in range(len(imdb.image_index)): im = cv2.imread(imdb.image_path_at(idx)) im = im[:, :, ::-1] height, width, depth = im.shape dpi = 80 plt.figure(figsize=(width / dpi, height / dpi), dpi=dpi) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() plt.imshow(im) # plot the image for matplotlib currentAxis = plt.gca() plt.axis('off') # scale each detection back up to the image # scale = torch.Tensor([rgb_image.shape[1::-1], rgb_image.shape[1::-1]]) for i in range(20): for j in range(len(content[i][idx])): score = content[i][idx][j][-1] if score > 0.1: label_name = imdb._classes[i]
def image_path_at(self, i, domain=Domain.SOURCE): """ Return the absolute path to image i in the domain image sequence. """ imdb = self.get_imdb(domain) return imdb.image_path_at(i)
if args.save: try: index = imdb._get_roidb_index_at_size(30) except: index = imdb._get_roidb_index_at_size(-1) if index == -1: index = len(imdb.roidbSize) print("saving {} imdb annotations to output folder...".format(index)) print(prefix_path) for i in range(index): print(roidb[i]) boxes = roidb[i]['boxes'] if len(boxes) == 0: continue #img_path = roidb[i]['image'] img_path = imdb.image_path_at(i) im = cv2.imread(img_path) if roidb[i]['flipped']: im = im[:, ::-1, :] cls = roidb[i]['gt_classes'] if args.save_box: if args.save_noise > 0: for j in range(4): fn = osp.join(prefix_path,"{}_{}_{}.png".format(imdb.name,i,j)) n_boxes = boxes + createNoisyBox(30) vis_dets(im,cls,n_boxes,i,fn=fn) else: fn = osp.join(prefix_path,"{}_{}.png".format(imdb.name,i)) vis_dets(im,cls,boxes,i,fn=fn) else:
def eval(): cfg_from_file('experiments/cfgs/rfcn_end2end.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('voc_0712_test') import cv2 net =None prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' ##model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) num_images = 100 del imdb.image_index[num_images:] #num_images = 10#len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb max_per_image = 300 thresh = 0.0 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) attention = net.blobs['attention'].data.squeeze() #net.blobs['attention'].data #scores = np.multiply(scores,attention) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 1:] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS,force_cpu=True) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets cls_str = imdb.classes[j] for roi in all_boxes[j][i]: cv2.putText(im,cls_str,(roi[0],roi[1]),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,0),1) cv2.rectangle(im,(roi[0],roi[1]),(roi[2],roi[3]),(0,0,255),1) # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] #cv2.imshow('vis',im) #cv2.waitKey(0) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def eval(): cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('sg_vrd_2016_test') import cv2 #h5f = h5py.File('/media/zawlin/ssd/iccv2017/data/voc/gen_eb.h5',driver='core') h5path = 'data/sg_vrd_2016/EB/eb.h5' h5f = h5py.File(h5path,driver='core') h5_rois = {} for i in h5f['test/']: data=h5f['test/%s'%i][...].astype(np.float32) idx = np.argsort(data[:,-1],axis=0) data_sorted = data[idx][::-1] data_sorted_idx = np.where((data_sorted[:,2]-data_sorted[:,0]>20) & (data_sorted[:,3]-data_sorted[:,1]>20)) data_sorted = data_sorted[data_sorted_idx] #print data_sorted h5_rois[i] = data_sorted[:1000,:4] #cfg.TEST.HAS_RPN=False net =None #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_eb_sigmoid.prototxt' prototxt = 'models/sg_vrd/wsd/test_eb_wsddn_s.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_16000.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/eb_wsddn_s_iter_5000.caffemodel' model = 'output/rfcn_end2end/sg_vrd_2016_train/eb_wsddn_s_iter_9400.caffemodel' #model = 'data/rfcn_models/resnet50_rfcn_final.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_eb_sigx_iter_100000.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) num_images = 100 del imdb.image_index[num_images:] all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) zl.tic() # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} max_per_image =200 thresh = 0.00001 cv2.namedWindow('im',0) cnt = 0 for i in xrange(num_images): # filter out any ground truth boxes im_path = imdb.image_path_at(i) im_name = im_path.split('/')[-1] eb_roi = h5_rois[im_name] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() #scores, boxes = im_detect(net, im, box_proposals) scores, boxes = im_detect_iccv(net, im, eb_roi) #attention = net.blobs['attention'].data.squeeze() #net.blobs['attention'].data #scores = np.multiply(scores,attention) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): if j == 15: dfdfd=1 dfdfd += 1 inds = np.where(scores[:, j-1] > thresh)[0] cls_scores = scores[inds, j-1] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 1:] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS,force_cpu=True) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] for j in xrange(1, imdb.num_classes): cls_str = imdb.classes[j] for roi in all_boxes[j][i]: cv2.putText(im,cls_str,(roi[0],roi[1]),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,0),1) cv2.rectangle(im,(roi[0],roi[1]),(roi[2],roi[3]),(0,0,255),1) cnt += 1 cv2.imwrite('/home/zawlin/%d.jpg'%cnt,im) cv2.imshow('vis',im) cv2.waitKey(0) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) print zl.toc()
def gen_recall(): cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('sg_vrd_2016_test') m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') import cv2 h5path = 'data/sg_vrd_2016/EB/eb.h5' h5f = h5py.File(h5path,driver='core') h5_rois = {} for i in h5f['test/']: data=h5f['test/%s'%i][...].astype(np.float32) idx = np.argsort(data[:,-1],axis=0) data_sorted = data[idx][::-1] data_sorted_idx = np.where((data_sorted[:,2]-data_sorted[:,0]>20) & (data_sorted[:,3]-data_sorted[:,1]>20)) data_sorted = data_sorted[data_sorted_idx] #print data_sorted h5_rois[i] = data_sorted[:4000,:4] #cfg.TEST.HAS_RPN=False net =None #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_eb_sigmoid.prototxt' prototxt = 'models/sg_vrd/wsd/test_eb_wsddn_s.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_16000.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/eb_wsddn_s_iter_5000.caffemodel' model = 'output/rfcn_end2end/sg_vrd_2016_train/eb_wsddn_s_iter_11000.caffemodel' #model = 'data/rfcn_models/resnet50_rfcn_final.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_eb_sigx_iter_100000.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) #num_images = 100 #del imdb.image_index[num_images:] all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) zl.tic() # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} max_per_image =20 thresh = 0.00001 cv2.namedWindow('im',0) cnt = 0 mat_pred_label = [] mat_pred_conf = [] mat_pred_bb = [] mat_gt_label = [] mat_gt_bb = [] for i in xrange(num_images): cnt+=1 # filter out any ground truth boxes im_path = imdb.image_path_at(i) im_name = im_path.split('/')[-1] imid = im_name[:-4] eb_roi = h5_rois[im_name] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect_iccv(net, im, eb_roi) _t['im_detect'].toc() _t['misc'].tic() boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j-1] > 0.00001)[0] cls_scores = scores[inds, j-1] cls_boxes = boxes[inds, 1:] #cls_boxes = boxes[inds, j * 4:(j + 1) * 4] # cls_boxes = boxes[inds] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, .7, force_cpu=True) # nms threshold # keep = nms_fast(cls_dets,.3) cls_dets = cls_dets[keep, :] boxes_tosort.append(cls_dets) mat_pred_label_i = [] mat_pred_conf_i = [] mat_pred_bb_i = [] for j in xrange(len(boxes_tosort)): cls_dets = boxes_tosort[j] idx = np.argsort(cls_dets[:,-1],axis=0)[::-1] cls_dets = cls_dets[idx] if cls_dets.shape[0]>max_per_image: cls_dets = cls_dets[:max_per_image,:] for di in xrange(cls_dets.shape[0]): # print 'here' di = cls_dets[di] score = di[-1] cls_idx = j + 1 cls_name = zl.idx2name_cls(m,cls_idx) #cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) if score > 1: score = 1 if score < thresh: continue cv2.rectangle(im,(di[0],di[1]),(di[2],di[3]),(255,0,0),2) x, y = int(di[0]), int(di[1]) if x < 10: x = 15 if y < 10: y = 15 mat_pred_label_i.append(cls_idx) mat_pred_conf_i.append(score) mat_pred_bb_i.append([di[0],di[1],di[2],di[3]]) cv2.putText(im,cls_name,(x,y),cv2.FONT_HERSHEY_SIMPLEX,1.0,(0,0,255),2) res_line = '%s %d %f %d %d %d %d'%(imid,cls_idx,score,di[0],di[1],di[2],di[3]) mat_pred_label.append(mat_pred_label_i) mat_pred_conf.append(mat_pred_conf_i) mat_pred_bb.append(mat_pred_bb_i) obj_boxes = m['gt/test/%s/obj_boxes'%imid][...] sub_boxes = m['gt/test/%s/sub_boxes'%imid][...] rlp_labels = m['gt/test/%s/rlp_labels'%imid][...] mat_gt_label_i = [] mat_gt_bb_i = [] mat_gt_i = [] for gti in xrange(obj_boxes.shape[0]): mat_gt_i.append([rlp_labels[gti,0],sub_boxes[gti,0],sub_boxes[gti,1],sub_boxes[gti,2],sub_boxes[gti,3]]) mat_gt_i.append([rlp_labels[gti,2],obj_boxes[gti,0],obj_boxes[gti,1],obj_boxes[gti,2],obj_boxes[gti,3]]) if len(mat_gt_i)>0: mat_gt_i = np.array(mat_gt_i) mat_gt_i=zl.unique_arr(mat_gt_i) for gti in xrange(mat_gt_i.shape[0]): mat_gt_bb_i.append(mat_gt_i[gti,1:]) mat_gt_label_i.append(mat_gt_i[gti,0]) mat_gt_label.append(mat_gt_label_i) mat_gt_bb.append(mat_gt_bb_i) #matlab_gt.append(matlab_gt_i) #now get gt cv2.imshow('im',im) if cv2.waitKey(0) == 27: exit(0) _t['misc'].toc() print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, _t['misc'].average_time) sio.savemat('output/sg_vrd_objs.mat', {'pred_bb': mat_pred_bb, 'pred_conf':mat_pred_conf, 'pred_label':mat_pred_label, 'gt_bb':mat_gt_bb, 'gt_label':mat_gt_label })