def get_roidb(imdb_name): imdb = get_imdb(imdb_name) print('--------------------------------------------------------') print('Loaded dataset `{:s}` for training'.format(imdb.name)) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) roidb = get_training_roidb(imdb, imdb_name) return roidb
def extract_roidb(imdb_names, training=True): """ Combine multiple roidbs """ def get_training_roidb(imdb, imdb_name): """Returns a roidb (Region of Interest database) for use in training.""" if cfg.TRAIN.USE_FLIPPED and 'train' in imdb_name: print('Appending horizontally-flipped training examples...') imdb.roidb imdb.append_flipped_images() print('{} images loaded after flipping images'.format(len(imdb.roidb))) prepare_roidb(imdb) # ratio_index = rank_roidb_ratio(imdb) return imdb.roidb def get_roidb(imdb_name): imdb = get_imdb(imdb_name) print('--------------------------------------------------------') print('Loaded dataset `{:s}` for training'.format(imdb.name)) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) roidb = get_training_roidb(imdb, imdb_name) return roidb roidbs = [get_roidb(s) for s in imdb_names.split('+')] roidb = roidbs[0] if len(roidbs) > 1: for r in roidbs[1:]: roidb.extend(r) tmp = get_imdb(imdb_names.split('+')[1]) imdb = datasets.imdb.imdb(imdb_names, tmp.classes) else: imdb = get_imdb(imdb_names) #if training: # roidb = filter_roidb(roidb) ratio_list, ratio_index = rank_roidb_ratio(roidb) return imdb, roidb, ratio_list, ratio_index
def test(): import os img_file = 'demo/images.jpeg' image = cv2.imread(img_file) #imdb_name = 'CaltechPedestrians_train' imdb_name = 'coco_2017_train' #imdb_name = 'voc_2007_trainval' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5' pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5' #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) print(imdb.classes) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\ cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
def __init__(self, vcoco_set, coco_root, vcoco_root): super(RoiVCocoBoxes, self).__init__(vcoco_set, coco_root) # TODO this sets a global config, which I prefer not to do. But the # faster_rcnn code depends on it. cf.cfg_from_list(["DATA_DIR", vcoco_root]) if vcoco_set == "vcoco_train": coco_split = "train" elif vcoco_set == "vcoco_val": coco_split = "val" else: raise ValueError("Invalid vcoco_set '%s'" % vcoco_set) imdb_name = "coco_2014_" + coco_split self._imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(self._imdb) self._roidb = self._imdb.roidb self.cocoimgid_2_roidbindex = { index: i for i, index in enumerate(self._imdb._image_index) }
resume = False # ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = RFCN(classes=imdb.classes, debug=_DEBUG) #init_modules = [net.rpn.conv1, net.rpn.score_conv, net.rpn.bbox_conv, net.fc6, net.fc7, net.score_fc, net.bbox_fc] #network.weights_normal_init(init_modules, dev=0.01) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy(net, pretrained_model) if resume: pretrained_model_file = 'models/saved_model3/faster_rcnn_resnet101_20000.h5' network.load_net(pretrained_model_file, net) start_step = 20000 print 'Resume training...'
# ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb_0 = get_imdb(imdb_name_0) imdb_1 = get_imdb(imdb_name_1) rdl_roidb.prepare_roidb(imdb_0) rdl_roidb.prepare_roidb(imdb_1) roidb_0 = imdb_0.roidb roidb_1 = imdb_1.roidb data_layer = RoIDataLayer(roidb_0, roidb_1, imdb_0.num_classes) # In[5]: # load net net = FasterRCNN_y(classes=imdb_0.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy_y(net, pretrained_model) # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5'
def test(): import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'res' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \ # cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imshow('demo', frame) cv2.waitKey(1000) cv2.destroyAllWindows() except IndexError as e: pass finally: print(cnt, '-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format( cnt, int(runtime), cnt / runtime)) cap.release() out.release()
# if vis: # cv2.imshow('test', im2show) # cv2.waitKey(1) if sav: cv2.imwrite(output_dir_detections + str(i) + '.png', im2show) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb_0.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': imdb_0 = get_imdb(imdb_test_name_0) imdb_0.competition_mode(on=True) net_0 = FasterRCNN(classes=imdb_0.classes, debug=False) network.load_net(trained_model_0, net_0) print('load model 0 successfully!') net_0.cuda() net_0.eval() imdb_1 = get_imdb(imdb_test_name_1) imdb_1.competition_mode(on=True) net_1 = FasterRCNN(classes=imdb_1.classes, debug=False) network.load_net(trained_model_1, net_1) print('load model 1 successfully!') net_1.cuda() net_1.eval()
rand_seed = 1024 save_name = 'vg' max_per_image = 300 thresh = 0.05 # ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) for imdb_fn in ( 'vg', 'vg_val', ): imdb = get_imdb(imdb_fn) imdb.competition_mode(on=True) net = FasterRCNN(classes=imdb.classes, debug=True) network.load_net(trained_model, net) net.cuda() net.eval() print('load model successfully!') get_preds(imdb_fn + '_out', net, imdb, max_per_image, thresh=thresh, test_bbox_reg=cfg.TEST.BBOX_REG, vis=False) print("DONE!")
# ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) #是一个pascal_voc对象! #对imdb的roidb进行进一步的处理方便训练使用,在pascal_voc生成的roidb的字典中 #添加,“image”:图像的路径,“weight”,"height","max_overlaps","max_classes" rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb #返回的是一个列表 #用于生成数据,相当于pytorch中的dataloader的功能,会对原始的图像进行变换,以用于训练 data_layer = RoIDataLayer(roidb, imdb.num_classes) #forward方法会返回一个batch的图像数据,并同时对图像进行了以下操作。 #1.将原图像进行了缩放以进行训练。例如:(356,500,3) == > (600,843,3)在配置中 #用于训练的图片最短边为600,因此在保证原图的长宽比的情况下,对图像进行了缩放,并记录下 #了缩放的尺度上例中:scale为600/356 = 1.68539,同时相应的将GT_box的值也乘以相应的scale #============================================= #最终返回的blobs为一个字典:
print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time)) if vis: cv2.imshow('test', im2show) cv2.waitKey(1) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('=====Evaluating detections=====') #评估检测结果,计算MAP。 imdb.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': # load data imdb = get_imdb(imdb_name) #返回一个pascal_voc对象 imdb.competition_mode(on=True) print("load data finished!") # load net net = FasterRCNN(classes=imdb.classes, debug=False) network.load_net(trained_model, net) #加载训练好的模型。 print('load model successfully!') # net.cuda() net.eval() # evaluation test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
# ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name, img_set) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) print "number of classes: " + str(imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy(net, pretrained_model) # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' # network.load_net(model_file, net) # exp_name = 'vgg16_02-19_13-24' # start_step = 60001 # lr /= 10. # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)
rand_seed = 1024 save_name = 'vg' max_per_image = 300 thresh = 0.05 # ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) # load data imdb = get_imdb('vg_val_small') imdb.competition_mode(on=True) # load net net = FasterRCNN(classes=imdb.classes, debug=True) network.load_net(trained_model, net) print('load model successfully!') net.cuda() net.eval() # # evaluation test_net(save_name, net, imdb, max_per_image,
def track(): def id_track(dataset, features): from collections import Counter def dist(f1, f2): score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0] return score id_list = [] id_count = {'f' + str(i): [] for i in range(len(features))} for dataframe in dataset: for i, f in enumerate(features): init_val = 1e15 for data in dataframe: score = dist(f, data['feature']) if score < init_val: init_val = score id = data['id'] id_count['f' + str(i)].append(id) for list in id_count.values(): c1 = Counter(list) most_id = c1.most_common(1)[0][0] id_list.append(most_id) return id_list import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) name_blocks = pre_model_name.split('_') if 'vgg16' in name_blocks: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in name_blocks: detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) relu = True if 'relu' in name_blocks else False network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: # detect tid = (cnt-1) % tps dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) # feature extraction features = [] for i, det in enumerate(dets): gt_box = det[np.newaxis,:] features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu)) det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) dataframe = [] if tid == 0: dataset = [] for i, f in enumerate(features): data = {} data['id'] = i data['feature'] = f dataframe.append(data) dataset.append(dataframe) anchors = dets elif tid > 0 and tid < tps-1: overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \ , np.ascontiguousarray(dets, dtype=np.float)) # max : K max overlaps score about N dets overlaps = np.multiply(overlaps, overlaps > 0.7) max_arg = overlaps.argmax(axis=0) for i, arg in enumerate(max_arg): if arg >= len(features): continue data = {} data['id'] = arg data['feature'] = features[arg] dataframe.append(data) dataset.append(dataframe) anchors = dets else: id_list = id_track(dataset, features) for i, id in enumerate(id_list): det = tuple(int(x)-2 for x in dets[i]) cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255)) # cv2.imshow('demo', frame) # cv2.waitKey(1000) # cv2.destroyAllWindows() except: pass finally: if cnt % 10 == 0: print(cnt,'-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime)) cap.release() out.release()