def train_main(data_dir, model_dir, train_steps, input_yaml): cfg_from_file(input_yaml) print('Using config:') pprint.pprint(cfg) imdb = get_imdb('voc_2007_trainval') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb, None) log_dir = get_log_dir(imdb) print('Output will be saved to `{:s}`'.format(output_dir)) print('Logs will be saved to `{:s}`'.format(log_dir)) device_name = '/gpu:0' print(device_name) network = get_network('VGGnet_train') train_net(network, imdb, roidb, output_dir=output_dir, log_dir=log_dir, pretrained_model='data/pretrain_model/VGG_imagenet.npy', max_iters=int(cfg.TRAIN.max_steps), restore=bool(int(cfg.TRAIN.restore)))
def test_net(sess, net, imdb, weights_filename): timer = Timer() timer.tic() np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # all_boxes = [] all_boxes = [[[] for _ in range(imdb.num_classes)] for _ in range(num_images)] print(all_boxes) for i in range(num_images): print('***********', imdb.image_path_at(i)) img = cv2.imread(imdb.image_path_at(i)) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) boxes = check_unreasonable_box(boxes, scale) all_boxes[i][1] += boxes det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) imdb.evaluate_detections(all_boxes, output_dir) timer.toc()
from lib.fast_rcnn.train import get_training_roidb, train_net from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir from lib.datasets.factory import get_imdb from lib.networks.factory import get_network from lib.fast_rcnn.config import cfg if __name__ == '__main__': cfg_from_file('ctpn/text.yml') print('Using config:') pprint.pprint(cfg) imdb = get_imdb('voc_2007_trainval') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb, None) log_dir = get_log_dir(imdb) print('Output will be saved to `{:s}`'.format(output_dir)) print('Logs will be saved to `{:s}`'.format(log_dir)) device_name = '/gpu:0' print(device_name) network = get_network('VGGnet_train') train_net(network, imdb, roidb, output_dir=output_dir, log_dir=log_dir, pretrained_model='data/pretrain/VGG_imagenet.npy',
def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
from lib.fast_rcnn.train import get_training_roidb, train_net from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir from lib.datasets.factory import get_imdb from lib.networks.factory import get_network from lib.fast_rcnn.config import cfg if __name__ == '__main__': cfg_from_file('ctpn/text.yml') print('Using config:') pprint.pprint(cfg) imdb = get_imdb('voc_2007_trainval') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb, None) log_dir = get_log_dir(imdb) print('Output will be saved to `{:s}`'.format(output_dir)) print('Logs will be saved to `{:s}`'.format(log_dir)) device_name = '/gpu:0' print(device_name) network = get_network('VGGnet_train') train_net(network, imdb, roidb, output_dir=output_dir, log_dir=log_dir, pretrained_model='data/pretrain_model/VGG_imagenet.npy', max_iters=int(cfg.TRAIN.max_steps), restore=bool(int(cfg.TRAIN.restore)))
from easydict import EasyDict as edict # from lib.fast_rcnn.config import if __name__ == '__main__': # print(cfg.TRAIN.restore) cfg_from_file('ctpn/text.yml') # print(cfg.TRAIN) # cfg_test= cfg # # print(cfg_test.TRAIN.restore) print('Using config:') imdb = get_imdb('voc_2007_train') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb, 'textline_v4') log_dir = get_log_dir(imdb) print('Output will be saved to `{:s}`'.format(output_dir)) print('Logs will be saved to `{:s}`'.format(log_dir)) device_name = '/gpu:0' print(device_name) network = get_network('VGGnet_train') pprint.pprint(cfg) # print(bool(int(cfg.TRAIN.restore))) # print(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # print(cfg.TRAIN.restore) # assert 0 train_net( network,
cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.gpu_id print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)