def test(): import os im_file = 'demo/004545.jpg' image = cv2.imread(im_file) detector = FasterRCNN() network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) detector.cuda() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): if scores[i] < 0.3: continue det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) model_file = 'models/saved_model3/faster_rcnn_200000.pth' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' detector = FasterRCNN() detector.load_state_dict(torch.load(model_file)) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
def __init__(self, weights=None): if weights is None: if not os.path.exists('weights'): os.mkdir('weights') download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip' if not os.path.exists('weights/faster-rcnn'): print('Downloading weights for faster-rcnn') if not os.path.exists(os.path.join('weights/faster-rcnn.zip')): check_output([ 'wget', download_url, '-O', 'weights/faster-rcnn.zip' ]) print('Unzipping...') check_output( ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights']) description = json.load( open('weights/faster-rcnn/description.json')) weights = os.path.join('weights/faster-rcnn', description['weights']) print('Building model...') self.model = FasterRCNNModel(classes=['__backround__', 'building'], debug=False) network.load_net(weights, self.model) self.model.cuda() self.model.eval()
def build_extractor(model_file, classes=None): if classes is None: extractor = FasterRCNN() else: extractor = FasterRCNN(classes) extractor.cuda() extractor.eval() network.load_net(model_file, extractor) print('load model successfully!') return extractor
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') object_classes = test_set.object_classes print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net) # network.load_net('./output/detection/RPN_object1_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() # Testing recall = test(test_loader, net) print('Recall: ' 'object: {recall: .3f}%'.format(recall=recall*100))
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) # model_file = './VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' model_file = './models/saved_model_max/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) img = mpimg.imread(im_file) # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(img) # Create a Rectangle patch for i, det in enumerate(dets): w = det[2] - det[0] h = det[3] - det[1] rect = patches.Rectangle(det[0:2], w, h, linewidth=1, edgecolor='r', facecolor='none') # text plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i])) # Add the patch to the Axes ax.add_patch(rect) plt.show() print('aa')
def load_model(model_file_path): detector = FasterRCNN() network.load_net(model_file_path, detector) detector.cuda() detector.eval() print('load model successfully!') return detector
def test(visualize=False): import os im_file = 'data/cervix/train/Type_2/1381.jpg' im_name = im_file.split('/')[-1] image = cv2.imread(im_file) # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5' model_file = 'models/saved_model3/faster_rcnn_100000.h5' expm = model_file.split('/')[-1].split('.')[0] expm_dir = os.path.join('demo', expm) if not os.path.exists(expm_dir): os.makedirs(expm_dir) detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval( ) # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode. print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', expm, im_name), im2show) if visualize: im2show = cv2.resize(im2show, None, None, fx=0.15, fy=0.15, interpolation=cv2.INTER_LINEAR) cv2.imshow('demo', im2show) cv2.waitKey(0)
def analysis_video(self, result_dir): self.statusbar_stringvar.set('Analysis..Please wait..') model_file = 'model.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') info_dict = {} info_dict['pictures'] = [] for index in range(len(self.image_list)): accuracy = 0. pic_info = {} pic_info['objects'] = [] dets, scores, classes = detector.detect(self.image_list[index], 0.8) im2show = np.copy(self.image_list[index]) for i, det in enumerate(dets): object_info = {} det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) accuracy += scores[i] #object info initial object_info['name'] = classes[i] object_info['accuracy'] = scores[i] object_info['bbox'] = det pic_info['objects'].append(object_info) # pic_info initial pic_info['filename'] = os.path.basename(self.video_path).split('.')[0] + '_' + str(index + 1) + '.jpg' pic_info['size'] = im2show.shape info_dict['pictures'].append(pic_info) cv2.imwrite(os.path.join(result_dir, pic_info['filename']), im2show) self.view_table.update(index + 1, **{ 'name': pic_info['filename'], 'accuracy': accuracy / len(classes), 'state': 'yes' }) self.statusbar_stringvar.set('Analysis done!') return info_dict
def __init__(self, classes, n_action_classes, n_action_nonagent_roles, **kwargs): super(HoiModel, self).__init__() print "Constructing HOI Model" faster_rcnn_config = kwargs.get("faster_rcnn_config", None) if faster_rcnn_config is not None: cf.cfg_from_file(faster_rcnn_config) faster_rcnn_cle = kwargs.get("faster_rcnn_command_line", None) if faster_rcnn_cle is not None: cf.cfg_from_list(faster_rcnn_cle) assert(cf.cfg["NCLASSES"] == len(classes)), \ "inconsistent FasterRCNN settings" self.detection_branch = FasterRCNN(classes=classes) self.human_centric_branch = HumanCentricBranch( n_action_classes, n_action_nonagent_roles) self.interaction_branch = InteractionBranch(n_action_nonagent_roles)
.format(i + 1, num_images, detect_time, nms_time) # if vis: # cv2.imshow('test', im2show) # cv2.waitKey(1) if sav: cv2.imwrite(output_dir_detections + str(i) + '.png', im2show) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': # load data imdb = get_imdb(imdb_name) imdb.competition_mode(on=True) # load net net = FasterRCNN(classes=imdb.classes, debug=False) network.load_net(trained_model, net) print('load model successfully!') net.cuda() net.eval() # evaluation test_net(net, imdb, max_per_image, thresh=thresh, vis=vis)
print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time) if vis: cv2.imshow('test', im2show) cv2.waitKey(1) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': # load data imdb = get_imdb(imdb_name) imdb.competition_mode(on=True) # load net net = FasterRCNN(classes=imdb.classes, debug=False, backbone='VGG') network.load_net(trained_model, net) print('load model successfully!') net.cuda() net.eval() # evaluation test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
# load config cfg_from_file(cfg_file) # overrides default config params lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) # network.load_pretrained_npy(net, pretrained_model) network.load_pretrained_model(net, 'vgg16') # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' # network.load_net(model_file, net) # exp_name = 'vgg16_02-19_13-24' # start_step = 60001 # lr /= 10. # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01) net.cuda() net.train() # set model in train mode, has effect on Dropout and Batchnorm. Use eval() to set evaluation mode. params = list(net.parameters())
bboxes_pick[i, 3], cur_scores[i] ) cur_rec += str_out cur_rec += "\n" else: cur_rec = "{} {}\n".format(file_path, 0) out_file.write(cur_rec) if __name__ == "__main__": ctx = mx.gpu(9) net = FasterRCNN( len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes, pretrained_model="vgg16", feature_name="vgg0_conv12_fwd_output", # pretrained_model="mobilenetv2_0.25", # feature_name="mobilenetv20_features_linearbottleneck12_batchnorm2_fwd_output", ctx=ctx) net.init_params(ctx) net.collect_params().load("/world/data-gpu-112/zhanglinghan/face-detect-faster-rcnn-mx/faster-rcnn-vgg16-9anchors/faster-rcnn-vgg16-9anchors-280000.gluonmodel", ctx) global f_path path_lst = os.listdir("/world/data-c27/face_landmarks_hourglass/detection-input") path_lst.sort() for f_name in path_lst: f_path = os.path.join("/world/data-c27/face_landmarks_hourglass/detection-input", f_name) print("processing {}".format(f_path)) f_path_out = os.path.join("/world/data-c27/face_landmarks_hourglass/detection-output", f_name.strip()) benchmark(net, ctx, f_path_out)
def train(): args = parse_args() args.decay_lrs = cfg.TRAIN.DECAY_LRS cfg.USE_GPU_NMS = True if args.use_cuda else False assert args.batch_size == 1, 'Only support single batch' lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY gamma = cfg.TRAIN.GAMMA # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) if args.dataset == 'voc07trainval': args.imdb_name = 'voc_2007_trainval' args.imdbval_name = 'voc_2007_test' elif args.dataset == 'voc0712trainval': args.imdb_name = 'voc_2007_trainval+voc_2012_trainval' args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError if args.net == 'res50': fname = 'resnet50-caffe.pth' elif args.net == 'res101': fname = 'resnet101-caffe.pth' else: raise NotImplementedError args.pretrained_model = os.path.join('data', 'pretrained', fname) output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle') # if not os.path.exists(dataset_cachefile): # imdb, roidb = combined_roidb(args.imdb_name) # cache = [imdb, roidb] # with open(dataset_cachefile, 'wb') as f: # pickle.dump(cache, f) # print('save dataset cache') # else: # with open(dataset_cachefile, 'rb') as f: # cache = pickle.load(f) # imdb, roidb = cache[0], cache[1] # print('loaded dataset from cache') imdb, roidb = combined_roidb(args.imdb_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True) model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model) print('model loaded') # if cfg.PRETRAINED_RPN: # rpn_model_path = 'output/rpn.pth' # model.load_state_dict(torch.load(rpn_model_path)['model']) # print('loaded rpn!') # optimizer params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] optimizer = SGD(params, momentum=momentum) if args.use_cuda: model = model.cuda() model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() train_data_iter = iter(train_dataloader) if epoch in args.decay_lrs: lr = lr * gamma adjust_learning_rate(optimizer, lr) print('adjust learning rate to {}'.format(lr)) for step in range(iters_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) output = model(im_data_variable, gt_boxes, im_info) rois, _, _, \ rcnn_cls_loss, rcnn_box_loss, \ rpn_cls_loss, rpn_box_loss, _train_info = output loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\ rpn_cls_loss.mean() + rpn_box_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] rcnn_tp += _train_info['rcnn_tp'] rcnn_tn += _train_info['rcnn_tn'] rcnn_fg += _train_info['rcnn_num_fg'] rcnn_bg += _train_info['rcnn_num_bg'] if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval rpn_cls_loss_v = rpn_cls_loss.mean().item() rpn_box_loss_v = rpn_box_loss.mean().item() rcnn_cls_loss_v = rcnn_cls_loss.mean().item() rcnn_box_loss_v = rcnn_box_loss.mean().item() print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic)) print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t " "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \ % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v)) if cfg.VERBOSE: print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg, float(rpn_tn) / rpn_bg)) print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg, float(rcnn_tn) / rcnn_bg)) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v, n_iter) writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v, n_iter) writer.add_scalar('losses/rcnn_cls_loss_v', rcnn_cls_loss_v, n_iter) writer.add_scalar('losses/rcnn_box_loss_v', rcnn_box_loss_v, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar('rcnn/fg_acc', float(rcnn_tp) / rcnn_fg, n_iter) writer.add_scalar('rcnn/bg_acc', float(rcnn_tn) / rcnn_bg, n_iter) loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join( output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
from faster_rcnn.rpn_proposal import proposal_train def train_transformation(data, label): data, label = random_flip(data, label) data = imagenetNormalize(data) return data, label train_dataset = VOCDataset(annotation_dir=cfg.annotation_dir, img_dir=cfg.img_dir, dataset_index=cfg.dataset_index, transform=train_transformation, resize_func=img_resize) train_datait = mx.gluon.data.DataLoader(train_dataset, batch_size=1, shuffle=True) ctx = mx.gpu(0) net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes) net.init_params(ctx) trainer = mx.gluon.trainer.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001, 'wd': 0.0005, 'momentum': 0.9}) anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales) first_iter_saved_toggle = True for epoch in range(1, 21): for it, (data, label) in enumerate(train_datait): data = data.as_in_context(ctx) _n, _c, h, w = data.shape label = label.as_in_context(ctx).reshape((1, -1, 5))
return data, label test_dataset = VOCDataset(annotation_dir=cfg.test_annotation_dir, img_dir=cfg.test_img_dir, dataset_index=cfg.test_dataset_index, transform=test_transformation, resize_func=None) # test_datait = mx.gluon.data.DataLoader(test_dataset, batch_size=1, shuffle=False) ctx = mx.gpu(0) args = parse_args() print("Load model: {}".format(args.model_file)) net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes, feature_name=args.feature_name) net.init_params(ctx) net.collect_params().load(args.model_file, ctx) det_result = [] prograss_bar = tqdm(total=len(test_dataset)) for it, data_id in enumerate(range(len(test_dataset))): data, label = test_dataset[data_id] data = data.asnumpy() data = data.transpose(1, 2, 0) data, scale = img_resize(data) data = data.transpose(2, 0, 1) data_id = test_dataset.dataset_index[data_id] data = mx.nd.array(data, ctx=ctx) _c, h, w = data.shape
nms_time = _t['misc'].toc(average=False) print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time) df = pandas.DataFrame(all_boxes) df.columns = ['x1', 'y1', 'x2', 'y2', 'score', 'image_id'] df.to_csv('predictions.csv', index=False) print('Total time: %.4f, per image: %.4f' % (total_time, total_time / num_images)) if __name__ == '__main__': # load net net = FasterRCNN(classes=['__backround__', 'building'], debug=False) network.load_net(args.weights, net) print('load model successfully!') net.cuda() net.eval() val_data = json.load(open(args.test_boxes)) # evaluation test_net(net, val_data, max_per_image, thresh=thresh, vis=vis, data_dir='../data')
def test(): # Set up dataloader data_loader = DAVIS_seq_dataloader(split='val') model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('Load Faster R-CNN model successfully!') # unet_model = './model/vgg_unet_1e-4_500.h5' # unet = UNet() # network.load_net(unet_model, unet) # unet.cuda() # network.weights_normal_init(unet, dev=0.01) # unet.load_from_faster_rcnn_h5(h5py.File(model_file)) criterion_bce = torch.nn.BCELoss().cuda() weight_decay = 5e-5 # optimizer = torch.optim.SGD(list(unet.parameters())[26:], lr=1e-4, weight_decay=weight_decay) # print('Load U-Net model successfully!') crop_set = [] # Iterate for i in range(data_loader.num_seq): # Get the first frame info seq = data_loader.seq_list[data_loader.out_pointer] seq_len = data_loader.seq_len[seq] img_blobs, seg_blobs = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) # Get the category of the object in the first frame rmin, rmax, cmin, cmax = bbox(seg_blobs[0,:,:,0]) features, rois = detector(im_data, im_info, rpn_only=True) new_rois_np = np.array([[0, cmin, rmin, cmax, rmax]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() cls_idx = cls_prob_np.argmax() cls_conf = cls_prob_np.max() # Overfit U-Net with the first frame # for i in range(100): # unet.train() # img_t = torch.from_numpy(img_blobs).permute(0,3,1,2).float().cuda() # img_v = Variable(img_t, requires_grad=False) # seg_t = torch.from_numpy(seg_blobs).permute(0,3,1,2).float().cuda() # seg_v = Variable(seg_t, requires_grad=False) # pred = unet(img_v) # loss = criterion_bce(pred, seg_v) # pred_view = pred.view(-1, 1) # seg_view = seg_v.view(-1, 1) # EPS = 1e-6 # loss = 0.6 * seg_view.mul(torch.log(pred_view+EPS)) + 0.4 * seg_view.mul(-1).add(1).mul(torch.log(1-pred+EPS)) # loss = -torch.mean(loss) # loss_val = loss.data[0] # optimizer.zero_grad() # loss.backward() # optimizer.step() # print('{}/100: {}'.format(i, loss_val)) # unet.eval() # Merge region proposals overlapping with last frame proposal for j in range(1, seq_len): img_blobs, _ = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) # 300 x 5, the first elements are useless here features, rois = detector(im_data, im_info, rpn_only=True) x1, y1, x2, y2 = merge_rois((rmin, rmax, cmin, cmax), rois.cpu().data.numpy(), thres=0.75) # Have overlapping proposals if x1 is not None: # Send to following layers to refine the bbox new_rois_np = np.array([[0, x1, y1, x2, y2]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() # Only regress bbox when confidence is greater than 0.8 if cls_prob_np.max() > 0.8 and cls_prob_np.argmax() != 0: keep = cls_prob_np.argmax() pred_boxes, scores, classes = detector.interpret_faster_rcnn(cls_prob, bbox_pred, new_rois, im_info, im_data.shape, 0.8) cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 width = x2 - x1 + 1 height = y2 - y1 + 1 dx = bbox_pred_np[0,keep*4+0] dy = bbox_pred_np[0,keep*4+1] dw = bbox_pred_np[0,keep*4+2] dh = bbox_pred_np[0,keep*4+3] pred_x = dx * width + cx pred_y = dy * height + cy pred_w = np.exp(dw) * width pred_h = np.exp(dh) * height x1 = pred_x - pred_w / 2 x2 = pred_x + pred_w / 2 y1 = pred_y - pred_h / 2 y2 = pred_y + pred_h / 2 # No overlapping proposals if x1 is None: # Using Faster R-CNN again to find potential objects dets, scores, classes = detector.detect(img, 0.6) # Cannot find any salient object if dets.shape[0] == 0: x1, y1, x2, y2 = cmin, rmin, cmax, rmax else: x1 = dets[:,0] y1 = dets[:,1] x2 = dets[:,2] y2 = dets[:,3] pred_area = (x2 - x1 + 1) * (y2 - y1 + 1) init_area = (cmax - cmin + 1) * (rmax - rmin + 1) xx1 = np.maximum(x1, cmin) xx2 = np.minimum(x2, cmax) yy1 = np.maximum(y1, rmin) yy2 = np.minimum(y2, rmax) inter = (xx2 - xx1 + 1) * (yy2 - yy1 + 1) ovr = inter / (pred_area + init_area - inter) # If there is overlapping, choose the largest IoU bbox try: ovr = ovr[ovr > 0.3] ovr_idx = np.argsort(ovr)[-1] x1 = dets[ovr_idx,0] y1 = dets[ovr_idx,1] x2 = dets[ovr_idx,2] y2 = dets[ovr_idx,3] # Else, choose the highest objectness score one except: if cls_idx == 0: temp_idx = scores.argmax() x1 = dets[temp_idx,0] y1 = dets[temp_idx,1] x2 = dets[temp_idx,2] y2 = dets[temp_idx,3] else: cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 cc = (cmin + cmax) / 2 cr = (rmin + rmax) / 2 dist = np.sqrt(np.square(cx-cc) + np.square(cy-cr)) dist_idx = np.argsort(dist) for di in dist_idx: if classes[di] == _CLASSES[cls_idx]: x1 = dets[di,0] y1 = dets[di,1] x2 = dets[di,2] y2 = dets[di,3] # Crop the region and send it to U-Net try: x1 = int(max(x1, 0)) x2 = int(min(x2, im_data.shape[2])) y1 = int(max(y1, 0)) y2 = int(min(y2, im_data.shape[1])) except: x1 = int(max(x1[0], 0)) x2 = int(min(x2[0], im_data.shape[2])) y1 = int(max(y1[0], 0)) y2 = int(min(y2[0], im_data.shape[1])) # MEAN_PIXEL = np.array([103.939, 116.779, 123.68]) # crop = img_blobs[:, y1:y2+1, x1:x2+1, :] - MEAN_PIXEL # crop = img_blobs[:,:,:,:] - MEAN_PIXEL # crop_v = Variable(torch.from_numpy(crop).permute(0, 3, 1, 2).cuda(), requires_grad=False) # pred = unet(crop_v) # pred_np = pred.cpu().data.numpy()[0,0,:,:] # pred_np[pred_np < 0.5] = 0 # pred_np[pred_np >= 0.5] = 1 # pred_np = pred_np * 255 # res = pred_np.astype(int) # cv2.imwrite('test.png', res) if y2 - y1 <= 1 or x2 - x1 <= 1: ipdb.set_trace() cv2.imwrite(os.path.join('demo', 'crop_{}_{}.png'.format(i, j)), img[y1:y2+1,x1:x2+1,:]) rmin = y1 rmax = y2 cmin = x1 cmax = x2 im2show = np.copy(img) cv2.rectangle(im2show, (int(x1),int(y1)), (int(x2),int(y2)), (0, 255, 0), 2) cv2.imwrite(os.path.join('demo', '{}_{}.jpg'.format(i, j)), im2show) temp = [i, j, x1, y1, x2, y2] crop_set.append(temp) # Save crop_set = np.array(crop_set) np.save('crop', crop_set)
# load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG, training=True) net.cuda() net.train() net.apply(weight_init) #-- parameters initialize #----download resnet101 weights----- pretrained_state = torch.load(pretrained_model) net.resnet.load_state_dict({ k: v for k, v in pretrained_state.items() if k in net.resnet.state_dict() }) for p in net.resnet.conv1.parameters(): p.requires_grad = False for p in net.resnet.bn1.parameters(): p.requires_grad = False
# load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy(net, pretrained_model) # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' # network.load_net(model_file, net) # exp_name = 'vgg16_02-19_13-24' # start_step = 60001 # lr /= 10. # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01) net.cuda() net.train() params = list(net.parameters()) # optimizer = torch.optim.Adam(params[-8:], lr=lr)
class FasterRCNN: def __init__(self, weights=None): if weights is None: if not os.path.exists('weights'): os.mkdir('weights') download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip' if not os.path.exists('weights/faster-rcnn'): print('Downloading weights for faster-rcnn') if not os.path.exists(os.path.join('weights/faster-rcnn.zip')): check_output([ 'wget', download_url, '-O', 'weights/faster-rcnn.zip' ]) print('Unzipping...') check_output( ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights']) description = json.load( open('weights/faster-rcnn/description.json')) weights = os.path.join('weights/faster-rcnn', description['weights']) print('Building model...') self.model = FasterRCNNModel(classes=['__backround__', 'building'], debug=False) network.load_net(weights, self.model) self.model.cuda() self.model.eval() def close_session(self): pass def predict_image(self, image, threshold, eval_mode=False): """ Infer buildings for a single image. Inputs: image :: n x m x 3 ndarray - Should be in RGB format """ if type(image) is str: image = cv2.imread(image) else: image = image[:, :, (2, 1, 0)] # RGB -> BGR im_data, im_scales = self.model.get_image_blob(image) im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) t0 = time.time() cls_prob, bbox_pred, rois = self.model(im_data, im_info) runtime = time.time() - t0 scores = cls_prob.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) current = np.concatenate( [ pred_boxes[:, 4:8], # (skip the background class) np.expand_dims(scores[:, 1], 1) ], axis=1) suppressed = current[py_cpu_nms(current.astype(np.float32), 0.3)] suppressed = pandas.DataFrame( suppressed, columns=['x1', 'y1', 'x2', 'y2', 'score']) if eval_mode: return suppressed[ suppressed['score'] >= threshold], suppressed, runtime else: return suppressed[suppressed['score'] >= threshold] def predict_all(self, test_boxes_file, threshold, data_dir=None): test_boxes = json.load(open(test_boxes_file)) if data_dir is None: data_dir = os.path.join(os.path.dirname(test_boxes_file)) total_time = 0.0 for i, anno in enumerate(test_boxes): orig_img = cv2.imread( '%s/%s' % (data_dir, anno['image_path']))[:, :, (2, 1, 0)] pred, all_rects, time = self.predict_image(orig_img, threshold, eval_mode=True) pred['image_id'] = i all_rects['image_id'] = i yield pred, all_rects, test_boxes[i]
# create VGG model for state featurization print("Loading image embedding model...") if args.image_embedding_model_type == "resnet": im_emb_model = ResNet50() elif args.image_embedding_model_type == "vgg": im_emb_model = VGG16() else: print("--image_embedding_model_type must be either resnet or vgg") sys.exit(0) print("Done!") # create Faster-RCNN model for state featurization print("Loading Fast-RCNN...") model_file = 'VGGnet_fast_rcnn_iter_70000.h5' model_frcnn = FasterRCNN() network.load_net(model_file, model_frcnn) model_frcnn.cuda() model_frcnn.eval() print("Done!") # create DQN's for the next object, predicates, and attributes print("Creating DQN models...") DQN_next_object_main = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_next_object_target = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_predicate_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_predicate_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_attribute_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) DQN_attribute_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) print("Done!")
# load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) #network.load_pretrained_npy(net, pretrained_model) # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' # network.load_net(model_file, net) # exp_name = 'vgg16_02-19_13-24' # start_step = 60001 # lr /= 10. # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01) if os.path.exists('pretrained_vgg.pkl'): pret_net = pkl.load(open('pretrained_vgg.pkl','rb')) else: pret_net = model_zoo.load_url('https://download.pytorch.org/models/vgg16-397923af.pth') pkl.dump(pret_net, open('pretrained_vgg.pkl','wb'), pkl.HIGHEST_PROTOCOL)
def train(): args = parse_args() lr = args.lr decay_lrs = args.decay_lrs momentum = args.momentum weight_decay = args.weight_decay bais_decay = args.bais_decay gamma = args.gamma cfg.USE_GPU_NMS = True if args.use_gpu else False if args.use_tfboard: writer = SummaryWriter() # load data print('load data') if args.dataset == 'voc07trainval': dataset_name = 'voc_2007_trainval' elif args.dataset == 'voc12trainval': dataset_name = 'voc_2012_trainval' elif args.dataset == 'voc0712trainval': dataset_name = 'voc_2007_trainval+voc_2012_trainval' else: raise NotImplementedError imdb, roidb = combined_roidb(dataset_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) iter_per_epoch = int(len(train_dataset) / args.batch_size) # prepare model print('load model') model = FasterRCNN(backbone=args.backbone) params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': bais_decay and weight_decay or 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] if args.use_gpu: model = model.cuda() model.train() # define optimizer optimizer = SGD(params, momentum=momentum) # training print('start training...') for epoch in range(args.epochs): start_time = time.time() train_data_iter = iter(train_dataloader) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 if epoch in decay_lrs: lr = lr * gamma adjust_lr(optimizer, lr) print('adjusting learning rate to {}'.format(lr)) for step in range(iter_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable, gt_boxes, im_info) rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \ rpn_cls_loss, rpn_reg_loss, _train_info = outputs loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \ rpn_cls_loss.mean() + rpn_reg_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() temp_loss += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] faster_rcnn_tp += _train_info['faster_rcnn_tp'] faster_rcnn_tn += _train_info['faster_rcnn_tn'] faster_rcnn_fg += _train_info['faster_rcnn_num_fg'] faster_rcnn_bg += _train_info['faster_rcnn_num_bg'] if (step + 1) % args.display_interval == 0: end_time = time.time() temp_loss /= args.display_interval rpn_cls_loss_m = rpn_cls_loss.mean().item() rpn_reg_loss_m = rpn_reg_loss.mean().item() faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item() faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item() print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' % (epoch, step + 1, iter_per_epoch, temp_loss, end_time - start_time)) print( 'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f' % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m, faster_rcnn_reg_loss_m)) if args.use_tfboard: n_iter = epoch * iter_per_epoch + step + 1 writer.add_scalar('losses/loss', temp_loss, n_iter) writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m, n_iter) writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_cls_loss_m', faster_rcnn_cls_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_reg_loss_m', faster_rcnn_reg_loss_m, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar( 'rcnn/fg_acc', float(faster_rcnn_tp) / faster_rcnn_fg, n_iter) writer.add_scalar( 'rcnn/bg_acc', float(faster_rcnn_tn) / faster_rcnn_bg, n_iter) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 start_time = time.time() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if epoch % args.save_interval == 0: save_name = os.path.join( args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
# ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load net net = FasterRCNN(classes=['__background__', 'building'], debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy(net, pretrained_model) net.cuda() net.train() params = list(net.parameters()) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.makedirs(output_dir)
def main(): global args, logger train_ds = train_dataset() # only support batch_size = 1 so far train_datait = mx.gluon.data.DataLoader(train_ds, batch_size=1, shuffle=True) ctx = [mx.gpu(i) for i in range(len(args.gpus.split(",")))] ctx = ctx[0] net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes, feature_name=args.feature_name) net.init_params(ctx) if args.pretrained_model != "": net.collect_params().load(args.pretrained_model, ctx) logger.info("loading {}".format(args.pretrained_model)) lr_schdl = mx.lr_scheduler.FactorScheduler(step=20000, factor=0.9) trainer = mx.gluon.trainer.Trainer(net.collect_params(), 'sgd', optimizer_params={ 'learning_rate': args.learning_rate, 'wd': args.weight_decay, "lr_scheduler": lr_schdl, 'momentum': 0.9 }) anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales) for epoch in range(0, args.epochs): last_iter_end_timestamp = time.time() for it, (data, label) in enumerate(train_datait): data_loaed_time = time.time() data = data.as_in_context(ctx) _n, _c, h, w = data.shape label = label.as_in_context(ctx).reshape((1, -1, 5)) with mx.autograd.record(): rpn_cls, rpn_reg, f = net.rpn(data) f_height = f.shape[2] f_width = f.shape[3] rpn_cls_gt, rpn_reg_gt = rpn_gt_opr(rpn_reg.shape, label, ctx, h, w) rpn_bbox_sampled, rcnn_reg_target, rcnn_cls_target = proposal_train( rpn_cls, rpn_reg, label, f.shape, data.shape, ctx) # RPN Loss part # Reshape and transpose to the shape of gt rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width)) rpn_cls = mx.nd.transpose(rpn_cls, (0, 1, 3, 4, 2)) rpn_reg = mx.nd.transpose( rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 1, 3, 4, 2)) mask = (rpn_cls_gt == 1).reshape( (1, anchors_count, f_height, f_width, 1)).broadcast_to( (1, anchors_count, f_height, f_width, 4)) rpn_loss_reg = mx.nd.sum( mx.nd.smooth_l1( (rpn_reg - rpn_reg_gt) * mask, 3.0)) / mx.nd.sum(mask) rpn_loss_cls = softmax_celoss_with_ignore( rpn_cls.reshape((-1, 2)), rpn_cls_gt.reshape((-1, )), -1) # RCNN part # add batch dimension rpn_bbox_sampled = mx.nd.concatenate([ mx.nd.zeros( (rpn_bbox_sampled.shape[0], 1), ctx), rpn_bbox_sampled ], axis=1) f = mx.nd.ROIPooling(f, rpn_bbox_sampled, (7, 7), 1.0 / 16) # VGG16 based spatial stride=16 rcnn_cls, rcnn_reg = net.rcnn(f) mask = (rcnn_cls_target > 0).reshape( (rcnn_cls_target.shape[0], 1)).broadcast_to( (rcnn_cls_target.shape[0], 4 * cfg.num_classes)) rcnn_loss_reg = mx.nd.sum( mx.nd.smooth_l1((rcnn_reg - rcnn_reg_target) * mask, 1.0)) / mx.nd.sum(mask) rcnn_loss_cls = mx.nd.softmax_cross_entropy( rcnn_cls, rcnn_cls_target) / rcnn_cls.shape[0] loss = rpn_loss_cls + rpn_loss_reg + rcnn_loss_cls + rcnn_loss_reg loss.backward() trainer.step(data.shape[0]) logger.info( "Epoch {} Iter {:>6d}: loss={:>6.5f}, rpn_loss_cls={:>6.5f}, rpn_loss_reg={:>6.5f}, rcnn_loss_cls={:>6.5f}, rcnn_loss_reg={:>6.5f}, lr={:>6.5f}" .format(epoch, it, loss.asscalar(), rpn_loss_cls.asscalar(), rpn_loss_reg.asscalar(), rcnn_loss_cls.asscalar(), rcnn_loss_reg.asscalar(), trainer.learning_rate)) net.collect_params().save( os.path.join(args.save_path, "lastest.gluonmodel")) if epoch % args.save_interval == 0: save_schema = os.path.split(args.save_path)[1] + "-{}" net.collect_params().save( os.path.join(args.save_path, save_schema.format(epoch) + ".gluonmodel"))
# load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = VisualGenome(split=0, num_im=50) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_net(pretrained_model, net) # network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy') net.cuda() net.train() params = list(net.parameters()) print("Params are {}".format( '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()])) ) # optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir):
thickness=1) im_name = os.path.basename(image_file) print(os.path.join('demo/det_results', im_name)) cv2.imwrite(os.path.join('demo/det_results', im_name), im2show) #cv2.imshow('demo', im2show) #cv2.waitKey(0) def folder_test(net, folder): txt_file = folder + 'JPEGImages/file_name.txt' with open(txt_file) as f: for line in f: img_path = folder + 'JPEGImages/' + line.strip('\n') + '.JPG' anno_path = folder + 'Annotations/' + line.strip('\n') + '.xml' image_test(net, img_path, anno_path) if __name__ == '__main__': model_file = 'models/saved_model3/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') #image_file = 'demo/000001.JPG' #image_test(detector, image_file, None) folder = '/data/jmtian/PlateData/PVW_WRM_CUT/' folder_test(detector, folder)