def train(**kwargs): opt._parse(kwargs) print('load data') dataset = Dataset(opt) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(), anchor_scales=[1]) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: old_state = trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, test_num=num_eval_images) print('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if opt.load_path and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print('Model was trained until epoch {}, continuing with epoch {}'.format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): lr_ = opt.lr * (opt.lr_decay ** (epoch // opt.epoch_decay)) trainer.faster_rcnn.set_lr(lr_) print('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale) if opt.use_cuda: img, bbox, label = img.cuda().float(), bbox_.float().cuda(), label_.float().cuda() else: img, bbox, label = img.float(), bbox_.float(), label_.float() img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss #trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #print('Current total loss {}'.format(losses[-1].tolist())) trainer.vis.plot('train_total_loss', losses[-1].tolist()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save(epoch=epoch) print("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) eval_result = eval(test_dataloader, faster_rcnn, test_num=min(opt.test_num, len(testset))) print(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'],epoch=epoch) print("After epoch {}: snapshotted to {}".format(epoch, best_path)) trainer.vis.plot('test_map', eval_result['map'])
def train(**kwargs): """ The main entry point for training; trains a FasterRCNN-based detector. """ opt._parse(kwargs) # Loading class names from checkpoint, if available # We need to load the checkpoint here if opt.load_path: old_state = torch.load(opt.load_path) class_names = old_state['class_names'] best_map = old_state['best_map'] else: class_names = [] best_map = 0 old_state = None print('load data') dataset = Dataset(opt, class_names) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt, dataset.get_class_names()) test_dataloader = data_.DataLoader(testset, \ batch_size=1, \ num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count()) print('Model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: trainer.load(old_state) print_log('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=num_eval_images) print_log('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if old_state and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print_log('Model was trained until epoch {}, continuing with epoch {}'. format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): writer.add_scalar('epoch', epoch, global_step) lr_ = opt.lr * (opt.lr_decay** np.sum(epoch >= np.array(opt.lr_schedule))) trainer.faster_rcnn.set_lr(lr_) print_log('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale).item() if opt.use_cuda: img = img.cuda().float() label = label_.float().cuda() if len(bbox_[0]) > 0: bbox = bbox_.float().cuda() else: bbox = bbox_ else: img, label = img.float(), label_.float() if len(bbox_[0]) > 0: bbox = bbox_.float() else: bbox = bbox_ img, label = Variable(img), Variable(label) if len(bbox[0]) > 0: bbox = Variable(bbox) else: bbox = np.asarray(bbox) #img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) writer.add_scalars( 'training/losses', dict(total_loss=losses.total_loss, roi_cls_loss=losses.roi_cls_loss, roi_loc_loss=losses.roi_loc_loss, rpn_cls_loss=losses.rpn_cls_loss, rpn_loc_loss=losses.rpn_loc_loss), global_step) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('gt_img', gt_img, global_step) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('pred_img', pred_img, global_step) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save( epoch=epoch, class_names=testset.get_class_names()) print_log("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) for lo in losses: del lo del img, bbox_, label_, scale torch.cuda.empty_cache() eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=min(opt.test_num, len(testset))) print_log(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'], epoch=epoch, class_names=testset.get_class_names()) print_log("After epoch {}: snapshotted to {}".format( epoch, best_path)) del eval_result torch.cuda.empty_cache()