def train(**kwargs): # 将调用函数时候附加的参数用,config.py的opt._parse()解析,获取存储路径,放入dataset opt._parse(kwargs) dataset = Dataset(opt) print('load data') # VOCBboxDataset作为数据读取库,读取图片,并调整和随机反转 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) # 数据装载到dataloader中,shuffle=True允许数据打乱,num_workers设置分批处理 test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() # 定义模型 print('model construct completed') # 将FasterRCNNVGG16作为fasterrcnn的模型送入到FasterRCNNTrainer中 # 并设置好GPU加速 trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: # 判断路径存在 trainer.load(opt.load_path) # 读取与训练模型 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr # 开始训练,迭代次数在config.py预先定义,超参 for epoch in range(opt.epoch): print ("---------------", epoch, " in ", opt.epoch, "-------------") trainer.reset_meters() # 可视化界面初始化数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 从训练数据中枚举dataloader,设置缩放范围,设置gpu加速 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 调用trainer.py中的函数trainer.train_step(img, bbox, label, scale) 进行一次参数优化过程 trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # 判断数据读取次数是否能够整除plot_every, # 如果达到判断debug_file是否存在,用ipdb工具设置断点, if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss将训练数据读取并上传完成可视化 trainer.vis.plot_many(trainer.get_meter_data()) # 绘制Ground truth包围盒 ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) # 将每次迭代读取的图片用dataset文件里面的inverse_normalize() # 函数进行预处理,将处理后的图片调用visdom_bbox trainer.vis.img('ground_truth_img', gt_img) # plot predict bboxes # 显示原始图片和预测结果(边框+类别) _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('predict_img', pred_img) # rpn confusion matrix(meter) # 调用trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # 调用Trainer.vis.img将Roi_cm将roi的可视化矩阵以图片的形式显示 trainer.vis.plot('test_map', eval_result['map']) # 设置学习率 lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) # 将损失学习率以及map等信息及时显示更新 trainer.vis.log(log_info) # 保存效果最好的map if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) # if判断句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break # 结束训练过程
def train(**kwargs): opt._parse(kwargs) # device_num = 6 data_root = "/home/lsm/TrainSet/" train_file = "train.txt" test_file = "test.txt" trainset = MyDataset(data_root, train_file, opt) testset = TestDataset(data_root, test_file, opt) print('load data') dataloader = data_.DataLoader(trainset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr f = open('log.txt', 'w') for epoch in range(opt.epoch): trainer.reset_meters() print("epoch " + str(epoch) + " ...") for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): # break # for ii, (img, bbox_, label_, scale) in enumerate(dataloader): # print(ii) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # # gt_img = visdom_bbox(ori_img_, # # at.tonumpy(bbox_[0]), # # at.tonumpy(label_[0])) # # trainer.vis.img('gt_img', gt_img) # # # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) # trainer.vis.log(log_info) # print(str(lr_)+": loss = "+str(trainer.get_meter_data())) f.write(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 19: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 50: break f.close()
def train(**kwargs): opt._parse(kwargs) data_set = TrainDataset() print('load data.') data_loader = data_.DataLoader(data_set, batch_size=1, shuffle=True) testset = TestDataset() test_dataloader = data_.DataLoader(testset, batch_size=1, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() print('model construct.') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) lr = opt.lr best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox, label, scale) in tqdm(enumerate(data_loader)): img = img.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: ipdb.set_trace() """plot loss""" trainer.vis.plot_many(trainer.get_meter_data()) """plot gt_bbox""" ori_img = inverse_normalize(img[0].cpu().numpy()) gt_img = visdom_bbox(ori_img, bbox[0].numpy(), label[0].numpy()) trainer.vis.img('gt_img', gt_img) """plot predicted bbox""" pred_bbox, pred_label, pred_score = trainer.faster_rcnn.predict( [ori_img], visualize=True) pred_img = visdom_bbox(ori_img, pred_bbox[0], pred_label[0], pred_score[0]) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', trainer.roi_cm.conf.float().cpu()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr = lr * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format( str(lr), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: print('finish!') break
import os import torch as t from utils.config import Config from model import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at %matplotlib inline img_name = 'demo.jpg' raw_img = read_image(f'/content/drive/My Drive/lq_det_hyper/lq_det/misc/{img_name}') raw_img = t.from_numpy(raw_img).unsqueeze(dim=0) faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn, using_visdom=False).cuda() trainer.load('/content/drive/My Drive/lq_det_hyper/lq_det/ckpt/fasterrcnn_12222105_0.712649824453_caffe_pretrain.pth') Config.caffe_vgg=True # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(raw_img, visualize=True) img, bbox, label, score = (at.tonumpy(raw_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) vis_bbox(img, bbox, label, score) import matplotlib.pyplot as plt plt.show()
def test(**kwargs): #opt._parse(kwargs) opt.env = 'test' opt.caffe_pretrain = True faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load( 'C:/Users/86188/Desktop/fiber/checkpoints/fasterrcnn_05031937_0.9089769879243565' ) print('成功加载神经网络') #1.载入待检测图片 img2 = Image.open(test_img_path) img_end = read_image(test_img_path) img_end = t.from_numpy(img_end)[None] #2.分割图片 ImageCut.imagecut(img2, 12, 15, save_path) os.remove('bboxes.txt') #先移除之前的文件 #3.循环检测 for filename in os.listdir(save_path): # listdir的参数是文件夹的路径 #print(filename) # 此时的filename是文件夹中文件的名称 img = read_image(os.path.join(save_path, filename)) img = t.from_numpy(img)[None] opt.caffe_pretrain = False # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) bboxes = at.tonumpy(_bboxes[0]) scores = at.tonumpy(_scores[0]) labels = at.tonumpy(_labels[0]) #print(bboxes) #输出框的坐标,array格式 #print(scores) #4.换算到绝对坐标 fn = filename.split('-') row = fn[0] col1 = fn[1] col2 = col1.split('.') col = col2[0] print(row, col) #部分图识别结果显示 part_img = visdom_bbox(at.tonumpy(img[0]), bboxes, labels.reshape(-1), scores.reshape(-1)) trainer.vis.img('part_img', part_img) #换算到绝对坐标 bboxes[:, 0] = bboxes[:, 0] + int(row) * 500.333333 # x坐标 bboxes[:, 1] = bboxes[:, 1] + int(col) * 528.000000 # y坐标 bboxes[:, 2] = bboxes[:, 2] + int(row) * 500.333333 # x坐标 bboxes[:, 3] = bboxes[:, 3] + int(col) * 528.000000 # y坐标 #5.绝对坐标保存到文件 with open('bboxes.txt', 'ab') as f: np.savetxt(f, bboxes, fmt="%f", delimiter=",") # 保存为float print('绝对坐标保存成功') #6.读取绝对坐标文件,并显示检测效果 all_bboxes = np.loadtxt(open('bboxes.txt', "rb"), delimiter=",") test_img = visdom_bbox( at.tonumpy(img_end[0]), all_bboxes, #labels.reshape(-1), #scores.reshape(-1) ) trainer.vis.img('test_img', test_img)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() writer = SummaryWriter('outputs/logs/') print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if ii % 5 == 4: meter_data_trainer = trainer.get_meter_data() rpn_loc_loss = meter_data_trainer['rpn_loc_loss'] rpn_cls_loss = meter_data_trainer['rpn_cls_loss'] roi_loc_loss = meter_data_trainer['roi_loc_loss'] roi_cls_loss = meter_data_trainer['roi_cls_loss'] total_loss = meter_data_trainer['total_loss'] print( 'lr:{:>7.4f}, rpn_loc_loss:{:>7.6f}, rpn_cls_loss:{:>7.6f}, roi_loc_loss:{:>7.6f}, roi_cls_loss:{:>7.6f}, total_loss:{:>7.6f}' .format(lr_, rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{:>10.4f}, map:{}, loss:{}'.format( lr_, str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) meter_data_trainer = trainer.get_meter_data() rpn_loc_loss = meter_data_trainer['rpn_loc_loss'] rpn_cls_loss = meter_data_trainer['rpn_cls_loss'] roi_loc_loss = meter_data_trainer['roi_loc_loss'] roi_cls_loss = meter_data_trainer['roi_cls_loss'] total_loss = meter_data_trainer['total_loss'] writer.add_scalar("Learning Rate:", lr_) writer.add_scalar("Train map:", eval_result['map']) writer.add_scalar("Rpn Loc Loss:", rpn_loc_loss) writer.add_scalar("Rpn Cls Loss:", rpn_cls_loss) writer.add_scalar("Roi Loc Loss:", roi_loc_loss) writer.add_scalar("Roi Cls Loss:", roi_cls_loss) writer.add_scalar("Total Loss:", rpn_loc_loss) trainer.vis.log(log_info) writer.close()
def train(**kwargs): opt._parse(kwargs) carrada = download('Carrada') train_dataset = Carrada().get('Train') # Try to overfit a sequence train_set = dict() # overfit a sequence train_set['2019-09-16-13-20-20'] = train_dataset['2019-09-16-13-20-20'] val_set = train_set train_seqs = SequenceCarradaDataset(train_set) val_seqs = SequenceCarradaDataset(val_set) # test_seqs = SequenceCarradaDataset(test_set) train_seqs_loader = data_.DataLoader(train_seqs, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) val_seqs_loader = data_.DataLoader( val_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) """ test_seqs_loader = data_.DataLoader(test_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) """ # faster_rcnn = FasterRCNNVGG16(n_fg_class=3) # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3) faster_rcnn = FasterRCNNRESNET18(n_fg_class=3) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9) if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) writer_path = os.path.join(opt.logs_path, opt.model_name) os.makedirs(writer_path, exist_ok=True) writer = SummaryWriter(writer_path) iteration = 0 best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('Processing epoch: {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)): seq_name, seq = sequence_data # overfit an image # seq = [seq[155]] # large seq = [seq[115]] # medium # seq = [seq[28]] # small path_to_frames = os.path.join(carrada, seq_name[0]) train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type, path_to_frames) train_frame_loader = data_.DataLoader(train_frame_set, batch_size=1, shuffle=False, num_workers=opt.num_workers) for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_frame_loader)): iteration += 1 scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() img = normalize(img) if opt.debug_step and (epoch + 1) % opt.debug_step == 0: trainer.train_step(img, bbox, label, scale, stop=True) else: trainer.train_step(img, bbox, label, scale) if (iteration + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() train_results = trainer.get_meter_data() writer.add_scalar('Losses/rpn_loc', train_results['rpn_loc_loss'], iteration) writer.add_scalar('Losses/rpn_cls', train_results['rpn_cls_loss'], iteration) writer.add_scalar('Losses/roi_loc', train_results['roi_loc_loss'], iteration) writer.add_scalar('Losses/roi_cls', train_results['roi_cls_loss'], iteration) writer.add_scalar('Losses/total', train_results['total_loss'], iteration) if (iteration + 1) % opt.img_every == 0: ori_img_ = at.tonumpy(img[0]) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) gt_img_grid = make_grid(torch.from_numpy(gt_img)) writer.add_image('Ground_truth_img', gt_img_grid, iteration) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], opt.signal_type, visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) pred_img_grid = make_grid(torch.from_numpy(pred_img)) writer.add_image('Predicted_img', pred_img_grid, iteration) if opt.train_eval and (iteration + 1) % opt.train_eval == 0: train_eval_result, train_best_iou = eval( train_seqs_loader, faster_rcnn, opt.signal_type) writer.add_scalar('Train/mAP', train_eval_result['map'], iteration) writer.add_scalar('Train/Best_IoU', train_best_iou, iteration) if opt.debug_step and (epoch + 1) % opt.debug_step == 0: _, _ = eval(train_seqs_loader, faster_rcnn, opt.signal_type, stop=True) eval_result, best_iou = eval(val_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Validation/mAP', eval_result['map'], iteration) writer.add_scalar('Validation/Best_IoU', best_iou, iteration) lr_ = scheduler.get_lr()[0] writer.add_scalar('learning_rate', lr_, iteration) log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: """ test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Test/mAP', test_result['map'], iteration) writer.add_scalar('Test/Best_IoU', test_best_iou, iteration) """ best_val_map = eval_result['map'] # best_test_map = test_result['map'] # best_path = trainer.save(best_val_map=best_val_map, best_test_map=best_test_map) best_path = trainer.save(best_val_map=best_val_map) if (epoch + 1) % opt.lr_step == 0: scheduler.step()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() ''' # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) ''' eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) print(eval_result) #trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) #trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if (epoch+1) % 2 == 0: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=2, shuffle=False, \ # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(7): trainer.reset_meters() for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): # ipdb.set_trace() pass # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], # at.tonumpy(bbox_)[0], # label_[0].numpy()) # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch==4: trainer.faster_rcnn.scale_lr(opt.lr_decay) eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) print('eval_result') trainer.save(mAP=eval_result['map'])
class Detector: def __init__(self, model_path, useGPU, n_fg_classes=1): ''' Creates a new detection model using the weights stored in the file MODEL_PATH and initializes the GPU if USEGPU is set to true. MODEL_PATH: path to a trained detection model. USEGPU: if true, the GPU will be used for faster computations. ''' torch.set_num_threads(1) opt.load_path = model_path self.faster_rcnn = FasterRCNNVGG16(n_fg_class=n_fg_classes) self.trainer = FasterRCNNTrainer(self.faster_rcnn, n_fg_class=n_fg_classes) if useGPU: self.trainer = self.trainer.cuda() state_dict = torch.load(opt.load_path) self.trainer.load(state_dict) self.transforms = transforms.ToTensor() self.useGPU = useGPU def predict_image(self, img, topk=1): ''' Detects objects in the provided testing images. IMG: PIL image fitting the input of the trained model TOPK: the number of bounding boxes to return. We return the most confident bounding boxes first. RETURNs: (BBOXES, CONFS) where BBOXES is a n x 4 array, where each line corresponds to one bounding box. The bounding box coordniates are stored in the format [x_min, y_min, x_max, y_max], where x corresponds to the width and y to the height. CONFS are the confidence values for each bounding box and are a n x m array. Each row corresponds to the bounding box in the same row of BBOXES and provides the scores for the m classes, that the model was trained to detect. ''' pred_bboxes, pred_labels, pred_scores = self._run_prediction(img) return pred_bboxes[:topk, [1, 0, 3, 2]], pred_scores[:topk] def annotate_image(self, img, topk): ''' Detects objects in the provided testing images. IMG: PIL image fitting the input of the trained model TOPK: the number of bounding boxes to return. We return the most confident bounding boxes first. RETURNS: IMG: a PIL image with the detected bounding boxes annotated as rectangles. ''' pred_bboxes, pred_labels, pred_scores = self._run_prediction(img) draw = PIL.ImageDraw.Draw(img) colors = [(255, 0, 0), (0, 255, 0)] for bbox, label, score in zip(pred_bboxes, pred_labels, pred_scores): draw.rectangle(bbox[[1, 0, 3, 2]], outline=colors[label]) #font = PIL.ImageFont.truetype("sans-serif.ttf", 16) #draw.text(bbox[[1,0]],"Sample Text",colors[label]) return img def _run_prediction(self, img, confidence_threshold=0.7): ''' Prepare an input image for CNN processing. IMG: PIL image RETURN: IMG as pytorch tensor in the format 1xCxHxW normalized according to data.dataset.caffe_normalize. ''' img = img.convert('RGB') img = np.asarray(img, dtype=np.float32) if img.ndim == 2: # reshape (H, W) -> (1, H, W) img = img[np.newaxis] else: # transpose (H, W, C) -> (C, H, W) img = img.transpose((2, 0, 1)) proc_img = data.dataset.caffe_normalize(img / 255.) tensor_img = torch.from_numpy(proc_img).unsqueeze(0) if self.useGPU: tensor_img = tensor_img.cuda() # This preset filters bounding boxes with a score < *confidence_threshold* # and has to be set everytime before using predict() self.faster_rcnn.use_preset('visualize') pred_bboxes, pred_labels, pred_scores = self.faster_rcnn.predict( tensor_img, [(img.shape[1], img.shape[2])]) box_filter = np.array(pred_scores[0]) > confidence_threshold return pred_bboxes[0][box_filter], pred_labels[0][ box_filter], pred_scores[0][box_filter]
def train(**kwargs): opt._parse(kwargs) results_file = 'fasterrcnn-oversampled-gan-class-2.csv' if os.path.exists(results_file): file = open(results_file, "w+") else: file = open(results_file, "w") columns = init_cols() writer = csv.DictWriter(file, fieldnames=columns) writer.writeheader() id_file_dir = 'ImageSets/Main/train_oversampled_gan_class_v2.txt' img_dir = 'JPEGImagesOversampledGANClassv2' anno_dir = 'AnnotationsOversampledGANClassv2' # # id_file_dir = 'ImageSets/Main/train_oversampled_gan.txt' # img_dir = 'JPEGImagesOversampledGAN' # anno_dir = 'AnnotationsOversampledGAN' # id_file_dir = 'ImageSets/Main/train_oversampled_orig_class.txt' # img_dir = 'JPEGImagesOversampledClass' # anno_dir = 'AnnotationsOversampledClass' # id_file_dir = 'ImageSets/Main/train_oversampled_orig_all.txt' # img_dir = 'JPEGImagesOrigOversampledAll' # anno_dir = 'AnnotationsOrigOversampledAll' dataset = DatasetAugmented(opt, id_file=id_file_dir, img_dir=img_dir, anno_dir=anno_dir) print(len(dataset)) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=True, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr rpn_loc_loss = [] rpn_cls_loss = [] roi_loc_loss = [] roi_cls_loss = [] total_loss = [] test_map_list = [] for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss losses_dict = trainer.get_meter_data() rpn_loc_loss.append(losses_dict['rpn_loc_loss']) roi_loc_loss.append(losses_dict['roi_loc_loss']) rpn_cls_loss.append(losses_dict['rpn_cls_loss']) roi_cls_loss.append(losses_dict['roi_cls_loss']) total_loss.append(losses_dict['total_loss']) save_losses('rpn_loc_loss', rpn_loc_loss, epoch) save_losses('roi_loc_loss', roi_loc_loss, epoch) save_losses('rpn_cls_loss', rpn_cls_loss, epoch) save_losses('total_loss', total_loss, epoch) save_losses('roi_cls_loss', roi_cls_loss, epoch) # print('conf matrix final') # print(trainer.roi_cm.conf) # save_conf_matrix(trainer.roi_cm.conf, 'confusion_matrix/oversampled-orig-class.csv') eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,epoch=epoch, file=file) test_map_list.append(eval_result['map']) save_map(test_map_list, epoch) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints-gan-class-2/fasterrcnn_%s' % timestr # save_path = 'checkpoints2/fasterrcnn_%s' % timestr # save_path = 'checkpoints-class/fasterrcnn_%s' % timestr best_path = trainer.save(best_map=best_map, save_path=save_path) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break file.close()
gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) if result['map'] < result2['map']: result = result2 pred_b, pred_l, pred_s = pred_bboxes, pred_labels, pred_scores if result['map'] < result3['map']: result = result3 pred_b, pred_l, pred_s = pred_bboxes_, pred_labels_, pred_scores_ return pred_b, pred_l, pred_s, result # 加载权重 trainer.load('checkpoints/' + model_path) pred_bboxes1, pred_labels1, pred_scores1, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result( test_dataloader, trainer.faster_rcnn) trainer.load('checkpoints/' + model_path2) pred_bboxes_, pred_labels_, pred_scores_, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result( test_dataloader, trainer.faster_rcnn) pred_bboxes_, pred_labels_, pred_scores_, result = nms_reuslt( pred_bboxes1, pred_labels1, pred_scores1, pred_bboxes_, pred_labels_, pred_scores_, gt_bboxes, gt_labels, gt_difficults) print(result) trainer.load('checkpoints/' + model_path3) pred_bboxes1, pred_labels1, pred_scores1, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result( test_dataloader, trainer.faster_rcnn)
def main(**kwargs): opt._parse(kwargs) # checkpoint = t.load('se_0314_all') # classifier = t.hub.load( # 'moskomule/senet.pytorch', # 'se_resnet50', # pretrained=True, ) checkpoint = t.load('res50_0314_all') classifier = models.resnet50() num_classes = 8 step = [112, 112] num_ftrs = classifier.fc.in_features classifier.fc = nn.Linear(num_ftrs, num_classes) classifier.load_state_dict(checkpoint['state_dict']) classifier.eval() classifier = classifier.cuda() result_file = open('result0520_fasterrcnn.txt', 'w') save_root = './result/bbox/' makeDir() faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('checkpoints/fasterrcnn_04081709_0.6626689194895079') data_root = '/home/lsm/testSamples700_new/' test_file = 'GT707.txt' test700 = Test700Dataset(data_root, test_file, opt) test_dataloader = data_.DataLoader(test700, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) print('data loaded!') for ii, (fn, imgs, sizes, gt_bboxes_) in tqdm(enumerate(test_dataloader)): # print(gt_bboxes_) gt_x1 = int(gt_bboxes_[0][0][1]) gt_y1 = int(gt_bboxes_[0][0][0]) gt_x2 = int(gt_bboxes_[0][0][3]) gt_y2 = int(gt_bboxes_[0][0][2]) # print([gt_x1,gt_y1,gt_x2,gt_y2]) sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) result_file.write(fn[0]) # drawBbox(data_root,fn[0],pred_bboxes_,save_root) img = Image.open(data_root + fn[0]).convert("RGB") plt.imshow(img) currentAxis = plt.gca() line = fn[0] for i in range(len(pred_bboxes_[0])): bbox = pred_bboxes_[0][i] score = pred_scores_[0][i] label = pred_labels_[0][i] x1, y1, x2, y2 = bbox[1], bbox[0], bbox[3], bbox[2] # plt.text(x1, y1, toolNameList[decision]+" "+str(score), size=15, color='r') line = line + ' ' + toolNameList[label + 1] + ' ' + str( score) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str( x2) + ' ' + str(y2) words = line.split() # result_file.write(words[0]) num_box = int((len(words) - 1) / 6) if num_box > 0: new_boxes = fuseBoxes(words[1:], num_box) writeResult(result_file, new_boxes, currentAxis) rect = patches.Rectangle((gt_x1, gt_y1), gt_x2 - gt_x1, gt_y2 - gt_y1, fill=False, edgecolor='g', linewidth=2) currentAxis.add_patch(rect) plt.savefig(save_root + fn[0]) plt.close() result_file.close()
# In[3]: faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() # You'll need to download pretrained model from [google dirve](https://drive.google.com/open?id=1cQ27LIn-Rig4-Uayzy_gH5-cW-NRGVzY) # # 1. model converted from chainer # In[4]: # in this machine the cupy isn't install correctly... # so it's a little slow trainer.load('/home/cy/chainer_best_model_converted_to_pytorch_0.7053.pth') opt.caffe_pretrain = True # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) # it failed to find the dog, but if you set threshold from 0.7 to 0.6, you'll find it # # 2. model trained with torchvision pretrained model # In[5]: trainer.load('/home/cy/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth')
def train(**kwargs): """ The main entry point for training; trains a FasterRCNN-based detector. """ opt._parse(kwargs) # Loading class names from checkpoint, if available # We need to load the checkpoint here if opt.load_path: old_state = torch.load(opt.load_path) class_names = old_state['class_names'] best_map = old_state['best_map'] else: class_names = [] best_map = 0 old_state = None print('load data') dataset = Dataset(opt, class_names) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt, dataset.get_class_names()) test_dataloader = data_.DataLoader(testset, \ batch_size=1, \ num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count()) print('Model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: trainer.load(old_state) print_log('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=num_eval_images) print_log('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if old_state and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print_log('Model was trained until epoch {}, continuing with epoch {}'. format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): writer.add_scalar('epoch', epoch, global_step) lr_ = opt.lr * (opt.lr_decay** np.sum(epoch >= np.array(opt.lr_schedule))) trainer.faster_rcnn.set_lr(lr_) print_log('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale).item() if opt.use_cuda: img = img.cuda().float() label = label_.float().cuda() if len(bbox_[0]) > 0: bbox = bbox_.float().cuda() else: bbox = bbox_ else: img, label = img.float(), label_.float() if len(bbox_[0]) > 0: bbox = bbox_.float() else: bbox = bbox_ img, label = Variable(img), Variable(label) if len(bbox[0]) > 0: bbox = Variable(bbox) else: bbox = np.asarray(bbox) #img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) writer.add_scalars( 'training/losses', dict(total_loss=losses.total_loss, roi_cls_loss=losses.roi_cls_loss, roi_loc_loss=losses.roi_loc_loss, rpn_cls_loss=losses.rpn_cls_loss, rpn_loc_loss=losses.rpn_loc_loss), global_step) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('gt_img', gt_img, global_step) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('pred_img', pred_img, global_step) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save( epoch=epoch, class_names=testset.get_class_names()) print_log("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) for lo in losses: del lo del img, bbox_, label_, scale torch.cuda.empty_cache() eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=min(opt.test_num, len(testset))) print_log(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'], epoch=epoch, class_names=testset.get_class_names()) print_log("After epoch {}: snapshotted to {}".format( epoch, best_path)) del eval_result torch.cuda.empty_cache()
class Trainer(object): def __init__(self, **kwargs): opt._parse(kwargs) self.opt = opt self.test_num = self.opt.test_num self.device, self.device_id = select_device(is_head=True) # Define Dataloader print("load data") self.train_dataset = Datasets(opt, mode='train') self.train_loader = DataLoader(self.train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) self.val_dataset = Datasets(opt, mode='val') self.val_loader = DataLoader(self.val_dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, num_workers=opt.test_num_workers) self.num_batch = len(self.train_loader) # Define Network # initilize the network here. print("define network") faster_rcnn = FasterRCNNVGG16() self.trainer = FasterRCNNTrainer(faster_rcnn) # Resuming Checkpoint self.start_epoch = 0 self.best_map = 0 self.lr = opt.lr if opt.load_path: self.trainer.load(opt.load_path) self.start_epoch = self.trainer.start_epoch self.best_map = self.trainer.best_map print('load pretrained model from %s' % opt.load_path) # Use multiple GPU if opt.use_mgpu and len(self.device_id) > 1: self.trainer = torch.nn.DataParallel(self.trainer, device_ids=self.device_id) print("Using multiple gpu") else: self.trainer = self.trainer.to(self.device) # Visdom self.trainer.vis.text(self.train_dataset.classes, win='labels') def train(self): self.trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(self.train_loader)): scale[0] = at.scalar(scale[0]) scale[1] = at.scalar(scale[1]) img = img.to(self.device) bbox = bbox_.to(self.device) label = label_.to(self.device) self.trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): os.makedirs(opt.debug_file) self.trainer.vis.plot_many(self.trainer.get_meter_data()) # plot groud truth bboxes # plot predicti bboxes # rpn confusion matrix(meter) self.trainer.vis.text(str( self.trainer.rpn_cm.value().tolist()), win='rpn_cm') self.trainer.vis.img( 'roi_cm', at.totensor(self.trainer.roi_cm.conf, False).float()) def eval(self): pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
import os import torch as t from utils.config import opt from model import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at img = read_image('misc/demo.jpg') img = t.from_numpy(img)[None] faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('weights/chainer_best_model_converted_to_pytorch_0.7053.pth') opt.caffe_pretrain = True _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1))
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print(f'load data, data:length {len(dataset)}') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) print(f'load test_data, test_data:length {len(testset)}') test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr best_path = None plot_flag = False cutmix_flag = False for epoch in range(opt.epoch): trainer.reset_meters() # index = list(range(len(dataset))) # np.random.shuffle(index) loss_history = [] for ii, (imgs, boxes, labels, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) imgs, boxes, labels = cutout_process(imgs, boxes, labels) if plot_flag: input_visual(imgs, boxes, labels) imgs, boxes, labels = imgs.cuda().float(), boxes.cuda( ), labels.cuda() # paste_img, paste_bboxes, paste_labels, paste_difficult = dataset.db.get_example(index[ii]) # paste_img, paste_bboxes, paste_labels, paste_scale = dataset.tsf((paste_img, paste_bboxes, paste_labels)) copy_cache = [imgs, boxes, labels] # paste_cache = [paste_img, paste_bboxes, paste_labels] paste_cache = [None] * 3 losses, info, *_ = trainer.train_step(imgs, scale, None, *copy_cache, *paste_cache, cutmix_flag, plot_flag) # if info["use_cutmix"] == 1: # count += 1 # # count_x.append(ii) # # count_y.append(losses.total_loss.item()) loss_history.append(losses.total_loss.item()) if (ii + 1) % opt.plot_every == 0: logging.info( f"[Batch: {epoch}/Iter {ii + 1}] training loss: {np.mean(loss_history):.2f}" ) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) logging.info( f"[Batch: {epoch}] eval loss: {eval_result['map']:.4f} lr:{lr_:.3f}" ) # 注意这里是会进行四舍五入 lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] if eval_result['map'] > best_map: best_map = eval_result['map'] if best_map < 0.4: best_path = None else: best_path = trainer.save(best_map=best_map) if epoch == 9: if best_path is not None: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay