def train(**kwargs): """训练过程""" # 加载配置文件中的各种参数设置 OPT._parse(kwargs) # 数据集 dataset = Dataset(opt=OPT) print("加载数据集") dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=True, num_workers=OPT.num_workers) # 测试集 testset = TestDataset(opt=OPT) test_dataloader = DataLoader(dataset=testset, batch_size=1, shuffle=False, num_workers=OPT.num_workers, pin_memory=True) # 模型 faster_rcnn = FasterRCNNVGG16() print("模型加载完成") trainer = FasterRCNNTrainer(faster_rcnn).cuda() best_map = 0 # 最好的map lr_ = OPT.lr # 学习率 for epoch in range(OPT.epoch): print("Epoch: %s/%s" % (epoch, OPT.epoch - 1)) print("-" * 10) trainer.reset_meters() # 每次epoch的开始将损失函数清零 for ii, (img, bbox_, label_, scale) in pb.progressbar(enumerate(dataloader), max_value=len(dataloader)): scale = scalar(scale) # 原图和处理后的图片之间的一个缩放比例 img, bbox, label = img.cuda(), bbox_.cuda(), label_.cuda() trainer.train_step(imgs=img, bboxes=bbox, labels=label, scale=scale) print("train:", trainer.get_meter_data()) # if (ii + 1) % OPT.plot_every == 0: # print(trainer.get_meter_data()) trainer.eval() for jj, (img, size, _, bbox, label, _) in pb.progressbar(enumerate(test_dataloader), max_value=len(test_dataloader)): img, bbox, label = img.cuda(), bbox.cuda(), label.cuda() trainer.val_step(img, size, bbox, label) print("val:", trainer.get_meter_data()) eval_result = evaluate(dataloader=test_dataloader, faster_rcnn=faster_rcnn, test_num=OPT.test_num) print("mAP: %.4f" % eval_result["mAP"]) print() trainer.train()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # ori_img_ = (at.tonumpy(img[0])) losses = trainer.get_meter_data() print(losses) write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png') _bboxes = trainer.faster_rcnn.predict([ori_img_], visualize=True) _bboxes = at.tonumpy(_bboxes[0]) # plot predicted bboxes write_image(ori_img_, _bboxes, 'pred.png') print('saved an image') if epoch == 13: break
def main(): parser = argparse.ArgumentParser() parser.add_argument("-p", "--path") parser.add_argument("-s", "--set_id") args = parser.parse_args() valset = TestDataset(opt, set_id=args.set_id, split='val') val_dataloader = data_.DataLoader(valset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) print(f"VAL SET: {len(val_dataloader)} ") print("Using Mask VGG") if opt.mask else print("Using normal VGG16") faster_rcnn = FasterRCNNVGG16(mask=opt.mask) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() best_map = 0 lr_ = opt.lr if args.path: assert os.path.isfile( args.path), 'Checkpoint {} does not exist.'.format(args.path) checkpoint = torch.load(args.path)['other_info'] best_map = checkpoint['best_map'] trainer.load(args.path) print("=" * 30 + " Checkpoint " + "=" * 30) print("Loaded checkpoint '{}' ".format(args.path, best_map)) eval_result = eval(val_dataloader, faster_rcnn, test_num=1000) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] # log_info = 'lr:{}, loss:{},map:{},lamr:{}'.format(str(lr_), # str(trainer.get_meter_data()), # str(eval_result['map']), # str(eval_result['lamr'])) log_info = 'lr:{}, loss:{},map:{}'.format( str(lr_), str(trainer.get_meter_data()), str(eval_result['map'])) print("Evaluation Results on Validation Set: ") print(log_info) print("\n\n") else: print("No checkpoint to evaluate is specified")
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) testset_all = TestDataset_all(opt, 'test2') test_all_dataloader = data_.DataLoader(testset_all, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) tsf = Transform(opt.min_size, opt.max_size) faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() print('model construct completed') # 加载训练过的模型,在config配置路径就可以了 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #提取蒸馏知识所需要的软标签 if opt.is_distillation == True: opt.predict_socre = 0.3 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, scale, id_) in tqdm(enumerate(dataloader)): if len(gt_bboxes_) == 0: continue sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_, features_ = trainer.faster_rcnn.predict(imgs, [ sizes]) img_file = os.path.join( opt.voc_data_dir, 'JPEGImages', id_[0] + '.jpg') ori_img = read_image(img_file, color=True) img, pred_bboxes_, pred_labels_, scale_ = tsf( (ori_img, pred_bboxes_[0], pred_labels_[0])) #去除软标签和真值标签重叠过多的部分,去除错误的软标签 pred_bboxes_, pred_labels_, pred_scores_ = py_cpu_nms( gt_bboxes_[0], gt_labels_[0], pred_bboxes_, pred_labels_, pred_scores_[0]) #存储软标签,这样存储不会使得GPU占用过多 np.save('label/' + str(id_[0]) + '.npy', pred_labels_) np.save('bbox/' + str(id_[0]) + '.npy', pred_bboxes_) np.save('feature/' + str(id_[0]) + '.npy', features_) np.save('score/' + str(id_[0]) + '.npy', pred_scores_) opt.predict_socre = 0.05 t.cuda.empty_cache() # visdom 显示所有类别标签名 trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('epoch=%d' % epoch) # 重置混淆矩阵 trainer.reset_meters() # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator), # 是一个快速、扩展性强 for ii, (img, sizes, bbox_, label_, scale, id_) in tqdm(enumerate(dataloader)): if len(bbox_) == 0: continue scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 训练的就这一步 下面的都是打印的信息 # 转化成pytorch能够计算的格式,转tensor格式 if opt.is_distillation == True: #读取软标签 teacher_pred_labels = np.load( 'label/' + str(id_[0]) + '.npy') teacher_pred_bboxes = np.load( 'bbox/' + str(id_[0]) + '.npy') teacher_pred_features_ = np.load( 'feature/' + str(id_[0]) + '.npy') teacher_pred_scores = np.load( 'score/' + str(id_[0]) + '.npy') #格式转换 teacher_pred_bboxes = teacher_pred_bboxes.astype(np.float32) teacher_pred_labels = teacher_pred_labels.astype(np.int32) teacher_pred_scores = teacher_pred_scores.astype(np.float32) #转成pytorch格式 teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes) teacher_pred_labels_ = at.totensor(teacher_pred_labels) teacher_pred_scores_ = at.totensor(teacher_pred_scores) teacher_pred_features_ = at.totensor(teacher_pred_features_) #使用GPU teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda() teacher_pred_labels_ = teacher_pred_labels_.cuda() teacher_pred_scores_ = teacher_pred_scores_.cuda() teacher_pred_features_ = teacher_pred_features_.cuda() # 如果dataset.py 中的Transform 设置了图像翻转,就要使用这个判读软标签是否一起翻转 if(teacher_pred_bboxes_[0][1] != bbox[0][0][1]): _, o_C, o_H, o_W = img.shape teacher_pred_bboxes_ = flip_bbox( teacher_pred_bboxes_, (o_H, o_W), x_flip=True) losses = trainer.train_step(img, bbox, label, scale, epoch, teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_, teacher_pred_scores) else: trainer.train_step(img, bbox, label, scale, epoch) # visdom显示的信息 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) gt_img = visdom_bbox(ori_img_, at.tonumpy(teacher_pred_bboxes_), at.tonumpy(teacher_pred_labels_), at.tonumpy(teacher_pred_scores_)) trainer.vis.img('gt_img_all', gt_img) # plot predicti bboxes _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 混淆矩阵 # rpn confusion matrix(meter) trainer.vis.text( str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.text( str(trainer.roi_cm.value().tolist()), win='roi_cm') # trainer.vis.img('roi_cm', at.totensor( # trainer.roi_cm.value(), False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{},ap:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['ap']), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # 保存最好结果并记住路径 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 20: trainer.save(best_map='20') result = eval(test_all_dataloader, trainer.faster_rcnn, test_num=5000) print('20result={}'.format(str(result))) # trainer.load(best_path) # result=eval(test_all_dataloader,trainer.faster_rcnn,test_num=5000) # print('bestmapresult={}'.format(str(result))) break # 每10轮加载前面最好权重,并且减少学习率 if epoch % 20 == 15: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): # 将调用函数时候附加的参数用,config.py的opt._parse()解析,获取存储路径,放入dataset opt._parse(kwargs) dataset = Dataset(opt) print('load data') # VOCBboxDataset作为数据读取库,读取图片,并调整和随机反转 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) # 数据装载到dataloader中,shuffle=True允许数据打乱,num_workers设置分批处理 test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() # 定义模型 print('model construct completed') # 将FasterRCNNVGG16作为fasterrcnn的模型送入到FasterRCNNTrainer中 # 并设置好GPU加速 trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: # 判断路径存在 trainer.load(opt.load_path) # 读取与训练模型 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr # 开始训练,迭代次数在config.py预先定义,超参 for epoch in range(opt.epoch): print ("---------------", epoch, " in ", opt.epoch, "-------------") trainer.reset_meters() # 可视化界面初始化数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 从训练数据中枚举dataloader,设置缩放范围,设置gpu加速 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 调用trainer.py中的函数trainer.train_step(img, bbox, label, scale) 进行一次参数优化过程 trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # 判断数据读取次数是否能够整除plot_every, # 如果达到判断debug_file是否存在,用ipdb工具设置断点, if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss将训练数据读取并上传完成可视化 trainer.vis.plot_many(trainer.get_meter_data()) # 绘制Ground truth包围盒 ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) # 将每次迭代读取的图片用dataset文件里面的inverse_normalize() # 函数进行预处理,将处理后的图片调用visdom_bbox trainer.vis.img('ground_truth_img', gt_img) # plot predict bboxes # 显示原始图片和预测结果(边框+类别) _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('predict_img', pred_img) # rpn confusion matrix(meter) # 调用trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # 调用Trainer.vis.img将Roi_cm将roi的可视化矩阵以图片的形式显示 trainer.vis.plot('test_map', eval_result['map']) # 设置学习率 lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) # 将损失学习率以及map等信息及时显示更新 trainer.vis.log(log_info) # 保存效果最好的map if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) # if判断句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break # 结束训练过程
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): precisions = np.zeros([configurations.epoch, 20]) recall = np.zeros([configurations.epoch, 20]) configurations._parse(kwargs) dataset = Dataset(configurations) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True, num_workers=configurations.num_workers) testset = TestDataset(configurations) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=configurations.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNN() #faster_rcnn.load_state_dict(torch.load('faster_rcnn_model_0.ckpt')) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if configurations.load_path: trainer.load(configurations.load_path) print('load pretrained model from %s' % configurations.load_path) best_map = 0 lr_ = configurations.lr for epoch in range(configurations.epoch): trainer.reset_meters() ########### FREEZING REQD MODEL ##################################### if epoch == 0: ##freezing ex2 and head for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor1.train() trainer.faster_rcnn.rpn.train() elif epoch == 3: ##freezing ex1 and rpn, unfreeze ex2 and head #unfreeze ex2 and head for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = True for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = True #make ex1 and rpn eval and frozen for param in trainer.faster_rcnn.extractor1[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor1.eval() trainer.faster_rcnn.rpn.eval() trainer.faster_rcnn.head.train() trainer.faster_rcnn.extractor2.train() elif epoch == 7: trainer.faster_rcnn.rpn.train() for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = True for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor2.eval() trainer.faster_rcnn.head.eval() elif epoch == 9: for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = True trainer.faster_rcnn.rpn.eval() trainer.faster_rcnn.head.train() ####################################################################### for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if epoch <= 2: trainer.step1(img, bbox, label, scale, epoch) elif epoch >= 3 and epoch <= 6: trainer.step2(img, bbox, label, scale, epoch) elif epoch >= 7 and epoch <= 8: trainer.step3(img, bbox, label, scale, epoch) elif epoch >= 9 and epoch <= 10: trainer.step4(img, bbox, label, scale, epoch) if ((ii + 1) % 500 == 0): append_loss(trainer.get_meter_data()) if (ii + 1) % configurations.plot_every == 0: if os.path.exists(configurations.debug_file): ipdb.set_trace() #plot loss if not os.path.exists(plot_dir): os.mkdir(plot_dir) plot_loss(loss_list, plot_dir) # if ii == 7000: # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = fetch_image(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # gt_img = gt_img.transpose(1,2,0) # if not os.path.exists(img_dir): # os.mkdir(img_dir) # plt.imsave('{}/actual_image_{}_{}.jpg'.format(img_dir, epoch, ii), gt_img) # # plot prediction bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # pred_img = fetch_image(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # pred_img = pred_img.transpose(1,2,0) # plt.imsave('{}/predicted_image_{}_{}.jpg'.format(img_dir,epoch, ii), pred_img) torch.save(faster_rcnn.state_dict(), 'faster_rcnn_model_{}.ckpt'.format(epoch + 1)) all_losses = np.zeros((7, len(total_loss))) all_losses[0, :] = rpn_loc_loss all_losses[1, :] = rpn_cls_loss all_losses[2, :] = roi_loc_loss all_losses[3, :] = roi_cls_loss all_losses[4, :] = total_loss all_losses[5, :] = total_rpn all_losses[6, :] = total_roi save_dir = 'prec_rec_loss/' if not os.path.exists(save_dir): os.mkdir(save_dir) np.save(save_dir + 'all_losses_' + str(epoch) + '.npy', all_losses) print("Epoch {} completed".format(epoch + 1))
def train(**kwargs): opt._parse(kwargs) log = SummaryWriter(log_dir=opt.log_dir) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) # 配置文件 # cfg = VGConf() # 训练数据集 # trainset = Dataset(cfg) # valset = Dataset(cfg, valid=True) # 加载数据 # print("load data2..") # dataloader = DataLoader(dataloader, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) # valloader = DataLoader(test_dataloader, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr idx = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) # 获取损失值 losses = trainer.get_meter_data() log.add_scalars(main_tag='Training(batch)', tag_scalar_dict=losses, global_step=idx) idx = idx+1 if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) print(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) log.add_scalar(tag='mAP', scalar_value=eval_result['map'], global_step=epoch) # trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) # trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train_val(): print('load data') train_loader, val_loader = get_train_val_loader( opt.root_dir, batch_size=opt.batch_size, val_ratio=0.1, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() # faster_rcnn = FasterRCNNResNet50() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() tqdm.monitor_interval = 0 for ii, sample in tqdm(enumerate(train_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda( ) img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) if bbox.size == 0: continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]), at.tonumpy(label[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) mAP = eval_mAP(trainer, val_loader) trainer.vis.plot('val_mAP', mAP) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(mAP), str(trainer.get_meter_data())) trainer.vis.log(log_info) if mAP > best_map: best_map = mAP best_path = trainer.save(best_map=best_map) if epoch == opt.epoch - 1: best_path = trainer.save() if (epoch + 1) % 10 == 0: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) log_dir = os.path.join("logs", "faster_rcnn_train_onGray") os.makedirs(log_dir, exist_ok=True) log_path = os.path.join( log_dir, time.strftime("%Y-%m-%d-%H%M.log", time.localtime(time.time())) ) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler(log_path), logging.StreamHandler()], ) logger = logging.getLogger() dataset = Dataset(opt) print("load data") dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True, ) faster_rcnn = FasterRCNNVGG16() print("model construct completed") logger.info(faster_rcnn) logger.info("-" * 50) trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda() if opt.load_path: trainer.load(opt.load_path) print("load pretrained model from %s" % opt.load_path) trainer.vis.text(dataset.db.label_names, win="labels") best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() trainer.reset_ave() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if (ii + 1) % opt.print_freq == 0: logger.info( "[Train] Epoch:{} [{:03d}/{:03d}]({:.0f}%)\t".format( epoch, ii + 1, len(dataloader), (ii + 1) / len(dataloader) * 100 ) ) trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=True ) else: trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=False ) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # # # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict( # [ori_img_], visualize=True # ) # pred_img = visdom_bbox( # ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0]), # ) # trainer.vis.img("pred_img", pred_img) # # # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm") # # roi confusion matrix # trainer.vis.img( # "roi_cm", at.totensor(trainer.roi_cm.conf, False).float() # ) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # trainer.vis.plot("test_map", eval_result["map"]) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]["lr"] log_info = "lr:{}, map:{},loss:{}".format( str(lr_), str(eval_result["map"]), str(trainer.get_meter_data()) ) logger.info(log_info) # trainer.vis.log(log_info) if eval_result["map"] > best_map: best_map = eval_result["map"] best_path = trainer.save( best_map=best_map, save_path="checkpoints/trainedOnGray/fasterrcnn_%s" % time.strftime("%m%d%H%M"), ) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.is_distilltion == False: iteration_number = 10 path = opt.voc_data_dir + '/ImageSets/Main/trainval.txt' datatxt = 0 f = open(path, "r") for i in range(5000): if i % 500 == 0: datatxt = datatxt + 1 f2 = open( opt.voc_data_dir + '/ImageSets/Main/' + str(datatxt) + '.txt', "w") f2.write(f.readline()) else: iteration_number = 1 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) for jj in range(iteration_number): t.cuda.empty_cache() if jj > 0: opt.datatxt = str(int(opt.datatxt) + 1) opt.load_path = best_path # 样本挖掘 print(opt.datatxt) if opt.is_example_mining == True and opt.load_path != None: if opt.example_type == 'mAP': example_mining_map(trainer, opt.datatxt) elif opt.example_type == 'loss': example_mining_loss(opt.datatxt) elif opt.example_type == 'diversity': example_mining_diversity(trainer, opt.datatxt) elif opt.example_type == 'mAP_diversity': example_mining_map_diversity(trainer, opt.datatxt) else: example_mining_map_loss(trainer, opt.datatxt) print('example mining completed') print('load data') dataset = Dataset(opt) dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) testset_all = TestDataset(opt, 'test') test_all_dataloader = data_.DataLoader( testset_all, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) # visdom 显示所有类别标签名 trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr # print(lr_) t.cuda.empty_cache() for epoch in range(opt.epoch): t.cuda.empty_cache() print('epoch=%d' % epoch) if opt.example_type != 'mAP': # 计算loss的数组初始化 loss = np.zeros(10000) ID = list() # 重置混淆矩阵 trainer.reset_meters() # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator), # 是一个快速、扩展性强 for ii, (img, sizes, bbox_, label_, scale, id_) in enumerate(dataloader): if len(bbox_) == 0: continue t.cuda.empty_cache() scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() # 训练的就这一步 下面的都是打印的信息 # 转化成pytorch能够计算的格式,转tensor格式 if opt.is_distilltion == True: # inx = str(id_[0]) # inx = int(inx[-5:]) # teacher_pred_bboxes = pred_bboxes[int(index[inx])] # teacher_pred_labels = pred_labels[int(index[inx])] # teacher_pred_features_ = pred_features[int(index[inx])] teacher_pred_labels = np.load('label/' + str(id_[0]) + '.npy') teacher_pred_bboxes = np.load('bbox/' + str(id_[0]) + '.npy') teacher_pred_features_ = np.load('feature/' + str(id_[0]) + '.npy') teacher_pred_bboxes = teacher_pred_bboxes.astype( np.float32) teacher_pred_labels = teacher_pred_labels.astype(np.int32) teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes) teacher_pred_labels_ = at.totensor(teacher_pred_labels) teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda() teacher_pred_labels_ = teacher_pred_labels_.cuda() teacher_pred_features_ = teacher_pred_features_.cuda() losses = trainer.train_step(img, bbox, label, scale, epoch, teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_) else: losses = trainer.train_step(img, bbox, label, scale, epoch) # 保存每一个样本的损失 if opt.example_type != 'mAP': ID += list(id_) loss[ii] = losses.total_loss # visdom显示的信息 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict( [ori_img_], visualize=True) print(at.tonumpy(_bboxes[0]).reshape(-1).shape) print(at.tonumpy(_labels[0]).shape) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 混淆矩阵 # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.text(str(trainer.roi_cm.value().tolist()), win='roi_cm') # trainer.vis.img('roi_cm', at.totensor( # trainer.roi_cm.value(), False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{},ap:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['ap']), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # 保存最好结果并记住路径 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if opt.example_type != 'mAP': order = loss.argsort()[::-1] f = open('loss.txt', "w") for i in range(len(ID)): f.write(ID[order[i]] + ' ' + str(loss[order[i]]) + '\n') f.close() if epoch == 20: #draw(test_dataloader, faster_rcnn, test_num=opt.test_num) save_name = trainer.save(best_map='20') f = open('result.txt', "a") result = eval(test_all_dataloader, trainer.faster_rcnn, test_num=5000) f.write(opt.datatxt + '\n') f.write(save_name + '\n') f.write(result + '\n') f.close print(result) trainer.faster_rcnn.scale_lr(10) lr_ = lr_ * 10 break # 每10轮加载前面最好权重,并且减少学习率 if epoch % 20 == 15: trainer.save(best_map='15') trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) writer = SummaryWriter(opt.logdir) init_logger(opt.logdir) global_step = 0 dataset = Dataset(opt) logging.info('load data') dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() logging.info('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) logging.info('load pretrained model from %s' % opt.load_path) logging.info(dataset.db.label_names) best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): global_step += 1 if global_step % 100 == 99: break scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss losses = trainer.get_meter_data() writer.add_scalars('losses', losses, global_step) logging.info('epoch {}, step {}: loss {}'.format( epoch, ii, float(at.scalar(at.tonumpy(losses['total_loss']))))) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = tb_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) writer.add_image('gt_img', gt_img, global_step) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = tb_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) writer.add_image('pred_img', pred_img, global_step) # rpn confusion matrix(meter) writer.add_text('rpn_cm', str(trainer.rpn_cm.value().tolist()), global_step) # roi confusion matrix writer.add_image( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float(), global_step) # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # # if eval_result['map'] > best_map: # best_map = eval_result['map'] # best_path = trainer.save(best_map=best_map) # if epoch == 9: # trainer.load(best_path) # trainer.faster_rcnn.scale_lr(opt.lr_decay) # lr_ = lr_ * opt.lr_decay # # writer.add_scalar('test_map', eval_result['map'], global_step) # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) # logging.info('log', log_info, global_step) writer.close()
def train(**kwargs): opt._parse(kwargs) #dataset = Polypcoco_anchorfree('/data2/dechunwang/dataset', split='train') #print("dataset length: ", len(dataset)) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) #print(dataloader) # for i, sample_image in enumerate(dataloader): # print("data loader output: ", sample_image) testset = TestDataset(opt) #testset = Polypcoco_anchorfree('/data2/dechunwang/dataset', split='test') test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) #print("test dataloader", test_dataloader) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): #print("loader:", img.shape, bbox_.shape, label_.shape, scale.shape) scale = at.scalar(scale) # img = torch.FloatTensor(img).unsqueeze(0) # bbox_ = torch.FloatTensor(bbox_) # print("bbox_ shape: ", bbox_.shape) # label_ = torch.FloatTensor(label_) # print("*" * 100) # print("bbox before tocuda: ", bbox_, bbox_.shape) # print("*" * 100) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # print("*" * 100) # print("bbox before trainer.step: ", bbox, bbox.shape) # print("*" * 100) #print(img.shape) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss #trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) #trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) #trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=len(testset)) print("result: ", eval_result) #trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) #trainer.vis.log(log_info) print("log info: ", log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) print("best: ", best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) # 全部的设置 dataset = Dataset(opt) # 数据集 print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) # pin memory:锁页内存,内存为所欲为的时候为true,详情见:https://blog.csdn.net/yangwangnndd/article/details/95385628 # num worker:加载数据的线程数,默认为8。具体数值的选取由训练时间决定,当训练时间快于加载时间时则需要增加线程 # shuffle=True允许数据打乱排序 testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: #接下来判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range( opt.epoch): # 训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 trainer.reset_meters() # 首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 然后从训练数据中枚举dataloader,设置好缩放范围,将img,bbox,label,scale全部设置为可gpu加速 trainer.train_step( img, bbox, label, scale ) # 调用trainer.py中的函数trainer.train_step(img,bbox,label,scale)进行一次参数迭代优化过程 # 判断数据读取次数是否能够整除plot_every(是否达到了画图次数) if (ii + 1) % opt.plot_every == 0: # 如果达到判断debug_file是否存在,用ipdb工具设置断点 if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # 调用trainer中的trainer.vis.plot_many(trainer.get_meter_data())将训练数据读取并上传完成可视化 trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # 将每次迭代读取的图片用dataset文件里面的inverse_normalize()函数进行预处理,将处理后的图片调用Visdom_bbox # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0][ 'lr'] # learning rate log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # 将损失学习率以及map等信息及时显示更新 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) # 用if判断语句永远保存效果最好的map if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay # if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 if epoch == 13: break
class Trainer(object): def __init__(self, **kwargs): opt._parse(kwargs) self.opt = opt self.test_num = self.opt.test_num self.device, self.device_id = select_device(is_head=True) # Define Dataloader print("load data") self.train_dataset = Datasets(opt, mode='train') self.train_loader = DataLoader(self.train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) self.val_dataset = Datasets(opt, mode='val') self.val_loader = DataLoader(self.val_dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, num_workers=opt.test_num_workers) self.num_batch = len(self.train_loader) # Define Network # initilize the network here. print("define network") faster_rcnn = FasterRCNNVGG16() self.trainer = FasterRCNNTrainer(faster_rcnn) # Resuming Checkpoint self.start_epoch = 0 self.best_map = 0 self.lr = opt.lr if opt.load_path: self.trainer.load(opt.load_path) self.start_epoch = self.trainer.start_epoch self.best_map = self.trainer.best_map print('load pretrained model from %s' % opt.load_path) # Use multiple GPU if opt.use_mgpu and len(self.device_id) > 1: self.trainer = torch.nn.DataParallel(self.trainer, device_ids=self.device_id) print("Using multiple gpu") else: self.trainer = self.trainer.to(self.device) # Visdom self.trainer.vis.text(self.train_dataset.classes, win='labels') def train(self): self.trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(self.train_loader)): scale[0] = at.scalar(scale[0]) scale[1] = at.scalar(scale[1]) img = img.to(self.device) bbox = bbox_.to(self.device) label = label_.to(self.device) self.trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): os.makedirs(opt.debug_file) self.trainer.vis.plot_many(self.trainer.get_meter_data()) # plot groud truth bboxes # plot predicti bboxes # rpn confusion matrix(meter) self.trainer.vis.text(str( self.trainer.rpn_cm.value().tolist()), win='rpn_cm') self.trainer.vis.img( 'roi_cm', at.totensor(self.trainer.roi_cm.conf, False).float()) def eval(self): pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
def train(**kwargs): opt._parse(kwargs) # device_num = 6 data_root = "/home/lsm/TrainSet/" train_file = "train.txt" test_file = "test.txt" trainset = MyDataset(data_root, train_file, opt) testset = TestDataset(data_root, test_file, opt) print('load data') dataloader = data_.DataLoader(trainset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr f = open('log.txt', 'w') for epoch in range(opt.epoch): trainer.reset_meters() print("epoch " + str(epoch) + " ...") for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): # break # for ii, (img, bbox_, label_, scale) in enumerate(dataloader): # print(ii) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # # gt_img = visdom_bbox(ori_img_, # # at.tonumpy(bbox_[0]), # # at.tonumpy(label_[0])) # # trainer.vis.img('gt_img', gt_img) # # # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) # trainer.vis.log(log_info) # print(str(lr_)+": loss = "+str(trainer.get_meter_data())) f.write(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 19: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 50: break f.close()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() writer = SummaryWriter('outputs/logs/') print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if ii % 5 == 4: meter_data_trainer = trainer.get_meter_data() rpn_loc_loss = meter_data_trainer['rpn_loc_loss'] rpn_cls_loss = meter_data_trainer['rpn_cls_loss'] roi_loc_loss = meter_data_trainer['roi_loc_loss'] roi_cls_loss = meter_data_trainer['roi_cls_loss'] total_loss = meter_data_trainer['total_loss'] print( 'lr:{:>7.4f}, rpn_loc_loss:{:>7.6f}, rpn_cls_loss:{:>7.6f}, roi_loc_loss:{:>7.6f}, roi_cls_loss:{:>7.6f}, total_loss:{:>7.6f}' .format(lr_, rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{:>10.4f}, map:{}, loss:{}'.format( lr_, str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) meter_data_trainer = trainer.get_meter_data() rpn_loc_loss = meter_data_trainer['rpn_loc_loss'] rpn_cls_loss = meter_data_trainer['rpn_cls_loss'] roi_loc_loss = meter_data_trainer['roi_loc_loss'] roi_cls_loss = meter_data_trainer['roi_cls_loss'] total_loss = meter_data_trainer['total_loss'] writer.add_scalar("Learning Rate:", lr_) writer.add_scalar("Train map:", eval_result['map']) writer.add_scalar("Rpn Loc Loss:", rpn_loc_loss) writer.add_scalar("Rpn Cls Loss:", rpn_cls_loss) writer.add_scalar("Roi Loc Loss:", roi_loc_loss) writer.add_scalar("Roi Cls Loss:", roi_cls_loss) writer.add_scalar("Total Loss:", rpn_loc_loss) trainer.vis.log(log_info) writer.close()
def train(**kwargs): # *变量名, 表示任何多个无名参数, 它是一个tuple;**变量名, 表示关键字参数, 它是一个dict opt._parse(kwargs) # 识别参数,传递过来的是一个字典,用parse来解析 dataset = Dataset(opt) # 作者自定义的Dataset类 print('读取数据中...') # Dataloader 定义了一次获取批次数据的方法 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) # PyTorch自带的DataLoader类,生成一个多线程迭代器来迭代dataset, 以供读取一个batch的数据 testset = TestDataset(opt, split='trainval') # 测试集loader test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() # 网络定义 print('模型构建完毕!') trainer = FasterRCNNTrainer( faster_rcnn).cuda() # 定义一个训练器,返回loss, .cuda()表示把返回的Tensor存入GPU if opt.load_path: # 如果要加载预训练模型 trainer.load(opt.load_path) print('已加载预训练参数 %s' % opt.load_path) else: print("未引入预训练参数, 随机初始化网络参数") trainer.vis.text(dataset.db.label_names, win='labels') # 显示labels标题 best_map = 0 # 定义一个best_map for epoch in range(opt.epoch): # 对于每一个epoch trainer.reset_meters() # 重置测各种测量仪 # 对每一个数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 转化为标量 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda( ) # 存入GPU img, bbox, label = Variable(img), Variable(bbox), Variable( label) # 转换成变量以供自动微分器使用 # TODO trainer.train_step(img, bbox, label, scale) # 训练一步 if (ii + 1) % opt.plot_every == 0: # 如果到达"每多少次显示" if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # 使用测试数据集来评价模型(此步里面包含预测信息) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save( best_map=best_map) # 好到一定程度就存储模型, 存储在checkpoint文件夹内 if epoch == 9: # 到第9轮的时候读取模型, 并调整学习率 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # if epoch == 13: # 到第14轮的时候停止训练 # break trainer.save(best_map=best_map)
def train(**kwargs): # opt._parse(kwargs)#将调用函数时候附加的参数用, # config.py文件里面的opt._parse()进行解释,然后 # 获取其数据存储的路径,之后放到Dataset里面! opt._parse(kwargs) dataset = Dataset(opt) print('load data') # #Dataset完成的任务见第二次推文数据预处理部分, # 这里简单解释一下,就是用VOCBboxDataset作为数据 # 集,然后依次从样例数据库中读取图片出来,还调用了 # Transform(object)函数,完成图像的调整和随机翻转工作 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) # 将数据装载到dataloader中,shuffle=True允许数据打乱排序, # num_workers是设置数据分为几批处理,同样的将测试数据集也 # 进行同样的处理,然后装载到test_dataloader中 test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) # 定义faster_rcnn=FasterRCNNVGG16()训练模型 faster_rcnn = FasterRCNNVGG16() print('model construct completed') # 设置trainer = FasterRCNNTrainer(faster_rcnn).cuda()将 # FasterRCNNVGG16作为fasterrcnn的模型送入到FasterRCNNTrainer # 中并设置好GPU加速 trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr # 用一个for循环开始训练过程,而训练迭代的次数 # opt.epoch=14也在config.py文件中预先定义好,属于超参数 for epoch in range(opt.epoch): # 首先在可视化界面重设所有数据 trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 然后从训练数据中枚举dataloader,设置好缩放范围, # 将img,bbox,label,scale全部设置为可gpu加速 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 调用trainer.py中的函数trainer.train_step # (img,bbox,label,scale)进行一次参数迭代优化过程 trainer.train_step(img, bbox, label, scale) # 判断数据读取次数是否能够整除plot_every # (是否达到了画图次数),如果达到判断debug_file是否存在, # 用ipdb工具设置断点,调用trainer中的trainer.vis. # plot_many(trainer.get_meter_data())将训练数据读取并 # 上传完成可视化 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) # 将每次迭代读取的图片用dataset文件里面的inverse_normalize() # 函数进行预处理,将处理后的图片调用Visdom_bbox可视化 trainer.vis.img('gt_img', gt_img) # plot predicti bboxes # 调用faster_rcnn的predict函数进行预测, # 预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) # 利用同样的方法将原始图片以及边框类别的 # 预测结果同样在可视化工具中显示出来 trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # 调用trainer.vis.text将rpn_cm也就是 # RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # 可视化ROI head的混淆矩阵 trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # 调用eval函数计算map等指标 eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # 可视化map trainer.vis.plot('test_map', eval_result['map']) # 设置学习的learning rate lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) # 将损失学习率以及map等信息及时显示更新 trainer.vis.log(log_info) # 用if判断语句永远保存效果最好的map if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: # if判断语句如果学习的epoch达到了9就将学习率*0.1 # 变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay # 判断epoch==13结束训练验证过程 if epoch == 13: break
def train(**kwargs): # opt._parse(kwargs) print('load data') dataloader = get_train_loader(opt.root_dir, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, sample in tqdm(enumerate(dataloader)): if len(sample.keys()) == 5: img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if label.size == 0: # continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch % 10 == 0: best_path = trainer.save(best_map=best_map)
def train(**kwargs): """ 训练 """ #解析命令行参数,设置配置文件参数 opt._parse(kwargs) #初始化Dataset参数 dataset = Dataset(opt) print('load data') #data_ 数据加载器(被重命名,pytorch方法) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) #初始化TestDataset参数 testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) #新建一个FasterRCNNVGG16 faster_rcnn = FasterRCNNVGG16() print('model construct completed') #新建一个trainer,并将网络模型转移到GPU上 #将FasterRCNNVGG16模型传入 trainer = FasterRCNNTrainer(faster_rcnn).cuda() #如果存在,加载训练好的模型 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #可视化类别 vis为visdom加载器 trainer.vis.text(dataset.db.label_names, win='labels') #best_map存放的是 最优的mAP的网络参数 best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): #trainer方法 将平均精度的元组 和 混淆矩阵的值置0 trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): #调整数据的形状 scale:缩放倍数(输入图片尺寸 比上 输出数据的尺寸) #1.6左右 供模型训练之前将模型规范化 scale = at.scalar(scale) #将数据集转入到GPU上 #img 1x3x800x600 一张图片 三通道 大小800x600(不确定) #bbox 1x1x4 #label 1x1 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #将数据转为V 变量,以便进行自动反向传播 img, bbox, label = Variable(img), Variable(bbox), Variable(label) #训练并更新可学习参数(重点*****) 前向+反向,返回losses trainer.train_step(img, bbox, label, scale) #进行多个数据的可视化 if (ii + 1) % opt.plot_every == 0: #进入调试模式 if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss 画五个损失 trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes img[0],是压缩0位,形状变为[3x800x600] #反向归一化,将img反向还原为原始图像,以便用于显示 ori_img_ = inverse_normalize(at.tonumpy(img[0])) #通过原始图像,真实bbox,真实类别 进行显示 gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes #对原图进行预测,得到预测的bbox label scores _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) #通过原始图像、预测的bbox,预测的类别 以及概率 进行显示 pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) #rpn混淆矩阵 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #roi混淆矩阵 trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #使用验证集对当前的网络进行验证,返回一个字典,key值有AP,mAP eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #如果当前的map值优于best_map,则将当前值赋给best_map。将当前模型保留 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) #如果epoch到达9时,加载 当前的最优模型,并将学习率按lr_decay衰减调低 if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay #可视化验证集的test_map 和log信息 trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) #获得config设置信息 dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集 print('load data') dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集 test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络 print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器 if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重 trainer.load(opt.load_path) #训练器加载权重 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用 lr_ = opt.lr #得到预设的学习率 for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) #进行类别处理得到scale(待定) #bbox是gt_box坐标(ymin, xmin, ymax, xmax) #label是类别的下标VOC_BBOX_LABEL_NAMES #img是图片,代码仅支持batch_size=1的训练 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练 trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型 if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次 trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率 log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #可视化内容,(跳过) if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型 best_map = eval_result['map'] #保存模型的map信息 best_path = trainer.save(best_map=best_map) #调用保存模型函数 if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练 trainer.load(best_path) #加载模型 trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率 lr_ = lr_ * opt.lr_decay #获得当前学习率 if epoch == 13: #13个epoch停止训练 break
def train(**kwargs): opt._parse(kwargs) results_file = 'fasterrcnn-oversampled-gan-class-2.csv' if os.path.exists(results_file): file = open(results_file, "w+") else: file = open(results_file, "w") columns = init_cols() writer = csv.DictWriter(file, fieldnames=columns) writer.writeheader() id_file_dir = 'ImageSets/Main/train_oversampled_gan_class_v2.txt' img_dir = 'JPEGImagesOversampledGANClassv2' anno_dir = 'AnnotationsOversampledGANClassv2' # # id_file_dir = 'ImageSets/Main/train_oversampled_gan.txt' # img_dir = 'JPEGImagesOversampledGAN' # anno_dir = 'AnnotationsOversampledGAN' # id_file_dir = 'ImageSets/Main/train_oversampled_orig_class.txt' # img_dir = 'JPEGImagesOversampledClass' # anno_dir = 'AnnotationsOversampledClass' # id_file_dir = 'ImageSets/Main/train_oversampled_orig_all.txt' # img_dir = 'JPEGImagesOrigOversampledAll' # anno_dir = 'AnnotationsOrigOversampledAll' dataset = DatasetAugmented(opt, id_file=id_file_dir, img_dir=img_dir, anno_dir=anno_dir) print(len(dataset)) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=True, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr rpn_loc_loss = [] rpn_cls_loss = [] roi_loc_loss = [] roi_cls_loss = [] total_loss = [] test_map_list = [] for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss losses_dict = trainer.get_meter_data() rpn_loc_loss.append(losses_dict['rpn_loc_loss']) roi_loc_loss.append(losses_dict['roi_loc_loss']) rpn_cls_loss.append(losses_dict['rpn_cls_loss']) roi_cls_loss.append(losses_dict['roi_cls_loss']) total_loss.append(losses_dict['total_loss']) save_losses('rpn_loc_loss', rpn_loc_loss, epoch) save_losses('roi_loc_loss', roi_loc_loss, epoch) save_losses('rpn_cls_loss', rpn_cls_loss, epoch) save_losses('total_loss', total_loss, epoch) save_losses('roi_cls_loss', roi_cls_loss, epoch) # print('conf matrix final') # print(trainer.roi_cm.conf) # save_conf_matrix(trainer.roi_cm.conf, 'confusion_matrix/oversampled-orig-class.csv') eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,epoch=epoch, file=file) test_map_list.append(eval_result['map']) save_map(test_map_list, epoch) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints-gan-class-2/fasterrcnn_%s' % timestr # save_path = 'checkpoints2/fasterrcnn_%s' % timestr # save_path = 'checkpoints-class/fasterrcnn_%s' % timestr best_path = trainer.save(best_map=best_map, save_path=save_path) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break file.close()
def train(**kwargs): opt._parse( kwargs ) #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面! dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, #pin_memory=True ) #pin_memory锁页内存,开启时使用显卡的内存,速度更快 faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.dataset.label_names, win='labels') best_map = 0 lr_ = opt.lr # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 for epoch in range(opt.epoch): print('epoch {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() #首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = array_tool.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() #可视化画出loss trainer.vis.plot_many(trainer.get_meter_data()) #可视化画出groudtruth bboxes ori_img_ = inverse_normalize(array_tool.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]), array_tool.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) #可视化画出预测bboxes # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox( ori_img_, array_tool.tonumpy(_bboxes[0]), array_tool.tonumpy(_labels[0]).reshape(-1), array_tool.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来 trainer.vis.img( 'roi_cm', array_tool.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}, loss{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #将学习率以及map等信息及时显示更新 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 best_ap = np.array([0.] * opt.label_number) lr_ = opt.lr vis = trainer.vis starttime = datetime.datetime.now() for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix roi_cm = at.totensor(trainer.roi_cm.conf, False).float() trainer.vis.img('roi_cm', roi_cm) eval_result = eval(test_dataloader, faster_rcnn, vis=vis, test_num=opt.test_num) best_ap = dict(zip(opt.VOC_BBOX_LABEL_NAMES, eval_result['ap'])) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: print('roi_cm=\n', trainer.roi_cm.value()) plot_confusion_matrix(trainer.roi_cm.value(), classes=('animal', 'plant', 'rock', 'background'), normalize=False, title='Normalized Confusion Matrix') best_map = eval_result['map'] best_path = trainer.save(best_map=best_map, best_ap=best_ap) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay # if epoch == 13: # break endtime = datetime.datetime.now() train_consum = (endtime - starttime).seconds print("train_consum=", train_consum)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) carrada = download('Carrada') train_set = Carrada().get('Train') val_set = Carrada().get('Validation') test_set = Carrada().get('Test') train_seqs = SequenceCarradaDataset(train_set) val_seqs = SequenceCarradaDataset(val_set) test_seqs = SequenceCarradaDataset(test_set) train_seqs_loader = data_.DataLoader(train_seqs, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) val_seqs_loader = data_.DataLoader(val_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) test_seqs_loader = data_.DataLoader(test_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) # faster_rcnn = FasterRCNNVGG16(n_fg_class=3) # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3) faster_rcnn = FasterRCNNRESNET18(n_fg_class=3) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9) if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) writer_path = os.path.join(opt.logs_path, opt.model_name) os.makedirs(writer_path, exist_ok=True) writer = SummaryWriter(writer_path) iteration = 0 best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('Processing epoch: {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)): seq_name, seq = sequence_data path_to_frames = os.path.join(carrada, seq_name[0]) train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type, path_to_frames) train_frame_loader = data_.DataLoader(train_frame_set, batch_size=1, shuffle=False, num_workers=opt.num_workers) for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_frame_loader)): iteration += 1 scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img = normalize(img) if opt.debug_step and (iteration+1) % opt.debug_step == 0: trainer.train_step(img, bbox, label, scale, stop=True) else: trainer.train_step(img, bbox, label, scale) if (iteration + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() train_results = trainer.get_meter_data() writer.add_scalar('Losses/rpn_loc', train_results['rpn_loc_loss'], iteration) writer.add_scalar('Losses/rpn_cls', train_results['rpn_cls_loss'], iteration) writer.add_scalar('Losses/roi_loc', train_results['roi_loc_loss'], iteration) writer.add_scalar('Losses/roi_cls', train_results['roi_cls_loss'], iteration) writer.add_scalar('Losses/total', train_results['total_loss'], iteration) if (iteration + 1) % opt.img_every == 0: ori_img_ = at.tonumpy(img[0]) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) gt_img_grid = make_grid(torch.from_numpy(gt_img)) writer.add_image('Ground_truth_img', gt_img_grid, iteration) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], opt.signal_type, visualize=True) # FLAG: vis pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) pred_img_grid = make_grid(torch.from_numpy(pred_img)) writer.add_image('Predicted_img', pred_img_grid, iteration) if opt.train_eval and (iteration + 1) % opt.train_eval == 0: train_eval_result, train_best_iou = eval(train_seqs_loader, faster_rcnn, opt.signal_type) writer.add_scalar('Train/mAP', train_eval_result['map'], iteration) writer.add_scalar('Train/Best_IoU', train_best_iou, iteration) eval_result, best_val_iou = eval(val_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Validation/mAP', eval_result['map'], iteration) writer.add_scalar('Validation/Best_IoU', best_val_iou, iteration) lr_ = scheduler.get_lr()[0] writer.add_scalar('learning_rate', lr_, iteration) log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Test/mAP', test_result['map'], iteration) writer.add_scalar('Test/Best_IoU', test_best_iou, iteration) best_map = eval_result['map'] best_test_map = test_result['map'] best_path = trainer.save(best_val_map=best_map, best_test_map=best_test_map) # best_path = trainer.save(best_map=best_map) if (epoch + 1) % opt.lr_step == 0: scheduler.step()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print("load data") dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=2, shuffle=False, # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print("model construct completed") trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print("load pretrained model from %s" % opt.load_path) trainer.vis.text(dataset.db.label_names, win="labels") best_map = 0 for epoch in range(7): trainer.reset_meters() for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 gt_img = visdom_bbox( at.tonumpy(ori_img_)[0], at.tonumpy(bbox_)[0], label_[0].numpy()) trainer.vis.img("gt_img", gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( ori_img, visualize=True) pred_img = visdom_bbox( at.tonumpy(ori_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), ) trainer.vis.img("pred_img", pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm") # roi confusion matrix trainer.vis.img( "roi_cm", at.totensor(trainer.roi_cm.conf, False).float()) if epoch == 4: trainer.faster_rcnn.scale_lr(opt.lr_decay) eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) print("eval_result") trainer.save(mAP=eval_result["map"])
def train(**kwargs): opt._parse(kwargs) data_set = TrainDataset() print('load data.') data_loader = data_.DataLoader(data_set, batch_size=1, shuffle=True) testset = TestDataset() test_dataloader = data_.DataLoader(testset, batch_size=1, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() print('model construct.') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) lr = opt.lr best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox, label, scale) in tqdm(enumerate(data_loader)): img = img.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: ipdb.set_trace() """plot loss""" trainer.vis.plot_many(trainer.get_meter_data()) """plot gt_bbox""" ori_img = inverse_normalize(img[0].cpu().numpy()) gt_img = visdom_bbox(ori_img, bbox[0].numpy(), label[0].numpy()) trainer.vis.img('gt_img', gt_img) """plot predicted bbox""" pred_bbox, pred_label, pred_score = trainer.faster_rcnn.predict( [ori_img], visualize=True) pred_img = visdom_bbox(ori_img, pred_bbox[0], pred_label[0], pred_score[0]) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', trainer.roi_cm.conf.float().cpu()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr = lr * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format( str(lr), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: print('finish!') break