def train(self): self.trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(self.train_loader)): scale[0] = at.scalar(scale[0]) scale[1] = at.scalar(scale[1]) img = img.to(self.device) bbox = bbox_.to(self.device) label = label_.to(self.device) self.trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): os.makedirs(opt.debug_file) self.trainer.vis.plot_many(self.trainer.get_meter_data()) # plot groud truth bboxes # plot predicti bboxes # rpn confusion matrix(meter) self.trainer.vis.text(str( self.trainer.rpn_cm.value().tolist()), win='rpn_cm') self.trainer.vis.img( 'roi_cm', at.totensor(self.trainer.roi_cm.conf, False).float())
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) print('Loading Model') # faster_rcnn = FasterRCNNVGG16() print('model construct completed') # trainer = FasterRCNNTrainer(faster_rcnn).cuda() lr_ = opt.lr extractor, classifier = decom_vgg16() img, bbox_, label_, scale = dataset[1] _, H, W = img.shape img_size = (H, W) img, bbox_, label_ = to_tensor(img), to_tensor(bbox_), to_tensor(label_) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) pdb.set_trace() features = extractor(img) rpn = RegionProposalNetwork(512, 512, ratios=ratios, anchor_scales=anchor_scales, feat_stride=self.feat_stride) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale )
def train_one_epoch(self): """ Return: total_loss: the total loss during training accuracy: the mAP """ pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() self.trainer.reset_meters() for ii, (img, sizes, bbox_, label_, scale, gt_difficults_) in \ tqdm.tqdm(enumerate(self.dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() self.trainer.train_step(img, bbox, label, scale) if (ii + 1) % self.opt.plot_every == 0: sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_ = \ self.faster_rcnn.predict(img, [sizes]) pred_bboxes += pred_bboxes_ pred_labels += pred_labels_ pred_scores += pred_scores_ gt_bboxes += list(bbox_.numpy()) gt_labels += list(label_.numpy()) gt_difficults += list(gt_difficults_.numpy()) return self.trainer.get_meter_data()['total_loss']
def update_meters(self, losses): """对各个损失分别求均值""" # 由于train_step返回的是nametuple形式的损失,所以要先变成字典 loss_dict = {k: scalar(v) for k, v in losses._asdict().items()} # 分别遍历每种损失,求其均值 for key, meter in self.meters.items(): meter.add(loss_dict[key])
def evaluate_coco(data, data_loader, model): n_ids = len(data.img_ids) result = [] for i, (img, bbox, label, scale, size, _) in tqdm(zip(data.img_ids, data_loader), total=n_ids): scale = at.scalar(scale) original_size = [size[0][0].item(), size[1][0].item()] pred_bbox, pred_label, pred_score = model(img, scale, None, None, original_size) for b, l, s in zip(pred_bbox, pred_label, pred_score): ymin, xmin, ymax, xmax = b obj = OrderedDict({ 'image_id': i, 'category_id': data.label_to_coco_label(l), 'bbox': [xmin, ymin, xmax - xmin, ymax - ymin], 'score': float(s) }) result.append(obj) result_path = f'./results/coco/predictions/{opt.model}.json' result_dir = os.path.dirname(result_path) if not os.path.exists(result_dir): os.makedirs(result_dir) with open(result_path, 'w', encoding='utf-8') as fout: json.dump(result, fout, cls=COCOEncoder, ensure_ascii=False) eval_result = data.evaluate(result_path) return eval_result
def train(**kwargs): opt._parse(kwargs) # 解析配置参数 # dataset = Dataset(opt) # 训练集 voc2007 5011 张 print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt, split='val') # 验证集 2500左右 test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNN_ResNet50() # 生成一个faster-rcnn实例 print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # tocuda if opt.load_path: # 加载与训练模型 trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr writer = SummaryWriter('logs', comment='faster-rcnn-vgg16') global_step = 0 for epoch in range(opt.epoch): # 开始迭代 14轮 0-12 13个epoch for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() loss = trainer.train_step(img, bbox, label, scale) rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss = loss writer.add_scalar('rpn_loc_loss', rpn_loc_loss.detach().cpu().numpy(), global_step) writer.add_scalar('rpn_cls_loss', rpn_cls_loss.detach().cpu().numpy(), global_step) writer.add_scalar('roi_loc_loss', roi_loc_loss.detach().cpu().numpy(), global_step) writer.add_scalar('roi_cls_loss', roi_cls_loss.detach().cpu().numpy(), global_step) writer.add_scalar('total_loss', total_loss.detach().cpu().numpy(), global_step) global_step += 1 if (ii + 1) % opt.plot_every == 0: pass eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}'.format(str(lr_), str(eval_result['map'])) print(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def update_meters(self, losses, BR=False): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} if not BR: for key, meter in self.meters.items(): meter.add(loss_d[key]) else: for key, meter in self.BR_meters.items(): meter.add(loss_d[key])
def evaluate_voc(data_loader, model): pred_bboxes, pred_labels, pred_scores = [], [], [] gt_bboxes, gt_labels, gt_difficults = [], [], [] for img, gt_bbox, gt_label, scale, size, gt_difficult in tqdm(data_loader): scale = at.scalar(scale) original_size = [size[0][0].item(), size[1][0].item()] pred_bbox, pred_label, pred_score = model(img, scale, None, None, original_size) gt_bboxes += list(gt_bbox.numpy()) gt_labels += list(gt_label.numpy()) gt_difficults += list(gt_difficult.numpy()) pred_bboxes += [pred_bbox] pred_labels += [pred_label] pred_scores += [pred_score] eval_results = {'AP': 0, 'AP_0.5': 0, 'AP_0.75': 0, 'AP_s': 0, 'AP_m': 0, 'AP_l': 0} iou_threshes = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] area_names = ['s', 'm', 'l'] area_ranges = [(0, 32 ** 2), (32 ** 2, 96 ** 2), (96 ** 2, np.inf)] for name, range in zip(area_names, area_ranges): # evaluate predictions for multiple iou threshes for iou_thresh in iou_threshes: result = eval_detection_voc( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, iou_thresh, True, range ) # accumulate results eval_results[f'AP_{name}'] += result['map'] # average results eval_results[f'AP_{name}'] /= 10. # evaluate results regardless of area size for iou_thresh in iou_threshes: result = eval_detection_voc( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, iou_thresh, True ) # accumulate results eval_results['AP'] += result['map'] # save map for iou 0.5 & 0.75 if iou_thresh == 0.5: eval_results['AP_0.5'] = result['map'] elif iou_thresh == 0.75: eval_results['AP_0.75'] = result['map'] else: continue eval_results['AP'] /= 10 # print results eval_log = '' for k, v in eval_results.items(): eval_log += f'{k}: {v * 100:.2f}, ' print(eval_log) return eval_results['AP']
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) # img, bbox, label, scale = dataset[0] # 返回的img是被scale后的图像,可能已经被随机翻转了 # 返回的 bbox 按照 ymin xmin ymax xmax 排列 # H, W = size(im) # 对于一张屏幕上显示的图片,a,b,c,d 代表 4 个顶点 # a ... b ymin # . . # c ... d ymax H高度 y的范围在 [0, H-1] 间 # xmin xmax # W宽度 x的范围在 [0, W-1] 间 print('load data') dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn) if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) for epoch in range(opt.epoch): for ii, (img, bbox_, label_, scale) in (enumerate(dataloader)): print('step: ', ii) scale = at.scalar(scale) img, bbox, label = img.float(), bbox_, label_ img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if ((ii + 1) % opt.plot_every == 0) and (epoch > 50): # 运行多少步以后再predict一次,epoch跑的太少的话根本预测不准什么东西 # if os.path.exists(opt.debug_file): # ipdb.set_trace() # plot groud truth bboxes 画出原本的框 ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) # gt_img np类型 范围是 [0, 1] 间 3 x H x W # 这里要将 gt_img 这个带框,带标注的图像保存或者显示出来 # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]))
def train(train_set, val_set, load_path=False, epochs=1, lr=1e-3, record_every=300, lr_decay=1e-3, test_num=500): ''' Uses the training set and validation set as arguments to create dataloader. Loads and trains model ''' train_dataloader = td.DataLoader(train_set, batch_size=1, pin_memory=False, shuffle=True) test_dataloader = td.DataLoader(val_set, batch_size=1, pin_memory=True) faster_rcnn = RFCNResnet101().cuda() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() saved_loss = [] iterations = [] if load_path: trainer.load(load_path) print('load pretrained model from %s' % load_path) state_dict = t.load(load_path) saved_loss = state_dict['losses'] iterations = state_dict['iterations'] best_map = 0 lr_ = lr for epoch in range(epochs): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) loss_info = 'Iter {}; Losses: RPN loc {}, RPN cls: {}, ROI loc {}, ROI cls {}, Total:{}'.format( str(ii), "%.3f" % losses[0].cpu().data.numpy(), "%.3f" % losses[1].cpu().data.numpy(), "%.3f" % losses[2].cpu().data.numpy(), "%.3f" % losses[3].cpu().data.numpy(), "%.3f" % losses[4].cpu().data.numpy()) print(loss_info) if (ii + 1) % record_every == 0: iterations.append(ii + 1) saved_loss.append([ losses[0].cpu().item(), losses[1].cpu().item(), losses[2].cpu().item(), losses[3].cpu().item(), losses[4].cpu().item() ]) kwargs = {"losses": saved_loss, "iterations": iterations} trainer.save(saved_loss=saved_loss, iterations=iterations) print("new model saved")
def update_meters(self, losses, BR=False): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} if not BR: for key, meter in self.meters.items(): meter.add(loss_d[key]) else: for key, meter in self.BR_meters.items(): # print('meter:',type(meter)) # print('loss_d[key]',loss_d[key]) # print('after_cpuu:',loss_d[key].cpu().numpy()) meter.add(loss_d[key].cpu().numpy())
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # ori_img_ = (at.tonumpy(img[0])) losses = trainer.get_meter_data() print(losses) write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png') _bboxes = trainer.faster_rcnn.predict([ori_img_], visualize=True) _bboxes = at.tonumpy(_bboxes[0]) # plot predicted bboxes write_image(ori_img_, _bboxes, 'pred.png') print('saved an image') if epoch == 13: break
def train(individual, **kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNN_mine(individual) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() ori_img_ = inverse_normalize(at.tonumpy(img[0])) _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] best_path = None if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay individual.accuracy = best_map
def train(self): """ while loss<target loss forward backward record loss if loop_n % RECORD_N: summary & save_progress """ time_start = time.time() self.net.train() try: while self.epoch < self.conf.max_epoch: self.epoch_loss = 0 self.reset_meters() for step, (img_name, img, bbox, label, index, relation, scale, _) in tqdm(enumerate(self.train_loader)): #bbox shape is [1, 2, 4] #label is [1,n] label_attr is[1,m,n] if bbox.size(1) < 2: continue self.optimizer.zero_grad() img, bbox, label, index, relation, scale = \ img.to(self.conf.dev), bbox.to(self.conf.dev), label.to(self.conf.dev), index.to(self.conf.dev),\ relation.to(self.conf.dev), at.scalar(scale) loss = self.train_forward_net(img_name, img, bbox, label, index, relation, scale) #print("loss {0}".format(loss.total_loss)) loss.total_loss.backward() self.optimizer.step() self.update_meters(loss) self.epoch_loss += loss.total_loss.detach().cpu().numpy() * img.size(0) #if step == 3500: # break # if step % 200 == 0: # print('Step=', step) # ['epoch_loss', 'test_loss', 'training_loss'] self.epoch_loss = self.epoch_loss / len(self.train_loader.dataset) # self.valid_loss = self.test(use_validation=True, display=True) self.tp.record_data({'epoch_loss': self.epoch_loss}) # 'validation_loss': self.valid_loss}) self.lr_scheduler.step({'loss': self.epoch_loss, 'epoch': self.epoch}) # , 'torch': self.valid_loss}) if self.epoch % self.conf.se == 0: print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch) self.summary() self.epoch += 1 print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch) self.summary(save_optim=True) # for resume training except KeyboardInterrupt: save = input('Save Current Progress ? y for yes: ') if 'y' in save: print('Saving Progress...') self.save_progress(save_optim=True, display=True)
def train(self, train_set, test_set, num_epoch, B=1, lr=1e-3): #device = 'cuda' if torch.cuda.is_available() else 'cpu' #model = model.to(device) #adam = torch.optim.Adam(model.parameters(), lr=lr) train_loader = td.DataLoader(train_set, batch_size=B, pin_memory=False, shuffle=True) test_loader = td.DataLoader(test_set, batch_size=B, pin_memory=True, shuffle=False) # load stuff here from log file best_map = 0 self.model.zero_grad() # set up plots here for epoch in range(num_epoch): #clear stuff (RFCNtrainer.reset_meters()) for batch_ind, (image, bbox, bbox_labels, scale) in enumerate(train_loader): #move data to device scale = at.scalar(scale) img = image.to(self.device) bbox = bbox.to(self.device) lbl = bbox_labels.to(self.device) self.step(img, bbox, lbl, scale) #plot loss and stuff every 2 epochs if (epoch + 1) % 2 == 0: # plot stuff (loss, boxes, rpn confusion matrix, etc.) goes here emptyval = [] # test with evaluation data, plot results #--> #result = eval(train_loader, self.model) #--> # log info to file here #--> #plot #--> #if (result['map'] > best_map): #--> # best_map = result['map']#--> return
def eval_mAP(trainer, val_loader): tqdm.monitor_interval = 0 mAP = [] for ii, sample in tqdm(enumerate(val_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) else: img_id, img, scale = sample['img_id'], sample['image'], sample[ 'scale'] bbox = np.zeros((1, 0, 4)) label = np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if bbox is None: # continue scale = at.scalar(scale) ori_img_ = inverse_normalize(at.tonumpy(img[0])) pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_boxes = pred_boxes[0] pred_labels = pred_labels[0] pred_scores = pred_scores[0] bbox = at.tonumpy(bbox[0]) # Rescale back C, H, W = ori_img_.shape ori_img_ = transform.resize(ori_img_, (C, H * (1 / scale), W * (1 / scale)), mode='reflect') o_H, o_W = H * (1 / scale), W * (1 / scale) pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W)) bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) mAP.append(map_iou(bbox, pred_boxes, pred_scores)) # if ii>=100: # break mAP = np.array(mAP) mAP = mAP[mAP != np.array(None)].astype(np.float32) return np.mean(mAP)
def pretrain(embedding_file): dataset = COCODataset(embedding_file, opt, True) opt.n_class = dataset.n_class faster_rcnn = FasterRCNNVGG16() faster_rcnn_trainer = FasterRCNNTrainer(faster_rcnn).cuda() opt.caffe_pretrain = False lstm = nn.LSTM(input_size=dataset.word_embedding.vector_size, hidden_size=opt.hidden_size, batch_first=True) predict_param = [lstm.hidden_size, opt.n_class] text_predictor = PredictNet(predict_param) text_predictor.parameters() lstm_trainer = LSTMTrainer(lstm, text_predictor).cuda() print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) for epoch in range(opt.epoch): for ii, (img, bbox_, text, label_, scale) in tqdm(enumerate(dataloader)): # train faster rcnn scale = at.scalar(scale) img, bbox, text, label = img.cuda().float(), bbox_.cuda( ), text.cuda(), label_.cuda() faster_rcnn_trainer.train_step(img, bbox, label, scale) # train lstm lstm_trainer.lstm_step(text, label) state = { 'rcnn': { 'model': faster_rcnn_trainer.state_dict(), 'optimizer': faster_rcnn_trainer.optimizer.state_dict() }, 'lstm': { 'model': lstm_trainer.state_dict(), 'optimizer': lstm_trainer.optimizer.state_dict() } } t.save(state, 'pretrain.pth')
def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key])
def train(**kwargs): precisions = np.zeros([configurations.epoch, 20]) recall = np.zeros([configurations.epoch, 20]) configurations._parse(kwargs) dataset = Dataset(configurations) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True, num_workers=configurations.num_workers) testset = TestDataset(configurations) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=configurations.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNN() #faster_rcnn.load_state_dict(torch.load('faster_rcnn_model_0.ckpt')) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if configurations.load_path: trainer.load(configurations.load_path) print('load pretrained model from %s' % configurations.load_path) best_map = 0 lr_ = configurations.lr for epoch in range(configurations.epoch): trainer.reset_meters() ########### FREEZING REQD MODEL ##################################### if epoch == 0: ##freezing ex2 and head for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor1.train() trainer.faster_rcnn.rpn.train() elif epoch == 3: ##freezing ex1 and rpn, unfreeze ex2 and head #unfreeze ex2 and head for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = True for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = True #make ex1 and rpn eval and frozen for param in trainer.faster_rcnn.extractor1[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor1.eval() trainer.faster_rcnn.rpn.eval() trainer.faster_rcnn.head.train() trainer.faster_rcnn.extractor2.train() elif epoch == 7: trainer.faster_rcnn.rpn.train() for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = True for param in trainer.faster_rcnn.extractor2[10:].parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = False trainer.faster_rcnn.extractor2.eval() trainer.faster_rcnn.head.eval() elif epoch == 9: for param in trainer.faster_rcnn.rpn.parameters(): param.requires_grad = False for param in trainer.faster_rcnn.head.parameters(): param.requires_grad = True trainer.faster_rcnn.rpn.eval() trainer.faster_rcnn.head.train() ####################################################################### for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if epoch <= 2: trainer.step1(img, bbox, label, scale, epoch) elif epoch >= 3 and epoch <= 6: trainer.step2(img, bbox, label, scale, epoch) elif epoch >= 7 and epoch <= 8: trainer.step3(img, bbox, label, scale, epoch) elif epoch >= 9 and epoch <= 10: trainer.step4(img, bbox, label, scale, epoch) if ((ii + 1) % 500 == 0): append_loss(trainer.get_meter_data()) if (ii + 1) % configurations.plot_every == 0: if os.path.exists(configurations.debug_file): ipdb.set_trace() #plot loss if not os.path.exists(plot_dir): os.mkdir(plot_dir) plot_loss(loss_list, plot_dir) # if ii == 7000: # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = fetch_image(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # gt_img = gt_img.transpose(1,2,0) # if not os.path.exists(img_dir): # os.mkdir(img_dir) # plt.imsave('{}/actual_image_{}_{}.jpg'.format(img_dir, epoch, ii), gt_img) # # plot prediction bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # pred_img = fetch_image(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # pred_img = pred_img.transpose(1,2,0) # plt.imsave('{}/predicted_image_{}_{}.jpg'.format(img_dir,epoch, ii), pred_img) torch.save(faster_rcnn.state_dict(), 'faster_rcnn_model_{}.ckpt'.format(epoch + 1)) all_losses = np.zeros((7, len(total_loss))) all_losses[0, :] = rpn_loc_loss all_losses[1, :] = rpn_cls_loss all_losses[2, :] = roi_loc_loss all_losses[3, :] = roi_cls_loss all_losses[4, :] = total_loss all_losses[5, :] = total_rpn all_losses[6, :] = total_roi save_dir = 'prec_rec_loss/' if not os.path.exists(save_dir): os.mkdir(save_dir) np.save(save_dir + 'all_losses_' + str(epoch) + '.npy', all_losses) print("Epoch {} completed".format(epoch + 1))
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from {}'.format(opt.load_path)) # trainer.vis.text(dataset.db.label_names, win='labels') adversary = None if opt.flagadvtrain: print("flagadvtrain turned: Adversarial training!") atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4) # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4) # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3, # rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) best_map = 0 lr_ = opt.lr normal_total_loss = [] adv_total_loss = [] total_time = 0.0 total_imgs = 0 true_imgs = 0 for epoch in range(opt.epoch): trainer.reset_meters() once = True for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) temp_img = copy.deepcopy(img).cuda() img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if opt.flagadvtrain: before_time = time.time() img = atk(img, bbox, label, scale) after_time = time.time() # with ctx_noparamgrad_and_eval(trainer.faster_rcnn): # img = adversary.perturb(img, label) # print("Adversarial training done!") total_time += after_time - before_time # print("Normal training starts\n") # trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # adv_total_loss.append(trainer.get_meter_data()["total_loss"]) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0])) # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii)) # temp_gt_img = visdom_bbox(temp_ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = temp_gt_img.shape # plt.imshow(np.reshape(temp_gt_img, (h, w, c))) # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii)) # plt.close() ori_img_ = inverse_normalize(at.tonumpy(img[0])) # print("GT Label is {} and pred_label is {}".format(label_[0],)) # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii)) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = gt_img.shape # plt.imshow(np.reshape(gt_img, (h, w, c))) # plt.savefig("imgs/orig_images/gt_img{}".format(ii)) # plt.close() # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) fig1 = plt.figure() ax1 = fig1.add_subplot(1,1,1) # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8)) final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8)) ax1.imshow(final1) gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0])) fig1.savefig("imgs/adv_images/adv_img{}".format(ii)) plt.close() _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True) fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8)) # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8)) ax2.imshow(final2) gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0])) fig2.savefig("imgs/orig_images/gt_img{}".format(ii)) plt.close() # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii)) # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels)) total_imgs += 1 if len(_temp_labels) == 0: continue if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True: true_imgs += 1 # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape)) # temp_pred_img = visdom_bbox(temp_ori_img_, # at.tonumpy(_temp_bboxes[0]), # at.tonumpy(_temp_labels[0]).reshape(-1), # at.tonumpy(_temp_scores[0])) # # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # fig = plt.figure() # ax1 = fig.add_subplot(2,1,1) # ax1.plot(normal_total_loss) # ax2 = fig.add_subplot(2,1,2) # ax2.plot(adv_total_loss) # fig.savefig("losses/both_loss{}".format(epoch)) # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num, # flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary) # trainer.vis.plot('test_map', eval_result['map']) # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), # str(eval_result['map']), # str(trainer.get_meter_data())) # print(log_info) # # trainer.vis.log(log_info) # # if eval_result['map'] > best_map: # best_map = eval_result['map'] # best_path = trainer.save(best_map=best_map) # if epoch == 9: # trainer.load(best_path) # trainer.faster_rcnn.scale_lr(opt.lr_decay) # lr_ = lr_ * opt.lr_decay if epoch == 0: break if epoch == 13: break print("Total number of images is {}".format(total_imgs)) print("True images is {}".format(true_imgs)) print("Total time is {}".format(total_time)) print("Avg time is {}".format(total_time/total_imgs))
def train(**kwargs): opt._parse( kwargs ) #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面! dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, #pin_memory=True ) #pin_memory锁页内存,开启时使用显卡的内存,速度更快 faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.dataset.label_names, win='labels') best_map = 0 lr_ = opt.lr # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 for epoch in range(opt.epoch): print('epoch {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() #首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = array_tool.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() #可视化画出loss trainer.vis.plot_many(trainer.get_meter_data()) #可视化画出groudtruth bboxes ori_img_ = inverse_normalize(array_tool.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]), array_tool.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) #可视化画出预测bboxes # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox( ori_img_, array_tool.tonumpy(_bboxes[0]), array_tool.tonumpy(_labels[0]).reshape(-1), array_tool.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来 trainer.vis.img( 'roi_cm', array_tool.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}, loss{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #将学习率以及map等信息及时显示更新 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) # 300w_dataset = FaceLandmarksDataset() print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True,\ num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') attacker = attacks.DCGAN(train_adv=False) if opt.load_attacker: attacker.load(opt.load_attacker) print('load attacker model from %s' % opt.load_attacker) trainer = VictimFasterRCNNTrainer(faster_rcnn, attacker, attack_mode=True).cuda() # trainer = VictimFasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') # eval_result = eval(test_dataloader, faster_rcnn, test_num=2000) best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters(adv=True) for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): ipdb.set_trace() scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) trainer.vis.plot_many(trainer.get_meter_data(adv=True)) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) if trainer.attacker is not None: adv_img = trainer.attacker.perturb(img) adv_img_ = inverse_normalize(at.tonumpy(adv_img[0])) _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [adv_img_], visualize=True) adv_pred_img = visdom_bbox( adv_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('adv_img', adv_pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if (ii) % 500 == 0: best_path = trainer.save(epochs=epoch, save_rcnn=True) if epoch % 2 == 0: best_path = trainer.save(epochs=epoch)
def train(**kwargs): opt._parse(kwargs) log_dir = os.path.join("logs", "faster_rcnn_train_onGray") os.makedirs(log_dir, exist_ok=True) log_path = os.path.join( log_dir, time.strftime("%Y-%m-%d-%H%M.log", time.localtime(time.time())) ) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler(log_path), logging.StreamHandler()], ) logger = logging.getLogger() dataset = Dataset(opt) print("load data") dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True, ) faster_rcnn = FasterRCNNVGG16() print("model construct completed") logger.info(faster_rcnn) logger.info("-" * 50) trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda() if opt.load_path: trainer.load(opt.load_path) print("load pretrained model from %s" % opt.load_path) trainer.vis.text(dataset.db.label_names, win="labels") best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() trainer.reset_ave() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if (ii + 1) % opt.print_freq == 0: logger.info( "[Train] Epoch:{} [{:03d}/{:03d}]({:.0f}%)\t".format( epoch, ii + 1, len(dataloader), (ii + 1) / len(dataloader) * 100 ) ) trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=True ) else: trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=False ) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # # # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict( # [ori_img_], visualize=True # ) # pred_img = visdom_bbox( # ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0]), # ) # trainer.vis.img("pred_img", pred_img) # # # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm") # # roi confusion matrix # trainer.vis.img( # "roi_cm", at.totensor(trainer.roi_cm.conf, False).float() # ) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # trainer.vis.plot("test_map", eval_result["map"]) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]["lr"] log_info = "lr:{}, map:{},loss:{}".format( str(lr_), str(eval_result["map"]), str(trainer.get_meter_data()) ) logger.info(log_info) # trainer.vis.log(log_info) if eval_result["map"] > best_map: best_map = eval_result["map"] best_path = trainer.save( best_map=best_map, save_path="checkpoints/trainedOnGray/fasterrcnn_%s" % time.strftime("%m%d%H%M"), ) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = LargeImageDataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=False, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr rpn_loc_loss = [] rpn_cls_loss = [] roi_loc_loss = [] roi_cls_loss = [] total_loss = [] test_map_list = [] for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss losses_dict = trainer.get_meter_data() rpn_loc_loss.append(losses_dict['rpn_loc_loss']) roi_loc_loss.append(losses_dict['roi_loc_loss']) rpn_cls_loss.append(losses_dict['rpn_cls_loss']) roi_cls_loss.append(losses_dict['roi_cls_loss']) total_loss.append(losses_dict['total_loss']) save_losses('rpn_loc_loss', rpn_loc_loss, epoch) save_losses('roi_loc_loss', roi_loc_loss, epoch) save_losses('rpn_cls_loss', rpn_cls_loss, epoch) save_losses('total_loss', total_loss, epoch) save_losses('roi_cls_loss', roi_cls_loss, epoch) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) test_map_list.append(eval_result['map']) save_map(test_map_list, epoch) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) #获得config设置信息 dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集 print('load data') dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集 test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络 print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器 if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重 trainer.load(opt.load_path) #训练器加载权重 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用 lr_ = opt.lr #得到预设的学习率 for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) #进行类别处理得到scale(待定) #bbox是gt_box坐标(ymin, xmin, ymax, xmax) #label是类别的下标VOC_BBOX_LABEL_NAMES #img是图片,代码仅支持batch_size=1的训练 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练 trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型 if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次 trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率 log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #可视化内容,(跳过) if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型 best_map = eval_result['map'] #保存模型的map信息 best_path = trainer.save(best_map=best_map) #调用保存模型函数 if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练 trainer.load(best_path) #加载模型 trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率 lr_ = lr_ * opt.lr_decay #获得当前学习率 if epoch == 13: #13个epoch停止训练 break
def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key])
def train(**kwargs): opt._parse(kwargs) log = SummaryWriter(log_dir=opt.log_dir) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) # 配置文件 # cfg = VGConf() # 训练数据集 # trainset = Dataset(cfg) # valset = Dataset(cfg, valid=True) # 加载数据 # print("load data2..") # dataloader = DataLoader(dataloader, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) # valloader = DataLoader(test_dataloader, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr idx = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) # 获取损失值 losses = trainer.get_meter_data() log.add_scalars(main_tag='Training(batch)', tag_scalar_dict=losses, global_step=idx) idx = idx+1 if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) print(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) log.add_scalar(tag='mAP', scalar_value=eval_result['map'], global_step=epoch) # trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) # trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): # opt._parse(kwargs) print('load data') dataloader = get_train_loader(opt.root_dir, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, sample in tqdm(enumerate(dataloader)): if len(sample.keys()) == 5: img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if label.size == 0: # continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch % 10 == 0: best_path = trainer.save(best_map=best_map)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print("load data") dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=2, shuffle=False, # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print("model construct completed") trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print("load pretrained model from %s" % opt.load_path) trainer.vis.text(dataset.db.label_names, win="labels") best_map = 0 for epoch in range(7): trainer.reset_meters() for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 gt_img = visdom_bbox( at.tonumpy(ori_img_)[0], at.tonumpy(bbox_)[0], label_[0].numpy()) trainer.vis.img("gt_img", gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( ori_img, visualize=True) pred_img = visdom_bbox( at.tonumpy(ori_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), ) trainer.vis.img("pred_img", pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm") # roi confusion matrix trainer.vis.img( "roi_cm", at.totensor(trainer.roi_cm.conf, False).float()) if epoch == 4: trainer.faster_rcnn.scale_lr(opt.lr_decay) eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) print("eval_result") trainer.save(mAP=eval_result["map"])
def train_val(): print('load data') train_loader, val_loader = get_train_val_loader( opt.root_dir, batch_size=opt.batch_size, val_ratio=0.1, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() # faster_rcnn = FasterRCNNResNet50() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() tqdm.monitor_interval = 0 for ii, sample in tqdm(enumerate(train_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda( ) img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) if bbox.size == 0: continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]), at.tonumpy(label[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) mAP = eval_mAP(trainer, val_loader) trainer.vis.plot('val_mAP', mAP) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(mAP), str(trainer.get_meter_data())) trainer.vis.log(log_info) if mAP > best_map: best_map = mAP best_path = trainer.save(best_map=best_map) if epoch == opt.epoch - 1: best_path = trainer.save() if (epoch + 1) % 10 == 0: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay