def train(**kwargs): Config._parse(kwargs) dataset = Dataset(Config) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=Config.num_workers) testset = TestDataset(Config) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=2, shuffle=False, \ # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, using_visdom=False).cuda() if Config.frc_ckpt_path: trainer.load(Config.frc_ckpt_path) print('load pretrained model from %s' % Config.frc_ckpt_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(7): trainer.reset_meters() for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % Config.plot_every == 0: if os.path.exists(Config.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 gt_img = visdom_bbox( at.tonumpy(ori_img_)[0], at.tonumpy(bbox_)[0], label_[0].numpy()) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( ori_img, visualize=True) pred_img = visdom_bbox(at.tonumpy(ori_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch == 4: trainer.faster_rcnn.scale_lr(Config.lr_decay) eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) print('eval_result') trainer.save(mAP=eval_result['map'])
import datetime from utils.config import Config from model.fasterrcnn import FasterRCNNTrainer, FasterRCNN import tensorflow as tf from utils.data import Dataset physical_devices = tf.config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0, "Not enough GPU hardware devices available" tf.config.experimental.set_memory_growth(physical_devices[0], True) config = Config() config._parse({}) print("读取数据中....") dataset = Dataset(config) frcnn = FasterRCNN(21, (7, 7)) print('model construct completed') """ feature_map, rpn_locs, rpn_scores, rois, roi_indices, anchor = frcnn.rpn(x, scale) ''' feature_map : (1, 38, 50, 256) max= 0.0578503 rpn_locs : (1, 38, 50, 36) max= 0.058497224 rpn_scores : (1, 17100, 2) max= 0.047915094 rois : (2000, 4) max= 791.0 roi_indices :(2000,) max= 0 anchor : (17100, 4) max= 1154.0387 ''' bbox = bboxes label = labels
def train(**kwargs): Config._parse(kwargs) dataset = Dataset(Config) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=Config.num_workers) testset = TestDataset(Config) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=Config.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if Config.frc_ckpt_path: trainer.load(Config.frc_ckpt_path) print('load pretrained model from %s' % Config.frc_ckpt_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = Config.lr for epoch in range(Config.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % Config.plot_every == 0: if os.path.exists(Config.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=Config.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(Config.lr_decay) lr_ = lr_ * Config.lr_decay if epoch == 13: break