help='save model') parser.add_argument('--no-cuda', action='store_true', default=False, help='enables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # set gpu id used os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(all_left_img, all_right_img, all_left_disp, True), batch_size=12, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False), batch_size=8, shuffle=False, num_workers=4, drop_last=False) if args.model == 'stackhourglass': model = stackhourglass(args.maxdisp) elif args.model == 'basic': model = basic(args.maxdisp) else:
def main(): global args os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) train_left_img.sort() train_right_img.sort() train_left_disp.sort() test_left_img.sort() test_right_img.sort() test_left_disp.sort() __normalize = {'mean': [0.0, 0.0, 0.0], 'std': [1.0, 1.0, 1.0]} TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True, normalize=__normalize), batch_size=args.train_bsize, shuffle=False, num_workers=1, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False, normalize=__normalize), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + '/training.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ':' + str(value)) model = StereoNet(k=args.stages - 1, r=args.stages - 1, maxdisp=args.maxdisp) model = nn.DataParallel(model).cuda() model.apply(weights_init) print('init with normal') optimizer = optim.RMSprop(model.parameters(), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format((args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> will start from scratch.") else: log.info("Not Resume") start_full_time = time.time() for epoch in range(args.start_epoch, args.epoch): log.info('This is {}-th epoch'.format(epoch)) train(TrainImgLoader, model, optimizer, log, epoch) savefilename = args.save_path + '/checkpoint.pth' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, savefilename) scheduler.step() # will adjust learning rate test(TestImgLoader, model, log) log.info('full training time = {: 2f} Hours'.format( (time.time() - start_full_time) / 3600))
def main(): global best_RMSE lw = utils_func.LossWise(args.api_key, args.losswise_tag, args.epochs - 1) # set logger log = logger.setup_logger(os.path.join(args.save_path, 'training.log')) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) # set tensorboard writer = SummaryWriter(args.save_path + '/tensorboardx') # Data Loader if args.generate_depth_map: TrainImgLoader = None import dataloader.KITTI_submission_loader as KITTI_submission_loader TestImgLoader = torch.utils.data.DataLoader( KITTI_submission_loader.SubmiteDataset(args.datapath, args.data_list, args.dynamic_bs), batch_size=args.bval, shuffle=False, num_workers=args.workers, drop_last=False) elif args.dataset == 'kitti': train_data, val_data = KITTILoader3D.dataloader( args.datapath, args.split_train, args.split_val, kitti2015=args.kitti2015) TrainImgLoader = torch.utils.data.DataLoader( KITTILoader_dataset3d.myImageFloder(train_data, True, kitti2015=args.kitti2015, dynamic_bs=args.dynamic_bs), batch_size=args.btrain, shuffle=True, num_workers=8, drop_last=False, pin_memory=True) TestImgLoader = torch.utils.data.DataLoader( KITTILoader_dataset3d.myImageFloder(val_data, False, kitti2015=args.kitti2015, dynamic_bs=args.dynamic_bs), batch_size=args.bval, shuffle=False, num_workers=8, drop_last=False, pin_memory=True) else: train_data, val_data = listflowfile.dataloader(args.datapath) TrainImgLoader = torch.utils.data.DataLoader( SceneFlowLoader.myImageFloder(train_data, True, calib=args.calib_value), batch_size=args.btrain, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader( SceneFlowLoader.myImageFloder(val_data, False, calib=args.calib_value), batch_size=args.bval, shuffle=False, num_workers=8, drop_last=False) # Load Model if args.data_type == 'disparity': model = disp_models.__dict__[args.arch](maxdisp=args.maxdisp) elif args.data_type == 'depth': model = models.__dict__[args.arch](maxdepth=args.maxdepth, maxdisp=args.maxdisp, down=args.down, scale=args.scale) else: log.info('Model is not implemented') assert False # Number of parameters log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model = nn.DataParallel(model).cuda() torch.backends.cudnn.benchmark = True # Optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) scheduler = MultiStepLR(optimizer, milestones=args.lr_stepsize, gamma=args.lr_gamma) if args.pretrain: if os.path.isfile(args.pretrain): log.info("=> loading pretrain '{}'".format(args.pretrain)) checkpoint = torch.load(args.pretrain) model.load_state_dict(checkpoint['state_dict'], strict=False) else: log.info('[Attention]: Do not find checkpoint {}'.format( args.pretrain)) if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) best_RMSE = checkpoint['best_RMSE'] scheduler.load_state_dict(checkpoint['scheduler']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info('[Attention]: Do not find checkpoint {}'.format( args.resume)) if args.generate_depth_map: os.makedirs(args.save_path + '/depth_maps/' + args.data_tag, exist_ok=True) tqdm_eval_loader = tqdm(TestImgLoader, total=len(TestImgLoader)) for batch_idx, (imgL_crop, imgR_crop, calib, H, W, filename) in enumerate(tqdm_eval_loader): pred_disp = inference(imgL_crop, imgR_crop, calib, model) for idx, name in enumerate(filename): np.save( args.save_path + '/depth_maps/' + args.data_tag + '/' + name, pred_disp[idx][-H[idx]:, :W[idx]]) import sys sys.exit() # evaluation if args.evaluate: evaluate_metric = utils_func.Metric() ## training ## for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(TestImgLoader): start_time = time.time() test(imgL_crop, imgR_crop, disp_crop_L, calib, evaluate_metric, optimizer, model) log.info( evaluate_metric.print(batch_idx, 'EVALUATE') + ' Time:{:.3f}'.format(time.time() - start_time)) import sys sys.exit() for epoch in range(args.start_epoch, args.epochs): scheduler.step() ## training ## train_metric = utils_func.Metric() tqdm_train_loader = tqdm(TrainImgLoader, total=len(TrainImgLoader)) for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(tqdm_train_loader): # start_time = time.time() train(imgL_crop, imgR_crop, disp_crop_L, calib, train_metric, optimizer, model, epoch) # log.info(train_metric.print(batch_idx, 'TRAIN') + ' Time:{:.3f}'.format(time.time() - start_time)) log.info(train_metric.print(0, 'TRAIN Epoch' + str(epoch))) train_metric.tensorboard(writer, epoch, token='TRAIN') lw.update(train_metric.get_info(), epoch, 'Train') ## testing ## is_best = False if epoch == 0 or ((epoch + 1) % args.eval_interval) == 0: test_metric = utils_func.Metric() tqdm_test_loader = tqdm(TestImgLoader, total=len(TestImgLoader)) for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(tqdm_test_loader): # start_time = time.time() test(imgL_crop, imgR_crop, disp_crop_L, calib, test_metric, optimizer, model) # log.info(test_metric.print(batch_idx, 'TEST') + ' Time:{:.3f}'.format(time.time() - start_time)) log.info(test_metric.print(0, 'TEST Epoch' + str(epoch))) test_metric.tensorboard(writer, epoch, token='TEST') lw.update(test_metric.get_info(), epoch, 'Test') # SAVE is_best = test_metric.RMSELIs.avg < best_RMSE best_RMSE = min(test_metric.RMSELIs.avg, best_RMSE) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_RMSE': best_RMSE, 'scheduler': scheduler.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, epoch, folder=args.save_path) lw.done()
def main(): global args torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) if args.distributed: if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.world_size = 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) train_set = DA.myImageFloder(train_left_img, train_right_img, train_left_disp, True) val_set = DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_set) val_sampler = torch.utils.data.distributed.DistributedSampler(val_set) else: train_sampler = None val_sampler = None TrainImgLoader = torch.utils.data.DataLoader(train_set, batch_size=args.train_bsize, shuffle=False, num_workers=4, pin_memory=True, sampler=train_sampler, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(val_set, batch_size=args.test_bsize, shuffle=False, num_workers=4, pin_memory=True, sampler=None, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + '/training.log') if args.local_rank == 0: log.info('len train_left_img: {}'.format(len(train_left_img))) log.info('len test_left_img: {}'.format(len(test_left_img))) if args.local_rank == 0: for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) if args.model_types == "PSMNet": model = PSMNet(args) args.loss_weights = [0.5, 0.7, 1.] elif args.model_types == "PSMNet_DSM": model = PSMNet_DSM(args) args.loss_weights = [0.5, 0.7, 1.] elif args.model_types == "Hybrid_Net_DSM" or "Hybrid_Net": model = Hybrid_Net(args) args.loss_weights = [0.5, 0.7, 1., 1., 1.] else: AssertionError("model error") if args.count_flops: FLOPs, param = count_flops(model.cuda()) if args.local_rank == 0: log.info("macs:{}".format(FLOPs)) log.info("parameters:{} ".format(param)) if args.sync_bn: if args.local_rank == 0: log.info( "using apex synced BN-----------------------------------------------------" ) model = apex.parallel.convert_syncbn_model(model) model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) model, optimizer = amp.initialize( model, optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) if args.distributed: if args.local_rank == 0: log.info( "using distributed-----------------------------------------------------" ) model = DDP(model, delay_allreduce=True) if args.local_rank == 0: log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.local_rank == 0: log.info("=> loading checkpoint '{}'".format(args.resume)) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if args.local_rank == 0: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: if args.local_rank == 0: log.info('Not Resume') start_full_time = time.time() if args.train: for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) if args.local_rank == 0: log.info('This is {}-th epoch'.format(epoch)) adjust_learning_rate(optimizer, epoch) train(TrainImgLoader, model, optimizer, log, epoch) # SAVE if args.local_rank == 0: savefilename = args.save_path + '/checkpoint_' + str( epoch) + '.tar' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, savefilename) if not epoch % 10: test(TestImgLoader, model, log) test(TestImgLoader, model, log) if args.local_rank == 0: log.info('full training time = {:.2f} Hours'.format( (time.time() - start_full_time) / 3600))
def main(): global args train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + '/training.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) model = nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') start_full_time = time.time() for epoch in range(args.start_epoch, args.epochs): log.info('This is {}-th epoch'.format(epoch)) train(TrainImgLoader, model, optimizer, log, epoch) savefilename = args.save_path + '/checkpoint.tar' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, savefilename) test(TestImgLoader, model, log) log.info('full training time = {:.2f} Hours'.format( (time.time() - start_full_time) / 3600))
'--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) all_left_img, all_right_img, all_left_disp, \ test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath, only_train_on_flyingthings3d=False) TrainImgLoader = torch.utils.data.DataLoader( DA.myImageFloder( all_left_img, all_right_img, all_left_disp, True, normalize=__normalize), batch_size=args.batchsize, shuffle=True, num_workers=min(4, args.batchsize), drop_last=False) TestImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(
def main(): global args train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) if (args.test): logFn = "/testing.log" if (not os.path.isdir(args.save_path + "/Testing")): os.makedirs(args.save_path + "/Testing") print("\n=== Testing ===") else: logFn = "/training.log" log = logger.setup_logger(args.save_path + logFn) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) # model = nn.DataParallel(model).cuda() model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.test: # Only perform test. --resume option is assumed to be issued at the same time. if (args.resume is None): raise Exception( "--resume arguments must be set while --test is issued.") if (not os.path.isfile(args.save_path + "/" + args.resume)): raise Exception("Checkpoint %s does not exist." % (args.save_path + "/" + args.resume)) log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.save_path + "/" + args.resume) model.load_state_dict(checkpoint['state_dict']) elif args.resume: if os.path.isfile(args.save_path + "/" + args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.save_path + "/" + args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') start_full_time = time.time() if (not args.test): for epoch in range(args.start_epoch, args.epochs): log.info('This is {}-th epoch'.format(epoch)) train(TrainImgLoader, model, optimizer, log, epoch) savefilename = args.save_path + '/checkpoint.tar' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, savefilename) if (not args.test): test(TestImgLoader, model, log) else: test(TestImgLoader, model, log, args.test_stride, args.save_path + "/Testing") log.info('full training time = {:.2f} Hours'.format( (time.time() - start_full_time) / 3600))
def main(): global args torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) if args.datatype == '2015': all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = ls2015.dataloader( args.datapath2015, split = args.split_for_val) from dataloader import KITTILoader as DA elif args.datatype == 'Sence Flow': train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) from dataloader import SecenFlowLoader as DA else: AssertionError if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + '/FLOPs_inference_time.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) if args.model_types == "PSMNet": model = PSMNet(args) args.loss_weights = [0.5, 0.7, 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "PSMNet_TSM": model = PSMNet_TSM(args) args.loss_weights = [0.5, 0.7, 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "Hybrid_Net": model = Hybrid_Net(args) args.loss_weights = [0.5, 0.7, 1., 1., 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "Hybrid_Net_DSM" : model = Hybrid_Net(args) args.loss_weights = [0.5, 0.7, 1., 1., 1.] #from dataloader import SecenFlowLoader as DA else: AssertionError("model error") model = nn.DataParallel(model).cuda() for i in range (30): #print("test_left_img", test_left_img[i]) log.info("=> test_left_img '{}'".format(test_left_img[i])) TestImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) #optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') log.info('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) #if args.testing: test(TestImgLoader, model, log)
type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # set gpu id used os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) all_left_img, all_right_img, all_left_disp, test_left_img, \ test_right_img, test_left_disp = lt.dataloader( args.datapath, args.fraction) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( all_left_img, all_right_img, all_left_disp, True), batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=False) train_loader_len = len(TrainImgLoader) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.batch_size, shuffle=False, num_workers=4,
def main(): global args train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) train_left_img.sort() train_right_img.sort() train_left_disp.sort() test_left_img.sort() test_right_img.sort() test_left_disp.sort() TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=2, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=2, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.savepath): os.makedirs(args.savepath) log = logger.setup_logger(args.savepath + '/training.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ':' + str(value)) lr = args.lr model = iresNet() model = nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.savepath: if os.path.isfile(args.loadmodel): log.info("=> loading checkpoint '{}'".format((args.loadmodel))) checkpoint = torch.load(args.loadmodel) args.start_epoch = checkpoint['epoch'] else: log.info("=> no checkpoint '{}'".format((args.loadmodel))) log.info("=>will start from scratch.") else: log.info("Not Resume") # train start_full_time = time.time() #count the time training used for epoch in range(args.start_epoch, args.epoch): log.info('This is {}-th epoch'.format(epoch)) train(train_left_img, train_right_img, test_left_disp, model, optimizer, log) savefilename = args.savepath + '/checkpoint.pth' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, savefilename) test(test_left_disp, test_right_img, test_left_disp, model, log) log.info('Full traing time = {:2f} Hours'.format( (time.time() - start_full_time) / 3600))
os.mkdir(args.log_dir) if not os.path.exists(args.savemodel): os.mkdir(args.savemodel) # set gpu id used #os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # if args.KITTI == '2015': # from dataloader import KITTIloader2015 as ls # else: # from dataloader import KITTIloader2012 as ls torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) all_left, all_right, all_gt, test_left, test_right, test_gt = lt.dataloader( args.datapath) Trainloader = torch.utils.data.DataLoader(DA.myImageFloder( all_left, all_right, all_gt, True), batch_size=12, shuffle=True, num_workers=8, drop_last=False) Testloader = torch.utils.data.DataLoader(DA.myImageFloder( all_left, all_right, all_gt, False), batch_size=8, shuffle=False, num_workers=4, drop_last=False)
def main(): global args train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) # log = logger.setup_logger(args.save_path + '/training.log') # for key, value in sorted(vars(args).items()): # log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) model = nn.DataParallel(model).cuda() # optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) # log.info('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) num_pretrain_items = 0 num_model_items = 0 if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) # start_epoch = pretrained_dict['epoch'] + 1 model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict['state_dict'].items() if k in model_dict } num_pretrain_items = len(pretrained_dict.items()) num_model_items = len(model_dict.items()) print('Number of pretrained items: {:d}'.format(num_pretrain_items)) print('Number of model items: {:d}'.format(num_model_items)) model_dict.update(pretrained_dict) model.load_state_dict(model_dict) # state_dict = torch.load(args.loadmodel) # model.load_state_dict(state_dict['state_dict']) else: start_epoch = 1 model_dict = model.state_dict() num_model_items = len(model_dict.items()) print('Number of model items: {:d}'.format(num_model_items)) if args.start_epoch is not 1: start_epoch = args.start_epoch else: start_epoch = 1 print(model) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) if args.trainfull: optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) else: for i, p in enumerate(model.parameters()): print(i, p.shape) if i < args.fixnum: p.requires_grad = False optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, betas=(0.9, 0.999)) # args.start_epoch = 0 # if args.resume: # if os.path.isfile(args.resume): # log.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # log.info("=> loaded checkpoint '{}' (epoch {})" # .format(args.resume, checkpoint['epoch'])) # else: # log.info("=> no checkpoint found at '{}'".format(args.resume)) # log.info("=> Will start from scratch.") # else: # log.info('Not Resume') train_step = 0 test_step = 0 start_full_time = time.time() for epoch in range(start_epoch, args.epochs + 1): # log.info('This is {}-th epoch'.format(epoch)) print('This is {}-th epoch'.format(epoch)) # train(TrainImgLoader, model, optimizer, log, epoch) train_losses, train_step = train(TrainImgLoader, model, optimizer, epoch, train_step) test_losses, test_step = test(TestImgLoader, model, epoch, test_step) savefilename = args.save_path + 'sf_' + str(epoch) + '.tar' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, savefilename) # test(TestImgLoader, model, log) # log.info('full training time = {:.2f} Hours'.format((time.time() - start_full_time) / 3600)) print('full training time = %.2f HR' % ((time.time() - start_full_time) / 3600))