def main(): # ensure numba JIT is on if 'NUMBA_DISABLE_JIT' in os.environ: del os.environ['NUMBA_DISABLE_JIT'] # parse arguments global args args = cmd_args.parse_args_from_yaml(sys.argv[1]) # -------------------- logging args -------------------- if osp.exists(args.ckpt_dir): to_continue = query_yes_no('Attention!!!, ckpt_dir already exists!\ Whether to continue?', default=None) if not to_continue: sys.exit(1) os.makedirs(args.ckpt_dir, mode=0o777, exist_ok=True) logger = Logger(osp.join(args.ckpt_dir, 'log')) logger.log('sys.argv:\n' + ' '.join(sys.argv)) os.environ['NUMBA_NUM_THREADS'] = str(args.workers) logger.log('NUMBA NUM THREADS\t' + os.environ['NUMBA_NUM_THREADS']) for arg in sorted(vars(args)): logger.log('{:20s} {}'.format(arg, getattr(args, arg))) logger.log('') # -------------------- dataset & loader -------------------- if not args.evaluate: train_dataset = datasets.__dict__[args.dataset]( train=True, transform=transforms.Augmentation(args.aug_together, args.aug_pc2, args.data_process, args.num_points, args.allow_less_points), gen_func=transforms.GenerateDataUnsymmetric(args), args=args) logger.log('train_dataset: ' + str(train_dataset)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2**32))) val_dataset = datasets.__dict__[args.dataset]( train=False, transform=transforms.ProcessData(args.data_process, args.num_points, args.allow_less_points), gen_func=transforms.GenerateDataUnsymmetric(args), args=args) logger.log('val_dataset: ' + str(val_dataset)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2**32))) # -------------------- create model -------------------- logger.log("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](args) if not args.evaluate: init_func = partial(init_weights_multi, init_type=args.init, gain=args.gain) model.apply(init_func) logger.log(model) model = torch.nn.DataParallel(model).cuda() criterion = EPE3DLoss().cuda() if args.evaluate: torch.backends.cudnn.enabled = False else: cudnn.benchmark = True # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936 # But if your input sizes changes at each iteration, # then cudnn will benchmark every time a new size appears, # possibly leading to worse runtime performances. # -------------------- resume -------------------- if args.resume: if osp.isfile(args.resume): logger.log("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict'], strict=True) logger.log( "=> loaded checkpoint '{}' (start epoch {}, min loss {})". format(args.resume, checkpoint['epoch'], checkpoint['min_loss'])) else: logger.log("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = None else: args.start_epoch = 0 # -------------------- evaluation -------------------- if args.evaluate: res_str = evaluate(val_loader, model, logger, args) logger.close() return res_str # -------------------- optimizer -------------------- optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=0) if args.resume and (checkpoint is not None): optimizer.load_state_dict(checkpoint['optimizer']) if hasattr(args, 'reset_lr') and args.reset_lr: print('reset lr') reset_learning_rate(optimizer, args) # -------------------- main loop -------------------- min_train_loss = None best_train_epoch = None best_val_epoch = None do_eval = True for epoch in range(args.start_epoch, args.epochs): old_lr = optimizer.param_groups[0]['lr'] adjust_learning_rate(optimizer, epoch, args) lr = optimizer.param_groups[0]['lr'] if old_lr != lr: print('Switch lr!') logger.log('lr: ' + str(optimizer.param_groups[0]['lr'])) train_loss = train(train_loader, model, criterion, optimizer, epoch, logger) gc.collect() is_train_best = True if best_train_epoch is None else ( train_loss < min_train_loss) if is_train_best: min_train_loss = train_loss best_train_epoch = epoch if do_eval: val_loss = validate(val_loader, model, criterion, logger) gc.collect() is_val_best = True if best_val_epoch is None else ( val_loss < min_val_loss) if is_val_best: min_val_loss = val_loss best_val_epoch = epoch logger.log("New min val loss!") min_loss = min_val_loss if do_eval else min_train_loss is_best = is_val_best if do_eval else is_train_best save_checkpoint( { 'epoch': epoch + 1, # next start epoch 'arch': args.arch, 'state_dict': model.state_dict(), 'min_loss': min_loss, 'optimizer': optimizer.state_dict(), }, is_best, args.ckpt_dir) train_str = 'Best train loss: {:.5f} at epoch {:3d}'.format( min_train_loss, best_train_epoch) logger.log(train_str) if do_eval: val_str = 'Best val loss: {:.5f} at epoch {:3d}'.format( min_val_loss, best_val_epoch) logger.log(val_str) logger.close() result_str = val_str if do_eval else train_str return result_str
import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.optim import torch.utils.data import transforms import dataset import models import cmd_args import open3d as o3d from utils_dataset import lines from torch.utils.tensorboard import SummaryWriter from loss_fn import iou_projected_to_2d args = cmd_args.parse_args_from_yaml("/home/mayank/Mayank/TrackThisFlow/configs/test_ours_KITTI.yaml") basedir = "/home/mayank/Data/KITTI/training/" writer = SummaryWriter() val_dataset = dataset.track_and_flow_dataset(basedir, transform=transforms.ProcessData(args.data_process, args.num_points, args.allow_less_points), gen_func=transforms.GenerateDataUnsymmetric(args), args=args ) print("Length of dataset:", len(val_dataset))
def main(): if 'NUMBA_DISABLE_JIT' in os.environ: del os.environ['NUMBA_DISABLE_JIT'] global args args = cmd_args.parse_args_from_yaml(sys.argv[1]) os.environ[ 'CUDA_VISIBLE_DEVICES'] = args.gpu if args.multi_gpu is None else '0,1' '''CREATE DIR''' experiment_dir = Path('./experiment/') experiment_dir.mkdir(exist_ok=True) file_dir = Path( str(experiment_dir) + '/PointConv%sFlyingthings3d-' % args.model_name + str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))) file_dir.mkdir(exist_ok=True) checkpoints_dir = file_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(exist_ok=True) log_dir = file_dir.joinpath('logs/') log_dir.mkdir(exist_ok=True) os.system('cp %s %s' % ('models.py', log_dir)) os.system('cp %s %s' % ('pointconv_util.py', log_dir)) os.system('cp %s %s' % ('train.py', log_dir)) os.system('cp %s %s' % ('config_train.yaml', log_dir)) '''LOG''' logger = logging.getLogger(args.model_name) logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler( str(log_dir) + '/train_%s_sceneflow.txt' % args.model_name) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.info( '----------------------------------------TRAINING----------------------------------' ) logger.info('PARAMETER ...') logger.info(args) blue = lambda x: '\033[94m' + x + '\033[0m' model = PointConvSceneFlow() train_dataset = datasets.__dict__[args.dataset]( train=True, transform=transforms.Augmentation(args.aug_together, args.aug_pc2, args.data_process, args.num_points), num_points=args.num_points, data_root=args.data_root, full=args.full) logger.info('train_dataset: ' + str(train_dataset)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2**32))) val_dataset = datasets.__dict__[args.dataset]( train=False, transform=transforms.ProcessData(args.data_process, args.num_points, args.allow_less_points), num_points=args.num_points, data_root=args.data_root) logger.info('val_dataset: ' + str(val_dataset)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2**32))) '''GPU selection and multi-GPU''' if args.multi_gpu is not None: device_ids = [int(x) for x in args.multi_gpu.split(',')] torch.backends.cudnn.benchmark = True model.cuda(device_ids[0]) model = torch.nn.DataParallel(model, device_ids=device_ids) else: model.cuda() if args.pretrain is not None: model.load_state_dict(torch.load(args.pretrain)) print('load model %s' % args.pretrain) logger.info('load model %s' % args.pretrain) else: print('Training from scratch') logger.info('Training from scratch') pretrain = args.pretrain init_epoch = int(pretrain[-14:-11]) if args.pretrain is not None else 0 if args.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) optimizer.param_groups[0]['initial_lr'] = args.learning_rate scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=80, gamma=0.5, last_epoch=init_epoch - 1) LEARNING_RATE_CLIP = 1e-5 history = defaultdict(lambda: list()) best_epe = 1000.0 for epoch in range(init_epoch, args.epochs): lr = max(optimizer.param_groups[0]['lr'], LEARNING_RATE_CLIP) print('Learning rate:%f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr total_loss = 0 total_seen = 0 optimizer.zero_grad() for i, data in tqdm(enumerate(train_loader, 0), total=len(train_loader), smoothing=0.9): pos1, pos2, norm1, norm2, flow, _ = data # move to cuda pos1 = pos1.cuda() pos2 = pos2.cuda() norm1 = norm1.cuda() norm2 = norm2.cuda() flow = flow.cuda() model = model.train() pred_flows, fps_pc1_idxs, _, _, _ = model(pos1, pos2, norm1, norm2) loss = multiScaleLoss(pred_flows, flow, fps_pc1_idxs) history['loss'].append(loss.cpu().data.numpy()) loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.cpu().data * args.batch_size total_seen += args.batch_size scheduler.step() train_loss = total_loss / total_seen str_out = 'EPOCH %d %s mean loss: %f' % (epoch, blue('train'), train_loss) print(str_out) logger.info(str_out) eval_epe3d, eval_loss = eval_sceneflow(model.eval(), val_loader) str_out = 'EPOCH %d %s mean epe3d: %f mean eval loss: %f' % ( epoch, blue('eval'), eval_epe3d, eval_loss) print(str_out) logger.info(str_out) if eval_epe3d < best_epe: best_epe = eval_epe3d if args.multi_gpu is not None: torch.save( model.module.state_dict(), '%s/%s_%.3d_%.4f.pth' % (checkpoints_dir, args.model_name, epoch, best_epe)) else: torch.save( model.state_dict(), '%s/%s_%.3d_%.4f.pth' % (checkpoints_dir, args.model_name, epoch, best_epe)) logger.info('Save model ...') print('Save model ...') print('Best epe loss is: %.5f' % (best_epe)) logger.info('Best epe loss is: %.5f' % (best_epe))
def main(): #import ipdb; ipdb.set_trace() if 'NUMBA_DISABLE_JIT' in os.environ: del os.environ['NUMBA_DISABLE_JIT'] global args args = cmd_args.parse_args_from_yaml(sys.argv[1]) os.environ[ 'CUDA_VISIBLE_DEVICES'] = args.gpu if args.multi_gpu is None else '0,1,2,3' '''CREATE DIR''' experiment_dir = Path('./Evaluate_experiment/') experiment_dir.mkdir(exist_ok=True) file_dir = Path( str(experiment_dir) + '/%sFlyingthings3d-' % args.model_name + str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))) file_dir.mkdir(exist_ok=True) checkpoints_dir = file_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(exist_ok=True) log_dir = file_dir.joinpath('logs/') log_dir.mkdir(exist_ok=True) os.system('cp %s %s' % ('models.py', log_dir)) os.system('cp %s %s' % ('pointconv_util.py', log_dir)) os.system('cp %s %s' % ('evaluate.py', log_dir)) os.system('cp %s %s' % ('config_evaluate.yaml', log_dir)) '''LOG''' logger = logging.getLogger(args.model_name) logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler( str(log_dir) + 'train_%s_sceneflow.txt' % args.model_name) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.info( '----------------------------------------TRAINING----------------------------------' ) logger.info('PARAMETER ...') logger.info(args) blue = lambda x: '\033[94m' + x + '\033[0m' model = PointConvSceneFlow() val_dataset = datasets.__dict__[args.dataset]( train=False, transform=transforms.ProcessData(args.data_process, args.num_points, args.allow_less_points), num_points=args.num_points, data_root=args.data_root) logger.info('val_dataset: ' + str(val_dataset)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2**32))) #load pretrained model pretrain = args.ckpt_dir + args.pretrain model.load_state_dict(torch.load(pretrain)) print('load model %s' % pretrain) logger.info('load model %s' % pretrain) model.cuda() epe3ds = AverageMeter() acc3d_stricts = AverageMeter() acc3d_relaxs = AverageMeter() outliers = AverageMeter() # 2D epe2ds = AverageMeter() acc2ds = AverageMeter() total_loss = 0 total_seen = 0 total_epe = 0 metrics = defaultdict(lambda: list()) for i, data in tqdm(enumerate(val_loader, 0), total=len(val_loader), smoothing=0.9): pos1, pos2, norm1, norm2, flow, path = data #move to cuda pos1 = pos1.cuda() pos2 = pos2.cuda() norm1 = norm1.cuda() norm2 = norm2.cuda() flow = flow.cuda() model = model.eval() with torch.no_grad(): pred_flows, fps_pc1_idxs, _, _, _ = model(pos1, pos2, norm1, norm2) loss = multiScaleLoss(pred_flows, flow, fps_pc1_idxs) full_flow = pred_flows[0].permute(0, 2, 1) epe3d = torch.norm(full_flow - flow, dim=2).mean() total_loss += loss.cpu().data * args.batch_size total_epe += epe3d.cpu().data * args.batch_size total_seen += args.batch_size pc1_np = pos1.cpu().numpy() pc2_np = pos2.cpu().numpy() sf_np = flow.cpu().numpy() pred_sf = full_flow.cpu().numpy() EPE3D, acc3d_strict, acc3d_relax, outlier = evaluate_3d(pred_sf, sf_np) epe3ds.update(EPE3D) acc3d_stricts.update(acc3d_strict) acc3d_relaxs.update(acc3d_relax) outliers.update(outlier) # 2D evaluation metrics flow_pred, flow_gt = geometry.get_batch_2d_flow( pc1_np, pc1_np + sf_np, pc1_np + pred_sf, path) EPE2D, acc2d = evaluate_2d(flow_pred, flow_gt) epe2ds.update(EPE2D) acc2ds.update(acc2d) mean_loss = total_loss / total_seen mean_epe = total_epe / total_seen str_out = '%s mean loss: %f mean epe: %f' % (blue('Evaluate'), mean_loss, mean_epe) print(str_out) logger.info(str_out) res_str = (' * EPE3D {epe3d_.avg:.4f}\t' 'ACC3DS {acc3d_s.avg:.4f}\t' 'ACC3DR {acc3d_r.avg:.4f}\t' 'Outliers3D {outlier_.avg:.4f}\t' 'EPE2D {epe2d_.avg:.4f}\t' 'ACC2D {acc2d_.avg:.4f}'.format(epe3d_=epe3ds, acc3d_s=acc3d_stricts, acc3d_r=acc3d_relaxs, outlier_=outliers, epe2d_=epe2ds, acc2d_=acc2ds)) print(res_str) logger.info(res_str)