def evaluate(dataset, data_loader, model, depth_model, batch_size, gpu=False, logger=None, args=None, epoch=0, processes=[], grid_3D_extended=None): torch.manual_seed(666) torch.cuda.manual_seed(666) random.seed(666) np.random.seed(666) os.environ['PYTHONHASHSEED'] = str(666) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # sanity check global label_grid model.eval() depth_model.eval() if logger == None: logger.set_logger("eval") logger.info("=> eval lidar_dir: {}".format(dataset.dataset.lidar_dir)) class_criterion = nn.BCELoss(reduction='none') reg_criterion = nn.SmoothL1Loss(reduction='none') avg_class_loss = 0 avg_reg_loss = 0 avg_loss = 0 count = 0 predictions = [] with torch.no_grad(): test_metric = utils_func.Metric() for batch in tqdm(data_loader) \ if args.show_eval_progress else data_loader: count += 1 if args.pixor_fusion: if not args.e2e: inputs = batch['X'].cuda() else: imgL = batch['imgL'].cuda() imgR = batch['imgR'].cuda() f = batch['f'] depth_map = batch['depth_map'].cuda() idxx = batch['idx'] h_shift = batch['h_shift'] ori_shape = batch['ori_shape'] images = batch['image'].cuda() img_index = batch['img_index'].cuda() bev_index = batch['bev_index'].cuda() else: inputs = batch['X'].cuda() if not args.no_cal_loss: class_labels = batch['cl'].cuda() reg_labels = batch['rl'].cuda() if args.pixor_fusion: if not args.e2e: class_outs, reg_outs = pixor(inputs, images, img_index, bev_index) else: depth_loss, depth_map = forward_depth_model( imgL, imgR, depth_map, f, test_metric, depth_model, 'test') inputs = [] for i in range(depth_map.shape[0]): calib = utils_func.torchCalib( dataset.dataset.get_calibration(idxx[i]), h_shift[i]) H, W = ori_shape[0][i], ori_shape[1][i] depth = depth_map[i][-H:, :W] ptc = depth_to_pcl(calib, depth, max_high=1.) ptc_np = ptc.clone().cpu().numpy() ptc_np = ptc_np.astype(np.float32) ptc = calib.lidar_to_rect(ptc[:, 0:3]) inputs.append( gen_feature_diffused_tensor( ptc, 700, 800, grid_3D_extended, diffused=args.diffused)) inputs = torch.stack(inputs) class_outs, reg_outs = model(inputs, images, img_index, bev_index) else: class_outs, reg_outs = model(inputs) class_outs = class_outs.squeeze(1) if not args.no_cal_loss: class_loss, reg_loss, loss = \ compute_loss(epoch, class_outs, reg_outs, class_labels, reg_labels, class_criterion, reg_criterion, args) avg_class_loss += class_loss.item() avg_reg_loss += reg_loss.item() if not isinstance( reg_loss, int) else reg_loss avg_loss += loss.item() if args.gen_predict_file: if gpu: class_outs, reg_outs = class_outs.cpu(), reg_outs.cpu() predictions += gen_single_prediction_fast( class_outs, reg_outs, label_grid, args.throw_threshold, args.nms_threshold) if not args.no_cal_loss: avg_class_loss /= count avg_reg_loss /= count avg_loss /= count logger.info("Finish evaluaiton: class_loss: {:.5f}, " "reg_loss: {:.5f}, total_loss: {:.5f}".format( avg_class_loss, avg_reg_loss, avg_loss)) logger.info(test_metric.print(epoch, '')) else: logger.info("Finish evaluaiton!") if args.gen_predict_file: logger.info("Generating predictions to files") logger.info(len(dataset.data)) savefile_path = osp.join(args.saverootpath, args.run_name, 'predicted_label_{}'.format(epoch)) predict_kitti_to_file(predictions, dataset.data, osp.join(savefile_path, "data"), dataset.dataset) if args.run_official_evaluate: label_path = osp.join(args.root_dir, "training/label_2") with open(osp.join(savefile_path, "outputs_07.txt"), "w") as f, \ open(os.devnull, 'w') as FNULL: processes.append( subprocess.Popen( [args.evaluate_bin7, label_path, savefile_path], stdout=f, stderr=FNULL)) with open(osp.join(savefile_path, "outputs_05.txt"), "w") as f, \ open(os.devnull, 'w') as FNULL: processes.append( subprocess.Popen( [args.evaluate_bin5, label_path, savefile_path], stdout=f, stderr=FNULL))
def main(): global best_RMSE lw = utils_func.LossWise(args.api_key, args.losswise_tag, args.epochs - 1) # set logger log = logger.setup_logger(os.path.join(args.save_path, 'training.log')) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) # set tensorboard writer = SummaryWriter(args.save_path + '/tensorboardx') # Data Loader if args.generate_depth_map: TrainImgLoader = None import dataloader.KITTI_submission_loader as KITTI_submission_loader TestImgLoader = torch.utils.data.DataLoader( KITTI_submission_loader.SubmiteDataset(args.datapath, args.data_list, args.dynamic_bs), batch_size=args.bval, shuffle=False, num_workers=args.workers, drop_last=False) elif args.dataset == 'kitti': train_data, val_data = KITTILoader3D.dataloader( args.datapath, args.split_train, args.split_val, kitti2015=args.kitti2015) TrainImgLoader = torch.utils.data.DataLoader( KITTILoader_dataset3d.myImageFloder(train_data, True, kitti2015=args.kitti2015, dynamic_bs=args.dynamic_bs), batch_size=args.btrain, shuffle=True, num_workers=8, drop_last=False, pin_memory=True) TestImgLoader = torch.utils.data.DataLoader( KITTILoader_dataset3d.myImageFloder(val_data, False, kitti2015=args.kitti2015, dynamic_bs=args.dynamic_bs), batch_size=args.bval, shuffle=False, num_workers=8, drop_last=False, pin_memory=True) else: train_data, val_data = listflowfile.dataloader(args.datapath) TrainImgLoader = torch.utils.data.DataLoader( SceneFlowLoader.myImageFloder(train_data, True, calib=args.calib_value), batch_size=args.btrain, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader( SceneFlowLoader.myImageFloder(val_data, False, calib=args.calib_value), batch_size=args.bval, shuffle=False, num_workers=8, drop_last=False) # Load Model if args.data_type == 'disparity': model = disp_models.__dict__[args.arch](maxdisp=args.maxdisp) elif args.data_type == 'depth': model = models.__dict__[args.arch](maxdepth=args.maxdepth, maxdisp=args.maxdisp, down=args.down, scale=args.scale) else: log.info('Model is not implemented') assert False # Number of parameters log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model = nn.DataParallel(model).cuda() torch.backends.cudnn.benchmark = True # Optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) scheduler = MultiStepLR(optimizer, milestones=args.lr_stepsize, gamma=args.lr_gamma) if args.pretrain: if os.path.isfile(args.pretrain): log.info("=> loading pretrain '{}'".format(args.pretrain)) checkpoint = torch.load(args.pretrain) model.load_state_dict(checkpoint['state_dict'], strict=False) else: log.info('[Attention]: Do not find checkpoint {}'.format( args.pretrain)) if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) best_RMSE = checkpoint['best_RMSE'] scheduler.load_state_dict(checkpoint['scheduler']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info('[Attention]: Do not find checkpoint {}'.format( args.resume)) if args.generate_depth_map: os.makedirs(args.save_path + '/depth_maps/' + args.data_tag, exist_ok=True) tqdm_eval_loader = tqdm(TestImgLoader, total=len(TestImgLoader)) for batch_idx, (imgL_crop, imgR_crop, calib, H, W, filename) in enumerate(tqdm_eval_loader): pred_disp = inference(imgL_crop, imgR_crop, calib, model) for idx, name in enumerate(filename): np.save( args.save_path + '/depth_maps/' + args.data_tag + '/' + name, pred_disp[idx][-H[idx]:, :W[idx]]) import sys sys.exit() # evaluation if args.evaluate: evaluate_metric = utils_func.Metric() ## training ## for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(TestImgLoader): start_time = time.time() test(imgL_crop, imgR_crop, disp_crop_L, calib, evaluate_metric, optimizer, model) log.info( evaluate_metric.print(batch_idx, 'EVALUATE') + ' Time:{:.3f}'.format(time.time() - start_time)) import sys sys.exit() for epoch in range(args.start_epoch, args.epochs): scheduler.step() ## training ## train_metric = utils_func.Metric() tqdm_train_loader = tqdm(TrainImgLoader, total=len(TrainImgLoader)) for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(tqdm_train_loader): # start_time = time.time() train(imgL_crop, imgR_crop, disp_crop_L, calib, train_metric, optimizer, model, epoch) # log.info(train_metric.print(batch_idx, 'TRAIN') + ' Time:{:.3f}'.format(time.time() - start_time)) log.info(train_metric.print(0, 'TRAIN Epoch' + str(epoch))) train_metric.tensorboard(writer, epoch, token='TRAIN') lw.update(train_metric.get_info(), epoch, 'Train') ## testing ## is_best = False if epoch == 0 or ((epoch + 1) % args.eval_interval) == 0: test_metric = utils_func.Metric() tqdm_test_loader = tqdm(TestImgLoader, total=len(TestImgLoader)) for batch_idx, (imgL_crop, imgR_crop, disp_crop_L, calib) in enumerate(tqdm_test_loader): # start_time = time.time() test(imgL_crop, imgR_crop, disp_crop_L, calib, test_metric, optimizer, model) # log.info(test_metric.print(batch_idx, 'TEST') + ' Time:{:.3f}'.format(time.time() - start_time)) log.info(test_metric.print(0, 'TEST Epoch' + str(epoch))) test_metric.tensorboard(writer, epoch, token='TEST') lw.update(test_metric.get_info(), epoch, 'Test') # SAVE is_best = test_metric.RMSELIs.avg < best_RMSE best_RMSE = min(test_metric.RMSELIs.avg, best_RMSE) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_RMSE': best_RMSE, 'scheduler': scheduler.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, epoch, folder=args.save_path) lw.done()
def train(args): use_gpu = torch.cuda.is_available() num_gpu = list(range(torch.cuda.device_count())) assert use_gpu, "Please use gpus." logger = get_logger(name=args.shortname) display_args(args, logger) # create dir for saving args.saverootpath = osp.abspath(args.saverootpath) savepath = osp.join(args.saverootpath, args.run_name) if not osp.exists(savepath): os.makedirs(savepath) train_file = os.path.join(args.image_sets, "{}.txt".format(args.train_dataset)) n_features = 35 if args.no_reflex else 36 if args.pixor_fusion: if args.e2e: train_data = KittiDataset_Fusion_stereo( txt_file=train_file, flip_rate=args.flip_rate, lidar_dir=args.eval_lidar_dir, label_dir=args.eval_label_dir, calib_dir=args.eval_calib_dir, image_dir=args.eval_image_dir, root_dir=args.root_dir, only_feature=args.no_cal_loss, split=args.split, image_downscale=args.image_downscale, crop_height=args.crop_height, random_shift_scale=args.random_shift_scale) else: train_data = KittiDataset_Fusion( txt_file=train_file, flip_rate=args.flip_rate, lidar_dir=args.train_lidar_dir, label_dir=args.train_label_dir, calib_dir=args.train_calib_dir, n_features=n_features, random_shift_scale=args.random_shift_scale, root_dir=args.root_dir, image_downscale=args.image_downscale) else: train_data = KittiDataset(txt_file=train_file, flip_rate=args.flip_rate, lidar_dir=args.train_lidar_dir, label_dir=args.train_label_dir, calib_dir=args.train_calib_dir, image_dir=args.train_image_dir, n_features=n_features, random_shift_scale=args.random_shift_scale, root_dir=args.root_dir) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=8) eval_data, eval_loader = get_eval_dataset(args) if args.pixor_fusion: pixor = PixorNet_Fusion(n_features, groupnorm=args.groupnorm, resnet_type=args.resnet_type, image_downscale=args.image_downscale, resnet_chls=args.resnet_chls) else: pixor = PixorNet(n_features, groupnorm=args.groupnorm) ts = time.time() pixor = pixor.cuda() pixor = nn.DataParallel(pixor, device_ids=num_gpu) class_criterion = nn.BCELoss(reduction='none') reg_criterion = nn.SmoothL1Loss(reduction='none') if args.opt_method == 'RMSprop': optimizer = optim.RMSprop(pixor.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: raise NotImplementedError() depth_model = PSMNet(maxdepth=80, maxdisp=192, down=args.depth_down) depth_model = nn.DataParallel(depth_model).cuda() # torch.backends.cudnn.benchmark = True depth_optimizer = optim.Adam(depth_model.parameters(), lr=args.depth_lr, betas=(0.9, 0.999)) grid_3D_extended = get_3D_global_grid_extended(700, 800, 35).cuda().float() if args.depth_pretrain: if os.path.isfile(args.depth_pretrain): logger.info("=> loading depth pretrain '{}'".format( args.depth_pretrain)) checkpoint = torch.load(args.depth_pretrain) depth_model.load_state_dict(checkpoint['state_dict']) depth_optimizer.load_state_dict(checkpoint['optimizer']) else: logger.info('[Attention]: Do not find checkpoint {}'.format( args.depth_pretrain)) depth_scheduler = MultiStepLR(depth_optimizer, milestones=args.depth_lr_stepsize, gamma=args.depth_lr_gamma) if args.pixor_pretrain: if os.path.isfile(args.pixor_pretrain): logger.info("=> loading depth pretrain '{}'".format( args.pixor_pretrain)) checkpoint = torch.load(args.pixor_pretrain) pixor.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) optimizer.param_groups[0]['lr'] *= 10 else: logger.info('[Attention]: Do not find checkpoint {}'.format( args.pixor_pretrain)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_milestones, gamma=args.gamma) if args.resume: logger.info("Resuming...") checkpoint_path = osp.join(savepath, args.checkpoint) if os.path.isfile(checkpoint_path): logger.info("Loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path) pixor.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) depth_model.load_state_dict(checkpoint['depth_state_dict']) depth_optimizer.load_state_dict(checkpoint['depth_optimizer']) depth_scheduler.load_state_dict(checkpoint['depth_scheduler']) start_epoch = checkpoint['epoch'] + 1 logger.info( "Resumed successfully from epoch {}.".format(start_epoch)) else: logger.warning("Model {} not found. " "Train from scratch".format(checkpoint_path)) start_epoch = 0 else: start_epoch = 0 class_criterion = class_criterion.cuda() reg_criterion = reg_criterion.cuda() processes = [] last_eval_epoches = [] for epoch in range(start_epoch, args.epochs): pixor.train() depth_model.train() scheduler.step() depth_scheduler.step() ts = time.time() logger.info("Start epoch {}, depth lr {:.6f} pixor lr {:.7f}".format( epoch, depth_optimizer.param_groups[0]['lr'], optimizer.param_groups[0]['lr'])) avg_class_loss = AverageMeter() avg_reg_loss = AverageMeter() avg_total_loss = AverageMeter() train_metric = utils_func.Metric() for iteration, batch in enumerate(train_loader): if args.pixor_fusion: if not args.e2e: inputs = batch['X'].cuda() else: imgL = batch['imgL'].cuda() imgR = batch['imgR'].cuda() f = batch['f'] depth_map = batch['depth_map'].cuda() idxx = batch['idx'] h_shift = batch['h_shift'] ori_shape = batch['ori_shape'] a_shift = batch['a_shift'] flip = batch['flip'] images = batch['image'].cuda() img_index = batch['img_index'].cuda() bev_index = batch['bev_index'].cuda() else: inputs = batch['X'].cuda() class_labels = batch['cl'].cuda() reg_labels = batch['rl'].cuda() if args.pixor_fusion: if not args.e2e: class_outs, reg_outs = pixor(inputs, images, img_index, bev_index) else: depth_loss, depth_map = forward_depth_model( imgL, imgR, depth_map, f, train_metric, depth_model) inputs = [] for i in range(depth_map.shape[0]): calib = utils_func.torchCalib( train_data.dataset.get_calibration(idxx[i]), h_shift[i]) H, W = ori_shape[0][i], ori_shape[1][i] depth = depth_map[i][-H:, :W] ptc = depth_to_pcl(calib, depth, max_high=1.) ptc = calib.lidar_to_rect(ptc[:, 0:3]) if torch.abs(a_shift[i]).item() > 1e-6: roty = utils_func.roty_pth(a_shift[i]).cuda() ptc = torch.mm(ptc, roty.t()) voxel = gen_feature_diffused_tensor( ptc, 700, 800, grid_3D_extended, diffused=args.diffused) if flip[i] > 0: voxel = torch.flip(voxel, [2]) inputs.append(voxel) inputs = torch.stack(inputs) class_outs, reg_outs = pixor(inputs, images, img_index, bev_index) else: class_outs, reg_outs = pixor(inputs) class_outs = class_outs.squeeze(1) class_loss, reg_loss, loss = \ compute_loss(epoch, class_outs, reg_outs, class_labels, reg_labels, class_criterion, reg_criterion, args) avg_class_loss.update(class_loss.item()) avg_reg_loss.update(reg_loss.item() \ if not isinstance(reg_loss, int) else reg_loss) avg_total_loss.update(loss.item()) optimizer.zero_grad() depth_optimizer.zero_grad() loss = depth_loss + 0.1 * loss loss.backward() optimizer.step() depth_optimizer.step() if not isinstance(reg_loss, int): reg_loss = reg_loss.item() if iteration % args.logevery == 0: logger.info("epoch {:d}, iter {:d}, class_loss: {:.5f}," " reg_loss: {:.5f}, loss: {:.5f}".format( epoch, iteration, avg_class_loss.avg, avg_reg_loss.avg, avg_total_loss.avg)) logger.info(train_metric.print(epoch, iteration)) logger.info("Finish epoch {}, time elapsed {:.3f} s".format( epoch, time.time() - ts)) if epoch % args.eval_every_epoch == 0 and epoch >= args.start_eval: logger.info("Evaluation begins at epoch {}".format(epoch)) evaluate(eval_data, eval_loader, pixor, depth_model, args.batch_size, gpu=use_gpu, logger=logger, args=args, epoch=epoch, processes=processes, grid_3D_extended=grid_3D_extended) if args.run_official_evaluate: last_eval_epoches.append((epoch, 7)) last_eval_epoches.append((epoch, 5)) if len(last_eval_epoches) > 0: for e, iou in last_eval_epoches[:]: predicted_results = osp.join(args.saverootpath, args.run_name, 'predicted_label_{}'.format(e), 'outputs_{:02d}.txt'.format(iou)) if osp.exists(predicted_results): with open(predicted_results, 'r') as f: for line in f.readlines(): if line.startswith('car_detection_ground AP'): results = [ float(num) for num in line.strip('\n').split(' ')[-3:] ] last_eval_epoches.remove((e, iou)) if epoch % args.save_every == 0: saveto = osp.join(savepath, "checkpoint_{}.pth.tar".format(epoch)) torch.save( { 'state_dict': pixor.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'depth_state_dict': depth_model.state_dict(), 'depth_optimizer': depth_optimizer.state_dict(), 'depth_scheduler': depth_scheduler.state_dict(), 'epoch': epoch }, saveto) logger.info("model saved to {}".format(saveto)) symlink_force(saveto, osp.join(savepath, "checkpoint.pth.tar")) for p in processes: if p.wait() != 0: logger.warning("There was an error") if len(last_eval_epoches) > 0: for e, iou in last_eval_epoches[:]: predicted_results = osp.join(args.saverootpath, args.run_name, 'predicted_label_{}'.format(e), 'outputs_{:02d}.txt'.format(iou)) if osp.exists(predicted_results): with open(predicted_results, 'r') as f: for line in f.readlines(): if line.startswith('car_detection_ground AP'): results = [ float(num) for num in line.strip('\n').split(' ')[-3:] ] last_eval_epoches.remove((e, iou))