def load_model(input_args): torch.manual_seed(input_args.seed) model = hsm(input_args.maxdisp, clean=False, level=1) model = nn.DataParallel(model) model.cuda() # load model if input_args.loadmodel is not None: base_weights = input_args.loadmodel if base_weights.startswith('s3://'): filename = os.path.basename(base_weights) model_path = f'{input_args.savemodel}/initial_weights/{filename}' if not os.path.exists(model_path): command = f'aws s3 cp {base_weights} {model_path}' os.system(command) base_weights = model_path pretrained_dict = torch.load(base_weights) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k) } model.load_state_dict(pretrained_dict['state_dict'], strict=False) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.Adam(model.parameters(), lr=0.1, betas=(0.9, 0.999)) torch.manual_seed(input_args.seed) # set again torch.cuda.manual_seed(input_args.seed) return model, optimizer
help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument('--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)') args = parser.parse_args() # dataloader from dataloader import listfiles as DA test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath) # construct model model = hsm(128,args.clean,level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k:v for k,v in pretrained_dict['state_dict'].items() if 'disp' not in k} model.load_state_dict(pretrained_dict['state_dict'],strict=False) else: print('run with random init') print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1,3,24*multip,32*multip)) imgR = np.zeros((1,3,24*multip,32*multip))
parser.add_argument('--database', default='/ssd//', help='data path') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--batchsize', type=int, default=24, help='samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--savemodel', default='./', help='save path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() torch.manual_seed(args.seed) model = hsm(args.maxdisp,clean=False,level=1) model = nn.DataParallel(model) model.cuda() # load model if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k:v for k,v in list(pretrained_dict['state_dict'].items()) if ('disp' not in k) } model.load_state_dict(pretrained_dict['state_dict'],strict=False) print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.Adam(model.parameters(), lr=0.1, betas=(0.9, 0.999)) def _init_fn(worker_id): np.random.seed()
def main(): parser = argparse.ArgumentParser(description='HSM-Net') parser.add_argument('--maxdisp', type=int, default=384, help='maxium disparity') parser.add_argument('--name', default='name') parser.add_argument('--database', default='/data/private', help='data path') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument( '--batch_size', type=int, default=16, # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on) help='samples per batch') parser.add_argument( '--val_batch_size', type=int, default=4, # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on) help='samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo") # parser.add_argument('--savemodel', default=os.path.join(os.getcwd(),'/trained_model'), # help='save path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--val_epoch', type=int, default=4) parser.add_argument('--save_epoch', type=int, default=10) parser.add_argument("--val", action="store_true", default=False) parser.add_argument("--save_numpy", action="store_true", default=False) parser.add_argument("--testres", type=float, default=1.8) parser.add_argument("--threshold", type=float, default=0.7) parser.add_argument("--use_pseudoGT", default=False, action="store_true") parser.add_argument("--lr", default=1e-3, type=float) parser.add_argument("--lr_decay", default=2, type=int) parser.add_argument("--gpu", default=[0], nargs="+") parser.add_argument("--no_aug", default=False, action="store_true") args = parser.parse_args() torch.manual_seed(args.seed) torch.manual_seed(args.seed) # set again torch.cuda.manual_seed(args.seed) batch_size = args.batch_size scale_factor = args.maxdisp / 384. # controls training resolution args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ") gpu = [] for i in args.gpu: gpu.append(int(i)) args.gpu = gpu root_dir = "/data/private/KITTI_raw/2011_09_26/2011_09_26_drive_0013_sync" disp_dir = "final-768px_testres-3.3/disp" entp_dir = "final-768px_testres-3.3/entropy" mode = "image" image_name = "0000000040.npy" #* this is the 4th image in the validation set train_left, train_right, train_disp, train_entp = kitti_raw_loader( root_dir, disp_dir, entp_dir, mode=mode, image_name=image_name) train_left = train_left * args.batch_size * 16 train_right = train_right * args.batch_size * 16 train_disp = train_disp * args.batch_size * 16 train_entp = train_entp * args.batch_size * 16 all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader( '%s/KITTI2015/data_scene_flow/training/' % args.database, val=args.val) left_val = [left_val[3]] right_val = [right_val[3]] disp_val_L = [disp_val_L[3]] loader_kitti15 = DA.myImageFloder(train_left, train_right, train_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0, use_pseudoGT=args.use_pseudoGT, entropy_threshold=args.threshold, left_entropy=train_entp, no_aug=args.no_aug) val_loader_kitti15 = DA.myImageFloder(left_val, right_val, disp_val_L, is_validation=True, testres=args.testres) train_data_inuse = loader_kitti15 val_data_inuse = val_loader_kitti15 # ! For internal bug in Pytorch, if you are going to set num_workers >0 in one dataloader, it must also be set to # ! n >0 for the other data loader as well (ex. 1 for valLoader and 10 for trainLoader) ValImgLoader = torch.utils.data.DataLoader( val_data_inuse, drop_last=False, batch_size=args.val_batch_size, shuffle=False, worker_init_fn=_init_fn, num_workers=args.val_batch_size) # TrainImgLoader = torch.utils.data.DataLoader( train_data_inuse, batch_size=batch_size, shuffle=True, drop_last=True, worker_init_fn=_init_fn, num_workers=args.batch_size) # , , worker_init_fn=_init_fn print('%d batches per epoch' % (len(train_data_inuse) // batch_size)) model = hsm(args.maxdisp, clean=False, level=1) if len(args.gpu) > 1: from sync_batchnorm.sync_batchnorm import convert_model model = nn.DataParallel(model, device_ids=args.gpu) model = convert_model(model) else: model = nn.DataParallel(model, device_ids=args.gpu) model.cuda() # load model if args.loadmodel is not None: print("loading pretrained model: " + str(args.loadmodel)) pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k) } model.load_state_dict(pretrained_dict['state_dict'], strict=False) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log = logger.Logger(args.log_dir, args.name, save_numpy=args.save_numpy) total_iters = 0 val_sample_count = 0 val_batch_count = 0 save_path = os.path.join(args.log_dir, os.path.join(args.name, "saved_model")) os.makedirs(save_path, exist_ok=True) for epoch in range(1, args.epochs + 1): total_train_loss = 0 train_score_accum_dict = { } # accumulates scores throughout a batch to get average score train_score_accum_dict["num_scored"] = 0 adjust_learning_rate(optimizer, args.lr, args.lr_decay, epoch, args.epochs, decay_rate=0.1) print('Epoch %d / %d' % (epoch, args.epochs)) # SAVE if epoch != 1 and epoch % args.save_epoch == 0: print("saving weights at epoch: " + str(epoch)) savefilename = os.path.join(save_path, 'ckpt_' + str(total_iters) + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename) ## val ## if epoch == 1 or epoch % args.val_epoch == 0: print("validating at epoch: " + str(epoch)) val_score_accum_dict = {} val_img_idx = 0 for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader): vis, scores_list, err_map_list = val_step( model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, args.testres) for score, err_map in zip(scores_list, err_map_list): for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()): log.scalar_summary( "val/im_" + str(val_img_idx) + "/" + score_tag, score_val, val_sample_count) log.image_summary("val/" + map_tag, map_val, val_sample_count) if score_tag not in val_score_accum_dict.keys(): val_score_accum_dict[score_tag] = 0 val_score_accum_dict[score_tag] += score_val val_img_idx += 1 val_sample_count += 1 log.image_summary('val/left', imgL_crop[0:1], val_sample_count) log.image_summary('val/right', imgR_crop[0:1], val_sample_count) log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count) # <-- GT disp log.entp_summary('val/entropy', vis['entropy'], val_sample_count) log.disp_summary('val/output3', vis['output3'][0], val_sample_count) for score_tag, score_val in val_score_accum_dict.items(): log.scalar_summary("val/" + score_tag + "_batch_avg", score_val, epoch) ## training ## for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): print("training at epoch: " + str(epoch)) is_scoring = total_iters % 10 == 0 loss, vis, scores_list, maps = train_step(model, optimizer, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, is_scoring=is_scoring) total_train_loss += loss if is_scoring: log.scalar_summary('train/loss_batch', loss, total_iters) for score in scores_list: for tag, val in score.items(): log.scalar_summary("train/" + tag + "_batch", val, total_iters) if tag not in train_score_accum_dict.keys(): train_score_accum_dict[tag] = 0 train_score_accum_dict[tag] += val train_score_accum_dict[ "num_scored"] += imgL_crop.shape[0] for tag, err_map in maps[0].items(): log.image_summary("train/" + tag, err_map, total_iters) if total_iters % 10 == 0: log.image_summary('train/left', imgL_crop[0:1], total_iters) log.image_summary('train/right', imgR_crop[0:1], total_iters) log.disp_summary('train/gt0', disp_crop_L[0:1], total_iters) # <-- GT disp log.entp_summary('train/entropy', vis['entropy'][0:1], total_iters) log.disp_summary('train/output3', vis['output3'][0:1], total_iters) total_iters += 1 log.scalar_summary('train/loss', total_train_loss / len(TrainImgLoader), epoch) for tag, val in train_score_accum_dict.items(): log.scalar_summary("train/" + tag + "_avg", val / train_score_accum_dict["num_scored"], epoch) torch.cuda.empty_cache() # Save final checkpoint print("Finished training!\n Saving the last checkpoint...") savefilename = os.path.join(save_path, 'final' + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename)
wandb_logger = wandb.init(name="submission.py", project="rvc_stereo", save_code=True, magic=True, config=args) # dataloader from dataloader import listfiles as DA # test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath) # print("total test images: " + str(len(test_left_img))) # print("output path: " + args.outdir) # construct model model = hsm(args.max_disp, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()])))
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument( '--datapath', default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014", help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--name', default='rvc_highres_output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=0.5, #default used to be 0.5 help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--debug_image', type=str, default=None) parser.add_argument("--eth_testres", type=float, default=3.5) parser.add_argument("--score_results", action="store_true", default=False) parser.add_argument("--save_weights", action="store_true", default=False) parser.add_argument("--kitti", action="store_true", default=False) parser.add_argument("--eth", action="store_true", default=False) parser.add_argument("--mb", action="store_true", default=False) parser.add_argument("--all_data", action="store_true", default=False) parser.add_argument("--eval_train_only", action="store_true", default=False) parser.add_argument("--debug", action="store_true", default=False) parser.add_argument("--batchsize", type=int, default=16) parser.add_argument("--prepare_kitti", action="store_true", default=False) args = parser.parse_args() # wandb.init(name=args.name, project="high-res-stereo", save_code=True, magic=True, config=args) if not os.path.exists("output"): os.mkdir("output") kitti_merics = {} eth_metrics = {} mb_metrics = {} # construct model model = hsm(128, args.clean, level=args.level) model = convert_model(model) # wandb.watch(model) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.eval() if not args.prepare_kitti: dataset = RVCDataset(args) if args.prepare_kitti: _, _, _, left_val, right_val, disp_val_L = lk15.dataloader( '/data/private/KITTI2015/data_scene_flow/training/', val=True) # change to trainval when finetuning on KITTI dataset = DA.myImageFloder(left_val, right_val, disp_val_L, rand_scale=[1, 1], order=0) dataloader = DataLoader(dataset, batch_size=args.batchsize, shuffle=False, num_workers=0) steps = 0 max_disp = None origianl_image_size = None top_pad = None left_pad = None testres = [args.testres] dataset_type = None data_path = [args.datapath] # for (imgL, imgR, gt_disp_raw, max_disp, origianl_image_size, top_pad, left_pad, testres, dataset_type , data_path) in dataloader: for (imgL, imgR, gt_disp_raw) in dataloader: # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1 data_path = data_path[0] img_name = os.path.basename(os.path.normpath(data_path)) testres = float(testres[0]) gt_disp_raw = gt_disp_raw[0] cum_metrics = None if dataset_type == 0: cum_metrics = mb_metrics elif dataset_type == 1: cum_metrics = eth_metrics elif dataset_type == 2: cum_metrics = kitti_merics print(img_name) if args.max_disp > 0: max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * testres // 64 * 64) if (max_disp * testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() print(" max disparity = " + str(model.module.maxdisp)) # wandb.log({"imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))), # "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape)))}, step=steps) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() # * output dimensions same as input dimensions # * (ex: imgL[1, 3, 704, 2240] then pred_disp[1, 704, 2240]) pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) print(' time = %.2f' % (ttime * 1000)) # * squeeze (remove dimensions with size 1) (ex: pred_disp[1, 704, 2240] ->[704, 2240]) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = int(top_pad[0]) left_pad = int(left_pad[0]) entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = img_name if not os.path.exists('output/%s/%s' % (args.name, idxname)): os.makedirs('output/%s/%s' % (args.name, idxname)) idxname = '%s/disp0%s' % (idxname, args.name) # * shrink image back to the GT size (ex: pred_disp[675, 2236] -> [375, 1242]) # ! we element-wise divide pred_disp by testres becasue the image is shrinking, # ! so the distance between pixels should also shrink by the same factor pred_disp_raw = cv2.resize( pred_disp / testres, (origianl_image_size[1], origianl_image_size[0]), interpolation=cv2.INTER_LINEAR) pred_disp = pred_disp_raw # raw is to use for scoring gt_disp = gt_disp_raw.numpy() # * clip while keep inf # ? `pred_disp != pred_disp` is always true, right?? # ? `pred_disp[pred_invalid] = np.inf` why do this? pred_invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[pred_invalid] = np.inf pred_disp_png = (pred_disp * 256).astype("uint16") gt_invalid = np.logical_or(gt_disp == np.inf, gt_disp != gt_disp) gt_disp[gt_invalid] = 0 gt_disp_png = (gt_disp * 256).astype("uint16") entorpy_png = (entropy * 256).astype('uint16') # ! raw output to png pred_disp_path = 'output/%s/%s/disp.png' % (args.name, idxname.split('/')[0]) gt_disp_path = 'output/%s/%s/gt_disp.png' % (args.name, idxname.split('/')[0]) assert (cv2.imwrite(pred_disp_path, pred_disp_png)) assert (cv2.imwrite(gt_disp_path, gt_disp_png)) assert (cv2.imwrite( 'output/%s/%s/ent.png' % (args.name, idxname.split('/')[0]), entorpy_png)) # ! Experimental color maps gt_disp_color_path = 'output/%s/%s/gt_disp_color.png' % ( args.name, idxname.split('/')[0]) pred_disp_color_path = 'output/%s/%s/disp_color.png' % ( args.name, idxname.split('/')[0]) gt_colormap = convert_to_colormap(gt_disp_png) pred_colormap = convert_to_colormap(pred_disp_png) entropy_colormap = convert_to_colormap(entorpy_png) assert (cv2.imwrite(gt_disp_color_path, gt_colormap)) assert (cv2.imwrite(pred_disp_color_path, pred_colormap)) # ! diff colormaps diff_colormap_path = 'output/%s/%s/diff_color.png' % ( args.name, idxname.split('/')[0]) false_positive_path = 'output/%s/%s/false_positive_color.png' % ( args.name, idxname.split('/')[0]) false_negative_path = 'output/%s/%s/false_negative_color.png' % ( args.name, idxname.split('/')[0]) gt_disp_png[gt_invalid] = pred_disp_png[gt_invalid] gt_disp_png = gt_disp_png.astype("int32") pred_disp_png = pred_disp_png.astype("int32") diff_colormap = convert_to_colormap(np.abs(gt_disp_png - pred_disp_png)) false_positive_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, None, 0))) false_negative_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, 0, None))) assert (cv2.imwrite(diff_colormap_path, diff_colormap)) assert (cv2.imwrite(false_positive_path, false_positive_colormap)) assert (cv2.imwrite(false_negative_path, false_negative_colormap)) out_pfm_path = 'output/%s/%s.pfm' % (args.name, idxname) with open(out_pfm_path, 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( 'output/%s/%s/time_%s.txt' % (args.name, idxname.split('/')[0], args.name), 'w') as f: f.write(str(ttime)) print(" output = " + out_pfm_path) caption = img_name + ", " + str( tuple(pred_disp_png.shape)) + ", max disparity = " + str( int(max_disp[0])) + ", time = " + str(ttime) # read GT depthmap and upload as jpg # wandb.log({"disparity": wandb.Image(pred_colormap, caption=caption) , "gt": wandb.Image(gt_colormap), "entropy": wandb.Image(entropy_colormap, caption= str(entorpy_png.shape)), # "diff":wandb.Image(diff_colormap), "false_positive":wandb.Image(false_positive_colormap), "false_negative":wandb.Image(false_negative_colormap)}, step=steps) torch.cuda.empty_cache() steps += 1 # Todo: find out what mask0nocc does. It's probably not the same as KITTI's object map if dataset_type == 2: obj_map_path = os.path.join(data_path, "obj_map.png") else: obj_map_path = None if args.score_results: if pred_disp_raw.shape != gt_disp_raw.shape: # pred_disp_raw[375 x 1242] gt_disp_raw[675 x 2236] ratio = float(gt_disp_raw.shape[1]) / pred_disp_raw.shape[1] disp_resized = cv2.resize( pred_disp_raw, (gt_disp_raw.shape[1], gt_disp_raw.shape[0])) * ratio pred_disp_raw = disp_resized # [675 x 2236] # if args.debug: # out_resized_pfm_path = 'output/%s/%s/pred_scored.pfm' % (args.name, img_name) # with open(out_resized_pfm_path, 'w') as f: # save_pfm(f, pred_disp_raw) # out_resized_gt_path = 'output/%s/%s/gt_scored.pfm' % (args.name, img_name) # with open(out_resized_gt_path, 'w') as f: # save_pfm(f, gt_disp_raw.numpy()) metrics = score_rvc.get_metrics( pred_disp_raw, gt_disp_raw, int(max_disp[0]), dataset_type, ('output/%s/%s' % (args.name, idxname.split('/')[0])), disp_path=pred_disp_path, gt_path=gt_disp_path, obj_map_path=obj_map_path, debug=args.debug) avg_metrics = {} for (key, val) in metrics.items(): if cum_metrics.get(key) == None: cum_metrics[key] = [] cum_metrics[key].append(val) avg_metrics["avg_" + key] = sum(cum_metrics[key]) / len( cum_metrics[key]) # wandb.log(metrics, step=steps) # wandb.log(avg_metrics, step=steps) # if args.save_weights and os.path.exists(args.loadmodel): # wandb.save(args.loadmodel) if args.prepare_kitti and (args.all_data or args.kitti): in_path = 'output/%s' % (args.name) out_path = "/home/isaac/high-res-stereo/kitti_submission_output" out_path = prepare_kitti(in_path, out_path) subprocess.run( ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path]) print("KITTI submission evaluation saved to: " + out_path)
def main(): parser = argparse.ArgumentParser(description='HSM-Net') parser.add_argument('--maxdisp', type=int, default=384, help='maxium disparity') parser.add_argument('--name', default='name') parser.add_argument('--database', default='/data/private', help='data path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--val_batch_size', type=int, default=1, help='samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo") parser.add_argument("--testres", default=[0], nargs="+") parser.add_argument("--no_aug",default=False, action="store_true") args = parser.parse_args() torch.manual_seed(args.seed) torch.manual_seed(args.seed) # set again torch.cuda.manual_seed(args.seed) args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ") testres = [] for i in args.testres: testres.append(float(i)) args.testres=testres all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader( '%s/KITTI2015/data_scene_flow/training/' % args.database, val=True) left_val = [left_val[3]] right_val = [right_val[3]] disp_val_L = [disp_val_L[3]] # all_l = all_left_disp + left_val # all_r = all_right_img + right_val # all_d = all_left_disp + disp_val_L # correct_shape = (1242, 375) # for i in range(len(all_l)): # l = np.array(Image.open(all_l[i]).convert("RGB")) # r = np.array(Image.open(all_r[i]).convert("RGB")) # d = Image.open(all_d[i]) # if l.shape != (375, 1242, 3): # # l2 = cv2.resize(l, correct_shape, interpolation=cv2.INTER_CUBIC) # r2 = cv2.resize(r, correct_shape, interpolation=cv2.INTER_CUBIC) # d2 = np.array(torchvision.transforms.functional.resize(d, [375, 1242])) # # d = np.stack([d, d, d], axis=-1) # # d2 = cv2.resize(d.astype("uint16"), correct_shape) # # cv2.imwrite(all_l[i], cv2.cvtColor(l2, cv2.COLOR_RGB2BGR)) # cv2.imwrite(all_r[i], cv2.cvtColor(r2, cv2.COLOR_RGB2BGR)) # cv2.imwrite(all_d[i], d2) # cv2.resize(l,()) model = hsm(args.maxdisp, clean=False, level=1) model.cuda() # load model print("loading pretrained model: " + str(args.loadmodel)) pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k)} model = nn.DataParallel(model, device_ids=[0]) model.load_state_dict(pretrained_dict['state_dict'], strict=False) name = "val_at_many_res" + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") log = logger.Logger(args.log_dir, name) val_sample_count = 0 for res in args.testres: val_loader_kitti15 = DA.myImageFloder(left_val, right_val, disp_val_L, is_validation=True, testres=res) ValImgLoader = torch.utils.data.DataLoader(val_loader_kitti15, drop_last=False, batch_size=args.val_batch_size, shuffle=False, worker_init_fn=_init_fn, num_workers=0) print("================ res: " + str(res) + " ============================") ## val ## val_score_accum_dict = {} val_img_idx = 0 for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader): vis, scores_list, err_map_list = val_step(model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, res) for score, err_map in zip(scores_list, err_map_list): for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()): log.scalar_summary("val/im_" + str(val_img_idx) + "/" + str(res) + "/"+ score_tag, score_val, val_sample_count) log.image_summary("val/" + str(res) + "/"+ map_tag, map_val, val_sample_count) if score_tag not in val_score_accum_dict.keys(): val_score_accum_dict[score_tag] = 0 val_score_accum_dict[score_tag]+=score_val print("res: " + str(res) + " " + score_tag + ": " + str(score_val)) val_img_idx+=1 val_sample_count += 1 log.image_summary('val/left', imgL_crop[0:1], val_sample_count) # log.image_summary('val/right', imgR_crop[0:1], val_sample_count) log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count) # <-- GT disp log.entp_summary('val/entropy', vis['entropy'], val_sample_count) log.disp_summary('val/output3', vis['output3'][0], val_sample_count)
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument("--name", required=True) parser.add_argument('--datapath', default='./data-mbtest/', help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument('--testres', type=float, default=1.8, # Too low for images. Sometimes turn it to 2 ~ 3 # for ETH3D we need to use different resolution # 1 - nothibg, 0,5 halves the image, 2 doubles the size of the iamge. We need to # middleburry 1 (3000, 3000) # ETH (3~4) since (1000, 1000) help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=int, default=2056, help='maximum disparity to search for') parser.add_argument('--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)') args = parser.parse_args() args.max_disp = int(args.max_disp) # max_disp = 2056 * testres args.max_disp = 16 * math.floor(args.max_disp/16) args.name = args.name + "_"+ datetime.now().strftime("%Y-%m-%d_%H:%M:%S") # dataloader from dataloader import listfiles as DA # test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath) # print("total test images: " + str(len(test_left_img))) # print("output path: " + args.outdir) # construct model model = hsm(args.max_disp, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k} model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1, 3, 24 * multip, 32 * multip)) imgR = np.zeros((1, 3, 24 * multip, 32 * multip)) imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): model.eval() pred_disp, entropy = model(imgL, imgR) left_img_dir = os.path.join(args.datapath, "stereo_front_left") right_img_dir = os.path.join(args.datapath, "stereo_front_right") left_img_path_list = os.listdir(left_img_dir) left_img_path_list.sort() right_img_path_list = os.listdir(right_img_dir) right_img_path_list.sort() processed = get_transform() model.eval() # save predictions out_path = os.path.join(args.datapath, args.name) if not os.path.exists(out_path): os.mkdir(out_path) disp_path = os.path.join(out_path, "disp") entp_path = os.path.join(out_path, "entropy") if not os.path.exists(disp_path): os.mkdir(disp_path) if not os.path.exists(entp_path): os.mkdir(entp_path) for (left_img_name, right_img_name) in zip(left_img_path_list, right_img_path_list): left_img_path = os.path.join(left_img_dir, left_img_name) right_img_path = os.path.join(right_img_dir, right_img_name) print(left_img_path) imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3] imgR_o = (skimage.io.imread(right_img_path).astype('float32'))[:, :, :3] imgsize = imgL_o.shape[:2] max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # test imgL = torch.FloatTensor(imgL) imgR = torch.FloatTensor(imgR) imgL = imgL.cuda() imgR = imgR.cuda() with torch.no_grad(): torch.cuda.synchronize() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf out_file_name = left_img_path[len(left_img_path) - len("315970554564438888.jpg"): len(left_img_path) - len("jpg")] out_file_name = os.path.join(out_file_name + "png", ) pred_disp_png = (pred_disp * 256).astype('uint16') cv2.imwrite(os.path.join(disp_path, out_file_name), pred_disp_png) entropy_png = (entropy* 256).astype('uint16') cv2.imwrite(os.path.join(entp_path, out_file_name), entropy_png) torch.cuda.empty_cache()
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument('--name', required=True, type=str) parser.add_argument('--datapath', default='/data/privateKITTI_raw/', help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=1.8, # Too low for images. Sometimes turn it to 2 ~ 3 # for ETH3D we need to use different resolution # 1 - nothibg, 0,5 halves the image, 2 doubles the size of the iamge. We need to # middleburry 1 (3000, 3000) # ETH (3~4) since (1000, 1000) help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=384, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--save_err', action="store_true") args = parser.parse_args() # args.name = args.name + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") name = "eval" + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") logger = Logger("/data/private/logs/high-res-stereo", name) print("Saving log at: " + name) # construct model model = hsm(args.max_disp, args.clean, level=args.level) model = nn.DataParallel(model) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1, 3, 24 * multip, 32 * multip)) imgR = np.zeros((1, 3, 24 * multip, 32 * multip)) imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): model.eval() pred_disp, entropy = model(imgL, imgR) processed = get_transform() model.eval() with open("KITTI2015_val.txt") as file: lines = file.readlines() left_img_paths = [x.strip() for x in lines] right_img_paths = [] for p in left_img_paths: right_img_paths.append(p.replace("image_2", "image_3")) left_img_paths = [left_img_paths[3]] right_img_paths = [right_img_paths[3]] for i, (left_img_path, right_img_path) in enumerate(zip(left_img_paths, right_img_paths)): print(left_img_path) imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3] imgR_o = ( skimage.io.imread(right_img_path).astype('float32'))[:, :, :3] imgsize = imgL_o.shape[:2] max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) # torch.save(imgL_o, "/home/isaac/high-res-stereo/debug/my_submission/img1.pt") imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img2.pt") imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img3.pt") ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # test imgL = torch.FloatTensor(imgL) imgR = torch.FloatTensor(imgR) imgL = imgL.cuda() imgR = imgR.cuda() with torch.no_grad(): torch.cuda.synchronize() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf out_base_path = left_img_path.split("/")[:-3] out_base_path = "/" + os.path.join(*out_base_path) out_base_path = os.path.join(out_base_path, args.name) # out_base_path = "/data/private/Middlebury/kitti_testres" + str(args.testres) + "_maxdisp" + str(int(args.max_disp)) img_name = left_img_path.split("/")[-1][:-3] + "png" disp_path = os.path.join(out_base_path, "disp") os.makedirs(disp_path, exist_ok=True) pred_disp_png = (pred_disp * 256).astype("uint16") cv2.imwrite(os.path.join(disp_path, img_name), pred_disp_png) logger.disp_summary("disp" + "/" + img_name[:-4], pred_disp, i) # disp_map(pred_disp, os.path.join(disp_path, img_name)) # logger.image_summary("poster", pred_disp, i) # i+=1 np.save(os.path.join(disp_path, img_name[:-len(".png")]), pred_disp) entp_path = os.path.join(out_base_path, "entropy") os.makedirs(entp_path, exist_ok=True) # saving entropy as png entropy_png = ((entropy / entropy.max()) * 256) cv2.imwrite(os.path.join(entp_path, img_name), entropy_png) logger.disp_summary("entropy" + "/" + img_name[:-4], entropy, i) # save_disp_as_colormap(entropy, os.path.join(entp_path, img_name)) np.save(os.path.join(entp_path, img_name[:-len(".png")]), entropy) torch.cuda.empty_cache()
def main(): parser = argparse.ArgumentParser(description='HSM-Net') parser.add_argument('--maxdisp', type=int, default=384, help='maxium disparity') parser.add_argument('--name', default='name') parser.add_argument('--database', default='/data/private', help='data path') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument( '--batch_size', type=int, default=18, # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on) help='samples per batch') parser.add_argument('--val_batch_size', type=int, default=2, help='validation samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo") # parser.add_argument('--savemodel', default=os.path.join(os.getcwd(),'/trained_model'), # help='save path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--val_epoch', type=int, default=2) parser.add_argument('--save_epoch', type=int, default=1) parser.add_argument("--val", action="store_true", default=False) parser.add_argument("--save_numpy", action="store_true", default=False) parser.add_argument("--testres", type=float, default=1.8) parser.add_argument("--threshold", type=float, default=0.7) parser.add_argument("--use_pseudoGT", default=False, action="store_true") parser.add_argument("--lr", default=1e-3, type=float) parser.add_argument("--lr_decay", default=2, type=int) parser.add_argument("--gpu", default=[0], nargs="+") args = parser.parse_args() torch.manual_seed(args.seed) torch.manual_seed(args.seed) # set again torch.cuda.manual_seed(args.seed) scale_factor = args.maxdisp / 384. # controls training resolution args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ") gpu = [] for i in args.gpu: gpu.append(int(i)) args.gpu = gpu all_left_img = [ "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im0.png" ] * args.batch_size * 16 all_right_img = [ "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im1.png" ] * args.batch_size * 16 all_left_disp = [ "/data/private/Middlebury/kitti_testres1.15_maxdisp384/disp/Cable-perfect.npy" ] * args.batch_size * 16 all_left_entp = [ "/data/private/Middlebury/kitti_testres1.15_maxdisp384/entropy/Cable-perfect.npy" ] * args.batch_size * 16 loader_mb = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.225, 0.6 * scale_factor], order=0, use_pseudoGT=args.use_pseudoGT, entropy_threshold=args.threshold, left_entropy=all_left_entp) val_left_img = [ "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im0.png" ] val_right_img = [ "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im1.png" ] val_disp = [ "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/disp0GT.pfm" ] val_loader_mb = DA.myImageFloder(val_left_img, val_right_img, val_disp, is_validation=True, testres=args.testres) TrainImgLoader = torch.utils.data.DataLoader( loader_mb, batch_size=args.batch_size, shuffle=True, drop_last=True, worker_init_fn=_init_fn, num_workers=args.batch_size) # , , worker_init_fn=_init_fn ValImgLoader = torch.utils.data.DataLoader(val_loader_mb, batch_size=1, shuffle=False, drop_last=False, worker_init_fn=_init_fn, num_workers=1) print('%d batches per epoch' % (len(loader_mb) // args.batch_size)) model = hsm(args.maxdisp, clean=False, level=1) gpus = [0, 1] if len(gpus) > 1: from sync_batchnorm.sync_batchnorm import convert_model model = nn.DataParallel(model, device_ids=gpus) model = convert_model(model) else: model = nn.DataParallel(model, device_ids=gpus) model.cuda() # load model if args.loadmodel is not None: print("loading pretrained model: " + str(args.loadmodel)) pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k) } model.load_state_dict(pretrained_dict['state_dict'], strict=False) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log = logger.Logger(args.log_dir, args.name, save_numpy=args.save_numpy) total_iters = 0 val_sample_count = 0 val_batch_count = 0 save_path = os.path.join(args.log_dir, os.path.join(args.name, "saved_model")) os.makedirs(save_path, exist_ok=True) for epoch in range(1, args.epochs + 1): total_train_loss = 0 train_score_accum_dict = { } # accumulates scores throughout a batch to get average score train_score_accum_dict["num_scored"] = 0 adjust_learning_rate(optimizer, args.lr, args.lr_decay, epoch, args.epochs, decay_rate=0.1) print('Epoch %d / %d' % (epoch, args.epochs)) # SAVE if epoch != 1 and epoch % args.save_epoch == 0: print("saving weights at epoch: " + str(epoch)) savefilename = os.path.join(save_path, 'ckpt_' + str(total_iters) + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename) ## val ## if epoch % args.val_epoch == 0: print("validating at epoch: " + str(epoch)) val_score_accum_dict = { } # accumulates scores throughout a batch to get average score for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader): vis, scores_list, err_map_list = val_step( model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, args.testres) for score, err_map in zip(scores_list, err_map_list): for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()): log.scalar_summary("val/" + score_tag, score_val, val_sample_count) log.image_summary("val/" + map_tag, map_val, val_sample_count) if score_tag not in val_score_accum_dict.keys(): val_score_accum_dict[score_tag] = 0 val_score_accum_dict[score_tag] += score_val val_sample_count += 1 log.image_summary('val/left', imgL_crop[0:1], val_sample_count) log.image_summary('val/right', imgR_crop[0:1], val_sample_count) log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count) # <-- GT disp log.entp_summary('val/entropy', vis['entropy'], val_sample_count) log.disp_summary('val/output3', vis['output3'][0], val_sample_count) for score_tag, score_val in val_score_accum_dict.items(): log.scalar_summary("val/" + score_tag + "_batch_avg", score_val, val_batch_count) val_batch_count += 1 ## training ## for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): print("training at epoch: " + str(epoch)) is_scoring = total_iters % 10 == 0 loss, vis, scores_list, maps = train_step(model, optimizer, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, is_scoring=is_scoring) total_train_loss += loss if is_scoring: log.scalar_summary('train/loss_batch', loss, total_iters) for score in scores_list: for tag, val in score.items(): log.scalar_summary("train/" + tag + "_batch", val, total_iters) if tag not in train_score_accum_dict.keys(): train_score_accum_dict[tag] = 0 train_score_accum_dict[tag] += val train_score_accum_dict[ "num_scored"] += imgL_crop.shape[0] for tag, err_map in maps[0].items(): log.image_summary("train/" + tag, err_map, total_iters) if total_iters % 10 == 0: log.image_summary('train/left', imgL_crop[0:1], total_iters) log.image_summary('train/right', imgR_crop[0:1], total_iters) log.disp_summary('train/gt0', disp_crop_L[0:1], total_iters) # <-- GT disp log.entp_summary('train/entropy', vis['entropy'][0:1], total_iters) log.disp_summary('train/output3', vis['output3'][0:1], total_iters) total_iters += 1 log.scalar_summary('train/loss', total_train_loss / len(TrainImgLoader), epoch) for tag, val in train_score_accum_dict.items(): log.scalar_summary("train/" + tag + "_avg", val / train_score_accum_dict["num_scored"], epoch) torch.cuda.empty_cache() # Save final checkpoint print("Finished training!\n Saving the last checkpoint...") savefilename = os.path.join(save_path, 'final' + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename)
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument('--datapath', default='./data-mbtest/', help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--outdir', default='output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument('--testres', type=float, default=0.5, help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--dtype', type=int) args = parser.parse_args() # construct model model = hsm(128, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1, 3, 24 * multip, 32 * multip)) imgR = np.zeros((1, 3, 24 * multip, 32 * multip)) imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): model.eval() pred_disp, entropy = model(imgL, imgR) # Get arguments. method_name = sys.argv[1] if args.dtype == 0: # KITTI args.testres = 1.8 elif args.dtype == 1: # Middlebury args.testres = 1 elif args.dtype == 2: # ETH args.testres = 3.5 # Gengsahn said it's between 3~4. Find with linear grid search processed = get_transform() model.eval() datasets_dir_path = "datasets_middlebury2014" folders = [os.path.join(datasets_dir_path, 'training')] if not args.training_only: folders.append(os.path.join(datasets_dir_path, 'test')) for folder in folders: datasets = [ dataset for dataset in os.listdir(folder) if os.path.isdir(os.path.join(folder, dataset)) ] for dataset_name in datasets: im0_path = os.path.join(folder, dataset_name, 'im0.png') im1_path = os.path.join(folder, dataset_name, 'im1.png') calib = ReadMiddlebury2014CalibFile( os.path.join(folder, dataset_name, 'calib.txt')) output_dir_path = os.path.join(folder, dataset_name) imgL_o = (skimage.io.imread(im0_path).astype('float32'))[:, :, :3] imgR_o = (skimage.io.imread(im1_path).astype('float32'))[:, :, :3] imgsize = imgL_o.shape[:2] if args.max_disp > 0: max_disp = int(args.max_disp) else: path_to_replace = os.path.basename(os.path.normpath(im0_path)) with open(im0_path.replace(path_to_replace, 'calib.txt')) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression( model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression( model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression( model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression( model.module.maxdisp, 64).cuda() print("max disparity = " + str(model.module.maxdisp)) # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # test imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) print('time = %.2f' % (ttime * 1000)) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = im0_path.split('/')[-2] if not os.path.exists('%s/%s' % (args.outdir, idxname)): os.makedirs('%s/%s' % (args.outdir, idxname)) idxname = '%s/disp0%s' % (idxname, method_name) # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf np.save('%s/%s-disp.npy' % (args.outdir, idxname.split('/')[0]), (pred_disp)) np.save('%s/%s-ent.npy' % (args.outdir, idxname.split('/')[0]), (entropy)) cv2.imwrite( '%s/%s-disp.png' % (args.outdir, idxname.split('/')[0]), pred_disp / pred_disp[~invalid].max() * 255) cv2.imwrite('%s/%s-ent.png' % (args.outdir, idxname.split('/')[0]), entropy / entropy.max() * 255) with open('%s/%s.pfm' % (args.outdir, idxname), 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( '%s/%s/time%s.txt' % (args.outdir, idxname.split('/')[0], method_name), 'w') as f: f.write(str(ttime)) torch.cuda.empty_cache()
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument( '--datapath', default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014", help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--name', default='rvc_highres_output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=-1, #default used to be 0.5 help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--debug_image', type=str, default=None) parser.add_argument("--eth_testres", type=int, default=3.5) args = parser.parse_args() wandb.init(name=args.name, project="rvc_stereo", save_code=True, magic=True, config=args) use_adaptive_testres = False if args.testres == -1: use_adaptive_testres = True # construct model model = hsm(128, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.eval() if args.testres > 0: dataset = RVCDataset(args.datapath, testres=args.testres) else: dataset = RVCDataset(args.datapath, eth_testres=args.eth_testres) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) steps = 0 for (imgL, imgR, max_disp, origianl_image_size, dataset_type, img_name) in dataloader: # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1 img_name = img_name[0] if args.debug_image != None and not args.debug_image in img_name: continue print(img_name) if use_adaptive_testres: if dataset_type == 0: # Middlebury args.testres = 1 elif dataset_type == 2: args.testres = 1.8 elif dataset_type == 1: # Gengsahn said it's between 3~4. Find with linear grid search args.testres = 3.5 else: raise ValueError( "name of the folder does not contain any of: kitti, middlebury, eth3d" ) if args.max_disp > 0: max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() print(" max disparity = " + str(model.module.maxdisp)) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 wandb.log( { "imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))), "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape))) }, step=steps) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) torch.save(pred_disp, "/home/isaac/high-res-stereo/debug/rvc/out.pt") print(' time = %.2f' % (ttime * 1000)) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - origianl_image_size[0][0] left_pad = max_w - origianl_image_size[1][0] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = img_name if not os.path.exists('%s/%s' % (args.name, idxname)): os.makedirs('%s/%s' % (args.name, idxname)) idxname = '%s/disp0%s' % (idxname, args.name) # resize to highres pred_disp = cv2.resize( pred_disp / args.testres, (origianl_image_size[1], origianl_image_size[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf pred_disp_png = pred_disp / pred_disp[~invalid].max() * 255 cv2.imwrite('%s/%s/disp.png' % (args.name, idxname.split('/')[0]), pred_disp_png) entorpy_png = entropy / entropy.max() * 255 cv2.imwrite('%s/%s/ent.png' % (args.name, idxname.split('/')[0]), entropy / entropy.max() * 255) out_pfm_path = '%s/%s.pfm' % (args.name, idxname) with open(out_pfm_path, 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( '%s/%s/time%s.txt' % (args.name, idxname.split('/')[0], args.name), 'w') as f: f.write(str(ttime)) print(" output = " + out_pfm_path) caption = img_name + ", " + str(tuple( pred_disp_png.shape)) + ", max disparity = " + str( max_disp) + ", time = " + str(ttime) wandb.log( { "disparity": wandb.Image(pred_disp_png, caption=caption), "entropy": wandb.Image(entorpy_png, caption=str(entorpy_png.shape)) }, step=steps) torch.cuda.empty_cache() steps += 1