def main(): args = parser.parse_args() if not (args.output_disp or args.output_depth): print('You must at least output one value !') return disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() dataset_dir = Path(args.dataset_dir) output_dir = Path(args.output_dir) output_dir.makedirs_p() if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = [dataset_dir / file for file in f.read().splitlines()] else: test_files = sum([ list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts ], []) print('{} files to test'.format(len(test_files))) for file in tqdm(test_files): img = imread(file) h, w, _ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = np.array( Image.fromarray(img).resize((args.img_width, args.img_height))) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0) tensor_img = ((tensor_img / 255 - 0.5) / 0.5).to(device) output = disp_net(tensor_img)[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() print(file_path) print(file_path.splitall()) file_name = '-'.join(file_path.splitall()[1:]) print(file_name) if args.output_disp: disp = (255 * tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8) imsave(output_dir / '{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1, 2, 0))) if args.output_depth: depth = 1 / output depth = (255 * tensor2array( depth, max_value=None, colormap='magma')).astype(np.uint8) imsave(output_dir / '{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1, 2, 0)))
def main(): args = parser.parse_args() if not (args.output_disp or args.output_depth): print('You must at least output one value !') return disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() dataset_dir = Path(args.dataset_dir) output_dir = Path(args.output_dir) output_dir.makedirs_p() if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = [dataset_dir / file for file in f.read().splitlines()] else: test_files = sum( [dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], []) print('{} files to test'.format(len(test_files))) for file in tqdm(test_files): img = imread(file).astype(np.float32) h, w, _ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = imresize(img, (args.img_height, args.img_width)).astype( np.float32) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img).unsqueeze(0) tensor_img = ((tensor_img / 255 - 0.5) / 0.2).to(device) output = disp_net(tensor_img)[0] if args.output_disp: disp = (255 * tensor2array( output, max_value=None, colormap='bone', channel_first=False)).astype(np.uint8) imsave(output_dir / '{}_disp{}'.format(file.namebase, file.ext), disp) if args.output_depth: depth = 1 / output depth = (255 * tensor2array( depth, max_value=10, colormap='rainbow', channel_first=False)).astype(np.uint8) imsave(output_dir / '{}_depth{}'.format(file.namebase, file.ext), depth)
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework elif args.gt_type == 'stillbox': from stillbox_eval.depth_evaluation_utils import test_framework_stillbox as test_framework disp_net = DispNetS().to(device) weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() if args.pretrained_posenet is None: print('no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 0 else: weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [file.relpathto(dataset_dir) for file in sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth) print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 7, len(test_files)), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h,w,_ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize(tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2,0,1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img/255 - 0.5)/0.5).to(device) for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) ref_imgs[i] = img pred_disp = disp_net(tgt_img).cpu().numpy()[0,0] if args.output_dir is not None: if j == 0: predictions = np.zeros((len(test_files), *pred_disp.shape)) predictions[j] = 1/pred_disp gt_depth = sample['gt_depth'] pred_depth = 1/pred_disp pred_depth_zoomed = zoom(pred_depth, (gt_depth.shape[0]/pred_depth.shape[0], gt_depth.shape[1]/pred_depth.shape[1]) ).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 0: # Reorganize ref_imgs : tgt is middle frame but not necessarily the one used in DispNetS # (in case sample to test was in end or beginning of the image sequence) middle_index = seq_length//2 tgt = ref_imgs[middle_index] reorganized_refs = ref_imgs[:middle_index] + ref_imgs[middle_index + 1:] _, poses = pose_net(tgt, reorganized_refs) mean_displacement_magnitude = poses[0,:,:3].norm(2,1).mean().item() scale_factor = sample['displacement'] / mean_displacement_magnitude errors[0,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor) # scale_factor = np.median(gt_depth)/np.median(pred_depth_zoomed) scale_factor=1 errors[1,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor) mean_errors = errors.mean(2) error_names = ['abs_rel','sq_rel','rms','log_rms','a1','a2','a3'] if args.pretrained_posenet: print("Results with scale factor determined by PoseNet : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[0])) print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1])) if args.output_dir is not None: np.save(output_dir/'predictions.npy', predictions)
def main(): global tgt_img, disp_net args = parser.parse_args() '''加载训练后的模型''' weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) # 网络模型的MD5 ID net_ID = MD5_ID(args.pretrained_posenet) # L和C的转换矩阵,对齐输入位姿到雷达坐标系 Transform_matrix_L2C = np.identity(4) '''Kitti switch''' if args.isKitti: if not args.isDynamic: from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework else: from kitti_eval.pose_evaluation_utils_forDynamicTest import test_framework_KITTI as test_framework save_dir = os.path.join(args.output_dir, "kitti", args.sequences[0], 'net_' + net_ID) if args.trainedOnMydataset: downsample_img_height = args.img_height downsample_img_width = args.img_width else: # on kitti train set downsample_img_height = 128 downsample_img_width = 416 Transform_matrix_L2C[:3, :3] = np.array( [[7.533745e-03, -9.999714e-01, -6.166020e-04], [1.480249e-02, 7.280733e-04, -9.998902e-01], [9.998621e-01, 7.523790e-03, 1.480755e-02]]) Transform_matrix_L2C[:3, -1:] = np.array( [-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1) else: from mydataset_eval.pose_evaluation_utils import test_framework_MYDATASET as test_framework save_dir = os.path.join(args.output_dir, "mydataset", args.sequences[0], 'net_' + net_ID) if args.trainedOnMydataset: downsample_img_height = args.img_height downsample_img_width = args.img_width else: # on kitti train set downsample_img_height = 128 downsample_img_width = 416 Transform_matrix_L2C[:3, :3] = np.array( [[-1.51482698e-02, -9.99886648e-01, 5.36310553e-03], [-4.65337018e-03, -5.36307196e-03, -9.99969412e-01], [9.99870070e-01, -1.56647995e-02, -4.48880010e-03]]) Transform_matrix_L2C[:3, -1:] = np.array( [4.29029924e-03, -6.08539196e-02, -9.20346161e-02]).reshape(3, 1) Transform_matrix_L2C = GramSchmidtHelper(Transform_matrix_L2C) Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C) # *************************可删除********************************* # 为了进行动态场景下的Mask评估,这里需要引入disp net if args.isDynamic: from models import DispNetS disp_net = DispNetS().to(device) weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() # normalize = custom_transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) # valid_transform = custom_transforms.Compose([custom_transforms.ArrayToTensor(), normalize]) # from datasets.sequence_folders import SequenceFolder # val_set = SequenceFolder( # '/home/sda/mydataset/preprocessing/formatted/data/', # transform=valid_transform, # seed=0, # train=False, # sequence_length=3, # ) # val_loader = torch.utils.data.DataLoader( # val_set, batch_size=1, shuffle=False, # num_workers=4, pin_memory=True) # # intrinsics = None # for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(val_loader): # intrinsics = intrinsics.to(device) # break # ************************************************************************* '''载入测试数据集''' dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) '''输出到文件夹中的数据''' num_poses = len(framework) - (seq_length - 2) predictions_array = np.zeros((len(framework), seq_length, 3, 4)) processing_time = np.zeros((num_poses - 1, 1)) # 输出文件夹 save_dir = Path(save_dir) print('Output files wiil be saved in: ' + save_dir) if not os.path.exists(save_dir): save_dir.makedirs_p() # Pose Graph Manager (for back-end optimization) initialization PGM = PoseGraphManager() PGM.addPriorFactor() # Result saver num_frames = len(framework) ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3, save_gap=args.save_gap, num_frames=num_frames, seq_idx=args.sequences[0], save_dir=save_dir) # for save the results as a video fig_idx = 1 fig = plt.figure(fig_idx) writer = FFMpegWriter(fps=15) video_path = save_dir + '/' + args.sequences[0] + ".mp4" num_frames_to_skip_to_show = 5 num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show) with writer.saving( fig, video_path, num_frames_to_save): # this video saving part is optional for j, sample in enumerate(tqdm(framework)): ''' VO部分 ''' imgs = sample['imgs'] w, h = imgs[0].size if (not args.no_resize) and (h != downsample_img_height or w != downsample_img_width): imgs = [ imresize(img, (downsample_img_height, downsample_img_width)).astype(np.float32) for img in imgs ] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) startTimeVO = time.time() _, poses = pose_net(tgt_img, ref_imgs) processing_time[j] = (time.time() - startTimeVO) / (seq_length - 1) # **************************可删除******************************** if args.isDynamic: '''测试Photo mask的效果''' if args.isKitti: intrinsics = [[ 2.416744631239935472e+02, 0.000000000000000000e+00, 2.041680103059581199e+02 ], [ 0.000000000000000000e+00, 2.462848682666666491e+02, 5.900083200000000261e+01 ], [ 0.000000000000000000e+00, 0.000000000000000000e+00, 1.000000000000000000e+00 ]] else: intrinsics = [[279.1911, 0.0000, 210.8265], [0.0000, 279.3980, 172.3114], [0.0000, 0.0000, 1.0000]] PhotoMask_Output(_, disp_net, intrinsics, j, poses, ref_imgs, save_dir) # *************************************************************** final_poses = pose2tf_mat(args.rotation_mode, imgs, poses) predictions_array[j] = final_poses # rel_VO_pose取final poses的第2项,整体则是取T10,T21,T32。。。 rel_VO_pose = np.identity(4) rel_VO_pose[:3, :] = final_poses[1] # 引入尺度因子对单目VO输出的位姿进行修正,并进行坐标系对齐到雷达坐标系 scale_factor = 7 rel_VO_pose[:3, -1:] = rel_VO_pose[:3, -1:] * scale_factor rel_VO_pose = Transform_matrix_C2L @ rel_VO_pose @ np.linalg.inv( Transform_matrix_C2L) rel_VO_pose = GramSchmidtHelper(rel_VO_pose) ResultSaver.saveRelativePose(rel_VO_pose) PGM.curr_node_idx = j + 1 PGM.curr_se3 = np.matmul(PGM.curr_se3, rel_VO_pose) PGM.addOdometryFactor(rel_VO_pose) PGM.prev_node_idx = PGM.curr_node_idx ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx) # if (j % num_frames_to_skip_to_show == 0): # ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx) # writer.grab_frame() if args.isKitti: ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE '''save output files''' if save_dir is not None: # np.save(save_dir / 'predictions.npy', predictions_array) ResultSaver.saveFinalPoseGraphResult(filename='abs_VO_poses.txt') ResultSaver.saveRelativePosesResult(filename='rel_VO_poses.txt') np.savetxt(save_dir / 'processing_time.txt', processing_time) if args.isKitti: np.savetxt(save_dir / 'errors.txt', errors) mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors))
def main(): args = parser.parse_args() if not(args.output_disp or args.output_depth): # print("args.output_disp:\n", args.output_disp) # print("args.output_depth:\n", args.output_depth) print('You must at least output one value !') return disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() dataset_dir = Path(args.dataset_dir) output_dir = Path(args.output_dir) output_dir.makedirs_p() print("dataset_list:\n", args.dataset_list) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = [dataset_dir/file for file in f.read().splitlines()] else: print("Else!") test_files = sum([list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts], []) print(dataset_dir) print("dataset_list:\n", args.dataset_list) print("test_files:\n", test_files) print('{} files to test'.format(len(test_files))) for file in tqdm(test_files): # print("file:\n", file) img = imread(file) h,w,_ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = np.array(Image.fromarray(img).imresize((args.img_height, args.img_width))) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0) tensor_img = ((tensor_img/255 - 0.5)/0.5).to(device) output = disp_net(tensor_img)[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() print(file_path) print(file_path.splitall()) file_name = '-'.join(file_path.splitall()[1:]) print(file_name) if args.output_disp: disp = (255*tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8) # imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0))) if args.output_depth: depth = 1/output # depth = (255*tensor2array(depth, max_value=10, colormap='rainbow')).astype(np.uint8) # depth = (2550*tensor2array(depth, max_value=10, colormap='bone')).astype(np.uint8) # print(depth.shape) # imsave(output_dir/'{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1,2,0))) depth = depth.to(device) errors = np.zeros((2, 9, len(test_files)), np.float32) mean_errors = errors.mean(2) gt = tifffile.imread('/home/zyd/respository/sfmlearner_results/endo_testset/left_depth_map_d4k1_000000.tiff') gt = gt[:, :, 2] abs_diff, abs_rel, sq_rel, a1, a2, a3 = 0,0,0,0,0,0 if 1: crop_mask = gt[0] != gt[0] y1,y2 = int(0.40810811 * 1024), int(0.99189189 * 1024) x1,x2 = int(0.03594771 * 1280), int(0.96405229 * 1280) crop_mask[y1:y2,x1:x2] = 1 for current_gt, current_pred in zip(gt, pred): valid = (current_gt > 0) & (current_gt < 80) if 1: valid = valid & crop_mask valid_gt = current_gt[valid] valid_pred = current_pred[valid].clamp(1e-3, 80) valid_pred = valid_pred * torch.median(valid_gt)/torch.median(valid_pred) thresh = torch.max((valid_gt / valid_pred), (valid_pred / valid_gt)) a1 += (thresh < 1.25).float().mean() a2 += (thresh < 1.25 ** 2).float().mean() a3 += (thresh < 1.25 ** 3).float().mean() abs_diff += torch.mean(torch.abs(valid_gt - valid_pred)) abs_rel += torch.mean(torch.abs(valid_gt - valid_pred) / valid_gt) sq_rel += torch.mean(((valid_gt - valid_pred)**2) / valid_gt) error_names = ['abs_diff', 'abs_rel','sq_rel','rms','log_rms', 'abs_log', 'a1','a2','a3'] print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1]))
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework disp_net = DispNetS().cuda() weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() if args.pretrained_posenet is None: print( 'no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 0 else: weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).cuda() pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [ file.relpathto(dataset_dir) for file in sum([ dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts ], []) ] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth) output_dir = Path(args.output_dir) output_dir.makedirs_p() print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 7, len(test_files)), np.float32) for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h, w, _ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize( tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs ] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2, 0, 1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img / 255 - 0.5) / 0.2).cuda() tgt_img_var = Variable(tgt_img, volatile=True) ref_imgs_var = [] for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.2).cuda() ref_imgs_var.append(Variable(img, volatile=True)) pred_disp = disp_net(tgt_img_var).data.cpu().numpy()[0, 0] gt_depth = sample['gt_depth'] pred_depth = 1 / pred_disp pred_depth_zoomed = zoom( pred_depth, (gt_depth.shape[0] / pred_depth.shape[0], gt_depth.shape[1] / pred_depth.shape[1])).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 0: _, poses = pose_net(tgt_img_var, ref_imgs_var) displacements = poses[0, :, :3].norm( 2, 1).cpu().data.numpy() # shape [1 - seq_length] scale_factors = (sample['displacements'] / displacements)[sample['displacements'] > 0] scale_factors = [ s1 / s2 for s1, s2 in zip(sample['displacements'], displacements) if s1 > 0 ] scale_factor = np.mean( scale_factors) if len(scale_factors) > 0 else 0 if len(scale_factors) == 0: print('not good ! ', sample['path'], sample['displacements']) errors[0, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) scale_factor = np.median(gt_depth) / np.median(pred_depth_zoomed) errors[1, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) mean_errors = errors.mean(2) error_names = ['abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3'] if args.pretrained_posenet: print("Results with scale factor determined by PoseNet : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}" .format(*mean_errors[0])) print( "Results with scale factor determined by GT/prediction ratio (like the original paper) : " ) print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}". format(*mean_errors[1]))
def main(): args = parser.parse_args() ''' #还挑着生成哪个, 这里都输出了 if not(args.output_disp or args.output_depth): print('You must at least output one value !') return ''' disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() dataset_dir = Path(args.dataset_dir) #str2Path output_disp_dir = Path(args.output_dir + '/disp') output_depth_dir = Path(args.output_dir + '/depth') output_disp_dir.makedirs_p() #如果没有就创建,甚至可以创建子文件夹 output_depth_dir.makedirs_p() if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = [dataset_dir / file for file in f.read().splitlines()] else: test_files = sum( [dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], []) print('{} files to test'.format(len(test_files))) for file in tqdm(test_files): #测试图片 img = imread(file).astype(np.float32) h, w, _ = img.shape #h :375 w:1242 _: 3 if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = imresize(img, (args.img_height, args.img_width)).astype( np.float32) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img).unsqueeze(0) tensor_img = ((tensor_img / 255 - 0.5) / 0.2).to(device) #网络输入 output = disp_net(tensor_img) #1,1,h,w output = output[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() file_name = '-'.join(file_path.splitall()) #save to disk disp = (255 * tensor2array(output, max_value=None, colormap='bone')).astype( np.uint8) #4x375x1242 imsave(output_disp_dir / '{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1, 2, 0))) #多通道图像转至(1,2,0),375x1242x4 depth = 1 / output depth = (255 * tensor2array(depth, max_value=10, colormap='rainbow')).astype( np.uint8) imsave(output_depth_dir / '{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1, 2, 0))[:, :, 1])
def init_disp_net(pretrained): weights = torch.load(pretrained, map_location=device_name) disp_net = DispNetS().to(device) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() return disp_net
def main(): args = parser.parse_args() if not (args.output_disp or args.output_depth): # print("args.output_disp:\n", args.output_disp) # print("args.output_depth:\n", args.output_depth) print('You must at least output one value !') return disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() dataset_dir = Path(args.dataset_dir) output_dir = Path(args.output_dir) output_dir.makedirs_p() print("dataset_list:\n", args.dataset_list) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = [dataset_dir / file for file in f.read().splitlines()] else: print("Else!") test_files = sum([ list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts ], []) print(dataset_dir) print("dataset_list:\n", args.dataset_list) print("test_files:\n", test_files) print('{} files to test'.format(len(test_files))) for file in tqdm(test_files): # print("file:\n", file) img = imread(file) h, w, _ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = np.array( Image.fromarray(img).imresize( (args.img_height, args.img_width))) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0) tensor_img = ((tensor_img / 255 - 0.5) / 0.5).to(device) output = disp_net(tensor_img)[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() print(file_path) print(file_path.splitall()) file_name = '-'.join(file_path.splitall()[1:]) print(file_name) if args.output_disp: disp = (255 * tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8) # imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0))) if args.output_depth: depth = 1 / output depth = (255 * tensor2array( depth, max_value=10, colormap='rainbow')).astype(np.uint8) depth = (2550 * tensor2array(depth, max_value=10, colormap='bone')).astype(np.uint8) print(depth.shape) imsave(output_dir / '{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1, 2, 0))) # added by ZYD gt = tifffile.imread( '/home/zyd/respository/sfmlearner_results/endo_testset/left_depth_map_d3k1_000000.tiff' ) gt = gt[:, :, 2] # np.savetxt('d4k1_gt.txt',gt,fmt='%0.8f') print("groundtruth:\n", gt) print("gt's mean:\n", np.mean(gt)) tensor = depth.detach().cpu() arr = tensor.squeeze().numpy() mask = (gt > 1e-3) # mask = np.logical_and(gt > 1e-3, gt < 80) gt_mask = gt[mask] arr_mask = arr[mask] scale_factor = np.median(gt_mask) / np.median(arr_mask) print("scale_factor:\n", scale_factor) arr = scale_factor * arr print("array's mean:\n", np.mean(arr)) np.savetxt('d4k1_pred_depth_1epoch.txt', arr, fmt='%0.8f') rmse = np.sqrt(mean_squared_error(arr, gt)) print("RMSE without masks:\n", rmse) RMSE, logR, AbsRel, SqRel, count = 0, 0, 0, 0, 0 b1, b2, b3 = 0, 0, 0 for i in range(1024): for j in range(1280): # if (1e-3 < gt[i, j] < 80): if (gt[i, j] > 1e-3): RMSE = RMSE + (gt[i, j] - arr[i, j])**2 logR = logR + (np.log(gt[i, j]) - np.log(arr[i, j]))**2 AbsRel = AbsRel + abs(gt[i, j] - arr[i, j]) / gt[i, j] SqRel = SqRel + ((gt[i, j] - arr[i, j])**2) / gt[i, j] count = count + 1 if (0.75 * gt[i, j] < arr[i, j] and arr[i, j] < 1.25 * gt[i, j]): b1 = b1 + 1 if (0.4375 * gt[i, j] < arr[i, j] and arr[i, j] < 1.5625 * gt[i, j]): b2 = b2 + 1 if (0.046875 * gt[i, j] < arr[i, j] and arr[i, j] < 1.953125 * gt[i, j]): b3 = b3 + 1 RMSE = (RMSE / count)**0.5 logR = (logR / count)**0.5 AbsRel = AbsRel / count SqRel = SqRel / count print("count = ", count) print("RMSE = ", RMSE) print("logR = ", logR) print("AbsRel = ", AbsRel) print("SqRel = ", SqRel) print("1.25 percentage: ", b1 / count) print("1.25^2 percentage: ", b2 / count) print("1.25^3 percentage: ", b3 / count)
def main(): args = parser.parse_args() o_dir = Path(args.output_dir) o_dir.makedirs_p() if not(args.output_disp or args.output_depth): print('You must at least output one value !') return disp_net = DispNetS().to(device) weights = torch.load(args.pretrained) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() vid_list = [i for i in list(Path(args.dataset_dir).walkdirs()) if i[-4:] == 'data'] vid_list.sort() N = len(vid_list) print('{} videos to demo'.format(N)) for ii, vid_path in enumerate(vid_list): vid_name = vid_path.split('/')[-2] dataset_dir = Path(vid_path) output_dir = Path(args.output_dir + vid_name) output_dir.makedirs_p() test_files = sum([list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts], []) vid_save_name = str(args.output_dir)+'{}.mp4'.format(vid_name) print('{}/{} - {} - {} files to test::video saved to \'{}\''.format(ii, N, vid_name, len(test_files), vid_save_name)) for file in tqdm(test_files): img = imread(file) h,w,_ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = cv2.resize(img, (args.img_width, args.img_height)) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0) tensor_img = ((tensor_img/255 - 0.5)/0.5).to(device) output = disp_net(tensor_img)[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() # print(file_path) # print(file_path.splitall()) file_name = '-'.join(file_path.splitall()[1:]) # print(file_name) if args.output_disp: disp = (255*tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8) imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0))) if args.output_depth: depth = 1/output depth = (255*tensor2array(depth, max_value=10, colormap='rainbow')).astype(np.uint8) imsave(output_dir/'{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1,2,0))) #make vid img_dir = output_dir test_files.sort() video = cv2.VideoWriter(vid_save_name, 0x7634706d, 10, (args.img_width, args.img_height*2)) for file in test_files: file_core_name, file_ext = str(file).split('.')[-2:] file_core_name = '-'.join(file_core_name.split('/')[-3:]) input_img = cv2.imread(file) h,w,_ = input_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): input_img = cv2.resize(input_img, (args.img_width, args.img_height)) disp_img = cv2.imread(output_dir + '/{}_disp.{}'.format(file_core_name, file_ext)) video.write(np.concatenate((input_img, disp_img))) cv2.destroyAllWindows() video.release()
class time_lapse: def __init__(self): self.start = time.time() self.count = 0 def now(self): self.count += 1 print("[%d]time elasped = %f" % (self.count, time.time() - self.start)) tl = time_lapse() tl.now() device = torch.device("cuda") if torch.cuda.is_available() else torch.device( "cpu") disp_net = DispNetS().to(device) weights = torch.load("pretrained/dispnet_model_best.pth.tar", map_location='cpu') disp_net.load_state_dict(weights['state_dict']) disp_net.eval() tgt_img0 = io.imread("samples/street1.jpeg") h, w, c = tgt_img0.shape #print(h, w, c) print(h, w) ww = 600 hh = int(h * ww / w + 0.5) tgt_img0 = cv2.resize(tgt_img0, (ww, hh)) print(ww, hh) #tgt_img0 = transform.resize(tgt_img0, (hh, ww)) tgt_img0 = torch.from_numpy(tgt_img0)