示例#1
0
def main():
    args = parser.parse_args()
    if not (args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir / file for file in f.read().splitlines()]
    else:
        test_files = sum([
            list(dataset_dir.walkfiles('*.{}'.format(ext)))
            for ext in args.img_exts
        ], [])

    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):

        img = imread(file)

        h, w, _ = img.shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            img = np.array(
                Image.fromarray(img).resize((args.img_width, args.img_height)))
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0)
        tensor_img = ((tensor_img / 255 - 0.5) / 0.5).to(device)

        output = disp_net(tensor_img)[0]

        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        print(file_path)
        print(file_path.splitall())
        file_name = '-'.join(file_path.splitall()[1:])
        print(file_name)

        if args.output_disp:
            disp = (255 * tensor2array(output, max_value=None,
                                       colormap='bone')).astype(np.uint8)
            imsave(output_dir / '{}_disp{}'.format(file_name, file_ext),
                   np.transpose(disp, (1, 2, 0)))
        if args.output_depth:
            depth = 1 / output
            depth = (255 * tensor2array(
                depth, max_value=None, colormap='magma')).astype(np.uint8)
            imsave(output_dir / '{}_depth{}'.format(file_name, file_ext),
                   np.transpose(depth, (1, 2, 0)))
def main():
    args = parser.parse_args()
    if not (args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir / file for file in f.read().splitlines()]
    else:
        test_files = sum(
            [dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts],
            [])

    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):

        img = imread(file).astype(np.float32)

        h, w, _ = img.shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            img = imresize(img, (args.img_height, args.img_width)).astype(
                np.float32)
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img).unsqueeze(0)
        tensor_img = ((tensor_img / 255 - 0.5) / 0.2).to(device)

        output = disp_net(tensor_img)[0]

        if args.output_disp:
            disp = (255 * tensor2array(
                output, max_value=None, colormap='bone',
                channel_first=False)).astype(np.uint8)
            imsave(output_dir / '{}_disp{}'.format(file.namebase, file.ext),
                   disp)
        if args.output_depth:
            depth = 1 / output
            depth = (255 * tensor2array(
                depth, max_value=10, colormap='rainbow',
                channel_first=False)).astype(np.uint8)
            imsave(output_dir / '{}_depth{}'.format(file.namebase, file.ext),
                   depth)
def main():
    args = parser.parse_args()
    if args.gt_type == 'KITTI':
        from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework
    elif args.gt_type == 'stillbox':
        from stillbox_eval.depth_evaluation_utils import test_framework_stillbox as test_framework

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained_dispnet)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    if args.pretrained_posenet is None:
        print('no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\
            (but consistent with original paper)')
        seq_length = 0
    else:
        weights = torch.load(args.pretrained_posenet)
        seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
        pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device)
        pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = list(f.read().splitlines())
    else:
        test_files = [file.relpathto(dataset_dir) for file in sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])]

    framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth)

    print('{} files to test'.format(len(test_files)))
    errors = np.zeros((2, 7, len(test_files)), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()

    for j, sample in enumerate(tqdm(framework)):
        tgt_img = sample['tgt']

        ref_imgs = sample['ref']

        h,w,_ = tgt_img.shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            tgt_img = imresize(tgt_img, (args.img_height, args.img_width)).astype(np.float32)
            ref_imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs]

        tgt_img = np.transpose(tgt_img, (2, 0, 1))
        ref_imgs = [np.transpose(img, (2,0,1)) for img in ref_imgs]

        tgt_img = torch.from_numpy(tgt_img).unsqueeze(0)
        tgt_img = ((tgt_img/255 - 0.5)/0.5).to(device)

        for i, img in enumerate(ref_imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img/255 - 0.5)/0.5).to(device)
            ref_imgs[i] = img

        pred_disp = disp_net(tgt_img).cpu().numpy()[0,0]

        if args.output_dir is not None:
            if j == 0:
                predictions = np.zeros((len(test_files), *pred_disp.shape))
            predictions[j] = 1/pred_disp

        gt_depth = sample['gt_depth']

        pred_depth = 1/pred_disp
        pred_depth_zoomed = zoom(pred_depth,
                                 (gt_depth.shape[0]/pred_depth.shape[0],
                                  gt_depth.shape[1]/pred_depth.shape[1])
                                 ).clip(args.min_depth, args.max_depth)
        if sample['mask'] is not None:
            pred_depth_zoomed = pred_depth_zoomed[sample['mask']]
            gt_depth = gt_depth[sample['mask']]

        if seq_length > 0:
            # Reorganize ref_imgs : tgt is middle frame but not necessarily the one used in DispNetS
            # (in case sample to test was in end or beginning of the image sequence)
            middle_index = seq_length//2
            tgt = ref_imgs[middle_index]
            reorganized_refs = ref_imgs[:middle_index] + ref_imgs[middle_index + 1:]
            _, poses = pose_net(tgt, reorganized_refs)
            mean_displacement_magnitude = poses[0,:,:3].norm(2,1).mean().item()

            scale_factor = sample['displacement'] / mean_displacement_magnitude
            errors[0,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor)

       # scale_factor = np.median(gt_depth)/np.median(pred_depth_zoomed)
        scale_factor=1
        errors[1,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor)

    mean_errors = errors.mean(2)
    error_names = ['abs_rel','sq_rel','rms','log_rms','a1','a2','a3']
    if args.pretrained_posenet:
        print("Results with scale factor determined by PoseNet : ")
        print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names))
        print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[0]))

    print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ")
    print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names))
    print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1]))

    if args.output_dir is not None:
        np.save(output_dir/'predictions.npy', predictions)
示例#4
0
def main():
    global tgt_img, disp_net
    args = parser.parse_args()
    '''加载训练后的模型'''
    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1,
                          output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)
    # 网络模型的MD5 ID
    net_ID = MD5_ID(args.pretrained_posenet)
    # L和C的转换矩阵,对齐输入位姿到雷达坐标系
    Transform_matrix_L2C = np.identity(4)
    '''Kitti switch'''
    if args.isKitti:
        if not args.isDynamic:
            from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework
        else:
            from kitti_eval.pose_evaluation_utils_forDynamicTest import test_framework_KITTI as test_framework
        save_dir = os.path.join(args.output_dir, "kitti", args.sequences[0],
                                'net_' + net_ID)
        if args.trainedOnMydataset:
            downsample_img_height = args.img_height
            downsample_img_width = args.img_width
        else:
            # on kitti train set
            downsample_img_height = 128
            downsample_img_width = 416

        Transform_matrix_L2C[:3, :3] = np.array(
            [[7.533745e-03, -9.999714e-01, -6.166020e-04],
             [1.480249e-02, 7.280733e-04, -9.998902e-01],
             [9.998621e-01, 7.523790e-03, 1.480755e-02]])
        Transform_matrix_L2C[:3, -1:] = np.array(
            [-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1)
    else:
        from mydataset_eval.pose_evaluation_utils import test_framework_MYDATASET as test_framework
        save_dir = os.path.join(args.output_dir, "mydataset",
                                args.sequences[0], 'net_' + net_ID)
        if args.trainedOnMydataset:
            downsample_img_height = args.img_height
            downsample_img_width = args.img_width
        else:
            # on kitti train set
            downsample_img_height = 128
            downsample_img_width = 416
        Transform_matrix_L2C[:3, :3] = np.array(
            [[-1.51482698e-02, -9.99886648e-01, 5.36310553e-03],
             [-4.65337018e-03, -5.36307196e-03, -9.99969412e-01],
             [9.99870070e-01, -1.56647995e-02, -4.48880010e-03]])
        Transform_matrix_L2C[:3, -1:] = np.array(
            [4.29029924e-03, -6.08539196e-02, -9.20346161e-02]).reshape(3, 1)
    Transform_matrix_L2C = GramSchmidtHelper(Transform_matrix_L2C)
    Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C)
    # *************************可删除*********************************
    # 为了进行动态场景下的Mask评估,这里需要引入disp net
    if args.isDynamic:
        from models import DispNetS
        disp_net = DispNetS().to(device)
        weights = torch.load(args.pretrained_dispnet)
        disp_net.load_state_dict(weights['state_dict'])
        disp_net.eval()

    # normalize = custom_transforms.Normalize(mean=[0.5, 0.5, 0.5],
    #                                         std=[0.5, 0.5, 0.5])
    # valid_transform = custom_transforms.Compose([custom_transforms.ArrayToTensor(), normalize])
    # from datasets.sequence_folders import SequenceFolder
    # val_set = SequenceFolder(
    #     '/home/sda/mydataset/preprocessing/formatted/data/',
    #     transform=valid_transform,
    #     seed=0,
    #     train=False,
    #     sequence_length=3,
    # )
    # val_loader = torch.utils.data.DataLoader(
    #     val_set, batch_size=1, shuffle=False,
    #     num_workers=4, pin_memory=True)
    #
    # intrinsics = None
    # for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(val_loader):
    #     intrinsics = intrinsics.to(device)
    #     break
    # *************************************************************************
    '''载入测试数据集'''
    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)
    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    '''输出到文件夹中的数据'''
    num_poses = len(framework) - (seq_length - 2)
    predictions_array = np.zeros((len(framework), seq_length, 3, 4))
    processing_time = np.zeros((num_poses - 1, 1))
    # 输出文件夹
    save_dir = Path(save_dir)
    print('Output files wiil be saved in: ' + save_dir)
    if not os.path.exists(save_dir): save_dir.makedirs_p()
    # Pose Graph Manager (for back-end optimization) initialization
    PGM = PoseGraphManager()
    PGM.addPriorFactor()
    # Result saver
    num_frames = len(framework)
    ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3,
                                       save_gap=args.save_gap,
                                       num_frames=num_frames,
                                       seq_idx=args.sequences[0],
                                       save_dir=save_dir)

    # for save the results as a video
    fig_idx = 1
    fig = plt.figure(fig_idx)
    writer = FFMpegWriter(fps=15)
    video_path = save_dir + '/' + args.sequences[0] + ".mp4"
    num_frames_to_skip_to_show = 5
    num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show)
    with writer.saving(
            fig, video_path,
            num_frames_to_save):  # this video saving part is optional
        for j, sample in enumerate(tqdm(framework)):
            '''
            VO部分
            '''
            imgs = sample['imgs']
            w, h = imgs[0].size
            if (not args.no_resize) and (h != downsample_img_height
                                         or w != downsample_img_width):
                imgs = [
                    imresize(img, (downsample_img_height,
                                   downsample_img_width)).astype(np.float32)
                    for img in imgs
                ]
            imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

            ref_imgs = []
            for i, img in enumerate(imgs):
                img = torch.from_numpy(img).unsqueeze(0)
                img = ((img / 255 - 0.5) / 0.5).to(device)
                if i == len(imgs) // 2:
                    tgt_img = img
                else:
                    ref_imgs.append(img)

            startTimeVO = time.time()
            _, poses = pose_net(tgt_img, ref_imgs)
            processing_time[j] = (time.time() - startTimeVO) / (seq_length - 1)

            # **************************可删除********************************
            if args.isDynamic:
                '''测试Photo mask的效果'''
                if args.isKitti:
                    intrinsics = [[
                        2.416744631239935472e+02, 0.000000000000000000e+00,
                        2.041680103059581199e+02
                    ],
                                  [
                                      0.000000000000000000e+00,
                                      2.462848682666666491e+02,
                                      5.900083200000000261e+01
                                  ],
                                  [
                                      0.000000000000000000e+00,
                                      0.000000000000000000e+00,
                                      1.000000000000000000e+00
                                  ]]
                else:
                    intrinsics = [[279.1911, 0.0000, 210.8265],
                                  [0.0000, 279.3980, 172.3114],
                                  [0.0000, 0.0000, 1.0000]]
                PhotoMask_Output(_, disp_net, intrinsics, j, poses, ref_imgs,
                                 save_dir)
            # ***************************************************************

            final_poses = pose2tf_mat(args.rotation_mode, imgs, poses)
            predictions_array[j] = final_poses
            # rel_VO_pose取final poses的第2项,整体则是取T10,T21,T32。。。
            rel_VO_pose = np.identity(4)
            rel_VO_pose[:3, :] = final_poses[1]
            # 引入尺度因子对单目VO输出的位姿进行修正,并进行坐标系对齐到雷达坐标系
            scale_factor = 7
            rel_VO_pose[:3, -1:] = rel_VO_pose[:3, -1:] * scale_factor
            rel_VO_pose = Transform_matrix_C2L @ rel_VO_pose @ np.linalg.inv(
                Transform_matrix_C2L)
            rel_VO_pose = GramSchmidtHelper(rel_VO_pose)
            ResultSaver.saveRelativePose(rel_VO_pose)

            PGM.curr_node_idx = j + 1
            PGM.curr_se3 = np.matmul(PGM.curr_se3, rel_VO_pose)
            PGM.addOdometryFactor(rel_VO_pose)
            PGM.prev_node_idx = PGM.curr_node_idx
            ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3,
                                                       PGM.curr_node_idx)

            # if (j % num_frames_to_skip_to_show == 0):
            #     ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx)
            #     writer.grab_frame()

            if args.isKitti:
                ATE, RE = compute_pose_error(sample['poses'], final_poses)
                errors[j] = ATE, RE
        '''save output files'''
        if save_dir is not None:
            # np.save(save_dir / 'predictions.npy', predictions_array)
            ResultSaver.saveFinalPoseGraphResult(filename='abs_VO_poses.txt')
            ResultSaver.saveRelativePosesResult(filename='rel_VO_poses.txt')
            np.savetxt(save_dir / 'processing_time.txt', processing_time)
            if args.isKitti:
                np.savetxt(save_dir / 'errors.txt', errors)

        mean_errors = errors.mean(0)
        std_errors = errors.std(0)
        error_names = ['ATE', 'RE']
        print('')
        print("Results")
        print("\t {:>10}, {:>10}".format(*error_names))
        print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
        print("std \t {:10.4f}, {:10.4f}".format(*std_errors))
示例#5
0
def main():
    args = parser.parse_args()
    if not(args.output_disp or args.output_depth):
        # print("args.output_disp:\n", args.output_disp)
        # print("args.output_depth:\n", args.output_depth)
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()
    print("dataset_list:\n", args.dataset_list)
    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir/file for file in f.read().splitlines()]
    else:
        print("Else!")
        test_files = sum([list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts], [])
    print(dataset_dir)
    print("dataset_list:\n", args.dataset_list)
    print("test_files:\n", test_files)
    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):
        # print("file:\n", file)
        img = imread(file)

        h,w,_ = img.shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            img = np.array(Image.fromarray(img).imresize((args.img_height, args.img_width)))
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0)
        tensor_img = ((tensor_img/255 - 0.5)/0.5).to(device)

        output = disp_net(tensor_img)[0]
        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        print(file_path)
        print(file_path.splitall())
        file_name = '-'.join(file_path.splitall()[1:])
        print(file_name)

        if args.output_disp:
            disp = (255*tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8)
            # imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0)))
        if args.output_depth:
            depth = 1/output
            # depth = (255*tensor2array(depth, max_value=10, colormap='rainbow')).astype(np.uint8)
            # depth = (2550*tensor2array(depth, max_value=10, colormap='bone')).astype(np.uint8)
            # print(depth.shape)
            # imsave(output_dir/'{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1,2,0)))
            depth = depth.to(device)
            errors = np.zeros((2, 9, len(test_files)), np.float32)
            mean_errors = errors.mean(2)

            gt = tifffile.imread('/home/zyd/respository/sfmlearner_results/endo_testset/left_depth_map_d4k1_000000.tiff')
            gt = gt[:, :, 2]

            abs_diff, abs_rel, sq_rel, a1, a2, a3 = 0,0,0,0,0,0
            if 1:
                crop_mask = gt[0] != gt[0]
                y1,y2 = int(0.40810811 * 1024), int(0.99189189 * 1024)
                x1,x2 = int(0.03594771 * 1280), int(0.96405229 * 1280)
                crop_mask[y1:y2,x1:x2] = 1

            for current_gt, current_pred in zip(gt, pred):
                valid = (current_gt > 0) & (current_gt < 80)
            if 1:
                valid = valid & crop_mask

            valid_gt = current_gt[valid]
            valid_pred = current_pred[valid].clamp(1e-3, 80)

            valid_pred = valid_pred * torch.median(valid_gt)/torch.median(valid_pred)

            thresh = torch.max((valid_gt / valid_pred), (valid_pred / valid_gt))
            a1 += (thresh < 1.25).float().mean()
            a2 += (thresh < 1.25 ** 2).float().mean()
            a3 += (thresh < 1.25 ** 3).float().mean()

            abs_diff += torch.mean(torch.abs(valid_gt - valid_pred))
            abs_rel += torch.mean(torch.abs(valid_gt - valid_pred) / valid_gt)

            sq_rel += torch.mean(((valid_gt - valid_pred)**2) / valid_gt)
            
            error_names = ['abs_diff', 'abs_rel','sq_rel','rms','log_rms', 'abs_log', 'a1','a2','a3']
            

            
            print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ")
            print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names))
            print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1]))
示例#6
0
def main():
    args = parser.parse_args()
    if args.gt_type == 'KITTI':
        from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework

    disp_net = DispNetS().cuda()
    weights = torch.load(args.pretrained_dispnet)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    if args.pretrained_posenet is None:
        print(
            'no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\
            (but consistent with original paper)')
        seq_length = 0
    else:
        weights = torch.load(args.pretrained_posenet)
        seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
        pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1,
                              output_exp=False).cuda()
        pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = list(f.read().splitlines())
    else:
        test_files = [
            file.relpathto(dataset_dir) for file in sum([
                dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts
            ], [])
        ]

    framework = test_framework(dataset_dir, test_files, seq_length,
                               args.min_depth, args.max_depth)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    print('{} files to test'.format(len(test_files)))
    errors = np.zeros((2, 7, len(test_files)), np.float32)

    for j, sample in enumerate(tqdm(framework)):
        tgt_img = sample['tgt']

        ref_imgs = sample['ref']

        h, w, _ = tgt_img.shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            tgt_img = imresize(
                tgt_img, (args.img_height, args.img_width)).astype(np.float32)
            ref_imgs = [
                imresize(img,
                         (args.img_height, args.img_width)).astype(np.float32)
                for img in ref_imgs
            ]

        tgt_img = np.transpose(tgt_img, (2, 0, 1))
        ref_imgs = [np.transpose(img, (2, 0, 1)) for img in ref_imgs]

        tgt_img = torch.from_numpy(tgt_img).unsqueeze(0)
        tgt_img = ((tgt_img / 255 - 0.5) / 0.2).cuda()
        tgt_img_var = Variable(tgt_img, volatile=True)

        ref_imgs_var = []
        for i, img in enumerate(ref_imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img / 255 - 0.5) / 0.2).cuda()
            ref_imgs_var.append(Variable(img, volatile=True))

        pred_disp = disp_net(tgt_img_var).data.cpu().numpy()[0, 0]

        gt_depth = sample['gt_depth']

        pred_depth = 1 / pred_disp
        pred_depth_zoomed = zoom(
            pred_depth,
            (gt_depth.shape[0] / pred_depth.shape[0], gt_depth.shape[1] /
             pred_depth.shape[1])).clip(args.min_depth, args.max_depth)
        if sample['mask'] is not None:
            pred_depth_zoomed = pred_depth_zoomed[sample['mask']]
            gt_depth = gt_depth[sample['mask']]
        if seq_length > 0:
            _, poses = pose_net(tgt_img_var, ref_imgs_var)
            displacements = poses[0, :, :3].norm(
                2, 1).cpu().data.numpy()  # shape [1 - seq_length]

            scale_factors = (sample['displacements'] /
                             displacements)[sample['displacements'] > 0]
            scale_factors = [
                s1 / s2
                for s1, s2 in zip(sample['displacements'], displacements)
                if s1 > 0
            ]
            scale_factor = np.mean(
                scale_factors) if len(scale_factors) > 0 else 0
            if len(scale_factors) == 0:
                print('not good ! ', sample['path'], sample['displacements'])
            errors[0, :, j] = compute_errors(gt_depth,
                                             pred_depth_zoomed * scale_factor)

        scale_factor = np.median(gt_depth) / np.median(pred_depth_zoomed)
        errors[1, :, j] = compute_errors(gt_depth,
                                         pred_depth_zoomed * scale_factor)

    mean_errors = errors.mean(2)
    error_names = ['abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3']
    if args.pretrained_posenet:
        print("Results with scale factor determined by PoseNet : ")
        print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
            *error_names))
        print(
            "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}"
            .format(*mean_errors[0]))

    print(
        "Results with scale factor determined by GT/prediction ratio (like the original paper) : "
    )
    print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
        *error_names))
    print(
        "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".
        format(*mean_errors[1]))
示例#7
0
def main():
    args = parser.parse_args()
    '''
    #还挑着生成哪个, 这里都输出了
    if not(args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return
    '''
    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)  #str2Path
    output_disp_dir = Path(args.output_dir + '/disp')
    output_depth_dir = Path(args.output_dir + '/depth')

    output_disp_dir.makedirs_p()  #如果没有就创建,甚至可以创建子文件夹
    output_depth_dir.makedirs_p()

    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir / file for file in f.read().splitlines()]
    else:
        test_files = sum(
            [dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts],
            [])

    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):  #测试图片

        img = imread(file).astype(np.float32)

        h, w, _ = img.shape  #h :375 w:1242 _: 3
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            img = imresize(img, (args.img_height, args.img_width)).astype(
                np.float32)
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img).unsqueeze(0)
        tensor_img = ((tensor_img / 255 - 0.5) / 0.2).to(device)

        #网络输入
        output = disp_net(tensor_img)  #1,1,h,w
        output = output[0]
        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        file_name = '-'.join(file_path.splitall())

        #save to disk

        disp = (255 *
                tensor2array(output, max_value=None, colormap='bone')).astype(
                    np.uint8)  #4x375x1242
        imsave(output_disp_dir / '{}_disp{}'.format(file_name, file_ext),
               np.transpose(disp, (1, 2, 0)))  #多通道图像转至(1,2,0),375x1242x4

        depth = 1 / output
        depth = (255 *
                 tensor2array(depth, max_value=10, colormap='rainbow')).astype(
                     np.uint8)
        imsave(output_depth_dir / '{}_depth{}'.format(file_name, file_ext),
               np.transpose(depth, (1, 2, 0))[:, :, 1])
示例#8
0
def init_disp_net(pretrained):
	weights = torch.load(pretrained, map_location=device_name)
	disp_net = DispNetS().to(device)
	disp_net.load_state_dict(weights['state_dict'])
	disp_net.eval()
	return disp_net
示例#9
0
def main():
    args = parser.parse_args()
    if not (args.output_disp or args.output_depth):
        # print("args.output_disp:\n", args.output_disp)
        # print("args.output_depth:\n", args.output_depth)
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()
    print("dataset_list:\n", args.dataset_list)
    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir / file for file in f.read().splitlines()]
    else:
        print("Else!")
        test_files = sum([
            list(dataset_dir.walkfiles('*.{}'.format(ext)))
            for ext in args.img_exts
        ], [])
    print(dataset_dir)
    print("dataset_list:\n", args.dataset_list)
    print("test_files:\n", test_files)
    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):
        # print("file:\n", file)
        img = imread(file)

        h, w, _ = img.shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            img = np.array(
                Image.fromarray(img).imresize(
                    (args.img_height, args.img_width)))
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0)
        tensor_img = ((tensor_img / 255 - 0.5) / 0.5).to(device)

        output = disp_net(tensor_img)[0]
        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        print(file_path)
        print(file_path.splitall())
        file_name = '-'.join(file_path.splitall()[1:])
        print(file_name)

        if args.output_disp:
            disp = (255 * tensor2array(output, max_value=None,
                                       colormap='bone')).astype(np.uint8)
            # imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0)))
        if args.output_depth:
            depth = 1 / output

            depth = (255 * tensor2array(
                depth, max_value=10, colormap='rainbow')).astype(np.uint8)
            depth = (2550 * tensor2array(depth, max_value=10,
                                         colormap='bone')).astype(np.uint8)
            print(depth.shape)
            imsave(output_dir / '{}_depth{}'.format(file_name, file_ext),
                   np.transpose(depth, (1, 2, 0)))

            # added by ZYD
            gt = tifffile.imread(
                '/home/zyd/respository/sfmlearner_results/endo_testset/left_depth_map_d3k1_000000.tiff'
            )
            gt = gt[:, :, 2]
            # np.savetxt('d4k1_gt.txt',gt,fmt='%0.8f')
            print("groundtruth:\n", gt)
            print("gt's mean:\n", np.mean(gt))

            tensor = depth.detach().cpu()
            arr = tensor.squeeze().numpy()

            mask = (gt > 1e-3)
            # mask = np.logical_and(gt > 1e-3, gt < 80)
            gt_mask = gt[mask]
            arr_mask = arr[mask]

            scale_factor = np.median(gt_mask) / np.median(arr_mask)
            print("scale_factor:\n", scale_factor)

            arr = scale_factor * arr
            print("array's mean:\n", np.mean(arr))
            np.savetxt('d4k1_pred_depth_1epoch.txt', arr, fmt='%0.8f')

            rmse = np.sqrt(mean_squared_error(arr, gt))
            print("RMSE without masks:\n", rmse)

            RMSE, logR, AbsRel, SqRel, count = 0, 0, 0, 0, 0
            b1, b2, b3 = 0, 0, 0

            for i in range(1024):
                for j in range(1280):
                    # if (1e-3 < gt[i, j] < 80):
                    if (gt[i, j] > 1e-3):
                        RMSE = RMSE + (gt[i, j] - arr[i, j])**2
                        logR = logR + (np.log(gt[i, j]) - np.log(arr[i, j]))**2
                        AbsRel = AbsRel + abs(gt[i, j] - arr[i, j]) / gt[i, j]
                        SqRel = SqRel + ((gt[i, j] - arr[i, j])**2) / gt[i, j]
                        count = count + 1
                        if (0.75 * gt[i, j] < arr[i, j]
                                and arr[i, j] < 1.25 * gt[i, j]):
                            b1 = b1 + 1
                        if (0.4375 * gt[i, j] < arr[i, j]
                                and arr[i, j] < 1.5625 * gt[i, j]):
                            b2 = b2 + 1
                        if (0.046875 * gt[i, j] < arr[i, j]
                                and arr[i, j] < 1.953125 * gt[i, j]):
                            b3 = b3 + 1

            RMSE = (RMSE / count)**0.5
            logR = (logR / count)**0.5
            AbsRel = AbsRel / count
            SqRel = SqRel / count
            print("count = ", count)
            print("RMSE = ", RMSE)
            print("logR = ", logR)
            print("AbsRel = ", AbsRel)
            print("SqRel = ", SqRel)
            print("1.25 percentage: ", b1 / count)
            print("1.25^2 percentage: ", b2 / count)
            print("1.25^3 percentage: ", b3 / count)
示例#10
0
def main():
    args = parser.parse_args()
    o_dir = Path(args.output_dir)
    o_dir.makedirs_p()
    if not(args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    vid_list = [i for i in list(Path(args.dataset_dir).walkdirs()) if i[-4:] == 'data']
    vid_list.sort()
    N = len(vid_list)
    print('{} videos to demo'.format(N))

    for ii, vid_path in enumerate(vid_list):
        
        vid_name = vid_path.split('/')[-2]

        dataset_dir = Path(vid_path)
        output_dir = Path(args.output_dir + vid_name)
        output_dir.makedirs_p()
        
        test_files = sum([list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts], [])
        vid_save_name = str(args.output_dir)+'{}.mp4'.format(vid_name)

        print('{}/{} - {} - {} files to test::video saved to \'{}\''.format(ii, N, vid_name, len(test_files), vid_save_name))

        for file in tqdm(test_files):

            img = imread(file)

            h,w,_ = img.shape

            if (not args.no_resize) and (h != args.img_height or w != args.img_width):
                img = cv2.resize(img, (args.img_width, args.img_height))
            
            img = np.transpose(img, (2, 0, 1))

            tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0)
            tensor_img = ((tensor_img/255 - 0.5)/0.5).to(device)

            output = disp_net(tensor_img)[0]

            file_path, file_ext = file.relpath(args.dataset_dir).splitext()
            # print(file_path)
            # print(file_path.splitall())
            file_name = '-'.join(file_path.splitall()[1:])
            # print(file_name)

            if args.output_disp:
                disp = (255*tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8)
                imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0)))
            if args.output_depth:
                depth = 1/output
                depth = (255*tensor2array(depth, max_value=10, colormap='rainbow')).astype(np.uint8)
                imsave(output_dir/'{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1,2,0)))
        

        #make vid
        img_dir = output_dir

        test_files.sort()

        video = cv2.VideoWriter(vid_save_name, 0x7634706d, 10, (args.img_width, args.img_height*2))

        for file in test_files:
            file_core_name, file_ext = str(file).split('.')[-2:]
            file_core_name = '-'.join(file_core_name.split('/')[-3:])
            input_img = cv2.imread(file)
            h,w,_ = input_img.shape
            if (not args.no_resize) and (h != args.img_height or w != args.img_width):
                input_img = cv2.resize(input_img, (args.img_width, args.img_height))
            disp_img = cv2.imread(output_dir + '/{}_disp.{}'.format(file_core_name, file_ext))

            video.write(np.concatenate((input_img, disp_img)))

        cv2.destroyAllWindows()
        video.release()
示例#11
0
        self.count = 0

    def now(self):
        self.count += 1
        print("[%d]time elasped = %f" % (self.count, time.time() - self.start))


tl = time_lapse()
tl.now()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device(
    "cpu")
disp_net = DispNetS().to(device)
weights = torch.load("pretrained/dispnet_model_best.pth.tar",
                     map_location='cpu')
disp_net.load_state_dict(weights['state_dict'])
disp_net.eval()

tgt_img0 = io.imread("samples/street1.jpeg")
h, w, c = tgt_img0.shape
#print(h, w, c)
print(h, w)
ww = 600
hh = int(h * ww / w + 0.5)
tgt_img0 = cv2.resize(tgt_img0, (ww, hh))
print(ww, hh)
#tgt_img0 = transform.resize(tgt_img0, (hh, ww))
tgt_img0 = torch.from_numpy(tgt_img0)
tgt_img = np.transpose(tgt_img0, (2, 0, 1))
print(tgt_img.shape)
tgt_img = tgt_img.unsqueeze(0)