def fuse_poses(opt, outputs, pose_fuse_mlp):
    def transformation_to_tensor(tr_batch):
        R, t = rot_translation_from_transformation(tr_batch)
        return torch.cat([R.reshape(-1, 9), t.reshape(-1, 3)], dim=1)

    for f_id in opt.frame_ids[1:]:
        pose_net = transformation_to_tensor(outputs[("cam_T_cam", 0, f_id)])
        pose_imu = transformation_to_tensor(outputs[("cam_T_cam_imu", 0,
                                                     f_id)])
        pose_fuse_input = torch.cat([pose_net, pose_imu], dim=1)
        pose_fuse_output = pose_fuse_mlp(pose_fuse_input)
        axisangle = pose_fuse_output[:, :3].reshape(-1, 1, 3)
        tr = pose_fuse_output[:, 3:6].reshape(-1, 1, 3)
        T = transformation_from_parameters(axisangle, tr)
        outputs[("cam_T_cam_fuse", 0, f_id)] = T
示例#2
0
def get_gt_poses(configs: List[Config]):
    for config in configs:
        with config.pose_data as d:
            for j in range(d.absolute_pose.shape[0] - 2):
                i = j + 1
                start = d.absolute_pose[i]
                end = d.absolute_pose[i + 1]
                transform = end[:3] - start[:3]

                start_dir = start[3:]
                end_dir = end[3:]

                # http://www.euclideanspace.com/maths/algebra/vectors/angleBetween/index.htm
                angle = np.acos(np.dot(start_dir, end_dir))
                axis = np.cross(start_dir, end_dir)
                # normalize to unit vector
                axis = axis / np.linalg.norm(axis)
                yield transformation_from_parameters(angle * axis, transform)
示例#3
0
 def pose_infer(self, img1, img2):
     feed_height = self.opt.height
     feed_width = self.opt.width
     input_image1_resized = img1.resize((feed_width, feed_height),
                                        pil.LANCZOS)
     input_image2_resized = img2.resize((feed_width, feed_height),
                                        pil.LANCZOS)
     input_image1_pytorch = transforms.ToTensor()(
         input_image1_resized).unsqueeze(0)
     input_image2_pytorch = transforms.ToTensor()(
         input_image2_resized).unsqueeze(0)
     input_images_pytorch = torch.cat(
         [input_image1_pytorch, input_image2_pytorch], 1)
     with torch.no_grad():
         features = self.pose_encoder(input_images_pytorch)
         axisangle, translation = self.pose_decoder([features])
         transf_mat = transformation_from_parameters(
             axisangle[:, 0], translation[:, 0]).cpu().numpy()
     return transf_mat
示例#4
0
def evaluate(opt):
    pose_errors = []
    pose_encoder, pose_decoder = prepare_model_for_test(opt)

    filenames = readlines('./splits/scannet_test_pose_deepv2d.txt')
    dataset = ScannetTestPoseDataset(
        opt.data_path,
        filenames,
        opt.height,
        opt.width,
        frame_idxs=opt.frame_ids,
    )

    dataloader = DataLoader(
        dataset,
        1,
        shuffle=False,
        num_workers=opt.num_workers,
    )

    print("-> Computing pose predictions")

    with torch.no_grad():
        for ind, inputs in enumerate(tqdm(dataloader)):
            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()
            color = torch.cat(
                [inputs[("color", i, 0)] for i in opt.frame_ids],
                axis=1,
            )
            features = pose_encoder(color)
            axisangle, translation = pose_decoder([features])
            this_pose = transformation_from_parameters(axisangle[:, 0],
                                                       translation[:, 0])
            this_pose = this_pose.data.cpu().numpy()[0]
            gt_pose = inputs['pose_gt'].data.cpu().numpy()[0]
            pose_errors.append(compute_pose_errors(this_pose, gt_pose))

    mean_pose_errors = np.array(pose_errors).mean(0)
    print("\n  " + ("{:>8} | " * 3).format("rot", "tdeg", "tcm"))
    print(("&{: 8.3f}  " * 3).format(*mean_pose_errors.tolist()) + "\\\\")
    print("\n-> Done!")
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
        "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width,
                               [0, 1], 4, is_train=False)
    dataloader = DataLoader(dataset, opt.batch_size, shuffle=False,
                            num_workers=opt.num_workers, pin_memory=True, drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
示例#6
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    sequence_id = int(opt.eval_split.split("_")[1])
    opt.batch_size = 1

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path,
                               filenames,
                               opt.height,
                               opt.width, [0, -1, 1],
                               4,
                               1,
                               is_train=False,
                               img_ext='.png')
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    # pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    config_file = "./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml"
    cfg.merge_from_file(config_file)
    cfg.freeze()
    maskrcnn_path = "./e2e_mask_rcnn_R_50_FPN_1x.pth"
    pose_encoder = networks.ResnetEncoder(cfg, maskrcnn_path)
    # pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    # pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(len(opt.frame_ids))
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    # opt.frame_ids = [0, 1]  # pose network only takes two frames as input
    ii = 0
    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                if isinstance(ipt, torch.Tensor):
                    inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids])

            all_features = pose_encoder(all_color_aug)
            all_features = [
                torch.split(f, opt.batch_size) for f in all_features
            ]

            features = {}
            for i, k in enumerate(opt.frame_ids):
                features[k] = [f[i] for f in all_features]
            pose_inputs = [features[i] for i in opt.frame_ids if i != "s"]

            axisangle, translation = pose_decoder(pose_inputs)
            if ii == 0:
                pred_poses.append(
                    transformation_from_parameters(axisangle[:, 0],
                                                   translation[:, 0],
                                                   True).cpu().numpy())
            pred_poses.append(
                transformation_from_parameters(axisangle[:, 1],
                                               translation[:,
                                                           1]).cpu().numpy())
            if ii % opt.log_frequency == 0:
                print("{:04d}-th image processing".format(ii))
            ii += 1
        # pred_poses.append(
        #     transformation_from_parameters(axisangle[:, 1], translation[:, 1]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(
        "/usr/stud/linp/storage/user/linp/results/kitti", "poses",
        "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape((-1, 3, 4))
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(
                np.dot(np.linalg.inv(gt_global_poses[i - 1]),
                       gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 3

    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(
            dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
    '''
    for i in range(0, num_frames - 2):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i + 1:i + track_length]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
    '''

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
示例#7
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    K = np.array(
        [[0.5, 0, 0.5, 0], [0, 1.656, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
        dtype=np.float32)
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", opt.eval_split,
                     "test_files.txt"))

    dataset = AirSimDataset(opt.data_path,
                            filenames,
                            opt.height,
                            opt.width, [0, 1],
                            4,
                            is_train=False)
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")
    depth_encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    depth_decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))
    depth_encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_encoder_dict = torch.load(depth_encoder_path)
    model_dict = depth_encoder.state_dict()
    depth_encoder.load_state_dict(
        {k: v
         for k, v in depth_encoder_dict.items() if k in model_dict})

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))
    depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc)
    depth_decoder.load_state_dict(torch.load(depth_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()
    depth_encoder.cuda()
    depth_encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    pred_poses = []
    pred_disps = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            input_color = inputs[("color", 0, 0)].cuda()
            depth_output = depth_decoder(depth_encoder(input_color))

            pred_disp, _ = disp_to_depth(depth_output[("disp", 0)],
                                         opt.min_depth, opt.max_depth)
            pred_disp = pred_disp.cpu()[:, 0].numpy()

            pred_disps.append(pred_disp)

            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0],
                                               translation[:,
                                                           0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)
    pred_disps = np.concatenate(pred_disps)

    gt_norms_div = []
    gt_norms = []
    pred_norms = []
    trans_pred = pred_pose[:, :3, 3]

    gt_poses_path = os.path.join(opt.data_path, "poses.txt")
    gt_local_poses = read_pose(gt_poses_path)
    num_frames = gt_local_poses.shape[0]
    for i in range(num_frames):
        local_xyzs = pred_poses[i, :3, 3]
        gt_local_xyzs = gt_local_poses[i, :3, 3]
        gt_norm_div = np.linalg.norm(gt_local_xyzs) / np.linalg.norm(
            local_xyzs)
        gt_norms_div.append(gt_norm_div)

    save_path = os.path.join(os.path.dirname(__file__),
                             "gt_norms_div_AirSim.npy")
    np.save(save_path, gt_norms_div)

    print("-> Predictions saved to", save_path)
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    # Depth
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)
    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    # Pose
    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    if opt.use_imu:
        imu_lstm = nn.LSTM(6, opt.lstm_hidden_size, opt.lstm_num_layers)
        imu_lstm.cuda()
        imu_lstm.eval()
        lstm_hs = None

        hidden_to_imu = torch.nn.Sequential(
            torch.nn.Linear(opt.lstm_hidden_size, 6), )
        hidden_to_imu.cuda()
        hidden_to_imu.eval()

        if opt.pose_fuse:
            pose_fuse_mlp = torch.nn.Sequential(
                torch.nn.Linear(24, opt.pose_mlp_hidden_size),
                torch.nn.Sigmoid(),
                torch.nn.Linear(opt.pose_mlp_hidden_size, 6),
            )
            pose_fuse_mlp.cuda()
            pose_fuse_mlp.eval()

    img_ext = '.png' if opt.png else '.jpg'

    pred_disps = []
    scale_factors = []

    kitty_odom = False
    if opt.eval_split.startswith("odom"):
        kitty_odom = True

    if kitty_odom:
        ids = [int(opt.eval_split.split("_")[1])]
    else:
        splits_dir = os.path.join(os.path.dirname(__file__), "splits")
        videonames = readlines(
            os.path.join(splits_dir, opt.eval_split, "test_video_list.txt"))
        ids = videonames

    for videoname in ids:
        if kitty_odom:
            filenames = readlines(
                os.path.join(splits_dir, opt.eval_split,
                             "test_files_{:02d}.txt".format(videoname)))
        else:
            filenames = readlines(
                os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        if kitty_odom:

            dataset = KITTIOdomDataset(opt.data_path,
                                       filenames,
                                       opt.height,
                                       opt.width, [0, 1],
                                       4,
                                       is_train=False,
                                       use_imu=False)
            dataloader = DataLoader(dataset,
                                    opt.batch_size,
                                    shuffle=False,
                                    num_workers=opt.num_workers,
                                    pin_memory=True,
                                    drop_last=False)
        else:
            if opt.use_imu:
                dataset = SequenceRawKittiDataset(
                    opt.data_path, [videoname],
                    filenames,
                    1,
                    imu_data_path=opt.imu_data_path,
                    img_ext=img_ext,
                    frame_idxs=[0, 1],
                    height=encoder_dict['height'],
                    width=encoder_dict['width'],
                    num_scales=4,
                    is_train=False)
                dataloader = DataLoader(dataset, shuffle=False, num_workers=0)
            else:
                filenames = list(
                    filter(lambda f: f.startswith(videoname), filenames))
                dataset = KITTIRAWDataset(opt.data_path,
                                          filenames,
                                          opt.height,
                                          opt.width, [0, 1],
                                          4,
                                          is_train=False,
                                          use_imu=False)
                dataloader = DataLoader(dataset,
                                        opt.batch_size,
                                        shuffle=False,
                                        num_workers=opt.num_workers,
                                        pin_memory=True,
                                        drop_last=False)
        # pred_poses = [np.eye(4).reshape(1, 4, 4)]
        pred_poses = []
        imu_scale_factors = []

        print("EVALUATING ", opt.model_name)

        print("-> Computing pose predictions")

        opt.frame_ids = [0, 1]  # pose network only takes two frames as input

        with torch.no_grad():
            for inputs in dataloader:
                for key, ipt in inputs.items():
                    inputs[key] = ipt.cuda()
                    if opt.use_imu:
                        inputs[key] = inputs[key].squeeze(0)
                input_color = inputs[("color", 0, 0)]
                feature = encoder(input_color)
                output = depth_decoder(feature)

                pred_disp, _ = disp_to_depth(output[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                pred_disps.append(pred_disp)

                all_color_aug = torch.cat([
                    inputs[("color_aug", i, 0)] for i in sorted(opt.frame_ids)
                ], 1)

                features = [pose_encoder(all_color_aug)]
                axisangle, translation = pose_decoder(features)
                outputs = {}
                outputs[("cam_T_cam", 0,
                         1)] = transformation_from_parameters(axisangle[:, 0],
                                                              translation[:,
                                                                          0],
                                                              invert=False)

                T = outputs[("cam_T_cam", 0, 1)]
                if opt.use_imu:
                    outputs = predict_poses_from_imu2(opt, inputs, imu_lstm,
                                                      lstm_hs, hidden_to_imu)
                    T_better = outputs[("cam_T_cam_imu", 0, 1)]
                    if opt.pose_fuse:
                        fuse_poses(opt, outputs, pose_fuse_mlp)
                        T_better = outputs[("cam_T_cam_fuse", 0, 1)]

                    R, t = rot_translation_from_transformation(T)
                    Rb, tb = rot_translation_from_transformation(T_better)
                    imu_scale_factor = torch.sum(tb * t) / torch.sum(t**2)

                    imu_scale_factors.append(imu_scale_factor.cpu().numpy())
                    # scale_factors.append(imu_scale_factors)

                    T = T_better

                pred_poses.append(T.cpu().numpy())

            pred_poses = np.concatenate(pred_poses)

            if opt.eval_split.startswith("odom"):
                gt_poses_path = os.path.join(opt.data_path, "poses",
                                             "{:02d}.txt".format(videoname))
            else:
                gt_poses_path = os.path.join(opt.data_path, videoname, "oxts",
                                             "poses.txt")

            eval_pose(opt, pred_poses, gt_poses_path)
        scale_factors = {}
        if imu_scale_factors:
            scale_factors["IMU factor"] = imu_scale_factors
    pred_disps = np.concatenate(pred_disps)
    if not kitty_odom:
        eval_depth(opt, pred_disps, scale_factors)
示例#9
0
def test_depth_pose(args):
    """Function to predict depth and pose
    """
    assert args.model_name is not None, \
        "You must specify the --model_name parameter; see README.md for an example"

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(args.model_name)
    model_path = os.path.join("models", args.model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")
    pose_encoder_path = os.path.join(model_path, "pose_encoder.pth")
    pose_decoder_path = os.path.join(model_path, "pose.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained depth encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    print("   Loading pretrained pose encoder")
    pose_encoder = networks.ResnetEncoder(18, False, 2)
    loaded_dict_pose_enc = torch.load(pose_encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)

    pose_encoder.load_state_dict(loaded_dict_pose_enc)

    encoder.to(device)
    pose_encoder.to(device)
    encoder.eval()
    pose_encoder.eval()

    print("   Loading pretrained depth decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    print("   Loading pretrained pose decoder")
    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    loaded_dict_pose = torch.load(pose_decoder_path, map_location=device)
    pose_decoder.load_state_dict(loaded_dict_pose)

    depth_decoder.to(device)
    pose_decoder.to(device)
    depth_decoder.eval()
    pose_decoder.eval()

    print("-> Predicting on test images")

    pred_depths = []
    pred_poses = []

    backproject_depth = BackprojectDepth(1, feed_height, feed_width)
    backproject_depth.to(device)
    project_3d = Project3D(1, feed_height, feed_width)
    project_3d.to(device)

    K = np.array(
        [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
        dtype=np.float32)
    K[0, :] *= feed_width
    K[1, :] *= feed_height
    inv_K = np.linalg.pinv(K)

    K = torch.from_numpy(K)
    K = K.unsqueeze(0).to(device)
    inv_K = torch.from_numpy(inv_K)
    inv_K = inv_K.unsqueeze(0).to(device)

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():

        for i in range(107):

            # Load image and preprocess
            image_0_path = './kitti_data/01/{:010d}.jpg'.format(i)
            input_image_0 = Image.open(image_0_path).convert('RGB')
            original_width, original_height = input_image_0.size
            input_image_0 = input_image_0.resize((feed_width, feed_height),
                                                 Image.LANCZOS)
            input_image_0 = transforms.ToTensor()(input_image_0).unsqueeze(0)

            image_1_path = './kitti_data/01/{:010d}.jpg'.format(i + 1)
            input_image_1 = Image.open(image_1_path).convert('RGB')
            input_image_1 = input_image_1.resize((feed_width, feed_height),
                                                 Image.LANCZOS)
            input_image_1 = transforms.ToTensor()(input_image_1).unsqueeze(0)

            # PREDICTION for depth
            input_image_0 = input_image_0.to(device)
            features = encoder(input_image_0)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            #disp_resized = torch.nn.functional.interpolate(
            #    disp, (original_height, original_width), mode="bilinear", align_corners=False)

            _, pred_depth = disp_to_depth(disp, 0.1, 100)
            pred_depth = pred_depth.cpu()[:, 0].numpy()

            pred_depths.append(pred_depth[0])

            print("   Predict Depth {:d}".format(i))

            # PREDICTION for pose
            input_image_1 = input_image_1.to(device)
            input_image_pose = torch.cat([input_image_0, input_image_1], 1)
            features_pose = pose_encoder(input_image_pose)
            features_pose = [features_pose]
            axisangle, translation = pose_decoder(features_pose)

            pred_pose = transformation_from_parameters(axisangle[:, 0],
                                                       translation[:, 0])

            pred_poses.append(pred_pose.cpu()[0].numpy())

            print("   Predict Pose {:d}".format(i))
            print(pred_pose)

            # WARPED image
            if RECONSTRUCTION:
                print("   Reconstruct image {:d}".format(i))
                cam_points = backproject_depth(pred_depth, inv_K)
                pix_coords = project_3d(cam_points, K, pred_pose)
                reconstruct_image_0 = torch.nn.functional.grid_sample(
                    input_image_1, pix_coords, padding_mode="border")
                print("   Saving resonstructed image...")

                reconstruct_image_0 = torch.nn.functional.interpolate(
                    reconstruct_image_0, (original_height, original_width),
                    mode="bilinear",
                    align_corners=False)
                reconstruct_image_0_np = reconstruct_image_0.squeeze().cpu(
                ).numpy()
                reconstruct_image_0_np = (reconstruct_image_0_np * 255).astype(
                    np.uint8)
                reconstruct_image_0_np = np.concatenate([
                    np.expand_dims(reconstruct_image_0_np[i], 2)
                    for i in range(3)
                ], 2)
                im = Image.fromarray(reconstruct_image_0_np, mode='RGB')
                name_dest_im = os.path.join("kitti_data/01", "warped",
                                            "{:010d}_warped.jpg".format(i))
                im.save(name_dest_im)
            print("...")

    np.save('kitti_data/pred_depth_01.npy', np.array(pred_depths))
    np.save('kitti_data/pred_pose_01.npy', np.array(pred_poses))
    print('-> Done!')
示例#10
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """

    conv_layer, data_lambda, intrinsics = get_params(opt)
    configs = load_csv(opt.test_data)
    dataset = CarlaDataset(configs,
                           data_lambda,
                           intrinsics, [0, 1],
                           4,
                           is_train=False,
                           is_cubemap=opt.mode is Mode.Cubemap,
                           width=opt.width,
                           height=opt.height)
    dataloader = DataLoader(dataset,
                            16,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    if opt.eval_model is None:
        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    else:
        if opt.load_weights_folder is not None:
            raise ValueError(
                "Can't specify eval_model and load_weights_folder, they conflict"
            )

        opt.eval_model = Path(opt.eval_model)
        models = Path(opt.eval_model) / "models"
        weights = [p for p in models.iterdir() if p.name.startswith("weights")]
        weights = [int(p.name.split("_")[1]) for p in weights]
        opt.load_weights_folder = models / f"weights_{max(weights)}"  #

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(conv_layer, opt.num_layers, False, 2)
    pose_encoder.load_state_dict(un_mod(torch.load(pose_encoder_path)))

    pose_decoder = networks.PoseDecoder(conv_layer, pose_encoder.num_ch_enc, 1,
                                        2)
    pose_decoder.load_state_dict(un_mod(torch.load(pose_decoder_path)))

    if opt.mode is Mode.Cubemap:
        cube_poses = CubePosesAndLoss(include_loss=False)
        cube_poses.cuda()
        cube_poses.eval()

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            cam_T_cam = transformation_from_parameters(axisangle[:, 0],
                                                       translation[:, 0])

            if opt.mode is Mode.Cubemap:
                cam_T_cam = cube_poses(cam_T_cam)

            pred_poses.append(cam_T_cam.cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    ates = []
    num_frames = pred_poses.shape[0]
    gt_poses = get_gt_poses(configs)
    for i in range(0, num_frames - 1):
        gt_pose = next(gt_poses)
        local_xyzs = np.array(dump_xyz(pred_poses[np.newaxis, i]))
        gt_local_xyzs = np.array(dump_xyz(gt_pose[np.newaxis, ...]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
示例#11
0
def evaluate_pose(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_09" or opt.eval_split == "odom_10", \
        "eval_split should be either odom_9 or odom_10"

    device = torch.device("cpu" if opt.no_cuda else "cuda")

    sequence_id = int(opt.eval_split.split("_")[-1])

    if opt.pose_model_input == "pairs":
        opt.frame_ids = [1, 0]  # pose network only takes two frames as input
        num_poses = 1
        filenames = readlines(
            os.path.join(
                os.path.dirname(__file__), "splits", "odom",
                "test_files_{}_{:02d}.txt".format("pairs", sequence_id)))
    else:
        opt.frame_ids = [i for i in opt.frame_ids if i != "s"]
        num_poses = len(opt.frame_ids) - 1
        filenames = readlines(
            os.path.join(
                os.path.dirname(__file__), "splits", "odom",
                "test_files_{}_{:02d}.txt".format("all" + str(num_poses + 1),
                                                  sequence_id)))

    img_ext = '.png' if opt.png else '.jpg'
    dataset = datasets_dict[opt.eval_split](opt.data_path,
                                            filenames,
                                            opt.height,
                                            opt.width,
                                            opt.frame_ids,
                                            4,
                                            is_train=False,
                                            img_ext=img_ext)
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, num_poses + 1)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, num_poses, 1)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.to(device)
    pose_encoder.eval()
    pose_decoder.to(device)
    pose_decoder.eval()

    pred_poses = []
    flip_pred_poses = []

    print("-> Computing pose predictions")

    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                inputs[key] = ipt.to(device)

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            if opt.post_process:
                # Left-Right Flip as Post-processing to further improve accuracy of pose estimation
                all_color_aug = torch.cat(
                    (all_color_aug, torch.flip(all_color_aug, [3])), 0)

            features = pose_encoder(all_color_aug)
            axisangle, translation = pose_decoder(features)

            if opt.post_process:
                N = axisangle.shape[0] // 2
                pred_poses.append(
                    transformation_from_parameters(
                        axisangle[:N].view(N * num_poses, 1, 3),
                        translation[:N].view(N * num_poses, 1, 3),
                        invert=True).cpu().numpy().reshape(N, num_poses, 4, 4))
                flip_pred_poses.append(
                    transformation_from_parameters(
                        axisangle[N:].view(N * num_poses, 1, 3),
                        translation[N:].view(N * num_poses, 1, 3),
                        invert=True).cpu().numpy().reshape(N, num_poses, 4, 4))
            else:
                N = axisangle.shape[0]
                pred_poses.append(
                    transformation_from_parameters(
                        axisangle.view(N * num_poses, 1, 3),
                        translation.view(N * num_poses, 1, 3),
                        invert=True).cpu().numpy().reshape(N, num_poses, 4, 4))

    pred_poses = np.concatenate(pred_poses)

    if opt.post_process:
        flip_pred_poses = np.concatenate(flip_pred_poses)
        flip_pred_poses[:, :, 1:3, 0] *= -1
        flip_pred_poses[:, :, 0, 1:] *= -1
        pred_poses = average_poses(np.array([pred_poses, flip_pred_poses]))

    gt_poses_path = os.path.join(opt.data_path, "poses",
                                 "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))
    gt_local_poses = np.expand_dims(np.array(gt_local_poses), axis=1)

    ATEs = []
    REs = []
    num_frames = gt_global_poses.shape[0]
    track_length = 5
    for i in range(0, num_frames - track_length):
        gt_odometry = local_poses_to_odometry(gt_local_poses[i:i +
                                                             track_length - 1])
        pred_odometry = local_poses_to_odometry(pred_poses[i:i + track_length -
                                                           num_poses])
        ATE, RE = compute_pose_error(gt_odometry, pred_odometry)
        ATEs.append(ATE)
        REs.append(RE)

    print("\n Trajectory error: \n"
          "    ATE: {:0.4f}, std: {:0.4f} \n"
          "    RE: {:0.4f}, std: {:0.4f}  \n ".format(np.mean(ATEs),
                                                      np.std(ATEs),
                                                      np.mean(REs),
                                                      np.std(REs)))

    # compute the global monocular visual odometry and save it
    global_pred_odometry = local_poses_to_odometry(pred_poses)

    save_filename = opt.eval_split
    if opt.post_process:
        save_filename = save_filename + "_pp"
    save_path = os.path.join(opt.load_weights_folder, save_filename + ".txt")
    np.savetxt(save_path,
               global_pred_odometry[:, :-1, :].reshape(
                   global_pred_odometry.shape[0], -1),
               delimiter=' ',
               fmt='%1.8e')
    print("-> Predictions saved to", save_path)
示例#12
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    K = np.array(
        [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
        dtype=np.float32)
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10" or opt.eval_split == "odom_0", \
        "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path,
                               filenames,
                               opt.height,
                               opt.width, [0, 1],
                               4,
                               is_train=False)
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")
    depth_encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    depth_decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))
    depth_encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_encoder_dict = torch.load(depth_encoder_path)
    model_dict = depth_encoder.state_dict()
    depth_encoder.load_state_dict(
        {k: v
         for k, v in depth_encoder_dict.items() if k in model_dict})

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))
    depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc)
    depth_decoder.load_state_dict(torch.load(depth_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()
    depth_encoder.cuda()
    depth_encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    pred_poses = []
    pred_disps = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            input_color = inputs[("color", 0, 0)].cuda()
            depth_output = depth_decoder(depth_encoder(input_color))

            pred_disp, _ = disp_to_depth(depth_output[("disp", 0)],
                                         opt.min_depth, opt.max_depth)
            pred_disp = pred_disp.cpu()[:, 0].numpy()

            pred_disps.append(pred_disp)

            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0],
                                               translation[:,
                                                           0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)
    pred_disps = np.concatenate(pred_disps)
    pred_poses_scaled = []
    ratios_d = []
    gt_norms_div = []
    gt_norms = []
    pred_norms = []
    td_divs_dgc = []
    poses_pred = []
    for i in range(pred_poses.shape[0]):
        pred_pose = pred_poses[i]
        pred_disp = pred_disps[i + 1]
        pred_depth = 1 / pred_disp
        scale_recovery = ScaleRecovery(1, 192, 640, K).cuda()
        pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda()
        ratio = scale_recovery(pred_depth).cpu().item()
        pred_pose_scaled = pred_pose[:3, 3] * ratio
        poses_pred.append(pred_pose[:3, 3])
        pred_poses_scaled.append(pred_pose_scaled)
        ratios_d.append(ratio)

    gt_poses_path = os.path.join(opt.data_path, "poses",
                                 "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(
                np.dot(np.linalg.inv(gt_global_poses[i - 1]),
                       gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(
            dump_xyz(pred_poses_scaled[i:i + track_length - 1]))
        gt_local_xyzs = np.array(
            dump_xyz(gt_local_poses[i:i + track_length - 1]))
        gt_norm_div = np.linalg.norm(gt_local_xyzs) / np.linalg.norm(
            local_xyzs)
        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
        gt_norms_div.append(gt_norm_div)
        gt_norms.append(np.linalg.norm(gt_local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_scaled{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_poses)
    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_gt{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_poses)
    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_pred{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_xyzs)
    save_path = os.path.join(os.path.dirname(__file__),
                             "gt_norms{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_norms)
    save_path = os.path.join(os.path.dirname(__file__),
                             "gt_norms_div{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_norms_div)
    save_path = os.path.join(os.path.dirname(__file__),
                             "ratios_d{:02d}.npy".format(sequence_id))
    np.save(save_path, ratios_d)
    save_path = os.path.join(os.path.dirname(__file__),
                             "pred_norms{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_norms)
    print("-> Predictions saved to", save_path)
示例#13
0
def test_simple(args):
    """Function to predict for a single image or folder of images
    """
    assert args.model_name is not None, \
        "You must specify the --model_name parameter; see README.md for an example"

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(args.model_name)
    model_path = os.path.join("models", args.model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained depth encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained depth decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    # FINDING INPUT IMAGES
    if os.path.isfile(args.image_path):
        # Only testing on a single image
        paths = [args.image_path]
        output_directory = os.path.dirname(args.image_path)
    elif os.path.isdir(args.image_path):
        # Searching folder for images
        paths = glob.glob(
            os.path.join(args.image_path, '*.{}'.format(args.ext)))
        output_directory = args.image_path
    else:
        raise Exception("Can not find args.image_path: {}".format(
            args.image_path))

    # don't try to predict disparity for a disparity image!
    paths = [img for img in paths if not img.endswith("_disp.jpg")]

    if len(paths) > 3:
        print("   Loading Pose network")
        pose_encoder_path = os.path.join(model_path, "pose_encoder.pth")
        pose_decoder_path = os.path.join(model_path, "pose.pth")

        pose_encoder = networks.ResnetEncoder(18, False, 2)
        pose_encoder.load_state_dict(torch.load(pose_encoder_path))

        pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
        pose_decoder.load_state_dict(torch.load(pose_decoder_path))

        pose_encoder.to(device)
        pose_encoder.eval()
        pose_decoder.to(device)
        pose_decoder.eval()

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        print("-> Predicting disparities on {:d} test images".format(
            len(paths)))
        processed_images = []
        for idx, image_path in enumerate(paths):
            # Load image and preprocess
            input_image = pil.open(image_path).convert('RGB')
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height),
                                             pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            processed_images += [input_image]

            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width),
                mode="bilinear",
                align_corners=False)

            # Saving numpy file
            output_name = os.path.splitext(os.path.basename(image_path))[0]
            name_dest_npy = os.path.join(output_directory,
                                         "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            vmax = np.percentile(disp_resized_np, 95)
            normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(),
                                              vmax=vmax)
            mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
            colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] *
                              255).astype(np.uint8)
            im = pil.fromarray(colormapped_im)

            name_dest_im = os.path.join(output_directory,
                                        "{}_disp.jpg".format(output_name))
            im.save(name_dest_im)

            print("   Processed {:d} of {:d} images - saved prediction to {}".
                  format(idx + 1, len(paths), name_dest_im))

        if len(processed_images) > 3:
            pred_poses = []
            rotations = []
            translations = []
            print("-> Predicting poses on {:d} test images".format(
                len(processed_images)))
            for idx, (a, b) in enumerate(
                    zip(processed_images[:-1], processed_images[1:])):
                all_color_aug = torch.cat([a, b], 1)

                features = [pose_encoder(all_color_aug)]
                axisangle, translation = pose_decoder(features)

                rotations += [axisangle[:, 0].cpu().numpy()]
                translations += [translation[:, 0].cpu().numpy()]

                pred_poses.append(
                    transformation_from_parameters(
                        axisangle[:, 0], translation[:, 0]).cpu().numpy())
            pred_poses = np.concatenate(pred_poses)
            save_path = os.path.join(args.image_path, "pred_poses.npy")
            np.save(save_path, pred_poses)
            print("-> Pose Predictions saved to", save_path)
            local_xyzs = np.array(dump_xyz(pred_poses))
            save_path = os.path.join(args.image_path, "pred_xyzs.npy")
            np.save(save_path, local_xyzs)
            print("-> Predicted path saved to", save_path)

            save_path = os.path.join(args.image_path, "axisangle.npy")
            np.save(save_path, np.concatenate(rotations))
            print("-> Predicted axis angles saved to", save_path)
            save_path = os.path.join(args.image_path, "translation.npy")
            np.save(save_path, np.concatenate(translations))
            print("-> Predicted translations saved to", save_path)

    print('-> Done!')
示例#14
0
def test_simple(args):
    """Function to predict for a single image or folder of images
    """
    assert args.model_name is not None, \
        "You must specify the --model_name parameter; see README.md for an example"

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(args.model_name)
    model_path = os.path.join("models", args.model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(
        num_ch_enc=encoder.num_ch_enc, scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    print("Loading pose networks")
    pose_encoder_path = os.path.join(model_path, "pose_encoder.pth")
    pose_decoder_path = os.path.join(model_path, "pose.pth")

    pose_encoder = networks.ResnetEncoder(18, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()
    

    bag_name = '2019-12-17-13-24-03'
    map_name = "feature=base&ver=2019121700&base_pt=(32.75707,-111.55757)&end_pt=(32.092537212,-110.7892506)"
    begin = '0:36:00'
    end = '0:37:00'
    output_directory = "assets/"

    dataset = TSDataset(bag_name, begin, end)
    pred_depth = []
    pred_poses = []
    last_img = None

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        for idx, input_image in enumerate(dataset):

            # Load image and preprocess
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width), mode="bilinear", align_corners=False)

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            vmax = np.percentile(disp_resized_np, 95)
            normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
            mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
            colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
            im = pil.fromarray(colormapped_im)
            pred_depth.append(im)

            # Handle pose
            if last_img is None:
                last_img = input_image
            all_color_aug = torch.cat([last_img, input_image], 1)
            last_img = input_image

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)
            pose = transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()
            pred_poses.append(pose)
            
            print("   Processed {:d} of {:d} images".format(
                idx + 1, len(dataset)))
    pred_poses = np.concatenate(pred_poses, axis=0)
    print(pred_poses.shape)
    np.save("poses.npy", pred_poses)

    # save_video(pred_depth)

    print('-> Done!')