def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
        "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width,
                               [0, 1], 4, is_train=False)
    dataloader = DataLoader(dataset, opt.batch_size, shuffle=False,
                            num_workers=opt.num_workers, pin_memory=True, drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
Exemplo n.º 2
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    # Depth
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)
    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    # Pose
    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    if opt.use_imu:
        imu_lstm = nn.LSTM(6, opt.lstm_hidden_size, opt.lstm_num_layers)
        imu_lstm.cuda()
        imu_lstm.eval()
        lstm_hs = None

        hidden_to_imu = torch.nn.Sequential(
            torch.nn.Linear(opt.lstm_hidden_size, 6), )
        hidden_to_imu.cuda()
        hidden_to_imu.eval()

        if opt.pose_fuse:
            pose_fuse_mlp = torch.nn.Sequential(
                torch.nn.Linear(24, opt.pose_mlp_hidden_size),
                torch.nn.Sigmoid(),
                torch.nn.Linear(opt.pose_mlp_hidden_size, 6),
            )
            pose_fuse_mlp.cuda()
            pose_fuse_mlp.eval()

    img_ext = '.png' if opt.png else '.jpg'

    pred_disps = []
    scale_factors = []

    kitty_odom = False
    if opt.eval_split.startswith("odom"):
        kitty_odom = True

    if kitty_odom:
        ids = [int(opt.eval_split.split("_")[1])]
    else:
        splits_dir = os.path.join(os.path.dirname(__file__), "splits")
        videonames = readlines(
            os.path.join(splits_dir, opt.eval_split, "test_video_list.txt"))
        ids = videonames

    for videoname in ids:
        if kitty_odom:
            filenames = readlines(
                os.path.join(splits_dir, opt.eval_split,
                             "test_files_{:02d}.txt".format(videoname)))
        else:
            filenames = readlines(
                os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        if kitty_odom:

            dataset = KITTIOdomDataset(opt.data_path,
                                       filenames,
                                       opt.height,
                                       opt.width, [0, 1],
                                       4,
                                       is_train=False,
                                       use_imu=False)
            dataloader = DataLoader(dataset,
                                    opt.batch_size,
                                    shuffle=False,
                                    num_workers=opt.num_workers,
                                    pin_memory=True,
                                    drop_last=False)
        else:
            if opt.use_imu:
                dataset = SequenceRawKittiDataset(
                    opt.data_path, [videoname],
                    filenames,
                    1,
                    imu_data_path=opt.imu_data_path,
                    img_ext=img_ext,
                    frame_idxs=[0, 1],
                    height=encoder_dict['height'],
                    width=encoder_dict['width'],
                    num_scales=4,
                    is_train=False)
                dataloader = DataLoader(dataset, shuffle=False, num_workers=0)
            else:
                filenames = list(
                    filter(lambda f: f.startswith(videoname), filenames))
                dataset = KITTIRAWDataset(opt.data_path,
                                          filenames,
                                          opt.height,
                                          opt.width, [0, 1],
                                          4,
                                          is_train=False,
                                          use_imu=False)
                dataloader = DataLoader(dataset,
                                        opt.batch_size,
                                        shuffle=False,
                                        num_workers=opt.num_workers,
                                        pin_memory=True,
                                        drop_last=False)
        # pred_poses = [np.eye(4).reshape(1, 4, 4)]
        pred_poses = []
        imu_scale_factors = []

        print("EVALUATING ", opt.model_name)

        print("-> Computing pose predictions")

        opt.frame_ids = [0, 1]  # pose network only takes two frames as input

        with torch.no_grad():
            for inputs in dataloader:
                for key, ipt in inputs.items():
                    inputs[key] = ipt.cuda()
                    if opt.use_imu:
                        inputs[key] = inputs[key].squeeze(0)
                input_color = inputs[("color", 0, 0)]
                feature = encoder(input_color)
                output = depth_decoder(feature)

                pred_disp, _ = disp_to_depth(output[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                pred_disps.append(pred_disp)

                all_color_aug = torch.cat([
                    inputs[("color_aug", i, 0)] for i in sorted(opt.frame_ids)
                ], 1)

                features = [pose_encoder(all_color_aug)]
                axisangle, translation = pose_decoder(features)
                outputs = {}
                outputs[("cam_T_cam", 0,
                         1)] = transformation_from_parameters(axisangle[:, 0],
                                                              translation[:,
                                                                          0],
                                                              invert=False)

                T = outputs[("cam_T_cam", 0, 1)]
                if opt.use_imu:
                    outputs = predict_poses_from_imu2(opt, inputs, imu_lstm,
                                                      lstm_hs, hidden_to_imu)
                    T_better = outputs[("cam_T_cam_imu", 0, 1)]
                    if opt.pose_fuse:
                        fuse_poses(opt, outputs, pose_fuse_mlp)
                        T_better = outputs[("cam_T_cam_fuse", 0, 1)]

                    R, t = rot_translation_from_transformation(T)
                    Rb, tb = rot_translation_from_transformation(T_better)
                    imu_scale_factor = torch.sum(tb * t) / torch.sum(t**2)

                    imu_scale_factors.append(imu_scale_factor.cpu().numpy())
                    # scale_factors.append(imu_scale_factors)

                    T = T_better

                pred_poses.append(T.cpu().numpy())

            pred_poses = np.concatenate(pred_poses)

            if opt.eval_split.startswith("odom"):
                gt_poses_path = os.path.join(opt.data_path, "poses",
                                             "{:02d}.txt".format(videoname))
            else:
                gt_poses_path = os.path.join(opt.data_path, videoname, "oxts",
                                             "poses.txt")

            eval_pose(opt, pred_poses, gt_poses_path)
        scale_factors = {}
        if imu_scale_factors:
            scale_factors["IMU factor"] = imu_scale_factors
    pred_disps = np.concatenate(pred_disps)
    if not kitty_odom:
        eval_depth(opt, pred_disps, scale_factors)
Exemplo n.º 3
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    sequence_id = int(opt.eval_split.split("_")[1])
    opt.batch_size = 1

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path,
                               filenames,
                               opt.height,
                               opt.width, [0, -1, 1],
                               4,
                               1,
                               is_train=False,
                               img_ext='.png')
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    # pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    config_file = "./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml"
    cfg.merge_from_file(config_file)
    cfg.freeze()
    maskrcnn_path = "./e2e_mask_rcnn_R_50_FPN_1x.pth"
    pose_encoder = networks.ResnetEncoder(cfg, maskrcnn_path)
    # pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    # pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(len(opt.frame_ids))
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    # opt.frame_ids = [0, 1]  # pose network only takes two frames as input
    ii = 0
    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                if isinstance(ipt, torch.Tensor):
                    inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids])

            all_features = pose_encoder(all_color_aug)
            all_features = [
                torch.split(f, opt.batch_size) for f in all_features
            ]

            features = {}
            for i, k in enumerate(opt.frame_ids):
                features[k] = [f[i] for f in all_features]
            pose_inputs = [features[i] for i in opt.frame_ids if i != "s"]

            axisangle, translation = pose_decoder(pose_inputs)
            if ii == 0:
                pred_poses.append(
                    transformation_from_parameters(axisangle[:, 0],
                                                   translation[:, 0],
                                                   True).cpu().numpy())
            pred_poses.append(
                transformation_from_parameters(axisangle[:, 1],
                                               translation[:,
                                                           1]).cpu().numpy())
            if ii % opt.log_frequency == 0:
                print("{:04d}-th image processing".format(ii))
            ii += 1
        # pred_poses.append(
        #     transformation_from_parameters(axisangle[:, 1], translation[:, 1]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(
        "/usr/stud/linp/storage/user/linp/results/kitti", "poses",
        "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape((-1, 3, 4))
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(
                np.dot(np.linalg.inv(gt_global_poses[i - 1]),
                       gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 3

    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(
            dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
    '''
    for i in range(0, num_frames - 2):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i + 1:i + track_length]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
    '''

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
Exemplo n.º 4
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    K = np.array(
        [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
        dtype=np.float32)
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10" or opt.eval_split == "odom_0", \
        "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path,
                               filenames,
                               opt.height,
                               opt.width, [0, 1],
                               4,
                               is_train=False)
    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")
    depth_encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    depth_decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))
    depth_encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_encoder_dict = torch.load(depth_encoder_path)
    model_dict = depth_encoder.state_dict()
    depth_encoder.load_state_dict(
        {k: v
         for k, v in depth_encoder_dict.items() if k in model_dict})

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))
    depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc)
    depth_decoder.load_state_dict(torch.load(depth_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()
    depth_encoder.cuda()
    depth_encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    pred_poses = []
    pred_disps = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            input_color = inputs[("color", 0, 0)].cuda()
            depth_output = depth_decoder(depth_encoder(input_color))

            pred_disp, _ = disp_to_depth(depth_output[("disp", 0)],
                                         opt.min_depth, opt.max_depth)
            pred_disp = pred_disp.cpu()[:, 0].numpy()

            pred_disps.append(pred_disp)

            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat(
                [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0],
                                               translation[:,
                                                           0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)
    pred_disps = np.concatenate(pred_disps)
    pred_poses_scaled = []
    ratios_d = []
    gt_norms_div = []
    gt_norms = []
    pred_norms = []
    td_divs_dgc = []
    poses_pred = []
    for i in range(pred_poses.shape[0]):
        pred_pose = pred_poses[i]
        pred_disp = pred_disps[i + 1]
        pred_depth = 1 / pred_disp
        scale_recovery = ScaleRecovery(1, 192, 640, K).cuda()
        pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda()
        ratio = scale_recovery(pred_depth).cpu().item()
        pred_pose_scaled = pred_pose[:3, 3] * ratio
        poses_pred.append(pred_pose[:3, 3])
        pred_poses_scaled.append(pred_pose_scaled)
        ratios_d.append(ratio)

    gt_poses_path = os.path.join(opt.data_path, "poses",
                                 "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(
                np.dot(np.linalg.inv(gt_global_poses[i - 1]),
                       gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(
            dump_xyz(pred_poses_scaled[i:i + track_length - 1]))
        gt_local_xyzs = np.array(
            dump_xyz(gt_local_poses[i:i + track_length - 1]))
        gt_norm_div = np.linalg.norm(gt_local_xyzs) / np.linalg.norm(
            local_xyzs)
        ates.append(compute_ate(gt_local_xyzs, local_xyzs))
        gt_norms_div.append(gt_norm_div)
        gt_norms.append(np.linalg.norm(gt_local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_scaled{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_poses)
    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_gt{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_poses)
    save_path = os.path.join(os.path.dirname(__file__),
                             "poses_pred{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_xyzs)
    save_path = os.path.join(os.path.dirname(__file__),
                             "gt_norms{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_norms)
    save_path = os.path.join(os.path.dirname(__file__),
                             "gt_norms_div{:02d}.npy".format(sequence_id))
    np.save(save_path, gt_norms_div)
    save_path = os.path.join(os.path.dirname(__file__),
                             "ratios_d{:02d}.npy".format(sequence_id))
    np.save(save_path, ratios_d)
    save_path = os.path.join(os.path.dirname(__file__),
                             "pred_norms{:02d}.npy".format(sequence_id))
    np.save(save_path, pred_norms)
    print("-> Predictions saved to", save_path)