Exemplo n.º 1
0
def getGroundTruthPose(image_name, dataset_dir):
    dataset_path = os.path.join(dataset_dir, "real")
    pose_name = os.path.join(dataset_path, image_name + "_modelview.txt")
    projection_name = os.path.join(dataset_path,
                                   image_name + "_projection.txt")
    img_name = os.path.join(dataset_path, image_name + ".png")
    if not os.path.isfile(img_name):
        img_name = os.path.join(dataset_path, image_name + ".jpg")
    if not os.path.isfile(img_name):
        raise RuntimeError("Unable to find the query image.", image_name)

    scene_info_name = os.path.join(dataset_dir, "scene_info.txt")

    pose = FUtil.loadMatrixFromFile(pose_name)

    if not os.path.isfile(scene_info_name):
        R = pose[:3, :3]
        t = pose[:3, 3]
        center = np.dot(-R.transpose(), t)
        pose[:3, 3] = pose[:3, 3] - np.dot(-R, center)
        #print("Loaded center from pose, pose: ", pose, "center", center)
    else:
        center = MultimodalPatchesDataset.getSceneCenter(scene_info_name)

    pose = movePoseToCenter(pose, center)
    projection = FUtil.loadMatrixFromFile(projection_name)
    img = cv2.imread(img_name)
    intrinsics = FUtil.projectiveToIntrinsics(projection, img.shape[1],
                                              img.shape[0])
    return pose, intrinsics
Exemplo n.º 2
0
    def loadRenderedCameraParamsNoNeg(name):
        img_path = name.replace("_texture", "")
        P_name = os.path.splitext(img_path)[0] + "_projection.txt"
        MV_name = os.path.splitext(img_path)[0] + "_modelview.txt"
        if not os.path.isfile(MV_name):
            MV_name = os.path.splitext(img_path)[0] + "_pose.txt"
        P = FUtil.loadMatrixFromFile(P_name)
        pose = FUtil.loadMatrixFromFile(MV_name)

        R = pose[:3, :3]

        corr_R = ((3 * np.eye(3)) - (np.dot(R, R.transpose()))) / 2.0
        R_corrected = np.dot(corr_R, R)
        pose[:3, :3] = R_corrected
        return P, pose
Exemplo n.º 3
0
    def getRenderedImageParams(self, name):
        abs_name = os.path.join(self.input_dir, name)
        img = cv2.imread(abs_name)
        img = cv2.flip(img, 2)

        # add the render camera
        width = img.shape[1]
        height = img.shape[0]

        P, pose = Matcher.loadRenderedCameraParams(abs_name)
        R_corrected = pose[:3, :3]
        t = pose[:3, 3]

        intr = FUtil.projectiveToIntrinsics(P, width, height)
        focal = intr[0, 0]
        cx = abs(intr[0, 2])
        cy = abs(intr[1, 2])
        params1 = np.array((focal, cx, cy, 0.0))  # radial distortion 0

        cam_center = self.scene_center + np.dot(-R_corrected.transpose(), t)

        quat = Quaternion(matrix=R_corrected)
        rot = np.array([quat.w, quat.x, quat.y, quat.z])

        return width, height, params1, rot, t, cam_center
Exemplo n.º 4
0
def poseFrom2D3DWithFOVEPNPOurRansac(src_pts,
                                     fov,
                                     img1_shape,
                                     coords3d,
                                     reprojection_error=10):
    """Estimates the pose using 2D-3D correspondences, EPnP algorithm and
       our implementation of RANSAC in python.
       src_pts, fov [radians] correspond to left view, which should be
                              the photgraph
       dst_pts, intr2, and img2_depth correspond to the rendered image.
       @type array @param src_pts 2D points in the photograph
       @type array @param projection1 OpenGL projection matrix of the camera
                          which took the photo.
       @type array @param dst_pts 2D points in the rendered image
       @type array @param projection2 OpenGL projection matrix of the camera
                          which rendered the synthetic image.
       @type array @param MV2 modelview matrix of the camera which rendered the
                          synthetic image.
       @type array @param img2_depth depth map of the rendered image.
    """
    intr1 = FUtil.fovToIntrinsics(fov, img1_shape[1], img1_shape[0])
    intr1[:, 1:3] = -intr1[:, 1:3]
    ret, R, t, inliers_idx = solveEPNPRansac(
        coords3d, src_pts, intr1, reprojection_error=reprojection_error)
    mask = np.zeros((src_pts.shape[0], 1)).astype(int)
    if ret:
        R[1:3, :] = -R[1:3, :]
        t[1:3] = -t[1:3]
        mask[inliers_idx] = int(1)

    return ret, R, t, mask
Exemplo n.º 5
0
def project(pt_4d, h, w, modelview, projection):
    intr = FUtil.projectiveToIntrinsics(projection, w, h)
    pt_3d = np.dot(modelview, pt_4d.transpose())
    pt_3d = pt_3d / pt_3d[3, :]
    pt_3d = pt_3d[:3, :]
    pt_2d = np.dot(intr, pt_3d)
    pt_2d = (pt_2d / pt_2d[2, :]).transpose()
    return pt_2d[:, :2]
Exemplo n.º 6
0
    def __init__(self,
                 video_path,
                 initial_gps,
                 fov,
                 working_directory,
                 snapshot,
                 model,
                 earth_file,
                 log_dir,
                 cuda=True,
                 normalize_output=True):
        self.video_path = video_path
        self.initial_gps = initial_gps
        self.fov = fov
        self.working_directory = working_directory
        self.video_frames_dir = os.path.join(self.working_directory,
                                             "video_frames")
        if not os.path.isdir(self.video_frames_dir):
            os.makedirs(self.video_frames_dir)

        self.video_output_dir = os.path.join(self.working_directory,
                                             "video_output")
        if not os.path.isdir(self.video_output_dir):
            os.makedirs(self.video_output_dir)

        self.snapshot = snapshot
        self.model = model
        self.log_dir = log_dir
        self.normalize_output = normalize_output
        self.cuda = cuda
        self.earth_file = earth_file
        self.video = cv2.VideoCapture(self.video_path)
        self.curr_gps = self.initial_gps
        self.framenum = 0

        cmd = "itr --pose-from-gps " + str(self.initial_gps[0]) + " " + str(
            self.initial_gps[1]) + " initcam " + self.earth_file
        os.system(cmd)

        init_pose = FUtil.loadMatrixFromFile("initcam_modelview.txt")
        R = init_pose[:3, :3]
        self.scene_center = np.dot(-R.transpose(), init_pose[:3, 3])
        self.curr_center = np.zeros((3, 1))

        success, image = self.video.read()
        frame_base = "frame%d" % self.framenum
        frame_name = frame_base + ".jpg"
        frame_name = os.path.join(self.video_frames_dir, frame_name)
        if not os.path.isfile(frame_name):
            cv2.imwrite(frame_name, image)

        pfargs = self.buildPFArgs(frame_name, self.curr_gps, self.fov)
        self.poseFinder = fp.PoseFinder(pfargs)

        self.KF = self.initKalmanFilter()
Exemplo n.º 7
0
def getGPSPose(image_name, dataset_dir):
    dataset_path = os.path.join(dataset_dir, "real_gps")
    if os.path.isdir(dataset_path):
        pose_name = os.path.join(dataset_path, image_name + "_modelview.txt")
        pose = FUtil.loadMatrixFromFile(pose_name)
        R = pose[:3, :3]
        t = pose[:3, 3]
        center = np.dot(-R.transpose(), t)
        pose[:3, 3] = center
        return pose
    return None
Exemplo n.º 8
0
def poseFrom2D3DWithFOV(src_pts,
                        fov,
                        img1_shape,
                        coords3d,
                        reprojection_error=10,
                        iterationsCount=100000,
                        pnp_flags=cv2.SOLVEPNP_EPNP,
                        use_ransac=True):
    """Estimates the pose using 2D-3D correspondences and PnP algorithm.
       src_pts, fov [radians] correspond to left view, which should be
                              the photgraph
       dst_pts, intr2, and img2_depth correspond to the rendered image.
       @type array @param src_pts 2D points in the photograph
       @type array @param projection1 OpenGL projection matrix of the camera
                          which took the photo.
       @type array @param dst_pts 2D points in the rendered image
       @type array @param projection2 OpenGL projection matrix of the camera
                          which rendered the synthetic image.
       @type array @param MV2 modelview matrix of the camera which rendered the
                          synthetic image.
       @type array @param img2_depth depth map of the rendered image.
    """
    print("fov", fov)
    print("src pts", src_pts.shape, coords3d.shape)
    intr1 = FUtil.fovToIntrinsics(fov, img1_shape[1], img1_shape[0])
    intr1[:, 1:3] = -intr1[:, 1:3]
    dist = np.array([])
    if use_ransac:
        ret, Rvec, t, inliers = cv2.solvePnPRansac(
            coords3d,
            src_pts,
            intr1,
            dist,
            reprojectionError=reprojection_error,
            iterationsCount=iterationsCount,
            flags=pnp_flags)  #,
    else:
        inliers = np.ones((src_pts.shape[0], 1)).astype(np.int)
        ret, Rvec, t = cv2.solvePnP(coords3d,
                                    src_pts,
                                    intr1,
                                    dist,
                                    flags=pnp_flags)  #,
    R = cv2.Rodrigues(Rvec, jacobian=0)[0]
    R[1:3, :] = -R[1:3, :]
    t[1:3] = -t[1:3]
    t = t.reshape(-1)
    mask = np.zeros((src_pts.shape[0], 1)).astype(int)
    if ret:
        mask[inliers[:, 0]] = int(1)
    return ret, R, t, mask
Exemplo n.º 9
0
def getRenderedPose(image_name, result_dir):
    rendered_dir = os.path.join(result_dir, image_name)
    pose_name = os.path.join(rendered_dir, "*_0.000000_pose.txt")
    pose_files = glob.glob(pose_name)

    if not pose_files:
        return None, None

    if not os.path.isfile(pose_files[0]):
        raise RuntimeError("Unable to find rendered pose: " + pose_files[0])
    scene_info_name = os.path.join(rendered_dir, "scene_info.txt")
    center = MultimodalPatchesDataset.getSceneCenter(scene_info_name)
    pose = FUtil.loadMatrixFromFile(pose_files[0])
    pose = movePoseToCenter(pose, center)
    return pose, center
Exemplo n.º 10
0
    def __init__(self, dataset_dir, images_path, list, radius=500):
        self.dataset_dir = dataset_dir
        self.images_path = images_path
        self.dataset_items = list

        # create list of images
        self.imgs_base = set()
        print("Preparing list of images...")
        for pair in tqdm(self.dataset_items):
            parts = pair.split("-")
            self.imgs_base.update({parts[0], parts[1]})

        # load camera center for each image
        self.all_names = []
        self.all_centers = []
        print("Loading camera info...")
        for image_base in tqdm(self.imgs_base):
            MV_name = image_base + "_modelview.txt"
            MV_path = os.path.join(self.images_path, MV_name)
            RT = FUtil.loadMatrixFromFile(MV_path)
            C = np.dot(-RT[:3, :3].transpose(), RT[:3, 3])
            self.all_names.append(image_base)
            self.all_centers.append(C)
        self.all_centers = np.array(self.all_centers)
        self.kdt = KDTree(self.all_centers, leaf_size=40, metric='euclidean')

        count = self.kdt.query_radius(self.all_centers,
                                      radius,
                                      count_only=True)
        cnt_max = np.max(count)
        self.max_num_points = cnt_max
        count = count / cnt_max

        density = np.log(count)
        norm = matplotlib.colors.Normalize()
        colors = np.floor(plt.cm.jet(norm(density)) * 255)
        ply_name = "dataset_camera_centers_heatmap_" + str(radius) + ".ply"
        # savePointCloudToPly(self.all_centers, colors[:, :3], ply_name)

        self.name_density = {}
        idx = 0
        for name in self.all_names:
            self.name_density.update({name: count[idx]})
            idx += 1
Exemplo n.º 11
0
def unproject(coords_2d, depth, modelview, projection):
    """Unprojects 2d image coordinates to 3D based on a depth map.
    @coords_2d [numpy.array] of shape Nx2, where coordinates are stored
    as Nx(x,y)
    @depth [numpy.array] is the depth map of the source image. Must have the
    same size as the source image.
    @param modelview numpy array 4x4 of GL modelview matrix to define
    transformation from 3D to camera coordinates.
    @param projection numpy array 4x4 of GL projection matrix to define
    transformation from camera coordinates to image coordinates.

    @return [np.array] Nx4 array of 3D points in homogeneous coordinates (w=1).
    """

    h = depth.shape[0]
    w = depth.shape[1]
    intr = FUtil.projectiveToIntrinsics(projection, w, h)
    intr_inv = np.linalg.inv(intr)
    mv_inv = np.linalg.inv(modelview)

    #coords_2d = np.array([[400, 300]])

    z1 = np.ones((coords_2d.shape[0], 1))
    c2d = coords_2d.astype(int)
    depths = (depth[c2d[:, 1], c2d[:, 0]]).reshape(-1, 1)
    #print("depths")
    #print(depths)
    depths = np.hstack([depths, depths, depths])
    coords_im = np.hstack([c2d, z1])
    coords_im = (coords_im * depths)
    #print("coords im depths")
    #print(coords_im)
    coords_3d = (np.dot(intr_inv, coords_im.transpose()))
    #print("intr inv")
    #print(coords_3d)
    coords_3d = np.concatenate((coords_3d, np.ones((1, coords_3d.shape[1]))),
                               axis=0)
    coords_3d = np.dot(mv_inv, coords_3d).transpose()
    #print(coords_3d)
    #c = np.random.randint(0, 255, (coords_3d.shape[0], 3))
    #savePointCloudToPly(coords_3d[:, :3], c, "mat_desktop.ply")

    return coords_3d
Exemplo n.º 12
0
def poseFrom2D3D(src_pts,
                 projection1,
                 img1_shape,
                 dst_pts,
                 projection2,
                 MV2,
                 img2_depth,
                 pnp_flags=cv2.SOLVEPNP_EPNP):
    """Estimates the pose using 2D-3D correspondences and PnP algorithm.
       src_pts, intr1 correspond to left view, which should be the photgraph
       dst_pts, intr2, and img2_depth correspond to the rendered image.
       @type array @param src_pts 2D points in the photograph
       @type array @param projection1 OpenGL projection matrix of the camera
                          which took the photo.
       @type array @param dst_pts 2D points in the rendered image
       @type array @param projection2 OpenGL projection matrix of the camera
                          which rendered the synthetic image.
       @type array @param MV2 modelview matrix of the camera which rendered the
                          synthetic image.
       @type array @param img2_depth depth map of the rendered image.
    """
    coords3d = unproject(dst_pts, img2_depth, MV2, projection2)[:, :3]
    intr1 = FUtil.projectiveToIntrinsics(projection1, img1_shape[1],
                                         img1_shape[0])
    intr1[:, 1:3] = -intr1[:, 1:3]
    dist = np.array([])
    ret, Rvec, t, inliers = cv2.solvePnPRansac(coords3d,
                                               src_pts,
                                               intr1,
                                               dist,
                                               reprojectionError=10,
                                               iterationsCount=100000,
                                               flags=pnp_flags)  #,
    R = cv2.Rodrigues(Rvec, jacobian=0)[0]
    R[1:3, :] = -R[1:3, :]
    t[1:3] = -t[1:3]
    t = t.reshape(-1)
    mask = np.zeros((src_pts.shape[0], 1))

    if ret:
        mask[inliers[:, 0]] = 1
    return ret, R, t, mask
Exemplo n.º 13
0
    def loadImages(self, ext=".png"):
        img1_base = os.path.splitext(self.info['img1_name'])[0]
        img2_base = os.path.splitext(self.info['img2_name'])[0]
        #print("IMG BASE", img2_base)
        MV1_name = os.path.join(self.images_path, img1_base + "_modelview.txt")
        MV2_name = os.path.join(
            self.images_path,
            img2_base.replace("_texture", "") + "_modelview.txt")

        P1_name = os.path.join(self.images_path, img1_base + "_projection.txt")
        P2_name = os.path.join(
            self.images_path,
            img2_base.replace("_texture", "") + "_projection.txt")

        self.RT1 = (FUtil.loadMatrixFromFile(MV1_name))
        self.RT2 = (FUtil.loadMatrixFromFile(MV2_name))

        self.P1 = FUtil.loadMatrixFromFile(P1_name)
        self.P2 = FUtil.loadMatrixFromFile(P2_name)

        # photograph
        if self.render_only:
            img1_path = os.path.join(self.images_path,
                                     img1_base + "_texture" + ext)
        else:
            img1_path = os.path.join(self.images_path, img1_base + ext)
        # render
        img2_path = os.path.join(self.images_path, img2_base + ext)

        # render
        img1_render_path = os.path.join(self.images_path,
                                        img1_base + "_texture" + ext)
        if not self.render_only:
            # remote _texture to convert the path to photo
            img2_base = img2_base.replace("_texture", "")
        img2_photo_path = os.path.join(self.images_path, img2_base + ext)

        # start = timer()
        img1 = cv2.imread(img1_path)
        img2 = cv2.imread(img2_path)
        #print(img1_path)
        img1 = np.flip(img1, 2)  # was needed with cv2.imread
        img2 = np.flip(img2, 2)  # was needed with cv2.imread

        img1_render = cv2.imread(img1_render_path)
        img2_photo = cv2.imread(img2_photo_path)
        img1_render = np.flip(img1_render, 2)  # was needed with cv2.imread
        img2_photo = np.flip(img2_photo, 2)  # was needed with cv2.imread

        #if self.photo_jitter:
        # apply gamma to simulate different camera response functions
        #gamma = np.random.uniform(0.8, 1.1)
        #img1 = np.clip(img1 ** (1 / gamma), 0, 255)
        #gamma = np.random.uniform(0.8, 1.1)
        #img2_photo = np.clip(img2_photo ** (1 / gamma), 0, 255)

        w1, h1, s1 = getSizeFOV(self.info['img1_shape'][1],
                                self.info['img1_shape'][0],
                                self.info['fov_1'],
                                maxres=self.left_maxres)
        w2, h2, s2 = getSizeFOV(self.info['img2_shape'][1],
                                self.info['img2_shape'][0],
                                self.info['fov_2'],
                                maxres=self.right_maxres)

        img1 = cv2.resize(img1, (w1, h1), interpolation=cv2.INTER_AREA)
        img2 = cv2.resize(img2, (w2, h2), interpolation=cv2.INTER_AREA)
        img1_render = cv2.resize(img1_render, (w1, h1),
                                 interpolation=cv2.INTER_AREA)

        if self.color_jitter:
            # jitter color of rendered images
            img2 = self.transform(torch.from_numpy(img2.transpose(
                (2, 0, 1)))).numpy().transpose((1, 2, 0))
            img1_render = self.transform(
                torch.from_numpy(img1_render.transpose(
                    (2, 0, 1)))).numpy().transpose((1, 2, 0))

        img2_photo = cv2.resize(img2_photo, (w2, h2),
                                interpolation=cv2.INTER_AREA)

        # print("image loaded in: ", timer() - start)

        img1_depth_path_npy = os.path.join(self.images_path,
                                           img1_base + "_texture_depth.npy")
        img1_depth_path = os.path.join(self.images_path,
                                       img1_base + "_texture_depth.txt.gz")
        img2_depth_path_npy = os.path.join(self.images_path,
                                           img2_base + "_texture_depth.npy")
        img2_depth_path = os.path.join(self.images_path,
                                       img2_base + "_texture_depth.txt.gz")
        img1_normals_path = os.path.join(self.images_path,
                                         img1_base + "_texture_normals.png")
        img2_normals_path = os.path.join(self.images_path,
                                         img2_base + "_texture_normals.png")
        img1_silhouettes_path = os.path.join(
            self.images_path, img1_base + "_texture_silhouettes.jpg")
        img2_silhouettes_path = os.path.join(
            self.images_path, img2_base + "_texture_silhouettes.jpg")

        img1_depth = None
        img2_depth = None
        img1_normals = None
        img2_normals = None

        if self.use_depth:
            # load depths
            img1_depth = self.loadAndCacheDepth(img1_depth_path,
                                                img1_depth_path_npy, w1, h1)
            img2_depth = self.loadAndCacheDepth(img2_depth_path,
                                                img2_depth_path_npy, w2, h2)

            # divide by max depth so that we feed comparable data to the network
            img1_render = np.concatenate([img1_render, img1_depth / 500000.0],
                                         axis=2)
            img2 = np.concatenate([img2, img2_depth / 500000.0], axis=2)

        if self.use_normals:
            img1_normals = self.loadAndCacheNormals(img1_normals_path,
                                                    img1_depth,
                                                    img1_depth_path,
                                                    img1_depth_path_npy, w1,
                                                    h1)
            img2_normals = self.loadAndCacheNormals(img2_normals_path,
                                                    img2_depth,
                                                    img2_depth_path,
                                                    img2_depth_path_npy, w2,
                                                    h2)

            img1_render = np.concatenate([img1_render, img1_normals], axis=2)
            img2 = np.concatenate([img2, img2_normals], axis=2)

        if self.use_silhouettes:
            img1_silhouettes = self.loadAndCacheSilhouettes(
                img1_silhouettes_path, img1_depth, img1_depth_path,
                img1_depth_path_npy, w1, h1)
            img2_silhouettes = self.loadAndCacheSilhouettes(
                img2_silhouettes_path, img2_depth, img2_depth_path,
                img2_depth_path_npy, w2, h2)

            img1_render = np.concatenate([img1_render, img1_silhouettes],
                                         axis=2)
            img2 = np.concatenate([img2, img2_silhouettes], axis=2)

        #project 3D coords to first view for distance calculations
        v1_d = self.info['coords3d_1']
        v2_d = self.info['coords3d_2']
        # distances in 3D for negative patch selection (not used anymore so that validation loss is comparable throughout different experiments)
        #self.dists = np.linalg.norm(v1_d[:, None] - v2_d, axis=2)

        v1_1_2d = project(v1_d, h1, w1, self.RT1, self.P1)
        v2_2_2d = project(v2_d, h2, w2, self.RT2, self.P2)
        self.v2_2_2d = v2_2_2d
        self.v2_1_2d = project(v2_d, h1, w1, self.RT1, self.P1)
        self.v1_2_2d = project(v1_d, h2, w2, self.RT2, self.P2)
        #distances in 2D for negative patch selection (used always)
        self.dists = np.linalg.norm(v1_1_2d[:, None] - self.v2_1_2d, axis=2)

        # patchsize = self.patch_radius
        # sel1 = np.logical_and(v1_1_2d > patchsize, v1_1_2d < (np.array([w1, h1]) - patchsize))
        # sel1 = np.logical_and(sel1[:, 0], sel1[:, 1])
        # sel2 = np.logical_and(v2_2_2d > patchsize, v2_2_2d < (np.array([w2, h2]) - patchsize))
        # sel2 = np.logical_and(sel2[:, 0], sel2[:, 1])
        # sel = sel1 #np.logical_and(sel1, sel2)
        # v1_1_2d = v1_1_2d[sel]
        # v2_2_2d = v2_2_2d[sel]

        self.info['coords2d_1'] = np.flip(v1_1_2d, axis=1) / s1
        self.info['coords2d_2'] = np.flip(v2_2_2d, axis=1) / s2

        self.uniform_randcoords = np.array([
            np.random.randint(self.patch_radius,
                              self.info['img2_shape'][0] - self.patch_radius,
                              self.numpatches),
            np.random.randint(self.patch_radius,
                              self.info['img2_shape'][1] - self.patch_radius,
                              self.numpatches)
        ]).astype(np.float).transpose()
        self.uniform_randcoords_out = np.flip(self.uniform_randcoords,
                                              axis=1).copy() * s2
        self.dists_neg = np.linalg.norm(self.v1_2_2d[:, None] -
                                        self.uniform_randcoords_out,
                                        axis=2)

        if self.uniform_negatives and self.dist_type == '3D':
            if img2_depth is None:
                img2_depth = self.loadAndCacheDepth(img2_depth_path,
                                                    img2_depth_path_npy, w2,
                                                    h2)
            self.uniform_randcoords_out_3D = unproject(
                self.uniform_randcoords_out, img2_depth, self.RT2, self.P2)

        return img1, img2, img1_render, img2_photo
Exemplo n.º 14
0
def getResultForImage(image_name, result_dir, dataset_dir, matching_dir_name):
    # get translation and rotation result for best pose
    bp_R_gt_err = 180
    bp_t_gt_err = 100000
    bp_t_ren_err = 100000
    rp_R_gt_err = 180
    rp_t_gt_err = 100000
    rp_t_ren_err = 100000
    gps_t_gt_err = 100000
    rendered_pose = None
    gps_pose = None
    gt_fov = -1
    bp_fov = -1
    bp_fov_ba = -1
    rp_fov = -1
    rp_fov_ba = -1

    bp_min_err = -1
    bp_num_inliers = -1
    rp = None
    rp_min_err = -1
    rp_num_inliers = -1
    gt_t_ren_err = 100000
    matched_dist = np.array([-1])
    all_reproj_dist = np.array([-1])
    rp_err_axis = -1
    rp_err_perp = -1
    bp_filename = None

    gps_pose = getGPSPose(image_name, dataset_dir)

    rendered_pose, ren_center = getRenderedPose(image_name, result_dir)
    #if rendered_pose is None:
    #    rendered_pose = rendered_pose_gt

    if rendered_pose is None and ren_center is None:
        return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename
    try:
        gt_pose, gt_intrinsics = getGroundTruthPose(image_name, dataset_dir)
    except Exception:
        print("Ground truth not found, skipping.")
        return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename

    gt_fov = FUtil.intrinsicsToFov(gt_intrinsics)

    matching_dir = os.path.join(result_dir, matching_dir_name)
    result_img_dir = os.path.join(matching_dir, image_name)
    bp, bp_min_err, bp_num_inliers, bp_filename, bp_fov, bp_fov_ba = PoseFinder.findBestPose(
        result_img_dir)

    if bp_fov is None:
        bp_fov = -1
    if bp_fov_ba is None:
        bp_fov_ba = -1
    rp_path = os.path.join(result_img_dir, image_name + "_bestpose_pose.txt")
    rp_info_path = os.path.join(result_img_dir,
                                image_name + "_bestpose_info.txt")
    rp = None
    rp_min_err = -1
    rp_num_inliers = -1
    if os.path.isfile(rp_path) and os.path.isfile(rp_info_path):
        rp = np.loadtxt(rp_path)
        rp_min_err, rp_num_inliers, rp_fov, rp_fov_ba = PoseFinder.parseInfoFile(
            rp_info_path)
        if rp_fov is None:
            rp_fov = -1
        if rp_fov_ba is None:
            rp_fov_ba = -1

    gt_t_ren_err = np.linalg.norm(gt_pose[:3, 3] - rendered_pose[:3, 3])
    if bp is not None:
        bp = movePoseToCenter(bp, ren_center)
        bp_R_gt_err, bp_t_gt_err = calculateErrors(gt_pose[:3, :3], gt_pose[:3,
                                                                            3],
                                                   bp[:3, :3], bp[:3, 3])
        bp_t_ren_err = np.linalg.norm(bp[:3, 3] - rendered_pose[:3, 3])
    else:
        bp_min_err = -1
        bp_num_inliers = -1

    # get translation and rotation result for refined pose
    matched_dist = np.array([-1])
    all_reproj_dist = np.array([-1])
    rp_err_axis = -1
    rp_err_perp = -1
    if rp is not None and not np.any(np.isnan(rp[:3, 3])):
        reproj_3D_path = os.path.join(result_img_dir,
                                      image_name + "_reproj_pt3D.npy")
        matched_3D_path = os.path.join(result_img_dir,
                                       image_name + "_matched_pt3D.npy")
        matched_3D = np.load(matched_3D_path)

        R_rp = rp[:3, :3]
        rp_center = np.dot(-R_rp.transpose(), rp[:3, 3])
        matched_dist = np.linalg.norm(matched_3D - rp_center, axis=1)
        if os.path.isfile(reproj_3D_path):
            reproj_3D = np.load(reproj_3D_path)
            all_reproj_dist = np.linalg.norm(reproj_3D - rp_center, axis=1)
        else:
            all_reproj_dist = np.array([-1])
        rp = movePoseToCenter(rp, ren_center)

        rp_err_axis, rp_err_perp = calculatePosErrAlongCameraAxis(
            gt_pose[:3, 3], rp, gt_intrinsics)

        rp_R_gt_err, rp_t_gt_err = calculateErrors(gt_pose[:3, :3], gt_pose[:3,
                                                                            3],
                                                   rp[:3, :3], rp[:3, 3])
        rp_t_ren_err = np.linalg.norm(rp[:3, 3] - rendered_pose[:3, 3])
    if gps_pose is not None:
        gps_t_gt_err = np.linalg.norm(gps_pose[:3, 3] - gt_pose[:3, 3])

    return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename
Exemplo n.º 15
0
    def loadPositionalInfo(self):
        num_imgs = len(self.image_names)
        image_coords_path = os.path.join(
            self.input_dir, "image_coords_ecef" + self.suffix + ".npz")
        recompute = True
        if os.path.exists(image_coords_path):
            positional_info = np.load(image_coords_path, allow_pickle=True)
            self.image_ecef = positional_info['image_ecef']
            self.image_have_rot = positional_info['image_have_rot']
            self.image_rot = positional_info['image_rot']
            if self.image_ecef.shape[0] == num_imgs:
                recompute = self.recompute

        if recompute:
            self.image_ecef = np.zeros([num_imgs, 3])
            self.image_have_rot = np.zeros([num_imgs, 1])
            self.image_rot = [
                Quaternion(axis=[1, 0, 0], angle=0)
                for i in range(0, num_imgs)
            ]
            for id in tqdm(range(0, num_imgs)):
                name = self.image_names[id]
                abs_name = os.path.join(self.input_dir, name)
                if self.isRender(name):
                    # load the modelview matrix, get cam center,
                    # add scene center
                    # and convert to WGS84 coordinates, then convert back to
                    # ECEF with altitude=0 since we usually don't have
                    # precise altitude for real photographs.
                    img_path = abs_name.replace("_texture", "")
                    MV_name = os.path.splitext(img_path)[0] + "_modelview.txt"
                    if not os.path.isfile(MV_name):
                        MV_name = os.path.splitext(img_path)[0] + "_pose.txt"
                    pose = FUtil.loadMatrixFromFile(MV_name)
                    R = pose[:3, :3]
                    corr_R = (((3 * np.eye(3)) - (np.dot(R, R.transpose()))) /
                              2.0)
                    R_corrected = np.dot(corr_R, R)
                    self.image_rot[id] = Quaternion(matrix=R_corrected)
                    self.image_have_rot[id] = 1
                    C = np.dot(-R.transpose(), pose[:3, 3]) + self.scene_center
                    lat, lon, alt = pm3d.ecef.ecef2geodetic(C[0], C[1], C[2])
                    #print("rendered lat lon alt: ", lat, lon, alt)
                    x, y, z = pm3d.ecef.geodetic2ecef(lat, lon, 0)
                else:
                    # load GPS from the photograph
                    x, y, z = (0, 0, 0)
                    lat, lon, alt = (-1, -1, -1)
                    with exiftool.ExifTool() as et:
                        res = et.execute_json("-n", "-GPSLatitude",
                                              "-GPSLatitudeRef",
                                              "-GPSLongitudeRef",
                                              "-GPSLongitude", "-GPSAltitude",
                                              abs_name)
                        if (('EXIF:GPSLatitude' in res[0])
                                and ('EXIF:GPSLongitude' in res[0])
                                and ('EXIF:GPSLatitudeRef' in res[0])
                                and ('EXIF:GPSLongitudeRef' in res[0])):
                            lat = res[0]['EXIF:GPSLatitude']
                            lon = res[0]['EXIF:GPSLongitude']
                            lat_ref = res[0]['EXIF:GPSLatitudeRef']
                            lon_ref = res[0]['EXIF:GPSLongitudeRef']
                            if lat_ref == 'S':
                                lat = -lat
                            if lon_ref == 'W':
                                lon = -lon
                            # if 'EXIF:GPSAltitude' in res[0]:
                            # alt = res[0]['EXIF:GPSAltitude']
                            #print("photo lat lon lat_ref lon_ref: ", lat, lon, lat_ref, lon_ref)
                            try:
                                x, y, z = pm3d.ecef.geodetic2ecef(lat, lon, 0)
                            except ValueError as ve:
                                print(
                                    "Unable to use GPS from the image", name,
                                    "reason:", ve, "resetting the position \
                                    to unknown.")
                                x, y, z = (0, 0, 0)

                # Set image_ecef to zeros if x,y,z are zeros
                # in order to mask them correctly im match() method.
                if x == y == z == 0:
                    self.image_ecef[id, :] = np.array([x, y, z])

                # coords in km due to double imprecision (/1000)
                self.image_ecef[id, :] = (
                    (np.array([x, y, z]) - self.scene_center) / 1000.0)

                np.savez(image_coords_path,
                         image_ecef=self.image_ecef,
                         image_rot=self.image_rot,
                         image_have_rot=self.image_have_rot)
Exemplo n.º 16
0
def poseEPNPBAIterative(R,
                        t,
                        fov,
                        mask,
                        photo_shape,
                        coords2d,
                        coords3d,
                        reprojection_error=10):
    num_inliers = 0
    cnt_same = 0
    while (True):
        if cnt_same > 5:
            break
        # bundle adjustment
        sel = mask[:, 0] == 1
        pose = np.ones([4, 4])
        pose[:3, :3] = R
        pose[:3, 3] = t
        intr1 = FUtil.fovToIntrinsics(fov, photo_shape[1], photo_shape[0])
        left_cam_idx = np.zeros(coords2d[sel].shape[0]).astype(np.int)
        left_cam_pt_idx = np.arange(0, coords2d[sel].shape[0]).astype(np.int)
        points2d = coords2d - np.array(
            [[photo_shape[1] / 2.0, photo_shape[0] / 2.0]])
        res_R, res_t, res_intr, success = BundleAdjustment.bundleAdjustment(
            [pose], [intr1],
            coords3d[sel],
            points2d[sel],
            left_cam_idx,
            left_cam_pt_idx,
            show=False)
        print("res intr BA", res_intr)
        if success:
            cnt_same_wrong = 0
            # update FOV with refined estimate
            fov = 2 * np.arctan2(photo_shape[1] / 2.0, res_intr[0])
            print("estim FOV BA:", (fov * 180) / np.pi)
            print("R BA:", res_R)
            print("t BA:", res_t)

            # measure number of inliers by runnong another
            # RANSAC round
            ret, R_epnp, t_epnp, mask_epnp = poseFrom2D3DWithFOV(
                coords2d, fov, photo_shape, coords3d, reprojection_error)
            new_num_inliers = np.sum(mask)
            print("EPNP R", R)
            print("EPNP t", t)

            print("EPNP num inliers", new_num_inliers)
            # count number of iterations number of inliers is the same
            if (new_num_inliers == num_inliers):
                cnt_same += 1
            else:
                # if the number of inliers increases or decreases, reset counter
                cnt_same = 0
            if new_num_inliers >= num_inliers:
                num_inliers = new_num_inliers
                R = R_epnp
                t = t_epnp
                mask = mask_epnp
            else:
                break
        else:
            break
    return R, t, fov, mask
Exemplo n.º 17
0
def getPatches(img1_name,
               img2_name,
               show=False,
               max_w=1024,
               patch_size=64,
               npatches=100,
               measure=False,
               save_ply=False):

    print("getPatches, ", img1_name, img2_name)
    img1_base, img1_ext = os.path.splitext(img1_name)
    img2_base, img2_ext = os.path.splitext(img2_name)

    img1_base = re.sub("_texture", "", img1_base)
    img2_base = re.sub("_texture", "", img2_base)

    img1_depth_name = img1_base + "_texture_depth.txt.gz"
    img2_depth_name = img2_base + "_texture_depth.txt.gz"

    MV1_name = img1_base + "_modelview.txt"
    MV2_name = img2_base + "_modelview.txt"

    P1_name = img1_base + "_projection.txt"
    P2_name = img2_base + "_projection.txt"

    RT1 = (FUtil.loadMatrixFromFile(MV1_name))
    RT2 = (FUtil.loadMatrixFromFile(MV2_name))

    P1 = FUtil.loadMatrixFromFile(P1_name)
    P2 = FUtil.loadMatrixFromFile(P2_name)

    img1_depth = loadDepth(img1_depth_name)
    img2_depth = loadDepth(img2_depth_name)

    img_1_depth_wrong = (np.min(img1_depth) == np.max(img1_depth))
    img_2_depth_wrong = (np.min(img2_depth) == np.max(img2_depth))
    if img_1_depth_wrong or img_2_depth_wrong:
        msg = "One of the two images has empty depth map."
        raise RuntimeError(msg)

    img1 = cv2.imread(img1_name)
    img2 = cv2.imread(img2_name)

    img1 = np.flip(img1, 2)
    img2 = np.flip(img2, 2)

    img1_aspect = float(img1.shape[0]) / img1.shape[1]
    img2_aspect = float(img2.shape[0]) / img2.shape[1]

    h1 = int(img1_aspect * max_w)
    h2 = int(img2_aspect * max_w)

    img1_resized = cv2.resize(img1, (max_w, h1))
    img2_resized = cv2.resize(img2, (max_w, h2))
    img1_depth = cv2.resize(img1_depth, (max_w, h1))
    img2_depth = cv2.resize(img2_depth, (max_w, h2))

    img1_3d, imgv1 = unproject_image(img1_resized, img1_depth, RT1, P1)
    img2_3d, imgv2 = unproject_image(img2_resized, img2_depth, RT2, P2)

    if save_ply:
        savePointCloudToPly(img1_3d[:, :3], imgv1[:, :], 'model_v1.ply')
        savePointCloudToPly(img2_3d[:, :3], imgv2[:, :], 'model_v2.ply')

    v1_idx, v2_idx = findIndicesOfCorresponding3DPoints(img1_3d, img2_3d)

    if v1_idx.shape[0] <= 0 or v2_idx.shape[0] <= 0:
        msg = "These two images do not contain any corresponding points."
        raise RuntimeError(msg)

    fov_1, fovy_1 = FUtil.projectiveToFOV(P1)
    fov_2, fovy_2 = FUtil.projectiveToFOV(P2)

    patch_radius = patch_size / 2
    coords1, coords2, sel = genRandomCorrespondingPoints2D(img1_resized.shape,
                                                           img2_resized.shape,
                                                           fov_1,
                                                           fov_2,
                                                           v1_idx,
                                                           v2_idx,
                                                           radius=patch_radius,
                                                           N=npatches)

    if measure:
        numiter = 1
        start = timer()
        for i in tqdm(range(0, numiter)):
            patches_1, patches_2 = generatePatchesFast(
                img1, img2, img1_resized.shape, img2_resized.shape, coords1,
                coords2, fov_1, fov_2, show, patch_radius)
        end = timer()
        elapsed = end - start
        print("Total runtime patches fast: ", elapsed, "one iteration: ",
              elapsed / numiter)

        # grid1 = utils.make_grid(torch.from_numpy(patches_1).float() / 255.0, nrow=30)
        # grid2 = utils.make_grid(torch.from_numpy(patches_2).float() / 255.0, nrow=30)
        # fig = plt.figure(dpi=600)
        # plt.subplot(1,2,1)
        # plt.imshow(grid1.numpy().transpose((1, 2, 0)))
        # plt.axis('off')
        # plt.subplot(1,2,2)
        # plt.imshow(grid2.numpy().transpose((1, 2, 0)))
        # plt.axis('off')

        start = timer()
        for i in tqdm(range(0, numiter)):
            patches_1, patches_2 = generatePatches(img1, img2,
                                                   img1_resized.shape,
                                                   img2_resized.shape, coords1,
                                                   coords2, fov_1, fov_2, show,
                                                   patch_radius)
        end = timer()
        elapsed = end - start
        print("Total runtime patches loop: ", elapsed, "one iteration: ",
              elapsed / numiter)

        # grid3 = utils.make_grid(torch.from_numpy(patches_1).float(), nrow=30)
        # grid4 = utils.make_grid(torch.from_numpy(patches_2).float(), nrow=30)
        # fig2 = plt.figure(dpi=600)
        # plt.subplot(1,2,1)
        # plt.imshow(grid3.numpy().transpose((1, 2, 0)))
        # plt.axis('off')
        # plt.subplot(1,2,2)
        # plt.imshow(grid4.numpy().transpose((1, 2, 0)))
        # plt.axis('off')
        # plt.show()
        # plt.close(fig)
        # plt.close(fig2)

    # dists = calculateDistances3D(img1_3d, img2_3d, v1_idx, v2_idx, sel)

    info = {
        'coords2d_1': coords1,
        'coords2d_2': coords2,
        'coords3d_1': img1_3d[v1_idx[sel]],
        'coords3d_2': img2_3d[v2_idx[sel]],
        'img1_shape': img1_resized.shape,
        'img2_shape': img2_resized.shape,
        'patch_radius': patch_radius,
        'fov_1': fov_1,
        'fov_2': fov_2,
        'img1_name': os.path.basename(img1_name),
        'img2_name': os.path.basename(img2_name)
    }

    # test if projected points correspond well to image coords
    # img1_2 = project(img1_3d[v1_idx[sel]], h2, max_w, RT2, P2)
    # img1_1 = project(img1_3d[v1_idx[sel]], h1, max_w, RT1, P1)
    # img2_1 = project(img2_3d[v2_idx[sel]], h1, max_w, RT1, P1)
    # img2_2 = project(img2_3d[v2_idx[sel]], h2, max_w, RT2, P2)
    # #
    # plt.figure()
    # plt.imshow(img2_resized)
    # plt.scatter(img1_2[:, 0], img1_2[:, 1], color='orange', s=1.0)
    # plt.scatter(img2_2[:, 0], img2_2[:, 1], color='blue', s=1.0)
    # plt.scatter(coords2[:, 1], coords2[:, 0], color='red', s=1.0)
    #
    # plt.figure()
    # plt.imshow(img1_resized)
    # plt.scatter(img2_1[:, 0], img2_1[:, 1], color='orange', s=1.0)
    # plt.scatter(img1_1[:, 0], img1_1[:, 1], color='blue', s=1.0)
    # plt.scatter(coords1[:, 1], coords1[:, 0], color='red', s=1.0)
    if show:
        plt.show()

    #showNegatives(patches_1, patches_2, negatives,
    #              img1_resized, img2_resized, v1_idx, v2_idx, sel)

    # join both point clouds  together for visualization
    # both_3d = np.concatenate((img1_3d[:, :3], img2_3d[:, :3]), axis=0)
    # both_c = np.concatenate((imgv1, imgv2), axis=0)
    # savePointCloudToPly(both_3d, both_c, 'model_real.ply')

    # save filtered 3D points
    if save_ply:
        savePointCloudToPly(img1_3d[v1_idx, :3], imgv1[v1_idx, :],
                            'model_v1_filtered.ply')
        savePointCloudToPly(img2_3d[v2_idx, :3], imgv2[v2_idx, :],
                            'model_v2_filtered.ply')

    #return patches_1, patches_2, info
    return info