def getGroundTruthPose(image_name, dataset_dir): dataset_path = os.path.join(dataset_dir, "real") pose_name = os.path.join(dataset_path, image_name + "_modelview.txt") projection_name = os.path.join(dataset_path, image_name + "_projection.txt") img_name = os.path.join(dataset_path, image_name + ".png") if not os.path.isfile(img_name): img_name = os.path.join(dataset_path, image_name + ".jpg") if not os.path.isfile(img_name): raise RuntimeError("Unable to find the query image.", image_name) scene_info_name = os.path.join(dataset_dir, "scene_info.txt") pose = FUtil.loadMatrixFromFile(pose_name) if not os.path.isfile(scene_info_name): R = pose[:3, :3] t = pose[:3, 3] center = np.dot(-R.transpose(), t) pose[:3, 3] = pose[:3, 3] - np.dot(-R, center) #print("Loaded center from pose, pose: ", pose, "center", center) else: center = MultimodalPatchesDataset.getSceneCenter(scene_info_name) pose = movePoseToCenter(pose, center) projection = FUtil.loadMatrixFromFile(projection_name) img = cv2.imread(img_name) intrinsics = FUtil.projectiveToIntrinsics(projection, img.shape[1], img.shape[0]) return pose, intrinsics
def loadRenderedCameraParamsNoNeg(name): img_path = name.replace("_texture", "") P_name = os.path.splitext(img_path)[0] + "_projection.txt" MV_name = os.path.splitext(img_path)[0] + "_modelview.txt" if not os.path.isfile(MV_name): MV_name = os.path.splitext(img_path)[0] + "_pose.txt" P = FUtil.loadMatrixFromFile(P_name) pose = FUtil.loadMatrixFromFile(MV_name) R = pose[:3, :3] corr_R = ((3 * np.eye(3)) - (np.dot(R, R.transpose()))) / 2.0 R_corrected = np.dot(corr_R, R) pose[:3, :3] = R_corrected return P, pose
def getRenderedImageParams(self, name): abs_name = os.path.join(self.input_dir, name) img = cv2.imread(abs_name) img = cv2.flip(img, 2) # add the render camera width = img.shape[1] height = img.shape[0] P, pose = Matcher.loadRenderedCameraParams(abs_name) R_corrected = pose[:3, :3] t = pose[:3, 3] intr = FUtil.projectiveToIntrinsics(P, width, height) focal = intr[0, 0] cx = abs(intr[0, 2]) cy = abs(intr[1, 2]) params1 = np.array((focal, cx, cy, 0.0)) # radial distortion 0 cam_center = self.scene_center + np.dot(-R_corrected.transpose(), t) quat = Quaternion(matrix=R_corrected) rot = np.array([quat.w, quat.x, quat.y, quat.z]) return width, height, params1, rot, t, cam_center
def poseFrom2D3DWithFOVEPNPOurRansac(src_pts, fov, img1_shape, coords3d, reprojection_error=10): """Estimates the pose using 2D-3D correspondences, EPnP algorithm and our implementation of RANSAC in python. src_pts, fov [radians] correspond to left view, which should be the photgraph dst_pts, intr2, and img2_depth correspond to the rendered image. @type array @param src_pts 2D points in the photograph @type array @param projection1 OpenGL projection matrix of the camera which took the photo. @type array @param dst_pts 2D points in the rendered image @type array @param projection2 OpenGL projection matrix of the camera which rendered the synthetic image. @type array @param MV2 modelview matrix of the camera which rendered the synthetic image. @type array @param img2_depth depth map of the rendered image. """ intr1 = FUtil.fovToIntrinsics(fov, img1_shape[1], img1_shape[0]) intr1[:, 1:3] = -intr1[:, 1:3] ret, R, t, inliers_idx = solveEPNPRansac( coords3d, src_pts, intr1, reprojection_error=reprojection_error) mask = np.zeros((src_pts.shape[0], 1)).astype(int) if ret: R[1:3, :] = -R[1:3, :] t[1:3] = -t[1:3] mask[inliers_idx] = int(1) return ret, R, t, mask
def project(pt_4d, h, w, modelview, projection): intr = FUtil.projectiveToIntrinsics(projection, w, h) pt_3d = np.dot(modelview, pt_4d.transpose()) pt_3d = pt_3d / pt_3d[3, :] pt_3d = pt_3d[:3, :] pt_2d = np.dot(intr, pt_3d) pt_2d = (pt_2d / pt_2d[2, :]).transpose() return pt_2d[:, :2]
def __init__(self, video_path, initial_gps, fov, working_directory, snapshot, model, earth_file, log_dir, cuda=True, normalize_output=True): self.video_path = video_path self.initial_gps = initial_gps self.fov = fov self.working_directory = working_directory self.video_frames_dir = os.path.join(self.working_directory, "video_frames") if not os.path.isdir(self.video_frames_dir): os.makedirs(self.video_frames_dir) self.video_output_dir = os.path.join(self.working_directory, "video_output") if not os.path.isdir(self.video_output_dir): os.makedirs(self.video_output_dir) self.snapshot = snapshot self.model = model self.log_dir = log_dir self.normalize_output = normalize_output self.cuda = cuda self.earth_file = earth_file self.video = cv2.VideoCapture(self.video_path) self.curr_gps = self.initial_gps self.framenum = 0 cmd = "itr --pose-from-gps " + str(self.initial_gps[0]) + " " + str( self.initial_gps[1]) + " initcam " + self.earth_file os.system(cmd) init_pose = FUtil.loadMatrixFromFile("initcam_modelview.txt") R = init_pose[:3, :3] self.scene_center = np.dot(-R.transpose(), init_pose[:3, 3]) self.curr_center = np.zeros((3, 1)) success, image = self.video.read() frame_base = "frame%d" % self.framenum frame_name = frame_base + ".jpg" frame_name = os.path.join(self.video_frames_dir, frame_name) if not os.path.isfile(frame_name): cv2.imwrite(frame_name, image) pfargs = self.buildPFArgs(frame_name, self.curr_gps, self.fov) self.poseFinder = fp.PoseFinder(pfargs) self.KF = self.initKalmanFilter()
def getGPSPose(image_name, dataset_dir): dataset_path = os.path.join(dataset_dir, "real_gps") if os.path.isdir(dataset_path): pose_name = os.path.join(dataset_path, image_name + "_modelview.txt") pose = FUtil.loadMatrixFromFile(pose_name) R = pose[:3, :3] t = pose[:3, 3] center = np.dot(-R.transpose(), t) pose[:3, 3] = center return pose return None
def poseFrom2D3DWithFOV(src_pts, fov, img1_shape, coords3d, reprojection_error=10, iterationsCount=100000, pnp_flags=cv2.SOLVEPNP_EPNP, use_ransac=True): """Estimates the pose using 2D-3D correspondences and PnP algorithm. src_pts, fov [radians] correspond to left view, which should be the photgraph dst_pts, intr2, and img2_depth correspond to the rendered image. @type array @param src_pts 2D points in the photograph @type array @param projection1 OpenGL projection matrix of the camera which took the photo. @type array @param dst_pts 2D points in the rendered image @type array @param projection2 OpenGL projection matrix of the camera which rendered the synthetic image. @type array @param MV2 modelview matrix of the camera which rendered the synthetic image. @type array @param img2_depth depth map of the rendered image. """ print("fov", fov) print("src pts", src_pts.shape, coords3d.shape) intr1 = FUtil.fovToIntrinsics(fov, img1_shape[1], img1_shape[0]) intr1[:, 1:3] = -intr1[:, 1:3] dist = np.array([]) if use_ransac: ret, Rvec, t, inliers = cv2.solvePnPRansac( coords3d, src_pts, intr1, dist, reprojectionError=reprojection_error, iterationsCount=iterationsCount, flags=pnp_flags) #, else: inliers = np.ones((src_pts.shape[0], 1)).astype(np.int) ret, Rvec, t = cv2.solvePnP(coords3d, src_pts, intr1, dist, flags=pnp_flags) #, R = cv2.Rodrigues(Rvec, jacobian=0)[0] R[1:3, :] = -R[1:3, :] t[1:3] = -t[1:3] t = t.reshape(-1) mask = np.zeros((src_pts.shape[0], 1)).astype(int) if ret: mask[inliers[:, 0]] = int(1) return ret, R, t, mask
def getRenderedPose(image_name, result_dir): rendered_dir = os.path.join(result_dir, image_name) pose_name = os.path.join(rendered_dir, "*_0.000000_pose.txt") pose_files = glob.glob(pose_name) if not pose_files: return None, None if not os.path.isfile(pose_files[0]): raise RuntimeError("Unable to find rendered pose: " + pose_files[0]) scene_info_name = os.path.join(rendered_dir, "scene_info.txt") center = MultimodalPatchesDataset.getSceneCenter(scene_info_name) pose = FUtil.loadMatrixFromFile(pose_files[0]) pose = movePoseToCenter(pose, center) return pose, center
def __init__(self, dataset_dir, images_path, list, radius=500): self.dataset_dir = dataset_dir self.images_path = images_path self.dataset_items = list # create list of images self.imgs_base = set() print("Preparing list of images...") for pair in tqdm(self.dataset_items): parts = pair.split("-") self.imgs_base.update({parts[0], parts[1]}) # load camera center for each image self.all_names = [] self.all_centers = [] print("Loading camera info...") for image_base in tqdm(self.imgs_base): MV_name = image_base + "_modelview.txt" MV_path = os.path.join(self.images_path, MV_name) RT = FUtil.loadMatrixFromFile(MV_path) C = np.dot(-RT[:3, :3].transpose(), RT[:3, 3]) self.all_names.append(image_base) self.all_centers.append(C) self.all_centers = np.array(self.all_centers) self.kdt = KDTree(self.all_centers, leaf_size=40, metric='euclidean') count = self.kdt.query_radius(self.all_centers, radius, count_only=True) cnt_max = np.max(count) self.max_num_points = cnt_max count = count / cnt_max density = np.log(count) norm = matplotlib.colors.Normalize() colors = np.floor(plt.cm.jet(norm(density)) * 255) ply_name = "dataset_camera_centers_heatmap_" + str(radius) + ".ply" # savePointCloudToPly(self.all_centers, colors[:, :3], ply_name) self.name_density = {} idx = 0 for name in self.all_names: self.name_density.update({name: count[idx]}) idx += 1
def unproject(coords_2d, depth, modelview, projection): """Unprojects 2d image coordinates to 3D based on a depth map. @coords_2d [numpy.array] of shape Nx2, where coordinates are stored as Nx(x,y) @depth [numpy.array] is the depth map of the source image. Must have the same size as the source image. @param modelview numpy array 4x4 of GL modelview matrix to define transformation from 3D to camera coordinates. @param projection numpy array 4x4 of GL projection matrix to define transformation from camera coordinates to image coordinates. @return [np.array] Nx4 array of 3D points in homogeneous coordinates (w=1). """ h = depth.shape[0] w = depth.shape[1] intr = FUtil.projectiveToIntrinsics(projection, w, h) intr_inv = np.linalg.inv(intr) mv_inv = np.linalg.inv(modelview) #coords_2d = np.array([[400, 300]]) z1 = np.ones((coords_2d.shape[0], 1)) c2d = coords_2d.astype(int) depths = (depth[c2d[:, 1], c2d[:, 0]]).reshape(-1, 1) #print("depths") #print(depths) depths = np.hstack([depths, depths, depths]) coords_im = np.hstack([c2d, z1]) coords_im = (coords_im * depths) #print("coords im depths") #print(coords_im) coords_3d = (np.dot(intr_inv, coords_im.transpose())) #print("intr inv") #print(coords_3d) coords_3d = np.concatenate((coords_3d, np.ones((1, coords_3d.shape[1]))), axis=0) coords_3d = np.dot(mv_inv, coords_3d).transpose() #print(coords_3d) #c = np.random.randint(0, 255, (coords_3d.shape[0], 3)) #savePointCloudToPly(coords_3d[:, :3], c, "mat_desktop.ply") return coords_3d
def poseFrom2D3D(src_pts, projection1, img1_shape, dst_pts, projection2, MV2, img2_depth, pnp_flags=cv2.SOLVEPNP_EPNP): """Estimates the pose using 2D-3D correspondences and PnP algorithm. src_pts, intr1 correspond to left view, which should be the photgraph dst_pts, intr2, and img2_depth correspond to the rendered image. @type array @param src_pts 2D points in the photograph @type array @param projection1 OpenGL projection matrix of the camera which took the photo. @type array @param dst_pts 2D points in the rendered image @type array @param projection2 OpenGL projection matrix of the camera which rendered the synthetic image. @type array @param MV2 modelview matrix of the camera which rendered the synthetic image. @type array @param img2_depth depth map of the rendered image. """ coords3d = unproject(dst_pts, img2_depth, MV2, projection2)[:, :3] intr1 = FUtil.projectiveToIntrinsics(projection1, img1_shape[1], img1_shape[0]) intr1[:, 1:3] = -intr1[:, 1:3] dist = np.array([]) ret, Rvec, t, inliers = cv2.solvePnPRansac(coords3d, src_pts, intr1, dist, reprojectionError=10, iterationsCount=100000, flags=pnp_flags) #, R = cv2.Rodrigues(Rvec, jacobian=0)[0] R[1:3, :] = -R[1:3, :] t[1:3] = -t[1:3] t = t.reshape(-1) mask = np.zeros((src_pts.shape[0], 1)) if ret: mask[inliers[:, 0]] = 1 return ret, R, t, mask
def loadImages(self, ext=".png"): img1_base = os.path.splitext(self.info['img1_name'])[0] img2_base = os.path.splitext(self.info['img2_name'])[0] #print("IMG BASE", img2_base) MV1_name = os.path.join(self.images_path, img1_base + "_modelview.txt") MV2_name = os.path.join( self.images_path, img2_base.replace("_texture", "") + "_modelview.txt") P1_name = os.path.join(self.images_path, img1_base + "_projection.txt") P2_name = os.path.join( self.images_path, img2_base.replace("_texture", "") + "_projection.txt") self.RT1 = (FUtil.loadMatrixFromFile(MV1_name)) self.RT2 = (FUtil.loadMatrixFromFile(MV2_name)) self.P1 = FUtil.loadMatrixFromFile(P1_name) self.P2 = FUtil.loadMatrixFromFile(P2_name) # photograph if self.render_only: img1_path = os.path.join(self.images_path, img1_base + "_texture" + ext) else: img1_path = os.path.join(self.images_path, img1_base + ext) # render img2_path = os.path.join(self.images_path, img2_base + ext) # render img1_render_path = os.path.join(self.images_path, img1_base + "_texture" + ext) if not self.render_only: # remote _texture to convert the path to photo img2_base = img2_base.replace("_texture", "") img2_photo_path = os.path.join(self.images_path, img2_base + ext) # start = timer() img1 = cv2.imread(img1_path) img2 = cv2.imread(img2_path) #print(img1_path) img1 = np.flip(img1, 2) # was needed with cv2.imread img2 = np.flip(img2, 2) # was needed with cv2.imread img1_render = cv2.imread(img1_render_path) img2_photo = cv2.imread(img2_photo_path) img1_render = np.flip(img1_render, 2) # was needed with cv2.imread img2_photo = np.flip(img2_photo, 2) # was needed with cv2.imread #if self.photo_jitter: # apply gamma to simulate different camera response functions #gamma = np.random.uniform(0.8, 1.1) #img1 = np.clip(img1 ** (1 / gamma), 0, 255) #gamma = np.random.uniform(0.8, 1.1) #img2_photo = np.clip(img2_photo ** (1 / gamma), 0, 255) w1, h1, s1 = getSizeFOV(self.info['img1_shape'][1], self.info['img1_shape'][0], self.info['fov_1'], maxres=self.left_maxres) w2, h2, s2 = getSizeFOV(self.info['img2_shape'][1], self.info['img2_shape'][0], self.info['fov_2'], maxres=self.right_maxres) img1 = cv2.resize(img1, (w1, h1), interpolation=cv2.INTER_AREA) img2 = cv2.resize(img2, (w2, h2), interpolation=cv2.INTER_AREA) img1_render = cv2.resize(img1_render, (w1, h1), interpolation=cv2.INTER_AREA) if self.color_jitter: # jitter color of rendered images img2 = self.transform(torch.from_numpy(img2.transpose( (2, 0, 1)))).numpy().transpose((1, 2, 0)) img1_render = self.transform( torch.from_numpy(img1_render.transpose( (2, 0, 1)))).numpy().transpose((1, 2, 0)) img2_photo = cv2.resize(img2_photo, (w2, h2), interpolation=cv2.INTER_AREA) # print("image loaded in: ", timer() - start) img1_depth_path_npy = os.path.join(self.images_path, img1_base + "_texture_depth.npy") img1_depth_path = os.path.join(self.images_path, img1_base + "_texture_depth.txt.gz") img2_depth_path_npy = os.path.join(self.images_path, img2_base + "_texture_depth.npy") img2_depth_path = os.path.join(self.images_path, img2_base + "_texture_depth.txt.gz") img1_normals_path = os.path.join(self.images_path, img1_base + "_texture_normals.png") img2_normals_path = os.path.join(self.images_path, img2_base + "_texture_normals.png") img1_silhouettes_path = os.path.join( self.images_path, img1_base + "_texture_silhouettes.jpg") img2_silhouettes_path = os.path.join( self.images_path, img2_base + "_texture_silhouettes.jpg") img1_depth = None img2_depth = None img1_normals = None img2_normals = None if self.use_depth: # load depths img1_depth = self.loadAndCacheDepth(img1_depth_path, img1_depth_path_npy, w1, h1) img2_depth = self.loadAndCacheDepth(img2_depth_path, img2_depth_path_npy, w2, h2) # divide by max depth so that we feed comparable data to the network img1_render = np.concatenate([img1_render, img1_depth / 500000.0], axis=2) img2 = np.concatenate([img2, img2_depth / 500000.0], axis=2) if self.use_normals: img1_normals = self.loadAndCacheNormals(img1_normals_path, img1_depth, img1_depth_path, img1_depth_path_npy, w1, h1) img2_normals = self.loadAndCacheNormals(img2_normals_path, img2_depth, img2_depth_path, img2_depth_path_npy, w2, h2) img1_render = np.concatenate([img1_render, img1_normals], axis=2) img2 = np.concatenate([img2, img2_normals], axis=2) if self.use_silhouettes: img1_silhouettes = self.loadAndCacheSilhouettes( img1_silhouettes_path, img1_depth, img1_depth_path, img1_depth_path_npy, w1, h1) img2_silhouettes = self.loadAndCacheSilhouettes( img2_silhouettes_path, img2_depth, img2_depth_path, img2_depth_path_npy, w2, h2) img1_render = np.concatenate([img1_render, img1_silhouettes], axis=2) img2 = np.concatenate([img2, img2_silhouettes], axis=2) #project 3D coords to first view for distance calculations v1_d = self.info['coords3d_1'] v2_d = self.info['coords3d_2'] # distances in 3D for negative patch selection (not used anymore so that validation loss is comparable throughout different experiments) #self.dists = np.linalg.norm(v1_d[:, None] - v2_d, axis=2) v1_1_2d = project(v1_d, h1, w1, self.RT1, self.P1) v2_2_2d = project(v2_d, h2, w2, self.RT2, self.P2) self.v2_2_2d = v2_2_2d self.v2_1_2d = project(v2_d, h1, w1, self.RT1, self.P1) self.v1_2_2d = project(v1_d, h2, w2, self.RT2, self.P2) #distances in 2D for negative patch selection (used always) self.dists = np.linalg.norm(v1_1_2d[:, None] - self.v2_1_2d, axis=2) # patchsize = self.patch_radius # sel1 = np.logical_and(v1_1_2d > patchsize, v1_1_2d < (np.array([w1, h1]) - patchsize)) # sel1 = np.logical_and(sel1[:, 0], sel1[:, 1]) # sel2 = np.logical_and(v2_2_2d > patchsize, v2_2_2d < (np.array([w2, h2]) - patchsize)) # sel2 = np.logical_and(sel2[:, 0], sel2[:, 1]) # sel = sel1 #np.logical_and(sel1, sel2) # v1_1_2d = v1_1_2d[sel] # v2_2_2d = v2_2_2d[sel] self.info['coords2d_1'] = np.flip(v1_1_2d, axis=1) / s1 self.info['coords2d_2'] = np.flip(v2_2_2d, axis=1) / s2 self.uniform_randcoords = np.array([ np.random.randint(self.patch_radius, self.info['img2_shape'][0] - self.patch_radius, self.numpatches), np.random.randint(self.patch_radius, self.info['img2_shape'][1] - self.patch_radius, self.numpatches) ]).astype(np.float).transpose() self.uniform_randcoords_out = np.flip(self.uniform_randcoords, axis=1).copy() * s2 self.dists_neg = np.linalg.norm(self.v1_2_2d[:, None] - self.uniform_randcoords_out, axis=2) if self.uniform_negatives and self.dist_type == '3D': if img2_depth is None: img2_depth = self.loadAndCacheDepth(img2_depth_path, img2_depth_path_npy, w2, h2) self.uniform_randcoords_out_3D = unproject( self.uniform_randcoords_out, img2_depth, self.RT2, self.P2) return img1, img2, img1_render, img2_photo
def getResultForImage(image_name, result_dir, dataset_dir, matching_dir_name): # get translation and rotation result for best pose bp_R_gt_err = 180 bp_t_gt_err = 100000 bp_t_ren_err = 100000 rp_R_gt_err = 180 rp_t_gt_err = 100000 rp_t_ren_err = 100000 gps_t_gt_err = 100000 rendered_pose = None gps_pose = None gt_fov = -1 bp_fov = -1 bp_fov_ba = -1 rp_fov = -1 rp_fov_ba = -1 bp_min_err = -1 bp_num_inliers = -1 rp = None rp_min_err = -1 rp_num_inliers = -1 gt_t_ren_err = 100000 matched_dist = np.array([-1]) all_reproj_dist = np.array([-1]) rp_err_axis = -1 rp_err_perp = -1 bp_filename = None gps_pose = getGPSPose(image_name, dataset_dir) rendered_pose, ren_center = getRenderedPose(image_name, result_dir) #if rendered_pose is None: # rendered_pose = rendered_pose_gt if rendered_pose is None and ren_center is None: return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename try: gt_pose, gt_intrinsics = getGroundTruthPose(image_name, dataset_dir) except Exception: print("Ground truth not found, skipping.") return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename gt_fov = FUtil.intrinsicsToFov(gt_intrinsics) matching_dir = os.path.join(result_dir, matching_dir_name) result_img_dir = os.path.join(matching_dir, image_name) bp, bp_min_err, bp_num_inliers, bp_filename, bp_fov, bp_fov_ba = PoseFinder.findBestPose( result_img_dir) if bp_fov is None: bp_fov = -1 if bp_fov_ba is None: bp_fov_ba = -1 rp_path = os.path.join(result_img_dir, image_name + "_bestpose_pose.txt") rp_info_path = os.path.join(result_img_dir, image_name + "_bestpose_info.txt") rp = None rp_min_err = -1 rp_num_inliers = -1 if os.path.isfile(rp_path) and os.path.isfile(rp_info_path): rp = np.loadtxt(rp_path) rp_min_err, rp_num_inliers, rp_fov, rp_fov_ba = PoseFinder.parseInfoFile( rp_info_path) if rp_fov is None: rp_fov = -1 if rp_fov_ba is None: rp_fov_ba = -1 gt_t_ren_err = np.linalg.norm(gt_pose[:3, 3] - rendered_pose[:3, 3]) if bp is not None: bp = movePoseToCenter(bp, ren_center) bp_R_gt_err, bp_t_gt_err = calculateErrors(gt_pose[:3, :3], gt_pose[:3, 3], bp[:3, :3], bp[:3, 3]) bp_t_ren_err = np.linalg.norm(bp[:3, 3] - rendered_pose[:3, 3]) else: bp_min_err = -1 bp_num_inliers = -1 # get translation and rotation result for refined pose matched_dist = np.array([-1]) all_reproj_dist = np.array([-1]) rp_err_axis = -1 rp_err_perp = -1 if rp is not None and not np.any(np.isnan(rp[:3, 3])): reproj_3D_path = os.path.join(result_img_dir, image_name + "_reproj_pt3D.npy") matched_3D_path = os.path.join(result_img_dir, image_name + "_matched_pt3D.npy") matched_3D = np.load(matched_3D_path) R_rp = rp[:3, :3] rp_center = np.dot(-R_rp.transpose(), rp[:3, 3]) matched_dist = np.linalg.norm(matched_3D - rp_center, axis=1) if os.path.isfile(reproj_3D_path): reproj_3D = np.load(reproj_3D_path) all_reproj_dist = np.linalg.norm(reproj_3D - rp_center, axis=1) else: all_reproj_dist = np.array([-1]) rp = movePoseToCenter(rp, ren_center) rp_err_axis, rp_err_perp = calculatePosErrAlongCameraAxis( gt_pose[:3, 3], rp, gt_intrinsics) rp_R_gt_err, rp_t_gt_err = calculateErrors(gt_pose[:3, :3], gt_pose[:3, 3], rp[:3, :3], rp[:3, 3]) rp_t_ren_err = np.linalg.norm(rp[:3, 3] - rendered_pose[:3, 3]) if gps_pose is not None: gps_t_gt_err = np.linalg.norm(gps_pose[:3, 3] - gt_pose[:3, 3]) return bp_R_gt_err, bp_t_gt_err, bp_t_ren_err, bp_min_err, bp_num_inliers, rp_R_gt_err, rp_t_gt_err, rp_t_ren_err, rp_min_err, rp_num_inliers, gt_t_ren_err, gps_t_gt_err, rp_err_axis, rp_err_perp, gt_fov, bp_fov, bp_fov_ba, rp_fov, rp_fov_ba, matched_dist, all_reproj_dist, bp_filename
def loadPositionalInfo(self): num_imgs = len(self.image_names) image_coords_path = os.path.join( self.input_dir, "image_coords_ecef" + self.suffix + ".npz") recompute = True if os.path.exists(image_coords_path): positional_info = np.load(image_coords_path, allow_pickle=True) self.image_ecef = positional_info['image_ecef'] self.image_have_rot = positional_info['image_have_rot'] self.image_rot = positional_info['image_rot'] if self.image_ecef.shape[0] == num_imgs: recompute = self.recompute if recompute: self.image_ecef = np.zeros([num_imgs, 3]) self.image_have_rot = np.zeros([num_imgs, 1]) self.image_rot = [ Quaternion(axis=[1, 0, 0], angle=0) for i in range(0, num_imgs) ] for id in tqdm(range(0, num_imgs)): name = self.image_names[id] abs_name = os.path.join(self.input_dir, name) if self.isRender(name): # load the modelview matrix, get cam center, # add scene center # and convert to WGS84 coordinates, then convert back to # ECEF with altitude=0 since we usually don't have # precise altitude for real photographs. img_path = abs_name.replace("_texture", "") MV_name = os.path.splitext(img_path)[0] + "_modelview.txt" if not os.path.isfile(MV_name): MV_name = os.path.splitext(img_path)[0] + "_pose.txt" pose = FUtil.loadMatrixFromFile(MV_name) R = pose[:3, :3] corr_R = (((3 * np.eye(3)) - (np.dot(R, R.transpose()))) / 2.0) R_corrected = np.dot(corr_R, R) self.image_rot[id] = Quaternion(matrix=R_corrected) self.image_have_rot[id] = 1 C = np.dot(-R.transpose(), pose[:3, 3]) + self.scene_center lat, lon, alt = pm3d.ecef.ecef2geodetic(C[0], C[1], C[2]) #print("rendered lat lon alt: ", lat, lon, alt) x, y, z = pm3d.ecef.geodetic2ecef(lat, lon, 0) else: # load GPS from the photograph x, y, z = (0, 0, 0) lat, lon, alt = (-1, -1, -1) with exiftool.ExifTool() as et: res = et.execute_json("-n", "-GPSLatitude", "-GPSLatitudeRef", "-GPSLongitudeRef", "-GPSLongitude", "-GPSAltitude", abs_name) if (('EXIF:GPSLatitude' in res[0]) and ('EXIF:GPSLongitude' in res[0]) and ('EXIF:GPSLatitudeRef' in res[0]) and ('EXIF:GPSLongitudeRef' in res[0])): lat = res[0]['EXIF:GPSLatitude'] lon = res[0]['EXIF:GPSLongitude'] lat_ref = res[0]['EXIF:GPSLatitudeRef'] lon_ref = res[0]['EXIF:GPSLongitudeRef'] if lat_ref == 'S': lat = -lat if lon_ref == 'W': lon = -lon # if 'EXIF:GPSAltitude' in res[0]: # alt = res[0]['EXIF:GPSAltitude'] #print("photo lat lon lat_ref lon_ref: ", lat, lon, lat_ref, lon_ref) try: x, y, z = pm3d.ecef.geodetic2ecef(lat, lon, 0) except ValueError as ve: print( "Unable to use GPS from the image", name, "reason:", ve, "resetting the position \ to unknown.") x, y, z = (0, 0, 0) # Set image_ecef to zeros if x,y,z are zeros # in order to mask them correctly im match() method. if x == y == z == 0: self.image_ecef[id, :] = np.array([x, y, z]) # coords in km due to double imprecision (/1000) self.image_ecef[id, :] = ( (np.array([x, y, z]) - self.scene_center) / 1000.0) np.savez(image_coords_path, image_ecef=self.image_ecef, image_rot=self.image_rot, image_have_rot=self.image_have_rot)
def poseEPNPBAIterative(R, t, fov, mask, photo_shape, coords2d, coords3d, reprojection_error=10): num_inliers = 0 cnt_same = 0 while (True): if cnt_same > 5: break # bundle adjustment sel = mask[:, 0] == 1 pose = np.ones([4, 4]) pose[:3, :3] = R pose[:3, 3] = t intr1 = FUtil.fovToIntrinsics(fov, photo_shape[1], photo_shape[0]) left_cam_idx = np.zeros(coords2d[sel].shape[0]).astype(np.int) left_cam_pt_idx = np.arange(0, coords2d[sel].shape[0]).astype(np.int) points2d = coords2d - np.array( [[photo_shape[1] / 2.0, photo_shape[0] / 2.0]]) res_R, res_t, res_intr, success = BundleAdjustment.bundleAdjustment( [pose], [intr1], coords3d[sel], points2d[sel], left_cam_idx, left_cam_pt_idx, show=False) print("res intr BA", res_intr) if success: cnt_same_wrong = 0 # update FOV with refined estimate fov = 2 * np.arctan2(photo_shape[1] / 2.0, res_intr[0]) print("estim FOV BA:", (fov * 180) / np.pi) print("R BA:", res_R) print("t BA:", res_t) # measure number of inliers by runnong another # RANSAC round ret, R_epnp, t_epnp, mask_epnp = poseFrom2D3DWithFOV( coords2d, fov, photo_shape, coords3d, reprojection_error) new_num_inliers = np.sum(mask) print("EPNP R", R) print("EPNP t", t) print("EPNP num inliers", new_num_inliers) # count number of iterations number of inliers is the same if (new_num_inliers == num_inliers): cnt_same += 1 else: # if the number of inliers increases or decreases, reset counter cnt_same = 0 if new_num_inliers >= num_inliers: num_inliers = new_num_inliers R = R_epnp t = t_epnp mask = mask_epnp else: break else: break return R, t, fov, mask
def getPatches(img1_name, img2_name, show=False, max_w=1024, patch_size=64, npatches=100, measure=False, save_ply=False): print("getPatches, ", img1_name, img2_name) img1_base, img1_ext = os.path.splitext(img1_name) img2_base, img2_ext = os.path.splitext(img2_name) img1_base = re.sub("_texture", "", img1_base) img2_base = re.sub("_texture", "", img2_base) img1_depth_name = img1_base + "_texture_depth.txt.gz" img2_depth_name = img2_base + "_texture_depth.txt.gz" MV1_name = img1_base + "_modelview.txt" MV2_name = img2_base + "_modelview.txt" P1_name = img1_base + "_projection.txt" P2_name = img2_base + "_projection.txt" RT1 = (FUtil.loadMatrixFromFile(MV1_name)) RT2 = (FUtil.loadMatrixFromFile(MV2_name)) P1 = FUtil.loadMatrixFromFile(P1_name) P2 = FUtil.loadMatrixFromFile(P2_name) img1_depth = loadDepth(img1_depth_name) img2_depth = loadDepth(img2_depth_name) img_1_depth_wrong = (np.min(img1_depth) == np.max(img1_depth)) img_2_depth_wrong = (np.min(img2_depth) == np.max(img2_depth)) if img_1_depth_wrong or img_2_depth_wrong: msg = "One of the two images has empty depth map." raise RuntimeError(msg) img1 = cv2.imread(img1_name) img2 = cv2.imread(img2_name) img1 = np.flip(img1, 2) img2 = np.flip(img2, 2) img1_aspect = float(img1.shape[0]) / img1.shape[1] img2_aspect = float(img2.shape[0]) / img2.shape[1] h1 = int(img1_aspect * max_w) h2 = int(img2_aspect * max_w) img1_resized = cv2.resize(img1, (max_w, h1)) img2_resized = cv2.resize(img2, (max_w, h2)) img1_depth = cv2.resize(img1_depth, (max_w, h1)) img2_depth = cv2.resize(img2_depth, (max_w, h2)) img1_3d, imgv1 = unproject_image(img1_resized, img1_depth, RT1, P1) img2_3d, imgv2 = unproject_image(img2_resized, img2_depth, RT2, P2) if save_ply: savePointCloudToPly(img1_3d[:, :3], imgv1[:, :], 'model_v1.ply') savePointCloudToPly(img2_3d[:, :3], imgv2[:, :], 'model_v2.ply') v1_idx, v2_idx = findIndicesOfCorresponding3DPoints(img1_3d, img2_3d) if v1_idx.shape[0] <= 0 or v2_idx.shape[0] <= 0: msg = "These two images do not contain any corresponding points." raise RuntimeError(msg) fov_1, fovy_1 = FUtil.projectiveToFOV(P1) fov_2, fovy_2 = FUtil.projectiveToFOV(P2) patch_radius = patch_size / 2 coords1, coords2, sel = genRandomCorrespondingPoints2D(img1_resized.shape, img2_resized.shape, fov_1, fov_2, v1_idx, v2_idx, radius=patch_radius, N=npatches) if measure: numiter = 1 start = timer() for i in tqdm(range(0, numiter)): patches_1, patches_2 = generatePatchesFast( img1, img2, img1_resized.shape, img2_resized.shape, coords1, coords2, fov_1, fov_2, show, patch_radius) end = timer() elapsed = end - start print("Total runtime patches fast: ", elapsed, "one iteration: ", elapsed / numiter) # grid1 = utils.make_grid(torch.from_numpy(patches_1).float() / 255.0, nrow=30) # grid2 = utils.make_grid(torch.from_numpy(patches_2).float() / 255.0, nrow=30) # fig = plt.figure(dpi=600) # plt.subplot(1,2,1) # plt.imshow(grid1.numpy().transpose((1, 2, 0))) # plt.axis('off') # plt.subplot(1,2,2) # plt.imshow(grid2.numpy().transpose((1, 2, 0))) # plt.axis('off') start = timer() for i in tqdm(range(0, numiter)): patches_1, patches_2 = generatePatches(img1, img2, img1_resized.shape, img2_resized.shape, coords1, coords2, fov_1, fov_2, show, patch_radius) end = timer() elapsed = end - start print("Total runtime patches loop: ", elapsed, "one iteration: ", elapsed / numiter) # grid3 = utils.make_grid(torch.from_numpy(patches_1).float(), nrow=30) # grid4 = utils.make_grid(torch.from_numpy(patches_2).float(), nrow=30) # fig2 = plt.figure(dpi=600) # plt.subplot(1,2,1) # plt.imshow(grid3.numpy().transpose((1, 2, 0))) # plt.axis('off') # plt.subplot(1,2,2) # plt.imshow(grid4.numpy().transpose((1, 2, 0))) # plt.axis('off') # plt.show() # plt.close(fig) # plt.close(fig2) # dists = calculateDistances3D(img1_3d, img2_3d, v1_idx, v2_idx, sel) info = { 'coords2d_1': coords1, 'coords2d_2': coords2, 'coords3d_1': img1_3d[v1_idx[sel]], 'coords3d_2': img2_3d[v2_idx[sel]], 'img1_shape': img1_resized.shape, 'img2_shape': img2_resized.shape, 'patch_radius': patch_radius, 'fov_1': fov_1, 'fov_2': fov_2, 'img1_name': os.path.basename(img1_name), 'img2_name': os.path.basename(img2_name) } # test if projected points correspond well to image coords # img1_2 = project(img1_3d[v1_idx[sel]], h2, max_w, RT2, P2) # img1_1 = project(img1_3d[v1_idx[sel]], h1, max_w, RT1, P1) # img2_1 = project(img2_3d[v2_idx[sel]], h1, max_w, RT1, P1) # img2_2 = project(img2_3d[v2_idx[sel]], h2, max_w, RT2, P2) # # # plt.figure() # plt.imshow(img2_resized) # plt.scatter(img1_2[:, 0], img1_2[:, 1], color='orange', s=1.0) # plt.scatter(img2_2[:, 0], img2_2[:, 1], color='blue', s=1.0) # plt.scatter(coords2[:, 1], coords2[:, 0], color='red', s=1.0) # # plt.figure() # plt.imshow(img1_resized) # plt.scatter(img2_1[:, 0], img2_1[:, 1], color='orange', s=1.0) # plt.scatter(img1_1[:, 0], img1_1[:, 1], color='blue', s=1.0) # plt.scatter(coords1[:, 1], coords1[:, 0], color='red', s=1.0) if show: plt.show() #showNegatives(patches_1, patches_2, negatives, # img1_resized, img2_resized, v1_idx, v2_idx, sel) # join both point clouds together for visualization # both_3d = np.concatenate((img1_3d[:, :3], img2_3d[:, :3]), axis=0) # both_c = np.concatenate((imgv1, imgv2), axis=0) # savePointCloudToPly(both_3d, both_c, 'model_real.ply') # save filtered 3D points if save_ply: savePointCloudToPly(img1_3d[v1_idx, :3], imgv1[v1_idx, :], 'model_v1_filtered.ply') savePointCloudToPly(img2_3d[v2_idx, :3], imgv2[v2_idx, :], 'model_v2_filtered.ply') #return patches_1, patches_2, info return info