def values_to_gtsfm_data(values: Values, initial_data: GtsfmData) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera(i, values.atPinholeCameraCal3Bundler(C(i))) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.number_measurements()): i, uv = input_track.measurement(measurement_idx) result_track.add_measurement(i, uv) result.add_track(result_track) return result
def plot_sfm_data_3d(sfm_data: GtsfmData, ax: Axes, max_plot_radius: float = 50) -> None: """Plot the camera poses and landmarks in 3D matplotlib plot. Args: sfm_data: SfmData object with camera and tracks. ax: axis to plot on. max_plot_radius: maximum distance threshold away from any camera for which a point will be plotted """ camera_poses = [ sfm_data.get_camera(i).pose() for i in sfm_data.get_valid_camera_indices() ] plot_poses_3d(camera_poses, ax) num_tracks = sfm_data.number_tracks() # Restrict 3d points to some radius of camera poses points_3d = np.array( [list(sfm_data.get_track(j).point3()) for j in range(num_tracks)]) nearby_points_3d = comp_utils.get_points_within_radius_of_cameras( camera_poses, points_3d, max_plot_radius) # plot 3D points for landmark in nearby_points_3d: ax.plot(landmark[0], landmark[1], landmark[2], "g.", markersize=1)
def save_camera_poses_viz(pre_ba_sfm_data: GtsfmData, post_ba_sfm_data: GtsfmData, gt_pose_graph: List[Optional[Pose3]], folder_name: str) -> None: """Visualize the camera pose and save to disk. Args: pre_ba_sfm_data: data input to bundle adjustment. post_ba_sfm_data: output of bundle adjustment. gt_pose_graph: ground truth poses. folder_name: folder to save the visualization at. """ # extract camera poses pre_ba_poses = [] for i in pre_ba_sfm_data.get_valid_camera_indices(): pre_ba_poses.append(pre_ba_sfm_data.get_camera(i).pose()) post_ba_poses = [] for i in post_ba_sfm_data.get_valid_camera_indices(): post_ba_poses.append(post_ba_sfm_data.get_camera(i).pose()) fig = plt.figure() ax = fig.add_subplot(projection="3d") plot_poses_3d(gt_pose_graph, ax, center_marker_color="m", label_name="GT") plot_poses_3d(pre_ba_poses, ax, center_marker_color="c", label_name="Pre-BA") plot_poses_3d(post_ba_poses, ax, center_marker_color="k", label_name="Post-BA") ax.legend(loc="upper left") set_axes_equal(ax) # save the 3D plot in the original view fig.savefig(os.path.join(folder_name, "poses_3d.png")) # save the BEV representation default_camera_elevation = 100 # in metres above ground ax.view_init(azim=0, elev=default_camera_elevation) fig.savefig(os.path.join(folder_name, "poses_bev.png")) plt.close(fig)
def __cameras_to_model( self, initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> List[int]: """Get the cameras which are to be modeled in the factor graph. We are using ability to add initial values as proxy for this function.""" cameras: Set[int] = set(initial_data.get_valid_camera_indices()) return sorted(list(cameras))
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. shared_calib: flag indicating if calibrations were shared between the cameras. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) if is_fisheye_calibration: cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( K(0 if shared_calib else i)) else: cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( K(0 if shared_calib else i)) camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), ) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.numberMeasurements()): i, uv = input_track.measurement(measurement_idx) result_track.addMeasurement(i, uv) result.add_track(result_track) return result
def write_cameras(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the camera data file in the COLMAP format. Reference: https://colmap.github.io/format.html#cameras-txt Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index save_dir: folder to put the cameras.txt file in. """ os.makedirs(save_dir, exist_ok=True) # TODO: handle shared intrinsics # Assumes all camera models have five intrinsic parameters camera_model = "RADIAL" file_path = os.path.join(save_dir, "cameras.txt") with open(file_path, "w") as f: f.write("# Camera list with one line of data per camera:\n") f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n") # note that we save the number of estimated cameras, not the number of input images, # which would instead be gtsfm_data.number_images(). f.write( f"# Number of cameras: {len(gtsfm_data.get_valid_camera_indices())}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) calibration = camera.calibration() fx = calibration.fx() u0 = calibration.px() v0 = calibration.py() k1 = calibration.k1() k2 = calibration.k2() image_height = images[i].height image_width = images[i].width f.write( f"{i} {camera_model} {image_width} {image_height} {fx} {u0} {v0} {k1} {k2}\n" )
def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() # add each camera for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) return initial_values
def get_stats_for_sfmdata(gtsfm_data: GtsfmData, suffix: str) -> List[GtsfmMetric]: """Helper to get bundle adjustment metrics from a GtsfmData object with a suffix for metric names.""" metrics = [] metrics.append( GtsfmMetric(name="number_cameras", data=len(gtsfm_data.get_valid_camera_indices()))) metrics.append( GtsfmMetric("number_tracks" + suffix, gtsfm_data.number_tracks())) metrics.append( GtsfmMetric( "3d_track_lengths" + suffix, gtsfm_data.get_track_lengths(), plot_type=GtsfmMetric.PlotType.HISTOGRAM, )) metrics.append( GtsfmMetric(f"reprojection_errors{suffix}_px", gtsfm_data.get_scene_reprojection_errors())) return metrics
def write_images(gtsfm_data: GtsfmData, save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Args: gtsfm_data: scene data to write. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() # TODO: compute this (from keypoint data? or from track data?) mean_obs_per_img = 0 # TODO: compute this img_fname = "dummy.jpg" file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) wRi_quaternion = camera.pose().rotation().quaternion() wti = camera.pose().translation() tx, ty, tz = wti qw, qx, qy, qz = wRi_quaternion f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n")
def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Note: the "Number of images" saved to the .txt file is not the number of images fed to the SfM algorithm, but rather the number of localized camera poses/images, which COLMAP refers to as the "reconstructed cameras". Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() image_id_num_measurements = defaultdict(int) for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) image_id_num_measurements[image_id] += 1 mean_obs_per_img = (sum(image_id_num_measurements.values()) / len(image_id_num_measurements) if len(image_id_num_measurements) else 0) file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img:.3f}\n" ) for i in gtsfm_data.get_valid_camera_indices(): img_fname = images[i].file_name camera = gtsfm_data.get_camera(i) # COLMAP exports camera extrinsics (cTw), not the poses (wTc), so must invert iTw = camera.pose().inverse() iRw_quaternion = iTw.rotation().toQuaternion() itw = iTw.translation() tx, ty, tz = itw qw, qx, qy, qz = iRw_quaternion.w(), iRw_quaternion.x( ), iRw_quaternion.y(), iRw_quaternion.z() f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n") # write out points2d for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): # write each measurement image_id, uv_measured = track.measurement(k) if image_id == i: f.write( f" {uv_measured[0]:.3f} {uv_measured[1]:.3f} {j}") f.write("\n")
def run( self, initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], verbose: bool = True, ) -> Tuple[GtsfmData, GtsfmData, List[bool]]: """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. Args: initial_data: initialized cameras, tracks w/ their 3d landmark from triangulation. absolute_pose_priors: priors to be used on cameras. relative_pose_priors: priors on the pose between two cameras. verbose: Boolean flag to print out additional info for debugging. Results: Optimized camera poses, 3D point w/ tracks, and error metrics, aligned to GT (if provided). Optimized camera poses after filtering landmarks (and cameras with no remaining landmarks). Valid mask as a list of booleans, indicating for each input track whether it was below the re-projection threshold. """ logger.info( f"Input: {initial_data.number_tracks()} tracks on {len(initial_data.get_valid_camera_indices())} cameras\n" ) if initial_data.number_tracks() == 0 or len( initial_data.get_valid_camera_indices()) == 0: # no cameras or tracks to optimize, so bundle adjustment is not possible logger.error( "Bundle adjustment aborting, optimization cannot be performed without any tracks or any cameras." ) return initial_data, initial_data, [ False ] * initial_data.number_tracks() cameras_to_model = self.__cameras_to_model(initial_data, absolute_pose_priors, relative_pose_priors) graph = self.__construct_factor_graph( cameras_to_model=cameras_to_model, initial_data=initial_data, absolute_pose_priors=absolute_pose_priors, relative_pose_priors=relative_pose_priors, ) initial_values = self.__initial_values(initial_data=initial_data) result_values = self.__optimize_factor_graph(graph, initial_values) final_error = graph.error(result_values) # Error drops from ~2764.22 to ~0.046 if verbose: logger.info(f"initial error: {graph.error(initial_values):.2f}") logger.info(f"final error: {final_error:.2f}") # construct the results optimized_data = values_to_gtsfm_data(result_values, initial_data, self._shared_calib) if verbose: logger.info("[Result] Number of tracks before filtering: %d", optimized_data.number_tracks()) # filter the largest errors if self._output_reproj_error_thresh: filtered_result, valid_mask = optimized_data.filter_landmarks( self._output_reproj_error_thresh) else: valid_mask = [True] * optimized_data.number_tracks() filtered_result = optimized_data logger.info("[Result] Number of tracks after filtering: %d", filtered_result.number_tracks()) return optimized_data, filtered_result, valid_mask
def run(self, initial_data: GtsfmData) -> GtsfmData: """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. Args: initial_data: initialized cameras, tracks w/ their 3d landmark from triangulation. Results: optimized camera poses, 3D point w/ tracks, and error metrics. """ logger.info( f"Input: {initial_data.number_tracks()} tracks on {len(initial_data.get_valid_camera_indices())} cameras\n" ) # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, 1.0) # Create a factor graph graph = gtsam.NonlinearFactorGraph() # Add measurements to the factor graph for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.number_measurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.add( GeneralSFMFactorCal3Bundler(uv, measurement_noise, C(i), P(j))) # get all the valid camera indices, which need to be added to the graph. valid_camera_indices = initial_data.get_valid_camera_indices() # Add a prior on first pose. This indirectly specifies where the origin is. graph.push_back( gtsam.PriorFactorPinholeCameraCal3Bundler( C(valid_camera_indices[0]), initial_data.get_camera(valid_camera_indices[0]), gtsam.noiseModel.Isotropic.Sigma(PINHOLE_CAM_CAL3BUNDLER_DOF, 0.1), )) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) # Create initial estimate initial = gtsam.Values() # add each PinholeCameraCal3Bundler for i in valid_camera_indices: camera = initial_data.get_camera(i) initial.insert(C(i), camera) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial.insert(P(j), track.point3()) # Optimize the graph and print results try: params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") lm = gtsam.LevenbergMarquardtOptimizer(graph, initial, params) result_values = lm.optimize() except Exception: logger.exception("LM Optimization failed") # as we did not perform the bundle adjustment, we skip computing the total reprojection error return GtsfmData(initial_data.number_images()) final_error = graph.error(result_values) # Error drops from ~2764.22 to ~0.046 logger.info(f"initial error: {graph.error(initial):.2f}") logger.info(f"final error: {final_error:.2f}") # construct the results optimized_data = values_to_gtsfm_data(result_values, initial_data) metrics_dict = {} metrics_dict["before_filtering"] = optimized_data.aggregate_metrics() logger.info("[Result] Number of tracks before filtering: %d", metrics_dict["before_filtering"]["number_tracks"]) # filter the largest errors filtered_result = optimized_data.filter_landmarks( self.output_reproj_error_thresh) metrics_dict["after_filtering"] = filtered_result.aggregate_metrics() io_utils.save_json_file( os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"), metrics_dict) logger.info("[Result] Number of tracks after filtering: %d", metrics_dict["after_filtering"]["number_tracks"]) logger.info( "[Result] Mean track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["mean"]) logger.info( "[Result] Median track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["median"]) filtered_result.log_scene_reprojection_error_stats() return filtered_result