def __calibration_priors( self, initial_data: GtsfmData, cameras_to_model: List[int], is_fisheye_calibration: bool) -> NonlinearFactorGraph: """Generate prior factors on calibration parameters of the cameras.""" graph = NonlinearFactorGraph() calibration_prior_factor_class = PriorFactorCal3Fisheye if is_fisheye_calibration else PriorFactorCal3Bundler calibration_prior_factor_dof = CAM_CAL3FISHEYE_DOF if is_fisheye_calibration else CAM_CAL3BUNDLER_DOF calibration_prior_noise_sigma = (CAM_CAL3FISHEYE_PRIOR_NOISE_SIGMA if is_fisheye_calibration else CAM_CAL3BUNDLER_PRIOR_NOISE_SIGMA) if self._shared_calib: graph.push_back( calibration_prior_factor_class( K(self.__map_to_calibration_variable(cameras_to_model[0])), initial_data.get_camera(cameras_to_model[0]).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, calibration_prior_noise_sigma), )) else: for i in cameras_to_model: graph.push_back( calibration_prior_factor_class( K(self.__map_to_calibration_variable(i)), initial_data.get_camera(i).calibration(), gtsam.noiseModel.Isotropic.Sigma( calibration_prior_factor_dof, calibration_prior_noise_sigma), )) return graph
def plot_sfm_data_3d(sfm_data: GtsfmData, ax: Axes, max_plot_radius: float = 50) -> None: """Plot the camera poses and landmarks in 3D matplotlib plot. Args: sfm_data: SfmData object with camera and tracks. ax: axis to plot on. max_plot_radius: maximum distance threshold away from any camera for which a point will be plotted """ camera_poses = [ sfm_data.get_camera(i).pose() for i in sfm_data.get_valid_camera_indices() ] plot_poses_3d(camera_poses, ax) num_tracks = sfm_data.number_tracks() # Restrict 3d points to some radius of camera poses points_3d = np.array( [list(sfm_data.get_track(j).point3()) for j in range(num_tracks)]) nearby_points_3d = comp_utils.get_points_within_radius_of_cameras( camera_poses, points_3d, max_plot_radius) # plot 3D points for landmark in nearby_points_3d: ax.plot(landmark[0], landmark[1], landmark[2], "g.", markersize=1)
def save_camera_poses_viz(pre_ba_sfm_data: GtsfmData, post_ba_sfm_data: GtsfmData, gt_pose_graph: List[Optional[Pose3]], folder_name: str) -> None: """Visualize the camera pose and save to disk. Args: pre_ba_sfm_data: data input to bundle adjustment. post_ba_sfm_data: output of bundle adjustment. gt_pose_graph: ground truth poses. folder_name: folder to save the visualization at. """ # extract camera poses pre_ba_poses = [] for i in pre_ba_sfm_data.get_valid_camera_indices(): pre_ba_poses.append(pre_ba_sfm_data.get_camera(i).pose()) post_ba_poses = [] for i in post_ba_sfm_data.get_valid_camera_indices(): post_ba_poses.append(post_ba_sfm_data.get_camera(i).pose()) fig = plt.figure() ax = fig.add_subplot(projection="3d") plot_poses_3d(gt_pose_graph, ax, center_marker_color="m", label_name="GT") plot_poses_3d(pre_ba_poses, ax, center_marker_color="c", label_name="Pre-BA") plot_poses_3d(post_ba_poses, ax, center_marker_color="k", label_name="Post-BA") ax.legend(loc="upper left") set_axes_equal(ax) # save the 3D plot in the original view fig.savefig(os.path.join(folder_name, "poses_3d.png")) # save the BEV representation default_camera_elevation = 100 # in metres above ground ax.view_init(azim=0, elev=default_camera_elevation) fig.savefig(os.path.join(folder_name, "poses_bev.png")) plt.close(fig)
def test_get_camera_valid(self): """Test for get_camera for a valid index.""" expected = PinholeCameraCal3Bundler( Pose3(), Cal3Bundler(fx=900, k1=0, k2=0, u0=100, v0=100)) i = 0 test_data = GtsfmData(1) test_data.add_camera(index=i, camera=expected) computed = test_data.get_camera(i) self.assertTrue(computed.equals(expected, EQUALITY_TOLERANCE))
def values_to_gtsfm_data(values: Values, initial_data: GtsfmData, shared_calib: bool) -> GtsfmData: """Cast results from the optimization to GtsfmData object. Args: values: results of factor graph optimization. initial_data: data used to generate the factor graph; used to extract information about poses and 3d points in the graph. shared_calib: flag indicating if calibrations were shared between the cameras. Returns: optimized poses and landmarks. """ result = GtsfmData(initial_data.number_images()) is_fisheye_calibration = isinstance(initial_data.get_camera(0), PinholeCameraCal3Fisheye) if is_fisheye_calibration: cal3_value_extraction_lambda = lambda i: values.atCal3Fisheye( K(0 if shared_calib else i)) else: cal3_value_extraction_lambda = lambda i: values.atCal3Bundler( K(0 if shared_calib else i)) camera_class = PinholeCameraCal3Fisheye if is_fisheye_calibration else PinholeCameraCal3Bundler # add cameras for i in initial_data.get_valid_camera_indices(): result.add_camera( i, camera_class(values.atPose3(X(i)), cal3_value_extraction_lambda(i)), ) # add tracks for j in range(initial_data.number_tracks()): input_track = initial_data.get_track(j) # populate the result with optimized 3D point result_track = SfmTrack(values.atPoint3(P(j))) for measurement_idx in range(input_track.numberMeasurements()): i, uv = input_track.measurement(measurement_idx) result_track.addMeasurement(i, uv) result.add_track(result_track) return result
def get_dummy_gtsfm_data(self) -> GtsfmData: """ """ sfm_result = GtsfmData(self._num_images) # Assume all default cameras are valid cameras, add toy data for cameras for i in range(DEFAULT_NUM_CAMERAS): sfm_result.add_camera(i, DEFAULT_CAMERAS[i]) # Calculate the measurements under each camera for all track points, then add toy data for tracks: for j in range(DEFAULT_NUM_TRACKS): world_x = DEFAULT_TRACK_POINTS[j] track_to_add = SfmTrack(world_x) for i in range(DEFAULT_NUM_CAMERAS): uv = sfm_result.get_camera(i).project(world_x) track_to_add.addMeasurement(idx=i, m=uv) sfm_result.add_track(track_to_add) return sfm_result
def write_cameras(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the camera data file in the COLMAP format. Reference: https://colmap.github.io/format.html#cameras-txt Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index save_dir: folder to put the cameras.txt file in. """ os.makedirs(save_dir, exist_ok=True) # TODO: handle shared intrinsics # Assumes all camera models have five intrinsic parameters camera_model = "RADIAL" file_path = os.path.join(save_dir, "cameras.txt") with open(file_path, "w") as f: f.write("# Camera list with one line of data per camera:\n") f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n") # note that we save the number of estimated cameras, not the number of input images, # which would instead be gtsfm_data.number_images(). f.write( f"# Number of cameras: {len(gtsfm_data.get_valid_camera_indices())}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) calibration = camera.calibration() fx = calibration.fx() u0 = calibration.px() v0 = calibration.py() k1 = calibration.k1() k2 = calibration.k2() image_height = images[i].height image_width = images[i].width f.write( f"{i} {camera_model} {image_width} {image_height} {fx} {u0} {v0} {k1} {k2}\n" )
def __initial_values(self, initial_data: GtsfmData) -> Values: """Initialize all the variables in the factor graph.""" initial_values = gtsam.Values() # add each camera for loop_idx, i in enumerate(initial_data.get_valid_camera_indices()): camera = initial_data.get_camera(i) initial_values.insert(X(i), camera.pose()) if not self._shared_calib or loop_idx == 0: # add only one value if calibrations are shared initial_values.insert(K(self.__map_to_calibration_variable(i)), camera.calibration()) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial_values.insert(P(j), track.point3()) return initial_values
def __construct_factor_graph( self, cameras_to_model: List[int], initial_data: GtsfmData, absolute_pose_priors: List[Optional[PosePrior]], relative_pose_priors: Dict[Tuple[int, int], PosePrior], ) -> NonlinearFactorGraph: """Construct the factor graph with reprojection factors, BetweenFactors, and prior factors.""" is_fisheye_calibration = isinstance( initial_data.get_camera(cameras_to_model[0]), PinholeCameraCal3Fisheye) graph = NonlinearFactorGraph() # Create a factor graph graph.push_back( self.__reprojection_factors( initial_data=initial_data, is_fisheye_calibration=is_fisheye_calibration)) graph.push_back( self._between_factors(relative_pose_priors=relative_pose_priors, cameras_to_model=cameras_to_model)) graph.push_back( self.__pose_priors( absolute_pose_priors=absolute_pose_priors, initial_data=initial_data, camera_for_origin=cameras_to_model[0], )) graph.push_back( self.__calibration_priors(initial_data, cameras_to_model, is_fisheye_calibration)) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) return graph
def __pose_priors( self, absolute_pose_priors: List[Optional[PosePrior]], initial_data: GtsfmData, camera_for_origin: gtsfm_types.CAMERA_TYPE, ) -> NonlinearFactorGraph: """Generate prior factors (in the world frame) on pose variables.""" graph = NonlinearFactorGraph() # TODO(Ayush): start using absolute prior factors. num_priors_added = 0 if num_priors_added == 0: # Adding a prior to fix origin as no absolute prior exists. graph.push_back( PriorFactorPose3( X(camera_for_origin), initial_data.get_camera(camera_for_origin).pose(), gtsam.noiseModel.Isotropic.Sigma( CAM_POSE3_DOF, CAM_POSE3_PRIOR_NOISE_SIGMA), )) return graph
def get_dummy_gtsfm_data(self) -> GtsfmData: """Create a new GtsfmData instance, add dummy cameras and tracks, and draw the track points on all images""" sfm_result = GtsfmData(self._num_images) # Assume all dummy cameras are valid cameras, add toy data for cameras for i in range(NUM_CAMERAS): sfm_result.add_camera(i, CAMERAS[i]) # Calculate the measurements under each camera for all track points, then add toy data for tracks: for j in range(NUM_TRACKS): world_x = DUMMY_TRACK_PTS_WORLD[j] track_to_add = SfmTrack(world_x) for i in range(NUM_CAMERAS): u, v = sfm_result.get_camera(i).project(world_x) # color the track point with (r, g, b) = (255, 255, 255) self._img_dict[i].value_array[ np.round(v).astype(np.uint32), np.round(u).astype(np.uint32), :] = 255 track_to_add.addMeasurement(idx=i, m=[u, v]) sfm_result.add_track(track_to_add) return sfm_result
def write_images(gtsfm_data: GtsfmData, save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Args: gtsfm_data: scene data to write. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() # TODO: compute this (from keypoint data? or from track data?) mean_obs_per_img = 0 # TODO: compute this img_fname = "dummy.jpg" file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img}\n" ) for i in gtsfm_data.get_valid_camera_indices(): camera = gtsfm_data.get_camera(i) wRi_quaternion = camera.pose().rotation().quaternion() wti = camera.pose().translation() tx, ty, tz = wti qw, qx, qy, qz = wRi_quaternion f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n")
def write_images(gtsfm_data: GtsfmData, images: List[Image], save_dir: str) -> None: """Writes the image data file in the COLMAP format. Reference: https://colmap.github.io/format.html#images-txt Note: the "Number of images" saved to the .txt file is not the number of images fed to the SfM algorithm, but rather the number of localized camera poses/images, which COLMAP refers to as the "reconstructed cameras". Args: gtsfm_data: scene data to write. images: list of all images for this scene, in order of image index. save_dir: folder to put the images.txt file in. """ os.makedirs(save_dir, exist_ok=True) num_imgs = gtsfm_data.number_images() image_id_num_measurements = defaultdict(int) for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): image_id, uv_measured = track.measurement(k) image_id_num_measurements[image_id] += 1 mean_obs_per_img = (sum(image_id_num_measurements.values()) / len(image_id_num_measurements) if len(image_id_num_measurements) else 0) file_path = os.path.join(save_dir, "images.txt") with open(file_path, "w") as f: f.write("# Image list with two lines of data per image:\n") f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") f.write( f"# Number of images: {num_imgs}, mean observations per image: {mean_obs_per_img:.3f}\n" ) for i in gtsfm_data.get_valid_camera_indices(): img_fname = images[i].file_name camera = gtsfm_data.get_camera(i) # COLMAP exports camera extrinsics (cTw), not the poses (wTc), so must invert iTw = camera.pose().inverse() iRw_quaternion = iTw.rotation().toQuaternion() itw = iTw.translation() tx, ty, tz = itw qw, qx, qy, qz = iRw_quaternion.w(), iRw_quaternion.x( ), iRw_quaternion.y(), iRw_quaternion.z() f.write( f"{i} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {i} {img_fname}\n") # write out points2d for j in range(gtsfm_data.number_tracks()): track = gtsfm_data.get_track(j) for k in range(track.numberMeasurements()): # write each measurement image_id, uv_measured = track.measurement(k) if image_id == i: f.write( f" {uv_measured[0]:.3f} {uv_measured[1]:.3f} {j}") f.write("\n")
def run(self, initial_data: GtsfmData) -> GtsfmData: """Run the bundle adjustment by forming factor graph and optimizing using Levenberg–Marquardt optimization. Args: initial_data: initialized cameras, tracks w/ their 3d landmark from triangulation. Results: optimized camera poses, 3D point w/ tracks, and error metrics. """ logger.info( f"Input: {initial_data.number_tracks()} tracks on {len(initial_data.get_valid_camera_indices())} cameras\n" ) # noise model for measurements -- one pixel in u and v measurement_noise = gtsam.noiseModel.Isotropic.Sigma( IMG_MEASUREMENT_DIM, 1.0) # Create a factor graph graph = gtsam.NonlinearFactorGraph() # Add measurements to the factor graph for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) # SfmTrack # retrieve the SfmMeasurement objects for m_idx in range(track.number_measurements()): # i represents the camera index, and uv is the 2d measurement i, uv = track.measurement(m_idx) # note use of shorthand symbols C and P graph.add( GeneralSFMFactorCal3Bundler(uv, measurement_noise, C(i), P(j))) # get all the valid camera indices, which need to be added to the graph. valid_camera_indices = initial_data.get_valid_camera_indices() # Add a prior on first pose. This indirectly specifies where the origin is. graph.push_back( gtsam.PriorFactorPinholeCameraCal3Bundler( C(valid_camera_indices[0]), initial_data.get_camera(valid_camera_indices[0]), gtsam.noiseModel.Isotropic.Sigma(PINHOLE_CAM_CAL3BUNDLER_DOF, 0.1), )) # Also add a prior on the position of the first landmark to fix the scale graph.push_back( gtsam.PriorFactorPoint3( P(0), initial_data.get_track(0).point3(), gtsam.noiseModel.Isotropic.Sigma(POINT3_DOF, 0.1))) # Create initial estimate initial = gtsam.Values() # add each PinholeCameraCal3Bundler for i in valid_camera_indices: camera = initial_data.get_camera(i) initial.insert(C(i), camera) # add each SfmTrack for j in range(initial_data.number_tracks()): track = initial_data.get_track(j) initial.insert(P(j), track.point3()) # Optimize the graph and print results try: params = gtsam.LevenbergMarquardtParams() params.setVerbosityLM("ERROR") lm = gtsam.LevenbergMarquardtOptimizer(graph, initial, params) result_values = lm.optimize() except Exception: logger.exception("LM Optimization failed") # as we did not perform the bundle adjustment, we skip computing the total reprojection error return GtsfmData(initial_data.number_images()) final_error = graph.error(result_values) # Error drops from ~2764.22 to ~0.046 logger.info(f"initial error: {graph.error(initial):.2f}") logger.info(f"final error: {final_error:.2f}") # construct the results optimized_data = values_to_gtsfm_data(result_values, initial_data) metrics_dict = {} metrics_dict["before_filtering"] = optimized_data.aggregate_metrics() logger.info("[Result] Number of tracks before filtering: %d", metrics_dict["before_filtering"]["number_tracks"]) # filter the largest errors filtered_result = optimized_data.filter_landmarks( self.output_reproj_error_thresh) metrics_dict["after_filtering"] = filtered_result.aggregate_metrics() io_utils.save_json_file( os.path.join(METRICS_PATH, "bundle_adjustment_metrics.json"), metrics_dict) logger.info("[Result] Number of tracks after filtering: %d", metrics_dict["after_filtering"]["number_tracks"]) logger.info( "[Result] Mean track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["mean"]) logger.info( "[Result] Median track length %.3f", metrics_dict["after_filtering"]["3d_track_lengths"]["median"]) filtered_result.log_scene_reprojection_error_stats() return filtered_result