def __init__(self, pose : Pose, additional_data : Dict, in_pyrender_coords : bool, distance_scale : float = distance_scale_px): if in_pyrender_coords: self.pose_pyrender = pose self.pose_cv = pose.get_converted_between_cv_and_pyrender_coords() else: self.pose_cv = pose self.pose_pyrender = pose.get_converted_between_cv_and_pyrender_coords() self.additional_data = additional_data self.distance_scale = distance_scale self.comparison_points_cv = None self.comparison_camera_matrix = None self.comparison_distortion_coefficients = None self.projected_points = None self.comparison_projected_distances = None self.comparison_soft_l1_distances = None self.comparison_cauchy_distances = None self.comparison_arctan_distances = None self.assignment_score_function = None self.assignment_scores = None self.comparison_indices = None self.projected_indices = None self.matched_scores = None self.matched_scores_rms = None self.calculate_inliers_within_bounding_box = False
def _get_die_pose_from_projected(bounding_box_cv : np.ndarray, dot_centers_cv : np.ndarray, local_dots_to_project : np.ndarray, approximate_pose_result : PoseResult, camera_matrix : np.ndarray, distortion_coefficients : np.ndarray) -> PoseResult: """Determines a die's pose that matches projection of local dots to found dot_centers_cv to solve Perspective-n-Point problem. Uses an approximate pose result as initial guess.""" #Note that we need a candidate pose to start performing matching, since are knows are model points (in 3D) and image-space dot locations (in 2D). #Therefore, we must either project the model points onto the image to 2D (given a candidate pose), or reproject the dot locations back to 3D (again, given a candidate pose). #Since the former might yield some model-space dots incorrectly omitted (since they face away from camera in candidate pose), the 3D matching problem gives more flexibility. #However, without knowing the dot size (image, and model space) there's no way to determine each dot's z-distance from the camera, so we'd need a projective matching/registration algorithm, which would supercede doing something like PNP. if approximate_pose_result.comparison_indices is not None and approximate_pose_result.projected_indices is not None: projected_indices = approximate_pose_result.projected_indices found_indices = approximate_pose_result.comparison_indices additional_data = {} #Note that poorer matches here will be handled with Ransac/outlier exclusion below. else: projected_indices, found_indices, additional_data = _match_points_with_point_cloud_registration(dot_centers_cv, local_dots_to_project, approximate_pose_result, camera_matrix, distortion_coefficients) local_dots_for_pnp_masked = local_dots_to_project[projected_indices, :] dot_centers_cv_masked = dot_centers_cv[found_indices, :] extrinsic_rvec = approximate_pose_result.pose_cv.rotation_rodrigues.copy() extrinsic_tvec = approximate_pose_result.pose_cv.translation.copy() num_dots_min = len(found_indices) inlier_distance = inlier_cutoff_px perform_iterative = False #NB It seems SolvePNP may not work for < 4 points, even in Iterative/useExtrinsicGuess case it claims to handle, and correspondingly the RANSAC version needs at least one more point to be meaningful. if num_dots_min >= 5: pnp_flags = cv2.SOLVEPNP_ITERATIVE matched_pnp = cv2.solvePnPRansac(local_dots_for_pnp_masked, dot_centers_cv_masked, camera_matrix, distortion_coefficients, reprojectionError = inlier_distance, rvec = extrinsic_rvec, tvec = extrinsic_tvec, useExtrinsicGuess = True, flags = pnp_flags) pose_cv = Pose.create_from_cv_results(matched_pnp) if pose_cv: pose_result = PoseResult(pose_cv, additional_data, in_pyrender_coords = False) else: perform_iterative = True elif num_dots_min == 4: four_dot_pnp_flags = cv2.SOLVEPNP_AP3P four_dot_pnp = cv2.solvePnP(local_dots_for_pnp_masked, dot_centers_cv_masked, camera_matrix, distortion_coefficients, flags = four_dot_pnp_flags) four_dot_pnp_pose_cv = Pose.create_from_cv_results(four_dot_pnp) if four_dot_pnp_pose_cv: pose_result = PoseResult(four_dot_pnp_pose_cv, additional_data, in_pyrender_coords = False) pose_result.calculate_comparison(dot_centers_cv_masked, camera_matrix, distortion_coefficients) pose_result_distances = pose_result.comparison_projected_distances[pose_result.comparison_indices, pose_result.projected_indices] all_distances_inliers = np.all(pose_result_distances < inlier_distance) perform_iterative = not all_distances_inliers else: perform_iterative = True else: perform_iterative = True if perform_iterative: iterative_pose_result = _get_die_pose_iterative(bounding_box_cv, dot_centers_cv, approximate_pose_result, camera_matrix, distortion_coefficients, get_reprojection_error_sum_assignment_arctan) pose_result = iterative_pose_result return pose_result
def _get_local_dots_projected(trial_pose_cv : Pose, camera_matrix : np.ndarray, distortion_coefficients : np.ndarray): """Gets the projected visible dot positions of a die given its pose and camera information.""" local_dots_facing_pyrender = get_local_dots_facing_camera_from_eye_space_pose(trial_pose_cv.get_converted_between_cv_and_pyrender_coords()) local_dots_to_project = local_dots_facing_pyrender local_dots_projected, local_dots_projected_jacobian = cv2.projectPoints(local_dots_to_project, trial_pose_cv.rotation_rodrigues, trial_pose_cv.translation, camera_matrix, distortion_coefficients) local_dots_projected = np.squeeze(local_dots_projected, axis = 1) return local_dots_projected
def get_dots_mask_facing_camera_from_eye_space_pose(die_eye_space_pose : Pose): """Gets a true-false mask of which dots face the camera, given the die's eye-space pose.""" dice_transform = die_eye_space_pose.as_transformation_matrix() dot_points = DiceConfig.get_local_dot_positions() dot_normals = DiceConfig.get_local_dot_normals() dot_points_transformed = transform_points_3d(dot_points, dice_transform, at_infinity=False) dot_normals_transformed = transform_points_3d(dot_normals, dice_transform, at_infinity=True) dots_are_visible = get_are_eye_space_point_normals_facing_camera(dot_points_transformed, dot_normals_transformed) return dots_are_visible
def _get_die_pose_iterative(bounding_box_cv : np.ndarray, dot_centers_cv : np.ndarray, approximate_pose_result : PoseResult, camera_matrix : np.ndarray, distortion_coefficients : np.ndarray, reprojection_error_function) -> PoseResult: """Gets a die pose iteratively given a reprojection error function, initial pose estimate, camera information, bounding box and found dot positions dot_centers_cv.""" def get_reprojection_error(trial_pose_rodrigues_translation_cv): trial_pose_rodrigues_cv = trial_pose_rodrigues_translation_cv[0:3] trial_pose_translation_cv = trial_pose_rodrigues_translation_cv[3:] trial_pose_cv = Pose(True, trial_pose_rodrigues_cv, trial_pose_translation_cv) reproj_error = reprojection_error_function(bounding_box_cv, dot_centers_cv, trial_pose_cv, camera_matrix, distortion_coefficients) return reproj_error from scipy.optimize import minimize initial_guess = approximate_pose_result.pose_cv.as_numpy_array() minimization_results = minimize(get_reprojection_error, initial_guess, method = 'Nelder-Mead')#NB Nelder-Mead may not be the most efficient method, but a gradient-free method seems best to handle this particular cost function. minimized_pose = Pose.create_from_numpy_array(minimization_results.x) return PoseResult(minimized_pose, {}, in_pyrender_coords = False)
def _get_die_image_bounding_box_pose(bounding_box, camera_matrix : np.ndarray, distortion_coefficients : np.ndarray) -> PoseResult: """Get an approximate pose (particularly for translation) by solving PNP problem of the corners of an image-space bounding box.""" box_size = tf.gather(bounding_box, [2, 3], axis = -1) - tf.gather(bounding_box, [0, 1], axis = -1) max_box_dimension = tf.math.reduce_max(tf.math.abs(box_size)) dice_local_bb_min_max_abs = tf.math.abs(DiceConfig.get_local_bounding_box_min_max()) dice_local_bb_extent = tf.math.reduce_max(dice_local_bb_min_max_abs) dice_local_bb_extent = tf.cast(dice_local_bb_extent, tf.float32) #NB Since distance between dots on face 2 is actually slightly greater than to neighbouring dots on other faces, we can't simply cluster based on dot-to-dot distance within each face. quad_scaling_factor = 1.2#Fitting a slightly larger quad than the dice extent will tend to give more accurate distance results when fitting a quad to the image bounding box. quad_with_dice_size = (np.array([[-1, -1, 0],[-1, 1, 0], [1, -1, 0], [1, 1, 0]]) * quad_scaling_factor * dice_local_bb_extent.numpy()).astype(np.float32) bounding_box_corners = tf.stack([tf.gather(bounding_box, [0, 1], axis = -1), tf.gather(bounding_box, [2, 1], axis = -1), tf.gather(bounding_box, [0, 3], axis = -1), tf.gather(bounding_box, [2, 3], axis = -1)], axis = -1) bounding_box_points = _convert_tensorflow_points_to_opencv(bounding_box_corners, transpose = True) quad_pnp_results = cv2.solvePnP(quad_with_dice_size, bounding_box_points, camera_matrix, distortion_coefficients) quad_pnp_pose_cv = Pose.create_from_cv_results(quad_pnp_results) quad_pnp_pose_results = PoseResult(quad_pnp_pose_cv, {}, in_pyrender_coords = False) return quad_pnp_pose_results
def _get_approximate_die_pose(die_class : int, y_angle_deg : float, bounding_box_pose_result : PoseResult, x_axis_rotations_deg : Sequence[float], y_rot_offsets_deg : Sequence[float]) -> PoseResult: """Gets an approximate pose given the die class, rotation around vertical axes, and a rough pose result estimated from the bounding box. Checks the given set of rotation offsets rotations around x and y axes.""" bb_pnp_res, bb_rotation, bb_translation = bounding_box_pose_result.pose_cv if bb_pnp_res: from scipy.spatial.transform import Rotation bb_translation_pyrender_coords = bb_translation * np.array([1, -1, -1])[:,np.newaxis] angle_of_translation_deg = np.rad2deg(np.arctan2(-bb_translation_pyrender_coords[0], -bb_translation_pyrender_coords[2])) y_angle_deg_with_position_offset = y_angle_deg + angle_of_translation_deg pose_results = [] die_local_up_axis, die_local_forward_axis, die_local_right_axis = _get_die_local_up_forward_right_axes(die_class) for y_rot_offset_deg in y_rot_offsets_deg: y_angle_final = y_angle_deg_with_position_offset + y_rot_offset_deg angle_cos = np.cos(np.deg2rad(y_angle_final)) angle_sin = np.sin(np.deg2rad(y_angle_final)) die_local_to_scene_rotation = np.eye(3) die_local_to_scene_rotation[0, 0:3] = angle_cos * die_local_right_axis + angle_sin * die_local_forward_axis die_local_to_scene_rotation[1, 0:3] = die_local_up_axis die_local_to_scene_rotation[2, 0:3] = - angle_sin * die_local_right_axis + angle_cos * die_local_forward_axis for x_axis_rotation_deg in x_axis_rotations_deg: x_axis_rotation = Rotation.from_euler('x', x_axis_rotation_deg, degrees = True) x_axis_rotation_matrix = x_axis_rotation.as_matrix() die_combined_rotation = x_axis_rotation_matrix @ die_local_to_scene_rotation die_rotation_rodrigues, die_rotation_rodrigues_jacobian = cv2.Rodrigues(die_combined_rotation) #NB Return in 'pyrender' coordinates approx_pose = Pose(True, die_rotation_rodrigues, bb_translation_pyrender_coords) pose_result = PoseResult(approx_pose, {}, in_pyrender_coords = True) pose_result.y_angle = y_angle_final pose_result.y_angle_rel = y_angle_deg + y_rot_offset_deg pose_result.x_angle = x_axis_rotation_deg pose_results.append(pose_result) return pose_results else: raise NotImplementedError("Cannot get approximate die pose if bounding box PNP was not found.")
def get_reprojection_error(trial_pose_rodrigues_translation_cv): trial_pose_rodrigues_cv = trial_pose_rodrigues_translation_cv[0:3] trial_pose_translation_cv = trial_pose_rodrigues_translation_cv[3:] trial_pose_cv = Pose(True, trial_pose_rodrigues_cv, trial_pose_translation_cv) reproj_error = reprojection_error_function(bounding_box_cv, dot_centers_cv, trial_pose_cv, camera_matrix, distortion_coefficients) return reproj_error