def reproject_disparity_to_3D(disparity_tensor: torch.Tensor, Q_matrix: torch.Tensor) -> torch.Tensor: r"""Reproject the disparity tensor to a 3D point cloud. Args: disparity_tensor: Disparity tensor of shape :math:`(B, 1, H, W)`. Q_matrix: Tensor of Q matrices of shapes :math:`(B, 4, 4)`. Returns: The 3D point cloud of shape :math:`(B, H, W, 3)` """ _check_Q_matrix(Q_matrix) _check_disparity_tensor(disparity_tensor) batch_size, rows, cols, _ = disparity_tensor.shape dtype = disparity_tensor.dtype device = disparity_tensor.device uv = create_meshgrid(rows, cols, normalized_coordinates=False, device=device, dtype=dtype) uv = uv.expand(batch_size, -1, -1, -1) v, u = torch.unbind(uv, dim=-1) v, u = torch.unsqueeze(v, -1), torch.unsqueeze(u, -1) uvd = torch.stack((u, v, disparity_tensor), 1).reshape(batch_size, 3, -1).permute(0, 2, 1) points = transform_points(Q_matrix, uvd).reshape(batch_size, rows, cols, 3) # Final check that everything went well. if not points.shape == (batch_size, rows, cols, 3): raise StereoException( f"Something went wrong in `reproject_disparity_to_3D`. Expected the final output" f"to be of shape {(batch_size, rows, cols, 3)}." f"But the computed point cloud had shape {points.shape}. " f"Please ensure input are correct. If this is an error, please submit an issue." ) return points
def track_next_frame(self, x: torch.Tensor) -> Tuple[torch.Tensor, bool]: """The frame `x` is prewarped according to the previous frame homography, matched with fast_matcher verified with ransac.""" if self.previous_homography is not None: # mypy, shut up Hwarp = self.previous_homography.clone()[None] # make a bit of border for safety Hwarp[:, 0:2, 0:2] = Hwarp[:, 0:2, 0:2] / 0.8 Hwarp[:, 0:2, 2] -= 10.0 Hinv = torch.inverse(Hwarp) h, w = self.target.shape[2:] frame_warped = warp_perspective(x, Hinv, (h, w)) input_dict: Dict[str, torch.Tensor] = { "image0": self.target, "image1": frame_warped } for k, v in self.target_fast_representation.items(): input_dict[f'{k}0'] = v match_dict = self.fast_matcher(input_dict) keypoints0 = match_dict['keypoints0'][match_dict['batch_indexes'] == 0] keypoints1 = match_dict['keypoints1'][match_dict['batch_indexes'] == 0] keypoints1 = transform_points(Hwarp, keypoints1) if len(keypoints0) < self.minimum_inliers_num: self.reset_tracking() return self.no_match() H, inliers = self.ransac(keypoints0, keypoints1) if inliers.sum().item() < self.minimum_inliers_num: self.reset_tracking() return self.no_match() self.previous_homography = H.clone() return H, True
def warp_grid(self, dst_homo_src: torch.Tensor) -> torch.Tensor: r"""Computes the grid to warp the coordinates grid by an homography. Args: dst_homo_src (torch.Tensor): Homography or homographies (stacked) to transform all points in the grid. Shape of the homography has to be :math:`(N, 3, 3)`. Returns: torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`. """ batch_size: int = dst_homo_src.shape[0] device: torch.device = dst_homo_src.device dtype: torch.dtype = dst_homo_src.dtype # expand grid to match the input batch size grid: torch.Tensor = self.grid.expand(batch_size, -1, -1, -1) # NxHxWx2 if len(dst_homo_src.shape) == 3: # local homography case dst_homo_src = dst_homo_src.view(batch_size, 1, 3, 3) # NxHxWx3x3 # perform the actual grid transformation, # the grid is copied to input device and casted to the same type flow: torch.Tensor = transform_points( dst_homo_src, grid.to(device).to(dtype)) # NxHxWx2 return flow.view(batch_size, self.height, self.width, 2) # NxHxWx2
def pixel2cam(depth: torch.Tensor, intrinsics_inv: torch.Tensor, pixel_coords: torch.Tensor) -> torch.Tensor: r"""Transform coordinates in the pixel frame to the camera frame. Args: depth (torch.Tensor): the source depth maps. Shape must be Bx1xHxW. intrinsics_inv (torch.Tensor): the inverse intrinsics camera matrix. Shape must be Bx4x4. pixel_coords (torch.Tensor): the grid with the homogeneous camera coordinates. Shape must be BxHxWx3. Returns: torch.Tensor: array of (u, v, 1) cam coordinates with shape BxHxWx3. """ if not len(depth.shape) == 4 and depth.shape[1] == 1: raise ValueError("Input depth has to be in the shape of " "Bx1xHxW. Got {}".format(depth.shape)) if not len(intrinsics_inv.shape) == 3: raise ValueError("Input intrinsics_inv has to be in the shape of " "Bx4x4. Got {}".format(intrinsics_inv.shape)) if not len(pixel_coords.shape) == 4 and pixel_coords.shape[3] == 3: raise ValueError("Input pixel_coords has to be in the shape of " "BxHxWx3. Got {}".format(intrinsics_inv.shape)) cam_coords: torch.Tensor = transform_points(intrinsics_inv[:, None], pixel_coords) return cam_coords * depth.permute(0, 2, 3, 1)
def warp_grid(self, src_homo_dst: torch.Tensor) -> torch.Tensor: r"""Compute the grid to warp the coordinates grid by the homography/ies. Args: src_homo_dst (torch.Tensor): Homography or homographies (stacked) to transform all points in the grid. Shape of the homography has to be :math:`(1, 3, 3)` or :math:`(N, 1, 3, 3)`. The homography assumes normalized coordinates [-1, 1] if normalized_coordinates is True. Returns: torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`. """ batch_size: int = src_homo_dst.shape[0] device: torch.device = src_homo_dst.device dtype: torch.dtype = src_homo_dst.dtype # expand grid to match the input batch size grid: torch.Tensor = self.grid.expand(batch_size, -1, -1, -1) # NxHxWx2 if len(src_homo_dst.shape) == 3: # local homography case src_homo_dst = src_homo_dst.view(batch_size, 1, 3, 3) # Nx1x3x3 # perform the actual grid transformation, # the grid is copied to input device and casted to the same type flow: torch.Tensor = transform_points( src_homo_dst, grid.to(device).to(dtype)) # NxHxWx2 return flow.view(batch_size, self.height, self.width, 2) # NxHxWx2
def cam2pixel(cam_coords_src: torch.Tensor, dst_proj_src: torch.Tensor, eps: float = 1e-12) -> torch.Tensor: r"""Transform coordinates in the camera frame to the pixel frame. Args: cam_coords: (x, y, z) coordinates defined in the first camera coordinates system. Shape must be BxHxWx3. dst_proj_src: the projection matrix between the reference and the non reference camera frame. Shape must be Bx4x4. eps: small value to avoid division by zero error. Returns: tensor of shape BxHxWx2 with (u, v) pixel coordinates. """ if not len(cam_coords_src.shape) == 4 and cam_coords_src.shape[3] == 3: raise ValueError( "Input cam_coords_src has to be in the shape of " "BxHxWx3. Got {}".format(cam_coords_src.shape) ) if not len(dst_proj_src.shape) == 3 and dst_proj_src.shape[-2:] == (4, 4): raise ValueError("Input dst_proj_src has to be in the shape of " "Bx4x4. Got {}".format(dst_proj_src.shape)) # apply projection matrix to points point_coords: torch.Tensor = transform_points(dst_proj_src[:, None], cam_coords_src) x_coord: torch.Tensor = point_coords[..., 0] y_coord: torch.Tensor = point_coords[..., 1] z_coord: torch.Tensor = point_coords[..., 2] # compute pixel coordinates u_coord: torch.Tensor = x_coord / (z_coord + eps) v_coord: torch.Tensor = y_coord / (z_coord + eps) # stack and return the coordinates, that's the actual flow pixel_coords_dst: torch.Tensor = torch.stack([u_coord, v_coord], dim=-1) return pixel_coords_dst # BxHxWx2
def _transform_boxes(boxes: torch.Tensor, M: torch.Tensor) -> torch.Tensor: """Transforms 3D and 2D in kornia format by applying the transformation matrix M. Boxes and the transformation matrix could be batched or not. Args: boxes: 2D quadrilaterals or 3D hexahedrons in kornia format. M: the transformation matrix of shape :math:`(3, 3)` or :math:`(B, 3, 3)` for 2D and :math:`(4, 4)` or :math:`(B, 4, 4)` for 3D hexahedron. """ M = M if M.is_floating_point() else M.float() # Work with batch as kornia.transform_points only supports a batch of points. boxes_per_batch, n_points_per_box, coordinates_dimension = boxes.shape[-3:] points = boxes.view(-1, n_points_per_box * boxes_per_batch, coordinates_dimension) M = M if M.ndim == 3 else M.unsqueeze(0) if points.shape[0] != M.shape[0]: raise ValueError( f"Batch size mismatch. Got {points.shape[0]} for boxes and {M.shape[0]} for the transformation matrix." ) transformed_boxes: torch.Tensor = transform_points(M, points) transformed_boxes = transformed_boxes.view_as(boxes) return transformed_boxes
def cam2pixel(cam_coords_src: torch.Tensor, dst_proj_src: torch.Tensor, eps: Optional[float] = 1e-6) -> torch.Tensor: r"""Transform coordinates in the camera frame to the pixel frame. Args: cam_coords (torch.Tensor): pixel coordinates defined in the first camera coordinates system. Shape must be BxHxWx3. dst_proj_src (torch.Tensor): the projection matrix between the reference and the non reference camera frame. Shape must be Bx4x4. Returns: torch.Tensor: array of [-1, 1] coordinates of shape BxHxWx2. """ if not len(cam_coords_src.shape) == 4 and cam_coords_src.shape[3] == 3: raise ValueError( "Input cam_coords_src has to be in the shape of " "BxHxWx3. Got {}".format(cam_coords_src.shape) ) if not len(dst_proj_src.shape) == 3 and dst_proj_src.shape[-2:] == (4, 4): raise ValueError("Input dst_proj_src has to be in the shape of " "Bx4x4. Got {}".format(dst_proj_src.shape)) b, h, w, _ = cam_coords_src.shape # apply projection matrix to points point_coords: torch.Tensor = transform_points(dst_proj_src[:, None], cam_coords_src) x_coord: torch.Tensor = point_coords[..., 0] y_coord: torch.Tensor = point_coords[..., 1] z_coord: torch.Tensor = point_coords[..., 2] # compute pixel coordinates u_coord: torch.Tensor = x_coord / (z_coord + eps) v_coord: torch.Tensor = y_coord / (z_coord + eps) # stack and return the coordinates, that's the actual flow pixel_coords_dst: torch.Tensor = torch.stack([u_coord, v_coord], dim=-1) return pixel_coords_dst # (B*N)xHxWx2
def inverse_keypoints(self, input: torch.Tensor, module: nn.Module, param: Optional[ParamItem] = None) -> torch.Tensor: if isinstance(module, GeometricAugmentationBase2D): transform = module.compute_inverse_transformation( module.get_transformation_matrix( input, None if param is None else cast(Dict, param.data))) input = transform_points( torch.as_tensor(transform, device=input.device, dtype=input.dtype), input) return input
def apply_to_keypoints(self, input: torch.Tensor, module: nn.Module, param: Optional[ParamItem] = None) -> torch.Tensor: if param is not None: _param = cast(Dict[str, torch.Tensor], param.data) else: _param = None # type: ignore if isinstance(module, GeometricAugmentationBase2D) and _param is None: raise ValueError( f"Transformation matrix for {module} has not been computed.") if isinstance(module, GeometricAugmentationBase2D) and _param is not None: input = transform_points( module.get_transformation_matrix(input, _param), input) else: pass # No need to update anything return input
def _mean_isotropic_scale_normalize( points: torch.Tensor, eps: float = 1e-8) -> Tuple[torch.Tensor, torch.Tensor]: r"""Normalizes points. Args: points : Tensor containing the points to be normalized with shape :math:`(B, N, D)`. eps : Small value to avoid division by zero error. Returns: Tuple containing the normalized points in the shape :math:`(B, N, D)` and the transformation matrix in the shape :math:`(B, D+1, D+1)`. """ if not isinstance(points, torch.Tensor): raise AssertionError( f"points is not an instance of torch.Tensor. Type of points is {type(points)}" ) if len(points.shape) != 3: raise AssertionError( f"points must be of shape (B, N, D). Got shape {points.shape}.") x_mean = torch.mean(points, dim=1, keepdim=True) # Bx1xD scale = (points - x_mean).norm(dim=-1, p=2).mean(dim=-1) # B D_int = points.shape[-1] D_float = torch.tensor(points.shape[-1], dtype=torch.float64, device=points.device) scale = torch.sqrt(D_float) / (scale + eps) # B transform = eye_like(D_int + 1, points) # (B, D+1, D+1) idxs = torch.arange(D_int, dtype=torch.int64, device=points.device) transform[:, idxs, idxs] = transform[:, idxs, idxs] * scale[:, None] transform[:, idxs, D_int] = transform[:, idxs, D_int] + (-scale[:, None] * x_mean[:, 0, idxs]) points_norm = transform_points(transform, points) # BxNxD return (points_norm, transform)
def normalize_points(points: torch.Tensor, eps: float = 1e-8) -> Tuple[torch.Tensor, torch.Tensor]: r"""Normalizes points (isotropic). Computes the transformation matrix such that the two principal moments of the set of points are equal to unity, forming an approximately symmetric circular cloud of points of radius 1 about the origin. Reference: Hartley/Zisserman 4.4.4 pag.107 This operation is an essential step before applying the DLT algorithm in order to consider the result as optimal. Args: points: Tensor containing the points to be normalized with shape :math:`(B, N, 2)`. eps: epsilon value to avoid numerical instabilities. Returns: tuple containing the normalized points in the shape :math:`(B, N, 2)` and the transformation matrix in the shape :math:`(B, 3, 3)`. """ if len(points.shape) != 3: raise AssertionError(points.shape) if points.shape[-1] != 2: raise AssertionError(points.shape) x_mean = torch.mean(points, dim=1, keepdim=True) # Bx1x2 scale = (points - x_mean).norm(dim=-1, p=2).mean(dim=-1) # B scale = torch.sqrt(torch.tensor(2.0)) / (scale + eps) # B ones, zeros = torch.ones_like(scale), torch.zeros_like(scale) transform = torch.stack([ scale, zeros, -scale * x_mean[..., 0, 0], zeros, scale, -scale * x_mean[..., 0, 1], zeros, zeros, ones ], dim=-1) # Bx9 transform = transform.view(-1, 3, 3) # Bx3x3 points_norm = transform_points(transform, points) # BxNx2 return (points_norm, transform)
def transform_grid(self, voxel_grid, grid_to_lidar, lidar_to_cam, cam_to_img): """ Transforms voxel sampling grid into frustum sampling grid Args: grid: (B, X, Y, Z, 3), Voxel sampling grid grid_to_lidar: (4, 4), Voxel grid to LiDAR unprojection matrix lidar_to_cam: (B, 4, 4), LiDAR to camera frame transformation cam_to_img: (B, 3, 4), Camera projection matrix Returns: frustum_grid: (B, X, Y, Z, 3), Frustum sampling grid """ B = lidar_to_cam.shape[0] # Create transformation matricies V_G = grid_to_lidar # Voxel Grid -> LiDAR (4, 4) C_V = lidar_to_cam # LiDAR -> Camera (B, 4, 4) I_C = cam_to_img # Camera -> Image (B, 3, 4) trans = C_V @ V_G # Reshape to match dimensions trans = trans.reshape(B, 1, 1, 4, 4) voxel_grid = voxel_grid.repeat_interleave(repeats=B, dim=0) # Transform to camera frame camera_grid = transform_points(trans_01=trans, points_1=voxel_grid) # Project to image I_C = I_C.reshape(B, 1, 1, 3, 4) image_grid, image_depths = transform_utils.project_to_image( project=I_C, points=camera_grid) # Convert depths to depth bins image_depths = transform_utils.bin_depths(depth_map=image_depths, **self.disc_cfg) # Stack to form frustum grid image_depths = image_depths.unsqueeze(-1) frustum_grid = torch.cat((image_grid, image_depths), dim=-1) return frustum_grid
def warp_grid3d(grid: torch.Tensor, src_homo_dst: torch.Tensor) -> torch.Tensor: r"""Compute the grid to warp the coordinates grid by the homography/ies. Args: grid: Unwrapped grid of the shape :math:`(1, D, H, W, 3)`. src_homo_dst (torch.Tensor): Homography or homographies (stacked) to transform all points in the grid. Shape of the homography has to be :math:`(1, 4, 4)` or :math:`(N, 1, 4, 4)`. Returns: torch.Tensor: the transformed grid of shape :math:`(N, H, W, 3)`. """ batch_size: int = src_homo_dst.size(0) _, depth, height, width, _ = grid.size() # expand grid to match the input batch size grid = grid.expand(batch_size, -1, -1, -1, -1) # NxDxHxWx3 if len(src_homo_dst.shape) == 3: # local homography case src_homo_dst = src_homo_dst.view(batch_size, 1, 4, 4) # Nx1x3x3 # perform the actual grid transformation, # the grid is copied to input device and casted to the same type flow: torch.Tensor = transform_points(src_homo_dst, grid.to(src_homo_dst)) # NxDxHxWx3 return flow.view(batch_size, depth, height, width, 3) # NxDxHxWx3
def solve_pnp_dlt( world_points: torch.Tensor, img_points: torch.Tensor, intrinsics: torch.Tensor, weights: Optional[torch.Tensor] = None, svd_eps: float = 1e-4, ) -> torch.Tensor: r"""This function attempts to solve the Perspective-n-Point (PnP) problem using Direct Linear Transform (DLT). Given a batch (where batch size is :math:`B`) of :math:`N` 3D points (where :math:`N \geq 6`) in the world space, a batch of :math:`N` corresponding 2D points in the image space and a batch of intrinsic matrices, this function tries to estimate a batch of world to camera transformation matrices. This implementation needs at least 6 points (i.e. :math:`N \geq 6`) to provide solutions. This function cannot be used if all the 3D world points (of any element of the batch) lie on a line or if all the 3D world points (of any element of the batch) lie on a plane. This function attempts to check for these conditions and throws an AssertionError if found. Do note that this check is sensitive to the value of the svd_eps parameter. Another bad condition occurs when the camera and the points lie on a twisted cubic. However, this function does not check for this condition. Args: world_points : A tensor with shape :math:`(B, N, 3)` representing the points in the world space. img_points : A tensor with shape :math:`(B, N, 2)` representing the points in the image space. intrinsics : A tensor with shape :math:`(B, 3, 3)` representing the intrinsic matrices. weights : This parameter is not used currently and is just a placeholder for API consistency. svd_eps : A small float value to avoid numerical precision issues. Returns: A tensor with shape :math:`(B, 3, 4)` representing the estimated world to camera transformation matrices (also known as the extrinsic matrices). Example: >>> world_points = torch.tensor([[ ... [ 5. , -5. , 0. ], [ 0. , 0. , 1.5], ... [ 2.5, 3. , 6. ], [ 9. , -2. , 3. ], ... [-4. , 5. , 2. ], [-5. , 5. , 1. ], ... ]], dtype=torch.float64) >>> >>> img_points = torch.tensor([[ ... [1409.1504, -800.936 ], [ 407.0207, -182.1229], ... [ 392.7021, 177.9428], [1016.838 , -2.9416], ... [ -63.1116, 142.9204], [-219.3874, 99.666 ], ... ]], dtype=torch.float64) >>> >>> intrinsics = torch.tensor([[ ... [ 500., 0., 250.], ... [ 0., 500., 250.], ... [ 0., 0., 1.], ... ]], dtype=torch.float64) >>> >>> print(world_points.shape, img_points.shape, intrinsics.shape) torch.Size([1, 6, 3]) torch.Size([1, 6, 2]) torch.Size([1, 3, 3]) >>> >>> pred_world_to_cam = kornia.geometry.solve_pnp_dlt(world_points, img_points, intrinsics) >>> >>> print(pred_world_to_cam.shape) torch.Size([1, 3, 4]) >>> >>> pred_world_to_cam tensor([[[ 0.9392, -0.3432, -0.0130, 1.6734], [ 0.3390, 0.9324, -0.1254, -4.3634], [ 0.0552, 0.1134, 0.9920, 3.7785]]], dtype=torch.float64) """ # This function was implemented based on ideas inspired from multiple references. # ============ # References: # ============ # 1. https://team.inria.fr/lagadic/camera_localization/tutorial-pose-dlt-opencv.html # 2. https://github.com/opencv/opencv/blob/68d15fc62edad980f1ffa15ee478438335f39cc3/modules/calib3d/src/calibration.cpp # noqa: E501 # 3. http://rpg.ifi.uzh.ch/docs/teaching/2020/03_camera_calibration.pdf # 4. http://www.cs.cmu.edu/~16385/s17/Slides/11.3_Pose_Estimation.pdf # 5. https://www.ece.mcmaster.ca/~shirani/vision/hartley_ch7.pdf if not isinstance(world_points, torch.Tensor): raise AssertionError( f"world_points is not an instance of torch.Tensor. Type of world_points is {type(world_points)}" ) if not isinstance(img_points, torch.Tensor): raise AssertionError( f"img_points is not an instance of torch.Tensor. Type of img_points is {type(img_points)}" ) if not isinstance(intrinsics, torch.Tensor): raise AssertionError( f"intrinsics is not an instance of torch.Tensor. Type of intrinsics is {type(intrinsics)}" ) if (weights is not None) and (not isinstance(weights, torch.Tensor)): raise AssertionError( f"If weights is not None, then weights should be an instance " f"of torch.Tensor. Type of weights is {type(weights)}") if type(svd_eps) is not float: raise AssertionError( f"Type of svd_eps is not float. Got {type(svd_eps)}") accepted_dtypes = (torch.float32, torch.float64) if world_points.dtype not in accepted_dtypes: raise AssertionError( f"world_points must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {world_points.dtype}.") if img_points.dtype not in accepted_dtypes: raise AssertionError( f"img_points must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {img_points.dtype}.") if intrinsics.dtype not in accepted_dtypes: raise AssertionError( f"intrinsics must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {intrinsics.dtype}.") if (len(world_points.shape) != 3) or (world_points.shape[2] != 3): raise AssertionError( f"world_points must be of shape (B, N, 3). Got shape {world_points.shape}." ) if (len(img_points.shape) != 3) or (img_points.shape[2] != 2): raise AssertionError( f"img_points must be of shape (B, N, 2). Got shape {img_points.shape}." ) if (len(intrinsics.shape) != 3) or (intrinsics.shape[1:] != (3, 3)): raise AssertionError( f"intrinsics must be of shape (B, 3, 3). Got shape {intrinsics.shape}." ) if world_points.shape[1] != img_points.shape[1]: raise AssertionError( "world_points and img_points must have equal number of points.") if (world_points.shape[0] != img_points.shape[0]) or ( world_points.shape[0] != intrinsics.shape[0]): raise AssertionError( "world_points, img_points and intrinsics must have the same batch size." ) if world_points.shape[1] < 6: raise AssertionError( f"At least 6 points are required to use this function. " f"Got {world_points.shape[1]} points.") B, N = world_points.shape[:2] # Getting normalized world points. world_points_norm, world_transform_norm = _mean_isotropic_scale_normalize( world_points) # Checking if world_points_norm (of any element of the batch) has rank = 3. This # function cannot be used if all world points (of any element of the batch) lie # on a line or if all world points (of any element of the batch) lie on a plane. _, s, _ = torch.svd(world_points_norm) if torch.any(s[:, -1] < svd_eps): raise AssertionError( f"The last singular value of one/more of the elements of the batch is smaller " f"than {svd_eps}. This function cannot be used if all world_points (of any " f"element of the batch) lie on a line or if all world_points (of any " f"element of the batch) lie on a plane.") intrinsics_inv = torch.inverse(intrinsics) world_points_norm_h = convert_points_to_homogeneous(world_points_norm) # Transforming img_points with intrinsics_inv to get img_points_inv img_points_inv = transform_points(intrinsics_inv, img_points) # Normalizing img_points_inv img_points_norm, img_transform_norm = _mean_isotropic_scale_normalize( img_points_inv) inv_img_transform_norm = torch.inverse(img_transform_norm) # Setting up the system (the matrix A in Ax=0) system = torch.zeros((B, 2 * N, 12), dtype=world_points.dtype, device=world_points.device) system[:, 0::2, 0:4] = world_points_norm_h system[:, 1::2, 4:8] = world_points_norm_h system[:, 0::2, 8:12] = world_points_norm_h * (-1) * img_points_norm[..., 0:1] system[:, 1::2, 8:12] = world_points_norm_h * (-1) * img_points_norm[..., 1:2] # Getting the solution vectors. _, _, v = torch.svd(system) solution = v[..., -1] # Reshaping the solution vectors to the correct shape. solution = solution.reshape(B, 3, 4) # Creating solution_4x4 solution_4x4 = eye_like(4, solution) solution_4x4[:, :3, :] = solution # De-normalizing the solution intermediate = torch.bmm(solution_4x4, world_transform_norm) solution = torch.bmm(inv_img_transform_norm, intermediate[:, :3, :]) # We obtained one solution for each element of the batch. We may # need to multiply each solution with a scalar. This is because # if x is a solution to Ax=0, then cx is also a solution. We can # find the required scalars by using the properties of # rotation matrices. We do this in two parts: # First, we fix the sign by making sure that the determinant of # the all the rotation matrices are non negative (since determinant # of a rotation matrix should be 1). det = torch.det(solution[:, :3, :3]) ones = torch.ones_like(det) sign_fix = torch.where(det < 0, ones * -1, ones) solution = solution * sign_fix[:, None, None] # Then, we make sure that norm of the 0th columns of the rotation # matrices are 1. Do note that the norm of any column of a rotation # matrix should be 1. Here we use the 0th column to calculate norm_col. # We then multiply solution with mul_factor. norm_col = torch.norm(input=solution[:, :3, 0], p=2, dim=1) mul_factor = (1 / norm_col)[:, None, None] temp = solution * mul_factor # To make sure that the rotation matrix would be orthogonal, we apply # QR decomposition. ortho, right = linalg_qr(temp[:, :3, :3]) # We may need to fix the signs of the columns of the ortho matrix. # If right[i, j, j] is negative, then we need to flip the signs of # the column ortho[i, :, j]. The below code performs the necessary # operations in an better way. mask = eye_like(3, ortho) col_sign_fix = torch.sign(mask * right) rot_mat = torch.bmm(ortho, col_sign_fix) # Preparing the final output. pred_world_to_cam = torch.cat([rot_mat, temp[:, :3, 3:4]], dim=-1) # TODO: Implement algorithm to refine the solution. return pred_world_to_cam
def warp_perspective( src: torch.Tensor, M: torch.Tensor, dsize: Tuple[int, int], mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: Optional[bool] = None, ) -> torch.Tensor: r"""Applies a perspective transformation to an image. .. image:: https://kornia-tutorials.readthedocs.io/en/latest/_images/warp_perspective_10_2.png The function warp_perspective transforms the source image using the specified matrix: .. math:: \text{dst} (x, y) = \text{src} \left( \frac{M^{-1}_{11} x + M^{-1}_{12} y + M^{-1}_{13}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}} , \frac{M^{-1}_{21} x + M^{-1}_{22} y + M^{-1}_{23}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}} \right ) Args: src: input image with shape :math:`(B, C, H, W)`. M: transformation matrix with shape :math:`(B, 3, 3)`. dsize: size of the output image (height, width). mode: interpolation mode to calculate output values ``'bilinear'`` | ``'nearest'``. padding_mode: padding mode for outside grid values ``'zeros'`` | ``'border'`` | ``'reflection'``. align_corners(bool, optional): interpolation flag. Returns: the warped input image :math:`(B, C, H, W)`. Example: >>> img = torch.rand(1, 4, 5, 6) >>> H = torch.eye(3)[None] >>> out = warp_perspective(img, H, (4, 2), align_corners=True) >>> print(out.shape) torch.Size([1, 4, 4, 2]) .. note:: This function is often used in conjuntion with :func:`get_perspective_transform`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/ latest/warp_perspective.html>`_. """ if not isinstance(src, torch.Tensor): raise TypeError("Input src type is not a torch.Tensor. Got {}".format( type(src))) if not isinstance(M, torch.Tensor): raise TypeError("Input M type is not a torch.Tensor. Got {}".format( type(M))) if not len(src.shape) == 4: raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format( src.shape)) if not (len(M.shape) == 3 and M.shape[-2:] == (3, 3)): raise ValueError("Input M must be a Bx3x3 tensor. Got {}".format( M.shape)) # TODO: remove the statement below in kornia v0.6 if align_corners is None: message: str = ( "The align_corners default value has been changed. By default now is set True " "in order to match cv2.warpPerspective. In case you want to keep your previous " "behaviour set it to False. This warning will disappear in kornia > v0.6." ) warnings.warn(message) # set default value for align corners align_corners = True B, C, H, W = src.size() h_out, w_out = dsize # we normalize the 3x3 transformation matrix and convert to 3x4 dst_norm_trans_src_norm: torch.Tensor = normalize_homography( M, (H, W), (h_out, w_out)) # Bx3x3 src_norm_trans_dst_norm = _torch_inverse_cast( dst_norm_trans_src_norm) # Bx3x3 # this piece of code substitutes F.affine_grid since it does not support 3x3 grid = (create_meshgrid(h_out, w_out, normalized_coordinates=True, device=src.device).to(src.dtype).repeat( B, 1, 1, 1)) grid = transform_points(src_norm_trans_dst_norm[:, None, None], grid) return F.grid_sample(src, grid, align_corners=align_corners, mode=mode, padding_mode=padding_mode)
def perspective_transform_lafs(trans_01: torch.Tensor, lafs_1: torch.Tensor) -> torch.Tensor: r"""Function that applies perspective transformations to a set of local affine frames (LAFs). Args: trans_01: tensor for perspective transformations of shape :math:`(B, 3, 3)`. lafs_1: tensor of lafs of shape :math:`(B, N, 2, 3)`. Returns: tensor of N-dimensional points of shape :math:`(B, N, 2, 3)`. Examples: >>> rng = torch.manual_seed(0) >>> lafs_1 = torch.rand(2, 4, 2, 3) # BxNx2x3 >>> lafs_1 tensor([[[[0.4963, 0.7682, 0.0885], [0.1320, 0.3074, 0.6341]], <BLANKLINE> [[0.4901, 0.8964, 0.4556], [0.6323, 0.3489, 0.4017]], <BLANKLINE> [[0.0223, 0.1689, 0.2939], [0.5185, 0.6977, 0.8000]], <BLANKLINE> [[0.1610, 0.2823, 0.6816], [0.9152, 0.3971, 0.8742]]], <BLANKLINE> <BLANKLINE> [[[0.4194, 0.5529, 0.9527], [0.0362, 0.1852, 0.3734]], <BLANKLINE> [[0.3051, 0.9320, 0.1759], [0.2698, 0.1507, 0.0317]], <BLANKLINE> [[0.2081, 0.9298, 0.7231], [0.7423, 0.5263, 0.2437]], <BLANKLINE> [[0.5846, 0.0332, 0.1387], [0.2422, 0.8155, 0.7932]]]]) >>> trans_01 = torch.eye(3).repeat(2, 1, 1) # Bx3x3 >>> trans_01.shape torch.Size([2, 3, 3]) >>> lafs_0 = perspective_transform_lafs(trans_01, lafs_1) # BxNx2x3 """ raise_error_if_laf_is_not_valid(lafs_1) if not torch.is_tensor(trans_01): raise TypeError("Input type is not a torch.Tensor") if not trans_01.device == lafs_1.device: raise TypeError("Tensor must be in the same device") if not trans_01.shape[0] == lafs_1.shape[0]: raise ValueError("Input batch size must be the same for both tensors") if (not (trans_01.shape[-1] == 3)) or (not (trans_01.shape[-2] == 3)): raise ValueError("Transformation should be homography") bs, n, _, _ = lafs_1.size() # First, we convert LAF to points threepts_1 = laf_to_three_points(lafs_1) points_1 = threepts_1.permute(0, 1, 3, 2).reshape(bs, n * 3, 2) # First, transform the points points_0 = transform_points(trans_01, points_1) # Back to LAF format threepts_0 = points_0.view(bs, n, 3, 2).permute(0, 1, 3, 2) return laf_from_three_points(threepts_0)
def undistort_points(points: torch.Tensor, K: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: r"""Compensate for lens distortion a set of 2D image points. Radial :math:`(k_1, k_2, k_3, k_4, k_4, k_6)`, tangential :math:`(p_1, p_2)`, thin prism :math:`(s_1, s_2, s_3, s_4)`, and tilt :math:`(\tau_x, \tau_y)` distortion models are considered in this function. Args: points: Input image points with shape :math:`(*, N, 2)`. K: Intrinsic camera matrix with shape :math:`(*, 3, 3)`. dist: Distortion coefficients :math:`(k_1,k_2,p_1,p_2[,k_3[,k_4,k_5,k_6[,s_1,s_2,s_3,s_4[,\tau_x,\tau_y]]]])`. This is a vector with 4, 5, 8, 12 or 14 elements with shape :math:`(*, n)`. Returns: Undistorted 2D points with shape :math:`(*, N, 2)`. Example: >>> _ = torch.manual_seed(0) >>> x = torch.rand(1, 4, 2) >>> K = torch.eye(3)[None] >>> dist = torch.rand(1, 4) >>> undistort_points(x, K, dist) tensor([[[-0.1513, -0.1165], [ 0.0711, 0.1100], [-0.0697, 0.0228], [-0.1843, -0.1606]]]) """ if points.dim() < 2 and points.shape[-1] != 2: raise ValueError(f'points shape is invalid. Got {points.shape}.') if K.shape[-2:] != (3, 3): raise ValueError(f'K matrix shape is invalid. Got {K.shape}.') if dist.shape[-1] not in [4, 5, 8, 12, 14]: raise ValueError( f"Invalid number of distortion coefficients. Got {dist.shape[-1]}") # Adding zeros to obtain vector with 14 coeffs. if dist.shape[-1] < 14: dist = torch.nn.functional.pad(dist, [0, 14 - dist.shape[-1]]) # Convert 2D points from pixels to normalized camera coordinates cx: torch.Tensor = K[..., 0:1, 2] # princial point in x (Bx1) cy: torch.Tensor = K[..., 1:2, 2] # princial point in y (Bx1) fx: torch.Tensor = K[..., 0:1, 0] # focal in x (Bx1) fy: torch.Tensor = K[..., 1:2, 1] # focal in y (Bx1) # This is equivalent to K^-1 [u,v,1]^T x: torch.Tensor = (points[..., 0] - cx) / fx # (BxN - Bx1)/Bx1 -> BxN y: torch.Tensor = (points[..., 1] - cy) / fy # (BxN - Bx1)/Bx1 -> BxN # Compensate for tilt distortion if torch.any(dist[..., 12] != 0) or torch.any(dist[..., 13] != 0): inv_tilt = tilt_projection(dist[..., 12], dist[..., 13], True) # Transposed untilt points (instead of [x,y,1]^T, we obtain [x,y,1]) x, y = transform_points(inv_tilt, torch.stack([x, y], dim=-1)).unbind(-1) # Iteratively undistort points x0, y0 = x, y for _ in range(5): r2 = x * x + y * y inv_rad_poly = (1 + dist[..., 5:6] * r2 + dist[..., 6:7] * r2 * r2 + dist[..., 7:8] * r2**3) / (1 + dist[..., 0:1] * r2 + dist[..., 1:2] * r2 * r2 + dist[..., 4:5] * r2**3) deltaX = (2 * dist[..., 2:3] * x * y + dist[..., 3:4] * (r2 + 2 * x * x) + dist[..., 8:9] * r2 + dist[..., 9:10] * r2 * r2) deltaY = (dist[..., 2:3] * (r2 + 2 * y * y) + 2 * dist[..., 3:4] * x * y + dist[..., 10:11] * r2 + dist[..., 11:12] * r2 * r2) x = (x0 - deltaX) * inv_rad_poly y = (y0 - deltaY) * inv_rad_poly # Convert points from normalized camera coordinates to pixel coordinates x = fx * x + cx y = fy * y + cy return torch.stack([x, y], -1)
def warp_frame_depth( image_src: torch.Tensor, depth_dst: torch.Tensor, src_trans_dst: torch.Tensor, camera_matrix: torch.Tensor, normalize_points: bool = False, ) -> torch.Tensor: """Warp a tensor from a source to destination frame by the depth in the destination. Compute 3d points from the depth, transform them using given transformation, then project the point cloud to an image plane. Args: image_src: image tensor in the source frame with shape :math:`(B,D,H,W)`. depth_dst: depth tensor in the destination frame with shape :math:`(B,1,H,W)`. src_trans_dst: transformation matrix from destination to source with shape :math:`(B,4,4)`. camera_matrix: tensor containing the camera intrinsics with shape :math:`(B,3,3)`. normalize_points: whether to normalise the pointcloud. This must be set to ``True`` when the depth is represented as the Euclidean ray length from the camera position. Return: the warped tensor in the source frame with shape :math:`(B,3,H,W)`. """ if not isinstance(image_src, torch.Tensor): raise TypeError( f"Input image_src type is not a torch.Tensor. Got {type(image_src)}." ) if not len(image_src.shape) == 4: raise ValueError( f"Input image_src musth have a shape (B, D, H, W). Got: {image_src.shape}" ) if not isinstance(depth_dst, torch.Tensor): raise TypeError( f"Input depht_dst type is not a torch.Tensor. Got {type(depth_dst)}." ) if not len(depth_dst.shape) == 4 and depth_dst.shape[-3] == 1: raise ValueError( f"Input depth_dst musth have a shape (B, 1, H, W). Got: {depth_dst.shape}" ) if not isinstance(src_trans_dst, torch.Tensor): raise TypeError(f"Input src_trans_dst type is not a torch.Tensor. " f"Got {type(src_trans_dst)}.") if not len(src_trans_dst.shape) == 3 and src_trans_dst.shape[-2:] == (3, 3): raise ValueError(f"Input src_trans_dst must have a shape (B, 3, 3). " f"Got: {src_trans_dst.shape}.") if not isinstance(camera_matrix, torch.Tensor): raise TypeError(f"Input camera_matrix type is not a torch.Tensor. " f"Got {type(camera_matrix)}.") if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3, 3): raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). " f"Got: {camera_matrix.shape}.") # unproject source points to camera frame points_3d_dst: torch.Tensor = depth_to_3d(depth_dst, camera_matrix, normalize_points) # Bx3xHxW # transform points from source to destionation points_3d_dst = points_3d_dst.permute(0, 2, 3, 1) # BxHxWx3 # apply transformation to the 3d points points_3d_src = transform_points(src_trans_dst[:, None], points_3d_dst) # BxHxWx3 # project back to pixels camera_matrix_tmp: torch.Tensor = camera_matrix[:, None, None] # Bx1x1xHxW points_2d_src: torch.Tensor = project_points(points_3d_src, camera_matrix_tmp) # BxHxWx2 # normalize points between [-1 / 1] height, width = depth_dst.shape[-2:] points_2d_src_norm: torch.Tensor = normalize_pixel_coordinates( points_2d_src, height, width) # BxHxWx2 return F.grid_sample(image_src, points_2d_src_norm, align_corners=True) # type: ignore