def test_dirty_points_and_gradcheck(self, batch_size, device, dtype): # generate input data points_src = torch.rand(batch_size, 10, 2, device=device, dtype=dtype) H = kornia.eye_like(3, points_src) H = H * 0.3 * torch.rand_like(H) H = H / H[:, 2:3, 2:3] points_src = 100. * torch.rand( batch_size, 20, 2, device=device, dtype=dtype) points_dst = kornia.transform_points(H, points_src) # making last point an outlier points_dst[:, -1, :] += 20 weights = torch.ones(batch_size, 20, device=device, dtype=dtype) # compute transform from source to target dst_homo_src = find_homography_dlt_iterated(points_src, points_dst, weights, 0.5, 10) assert_allclose(kornia.transform_points(dst_homo_src, points_src[:, :-1]), points_dst[:, :-1], rtol=1e-3, atol=1e-3)
def test_clean_points_and_gradcheck(self, batch_size, device): # generate input data dtype = torch.float64 H = (torch.eye(3, device=device)[None].repeat(batch_size, 1, 1) + 0.3 * torch.rand(batch_size, 3, 3, device=device)) H = H / H[:, 2:3, 2:3] points_src = torch.rand(batch_size, 10, 2).to(device) points_dst = kornia.transform_points(H, points_src) weights = torch.ones(batch_size, 10, device=device) # compute transform from source to target dst_homo_src = find_homography_dlt_iterated(points_src, points_dst, weights, 10) assert_allclose(kornia.transform_points(dst_homo_src, points_src), points_dst, rtol=1e-3, atol=1e-4) # compute gradient check points_src = utils.tensor_to_gradcheck_var(points_src) # to var points_dst = utils.tensor_to_gradcheck_var(points_dst) # to var weights = utils.tensor_to_gradcheck_var(weights) # to var assert gradcheck(kornia.find_homography_dlt_iterated, ( points_src, points_dst, weights, ), rtol=1e-3, atol=1e-4, raise_exception=True)
def test_normalize_pixel_grid(device, dtype): if device.type == 'cuda' and dtype == torch.float16: pytest.skip('"inverse_cuda" not implemented for "Half"') # generate input data batch_size = 1 height, width = 2, 4 # create points grid grid_norm = kornia.utils.create_meshgrid(height, width, normalized_coordinates=True, device=device, dtype=dtype) assert grid_norm.device == device assert grid_norm.dtype == dtype grid_norm = torch.unsqueeze(grid_norm, dim=0) grid_pix = kornia.utils.create_meshgrid(height, width, normalized_coordinates=False, device=device, dtype=dtype) assert grid_pix.device == device assert grid_pix.dtype == dtype grid_pix = torch.unsqueeze(grid_pix, dim=0) # grid from pixel space to normalized norm_trans_pix = kornia.normal_transform_pixel(height, width, device=device, dtype=dtype) # 1x3x3 pix_trans_norm = torch.inverse(norm_trans_pix) # 1x3x3 # transform grids grid_pix_to_norm = kornia.transform_points(norm_trans_pix, grid_pix) grid_norm_to_pix = kornia.transform_points(pix_trans_norm, grid_norm) assert_close(grid_pix, grid_norm_to_pix) assert_close(grid_norm, grid_pix_to_norm)
def test_get_perspective_transform3d(self, batch_size, device, dtype): # generate input data d_max, h_max, w_max = 16, 64, 32 # height, width d = torch.ceil(d_max * torch.rand(batch_size, device=device, dtype=dtype)) h = torch.ceil(h_max * torch.rand(batch_size, device=device, dtype=dtype)) w = torch.ceil(w_max * torch.rand(batch_size, device=device, dtype=dtype)) norm = torch.rand(batch_size, 8, 3, device=device, dtype=dtype) points_src = torch.rand_like(norm, device=device, dtype=dtype) points_dst = points_src + norm # compute transform from source to target dst_homo_src = kornia.get_perspective_transform3d(points_src, points_dst) # TODO: get_perspective_transform3d seems to be correct since it would result in the # expected output for cropping volumes. Not sure what is going on here. assert_allclose( kornia.transform_points(dst_homo_src, points_src), points_dst, rtol=1e-4, atol=1e-4) # compute gradient check points_src = utils.tensor_to_gradcheck_var(points_src) # to var points_dst = utils.tensor_to_gradcheck_var(points_dst) # to var assert gradcheck( kornia.get_perspective_transform3d, ( points_src, points_dst, ), raise_exception=True)
def test_transform2d_apply(self, device, dtype): height, width = 2, 5 input = torch.tensor([[0., 0.], [width - 1, height - 1]], device=device, dtype=dtype) expected = torch.tensor([[-1., -1.], [1., 1.]], device=device, dtype=dtype) transform = kornia.normal_transform_pixel(height, width, device=device, dtype=dtype) output = kornia.transform_points(transform, input) assert_allclose(output, expected.to(device=device, dtype=dtype), atol=1e-4, rtol=1e-4)
def test_transform3d_apply(self, device, dtype): depth, height, width = 3, 2, 5 input = torch.tensor([[0.0, 0.0, 0.0], [width - 1, height - 1, depth - 1]], device=device, dtype=dtype) expected = torch.tensor([[-1.0, -1.0, -1.0], [1.0, 1.0, 1.0]], device=device, dtype=dtype) transform = kornia.normal_transform_pixel3d(depth, height, width, device=device, dtype=dtype) output = kornia.transform_points(transform, input) assert_close(output, expected.to(device=device, dtype=dtype), atol=1e-4, rtol=1e-4)
def test_get_perspective_transform(batch_size, device): # generate input data h_max, w_max = 64, 32 # height, width h = torch.ceil(h_max * torch.rand(batch_size)).to(device) w = torch.ceil(w_max * torch.rand(batch_size)).to(device) norm = torch.rand(batch_size, 4, 2).to(device) points_src = torch.zeros_like(norm) points_src[:, 1, 0] = h points_src[:, 2, 1] = w points_src[:, 3, 0] = h points_src[:, 3, 1] = w points_dst = points_src + norm # compute transform from source to target dst_homo_src = kornia.get_perspective_transform(points_src, points_dst) assert_allclose(kornia.transform_points(dst_homo_src, points_src), points_dst) # compute gradient check points_src = utils.tensor_to_gradcheck_var(points_src) # to var points_dst = utils.tensor_to_gradcheck_var(points_dst) # to var assert gradcheck(kornia.get_perspective_transform, ( points_src, points_dst, ), raise_exception=True)
def generate_scene(num_views: int, num_points: int) -> Dict[str, torch.Tensor]: # Generate the 3d points points3d = torch.rand(1, num_points, 3) # NxMx3 # Create random camera matrix K = epipolar.random_intrinsics(0.0, 100.0) # 1x3x3 # Create random rotation per view ang = torch.rand(num_views, 1) * kornia.pi * 2.0 rvec = torch.rand(num_views, 3) rvec = ang * rvec / torch.norm(rvec, dim=1, keepdim=True) # Nx3 rot_mat = kornia.angle_axis_to_rotation_matrix(rvec) # Nx3x3 # matches with cv2.Rodrigues -> yay ! # Create random translation per view tx = torch.empty(num_views).uniform_(-0.5, 0.5) ty = torch.empty(num_views).uniform_(-0.5, 0.5) tz = torch.empty(num_views).uniform_(-1.0, 2.0) tvec = torch.stack([tx, ty, tz], dim=1)[..., None] # Make sure the shape is in front of the camera points3d_trans = (rot_mat @ points3d.transpose(-2, -1)) + tvec min_dist = torch.min(points3d_trans[:, 2], dim=1)[0] tvec[:, 2, 0] = torch.where(min_dist < 0, tz - min_dist + 1.0, tz) # compute projection matrices P = epipolar.projection_from_KRt(K, rot_mat, tvec) # project points3d and backproject to image plane points2d = kornia.transform_points(P, points3d.expand(num_views, -1, -1)) return dict(K=K, R=rot_mat, t=tvec, P=P, points3d=points3d, points2d=points2d)
def find_homography_dlt_iterated(points1: torch.Tensor, points2: torch.Tensor, weights: torch.Tensor, soft_inl_th: float = 3.0, n_iter: int = 5) -> torch.Tensor: r"""Computes the homography matrix using the iteratively-reweighted least squares (IRWLS). The linear system is solved by using the Reweighted Least Squares Solution for the 4 Points algorithm. Args: points1: A set of points in the first image with a tensor shape :math:`(B, N, 2)`. points2: A set of points in the second image with a tensor shape :math:`(B, N, 2)`. weights: Tensor containing the weights per point correspondence with a shape of :math:`(B, N)`. Used for the first iteration of the IRWLS. soft_inl_th: Soft inlier threshold used for weight calculation. n_iter: number of iterations. Returns: the computed homography matrix with shape :math:`(B, 3, 3)`. """ '''Function, which finds homography via iteratively-reweighted least squares ToDo: add citation''' H: torch.Tensor = find_homography_dlt(points1, points2, weights) for i in range(n_iter - 1): pts1_in_2: torch.Tensor = kornia.transform_points(H, points1) error_squared: torch.Tensor = (pts1_in_2 - points2).pow(2).sum(dim=-1) weights_new: torch.Tensor = torch.exp(-error_squared / (2.0 * (soft_inl_th**2))) H = find_homography_dlt(points1, points2, weights_new) return H
def test_transform2d_apply(self): height, width = 2, 5 input = torch.tensor([[0., 0.], [width - 1, height - 1]]) expected = torch.tensor([[-1., -1.], [1., 1.]]) transform = kornia.normal_transform_pixel(height, width) output = kornia.transform_points(transform, input) assert_allclose(output, expected)
def draw_rectangle(image, dst_homo_src): height, width = image.shape[:2] pts_src = torch.FloatTensor( [[[-1, -1], [1, -1], [1, 1], [-1, 1]]] # top-left # bottom-left # bottom-right # top-right ).to(dst_homo_src.device) # transform points pts_dst = dgm.transform_points(torch.inverse(dst_homo_src), pts_src) def compute_factor(size): return 1.0 * size / 2 def convert_coordinates_to_pixel(coordinates, factor): return factor * (coordinates + 1.0) # compute conversion factor x_factor = compute_factor(width - 1) y_factor = compute_factor(height - 1) pts_dst = pts_dst.cpu().squeeze().detach().numpy() pts_dst[..., 0] = convert_coordinates_to_pixel(pts_dst[..., 0], x_factor) pts_dst[..., 1] = convert_coordinates_to_pixel(pts_dst[..., 1], y_factor) # do the actual drawing for i in range(4): pt_i, pt_ii = tuple(pts_dst[i % 4]), tuple(pts_dst[(i + 1) % 4]) image = cv2.line(image, pt_i, pt_ii, (255, 0, 0), 3) return image
def test_transform_points(self, batch_size, num_points, num_dims, device, dtype): # generate input data eye_size = num_dims + 1 points_src = torch.rand(batch_size, num_points, num_dims, device=device, dtype=dtype) dst_homo_src = utils.create_random_homography(batch_size, eye_size).to(device=device, dtype=dtype) dst_homo_src = dst_homo_src.to(device) # transform the points from dst to ref points_dst = kornia.transform_points(dst_homo_src, points_src) # transform the points from ref to dst src_homo_dst = torch.inverse(dst_homo_src) points_dst_to_src = kornia.transform_points(src_homo_dst, points_dst) # projected should be equal as initial assert_allclose(points_src, points_dst_to_src, atol=1e-4, rtol=1e-4)
def test_clean_points(self, batch_size, device, dtype): # generate input data points_src = torch.rand(batch_size, 10, 2, device=device, dtype=dtype) H = kornia.eye_like(3, points_src) H = H * 0.3 * torch.rand_like(H) H = H / H[:, 2:3, 2:3] points_dst = kornia.transform_points(H, points_src) weights = torch.ones(batch_size, 10, device=device, dtype=dtype) # compute transform from source to target dst_homo_src = find_homography_dlt(points_src, points_dst, weights) assert_allclose(kornia.transform_points(dst_homo_src, points_src), points_dst, rtol=1e-3, atol=1e-4)
def test_jit(self): @torch.jit.script def op_script(transform, points): return kornia.transform_points(transform, points) points = torch.ones(1, 2, 2) transform = torch.eye(3)[None] actual = op_script(transform, points) expected = kornia.transform_points(transform, points) assert_allclose(actual, expected)
def warp_frame_depth( image_src: torch.Tensor, depth_dst: torch.Tensor, src_trans_dst: torch.Tensor, camera_matrix: torch.Tensor, normalize_points: bool = False, sampling_mode='bilinear') -> torch.Tensor: # TAKEN FROM KORNIA LIBRARY if not isinstance(image_src, torch.Tensor): raise TypeError(f"Input image_src type is not a torch.Tensor. Got {type(image_src)}.") if not len(image_src.shape) == 4: raise ValueError(f"Input image_src musth have a shape (B, D, H, W). Got: {image_src.shape}") if not isinstance(depth_dst, torch.Tensor): raise TypeError(f"Input depht_dst type is not a torch.Tensor. Got {type(depth_dst)}.") if not len(depth_dst.shape) == 4 and depth_dst.shape[-3] == 1: raise ValueError(f"Input depth_dst musth have a shape (B, 1, H, W). Got: {depth_dst.shape}") if not isinstance(src_trans_dst, torch.Tensor): raise TypeError(f"Input src_trans_dst type is not a torch.Tensor. " f"Got {type(src_trans_dst)}.") if not len(src_trans_dst.shape) == 3 and src_trans_dst.shape[-2:] == (3, 3): raise ValueError(f"Input src_trans_dst must have a shape (B, 3, 3). " f"Got: {src_trans_dst.shape}.") if not isinstance(camera_matrix, torch.Tensor): raise TypeError(f"Input camera_matrix type is not a torch.Tensor. " f"Got {type(camera_matrix)}.") if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3, 3): raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). " f"Got: {camera_matrix.shape}.") # unproject source points to camera frame points_3d_dst: torch.Tensor = kornia.depth_to_3d(depth_dst, camera_matrix, normalize_points) # Bx3xHxW # transform points from source to destination points_3d_dst = points_3d_dst.permute(0, 2, 3, 1) # BxHxWx3 # apply transformation to the 3d points points_3d_src = kornia.transform_points(src_trans_dst[:, None], points_3d_dst) # BxHxWx3 points_3d_src[:, :, :, 2] = torch.relu(points_3d_src[:, :, :, 2]) # project back to pixels camera_matrix_tmp: torch.Tensor = camera_matrix[:, None, None] # Bx1x1xHxW points_2d_src: torch.Tensor = kornia.project_points(points_3d_src, camera_matrix_tmp) # BxHxWx2 # normalize points between [-1 / 1] height, width = depth_dst.shape[-2:] points_2d_src_norm: torch.Tensor = kornia.normalize_pixel_coordinates(points_2d_src, height, width) # BxHxWx2 return torch.nn.functional.grid_sample(image_src, points_2d_src_norm, align_corners=True, mode=sampling_mode)
def test_transform_points(self, batch_size, num_points, num_dims, device_type): # generate input data eye_size = num_dims + 1 points_src = torch.rand(batch_size, num_points, num_dims) points_src = points_src.to(torch.device(device_type)) dst_homo_src = utils.create_random_homography(batch_size, eye_size) dst_homo_src = dst_homo_src.to(torch.device(device_type)) # transform the points from dst to ref points_dst = kornia.transform_points(dst_homo_src, points_src) # transform the points from ref to dst src_homo_dst = torch.inverse(dst_homo_src) points_dst_to_src = kornia.transform_points(src_homo_dst, points_dst) # projected should be equal as initial error = utils.compute_mse(points_src, points_dst_to_src) assert pytest.approx(error.item(), 0.0)
def test_normalize_pixel_grid(): # generate input data batch_size = 1 height, width = 2, 4 # create points grid grid_norm = kornia.utils.create_meshgrid( height, width, normalized_coordinates=True) grid_norm = torch.unsqueeze(grid_norm, dim=0) grid_pix = kornia.utils.create_meshgrid( height, width, normalized_coordinates=False) grid_pix = torch.unsqueeze(grid_pix, dim=0) # grid from pixel space to normalized norm_trans_pix = kornia.normal_transform_pixel(height, width) # 1x3x3 pix_trans_norm = torch.inverse(norm_trans_pix) # 1x3x3 # transform grids grid_pix_to_norm = kornia.transform_points(norm_trans_pix, grid_pix) grid_norm_to_pix = kornia.transform_points(pix_trans_norm, grid_norm) assert_allclose(grid_pix, grid_norm_to_pix) assert_allclose(grid_norm, grid_pix_to_norm)
def transform_boxes(trans_mat: torch.Tensor, boxes: torch.Tensor, mode: str = "xyxy") -> torch.Tensor: r""" Function that applies a transformation matrix to a box or batch of boxes. Boxes must be a tensor of the shape (N, 4) or a batch of boxes (B, N, 4) and trans_mat must be a (3, 3) transformation matrix or a batch of transformation matrices (B, 3, 3) Args: trans_mat (torch.Tensor): The transformation matrix to be applied boxes (torch.Tensor): The boxes to be transformed mode (str): The format in which the boxes are provided. If set to 'xyxy' the boxes are assumed to be in the format (xmin, ymin, xmax, ymax). If set to 'xywh' the boxes are assumed to be in the format (xmin, ymin, width, height). Default: 'xyxy' Returns: torch.Tensor: The set of transformed points in the specified mode """ if not torch.is_tensor(boxes): raise TypeError(f"Boxes type is not a torch.Tensor. Got {type(boxes)}") if not torch.is_tensor(trans_mat): raise TypeError( f"Tranformation matrix type is not a torch.Tensor. Got {type(trans_mat)}" ) if not isinstance(mode, str): raise TypeError(f"Mode must be a string. Got {type(mode)}") if mode not in ("xyxy", "xywh"): raise ValueError(f"Mode must be one of 'xyxy', 'xywh'. Got {mode}") # convert boxes to format xyxy if mode == "xywh": boxes[..., -2] = boxes[..., 0] + boxes[..., -2] # x + w boxes[..., -1] = boxes[..., 1] + boxes[..., -1] # y + h transformed_boxes: torch.Tensor = kornia.transform_points( trans_mat, boxes.view(boxes.shape[0], -1, 2)) transformed_boxes = transformed_boxes.view_as(boxes) if mode == 'xywh': transformed_boxes[..., 2] = transformed_boxes[..., 2] - transformed_boxes[..., 0] transformed_boxes[..., 3] = transformed_boxes[..., 3] - transformed_boxes[..., 1] return transformed_boxes
def test_jit_trace(self, device): @torch.jit.script def op_script(transform, points): return kornia.transform_points(transform, points) points = torch.ones(1, 2, 2).to(device) transform = torch.eye(3)[None].to(device) op_script_trace = torch.jit.trace(op_script, ( transform, points, )) actual = op_script_trace(transform, points) expected = kornia.transform_points(transform, points) assert_allclose(actual, expected)
def test_jit(self, device): @torch.jit.script def op_script(transform, boxes): return kornia.transform_boxes(transform, boxes) boxes = torch.tensor([139.2640, 103.0150, 258.0480, 307.5075]).to(device) trans_mat = torch.tensor([[[-1., 0., 512.], [0., 1., 0.], [0., 0., 1.]]]).to(device) actual = op_script(trans_mat, boxes) expected = kornia.transform_points(trans_mat, boxes) assert_allclose(actual, expected)
def get_differentiable_square_depth_estimation(reference_pose_torch, measurement_pose_torch, previous_depth_torch, full_K_torch, half_K_torch, original_image_size, device): batch_size, _, _ = full_K_torch.size() R_render = torch.eye(3, dtype=torch.float, device=device) T_render = torch.zeros(3, dtype=torch.float, device=device) R_render = torch.stack(batch_size * [R_render], dim=0) T_render = torch.stack(batch_size * [T_render], dim=0) R_render[:, 0, 0] *= -1 R_render[:, 1, 1] *= -1 trans = torch.bmm(torch.inverse(reference_pose_torch), measurement_pose_torch) points_3d_src = kornia.depth_to_3d(previous_depth_torch, full_K_torch, normalize_points=False) points_3d_src = points_3d_src.permute(0, 2, 3, 1) points_3d_dst = kornia.transform_points(trans[:, None], points_3d_src).view(batch_size, -1, 3) point_cloud_p3d = structures.Pointclouds(points=points_3d_dst, features=None) width_normalizer = original_image_size / 4.0 height_normalizer = original_image_size / 4.0 px_ndc = (half_K_torch[:, 0, 2] - width_normalizer) / width_normalizer py_ndc = (half_K_torch[:, 1, 2] - height_normalizer) / height_normalizer fx_ndc = half_K_torch[:, 0, 0] / width_normalizer fy_ndc = half_K_torch[:, 1, 1] / height_normalizer principal_point = torch.stack([px_ndc, py_ndc], dim=-1) focal_length = torch.stack([fx_ndc, fy_ndc], dim=-1) cameras = renderer.SfMPerspectiveCameras(focal_length=focal_length, principal_point=principal_point, R=R_render, T=T_render, device=torch.device('cuda')) raster_settings = renderer.PointsRasterizationSettings( image_size=int(original_image_size / 2.0), radius=0.02, points_per_pixel=3) depth_renderer = renderer.PointsRasterizer(cameras=cameras, raster_settings=raster_settings) rendered_depth = torch.min(depth_renderer(point_cloud_p3d).zbuf, dim=-1)[0] depth_hypothesis = torch.relu(rendered_depth).unsqueeze(1) return depth_hypothesis
def transform_grid(self, voxel_grid, grid_to_lidar, lidar_to_cam, cam_to_img): """ Transforms voxel sampling grid into frustum sampling grid Args: voxel_grid [torch.Tensor(B, X, Y, Z, 3)]: Voxel sampling grid grid_to_lidar [torch.Tensor(4, 4)]: Voxel grid to LiDAR unprojection matrix lidar_to_cam [torch.Tensor(B, 4, 4)]: LiDAR to camera frame transformation cam_to_img [torch.Tensor(B, 3, 4)]: Camera projection matrix Returns: frustum_grid [torch.Tensor(B, X, Y, Z, 3)]: Frustum sampling grid """ # B是相机数目 B = lidar_to_cam.shape[0] # Create transformation matricies V_G = grid_to_lidar # Voxel Grid -> LiDAR (4, 4) C_V = lidar_to_cam # LiDAR -> Camera (B, 4, 4) I_C = cam_to_img # Camera -> Image (B, 3, 4) trans = C_V @ V_G # grid转到lidar实际坐标,再转到相机坐标再转到像素。主要是为了grid和像素对应。 # Reshape to match dimensions trans = trans.reshape(B, 1, 1, 4, 4) voxel_grid = voxel_grid.repeat_interleave(repeats=B, dim=0) # Transform to camera frame #camera_grid shape: B X Y Z 3 camera_grid = kornia.transform_points(trans_01=trans, points_1=voxel_grid) # Project to image I_C = I_C.reshape(B, 1, 1, 3, 4) # image_grid shape: B X Y Z 2; image_depth B X Y Z 1 image_grid, image_depths = transform_utils.project_to_image( project=I_C, points=camera_grid) # Convert depths to depth bins # Image_depths.shape: B X Y Z 1 落在哪个bin image_depths = depth_utils.bin_depths(depth_map=image_depths, **self.disc_cfg) # Stack to form frustum grid image_depths = image_depths.unsqueeze(-1) # frustum_grid = B X Y Z 3 frustum_grid = torch.cat((image_grid, image_depths), dim=-1) return frustum_grid
def test_two_view(self, device, dtype): num_views: int = 2 num_points: int = 10 scene: Dict[str, torch.Tensor] = epi.generate_scene(num_views, num_points) P1 = scene['P'][0:1] P2 = scene['P'][1:2] x1 = scene['points2d'][0:1] x2 = scene['points2d'][1:2] X = epi.triangulate_points(P1, P2, x1, x2) x_reprojected = kornia.transform_points(scene['P'], X.expand(num_views, -1, -1)) assert_allclose(scene['points3d'], X, rtol=1e-4, atol=1e-4) assert_allclose(scene['points2d'], x_reprojected)
def get_non_differentiable_rectangle_depth_estimation(reference_pose_torch, measurement_pose_torch, previous_depth_torch, full_K_torch, half_K_torch, original_width, original_height): batch_size, _, _ = reference_pose_torch.shape half_width = int(original_width / 2) half_height = int(original_height / 2) trans = torch.bmm(torch.inverse(reference_pose_torch), measurement_pose_torch) points_3d_src = kornia.depth_to_3d(previous_depth_torch, full_K_torch, normalize_points=False) points_3d_src = points_3d_src.permute(0, 2, 3, 1) points_3d_dst = kornia.transform_points(trans[:, None], points_3d_src) points_3d_dst = points_3d_dst.view(batch_size, -1, 3) z_values = points_3d_dst[:, :, -1] z_values = torch.relu(z_values) sorting_indices = torch.argsort(z_values, descending=True) z_values = torch.gather(z_values, dim=1, index=sorting_indices) sorting_indices_for_points = torch.stack([sorting_indices] * 3, dim=-1) points_3d_dst = torch.gather(points_3d_dst, dim=1, index=sorting_indices_for_points) projections = torch.round(kornia.project_points(points_3d_dst, half_K_torch.unsqueeze(1))).long() is_valid_below = (projections[:, :, 0] >= 0) & (projections[:, :, 1] >= 0) is_valid_above = (projections[:, :, 0] < half_width) & (projections[:, :, 1] < half_height) is_valid = is_valid_below & is_valid_above depth_hypothesis = torch.zeros(size=(batch_size, 1, half_height, half_width)).cuda() for projection_index in range(0, batch_size): valid_points_zs = z_values[projection_index][is_valid[projection_index]] valid_projections = projections[projection_index][is_valid[projection_index]] i_s = valid_projections[:, 1] j_s = valid_projections[:, 0] ij_combined = i_s * half_width + j_s _, ij_combined_unique_indices = np.unique(ij_combined.cpu().numpy(), return_index=True) ij_combined_unique_indices = torch.from_numpy(ij_combined_unique_indices).long().cuda() i_s = i_s[ij_combined_unique_indices] j_s = j_s[ij_combined_unique_indices] valid_points_zs = valid_points_zs[ij_combined_unique_indices] torch.index_put_(depth_hypothesis[projection_index, 0], (i_s, j_s), valid_points_zs) return depth_hypothesis
def transform_grid(self, voxel_grid, grid_to_lidar, lidar_to_cam, cam_to_img): """ Transforms voxel sampling grid into frustum sampling grid Args: grid: (B, X, Y, Z, 3), Voxel sampling grid grid_to_lidar: (4, 4), Voxel grid to LiDAR unprojection matrix lidar_to_cam: (B, 4, 4), LiDAR to camera frame transformation cam_to_img: (B, 3, 4), Camera projection matrix Returns: frustum_grid: (B, X, Y, Z, 3), Frustum sampling grid """ B = lidar_to_cam.shape[0] # Create transformation matricies V_G = grid_to_lidar # Voxel Grid -> LiDAR (4, 4) C_V = lidar_to_cam # LiDAR -> Camera (B, 4, 4) I_C = cam_to_img # Camera -> Image (B, 3, 4) trans = C_V @ V_G # Reshape to match dimensions trans = trans.reshape(B, 1, 1, 4, 4) voxel_grid = voxel_grid.repeat_interleave(repeats=B, dim=0) # Transform to camera frame camera_grid = kornia.transform_points(trans_01=trans, points_1=voxel_grid) # Project to image I_C = I_C.reshape(B, 1, 1, 3, 4) image_grid, image_depths = transform_utils.project_to_image( project=I_C, points=camera_grid) # Convert depths to depth bins image_depths = transform_utils.bin_depths(depth_map=image_depths, **self.disc_cfg) # Stack to form frustum grid image_depths = image_depths.unsqueeze(-1) frustum_grid = torch.cat((image_grid, image_depths), dim=-1) return frustum_grid
def normalize_points(points: torch.Tensor, eps: float = 1e-8) -> Tuple[torch.Tensor, torch.Tensor]: r"""Normalizes points (isotropic). Computes the transformation matrix such that the two principal moments of the set of points are equal to unity, forming an approximately symmetric circular cloud of points of radius 1 about the origin. Reference: Hartley/Zisserman 4.4.4 pag.107 This operation is an essential step before applying the DLT algorithm in order to consider the result as optimal. Args: points: Tensor containing the points to be normalized with shape :math:`(B, N, 2)`. eps: epsilon value to avoid numerical instabilities. Returns: tuple containing the normalized points in the shape :math:`(B, N, 2)` and the transformation matrix in the shape :math:`(B, 3, 3)`. """ assert len(points.shape) == 3, points.shape assert points.shape[-1] == 2, points.shape x_mean = torch.mean(points, dim=1, keepdim=True) # Bx1x2 scale = (points - x_mean).norm(dim=-1).mean(dim=-1) # B scale = torch.sqrt(torch.tensor(2.0)) / (scale + eps) # B ones, zeros = torch.ones_like(scale), torch.zeros_like(scale) transform = torch.stack([ scale, zeros, -scale * x_mean[..., 0, 0], zeros, scale, -scale * x_mean[..., 0, 1], zeros, zeros, ones ], dim=-1) # Bx9 transform = transform.view(-1, 3, 3) # Bx3x3 points_norm = kornia.transform_points(transform, points) # BxNx2 return (points_norm, transform)
def oneway_transfer_error(pts1: torch.Tensor, pts2: torch.Tensor, H: torch.Tensor, squared: bool = True, eps: float = 1e-8) -> torch.Tensor: r"""Return transfer error in image 2 for correspondences given the homography matrix. Args: pts1: correspondences from the left images with shape (B, N, 2 or 3). If they are homogeneous, converted automatically. pts2: correspondences from the right images with shape (B, N, 2 or 3). If they are homogeneous, converted automatically. H: Homographies with shape :math:`(B, 3, 3)`. squared: if True (default), the squared distance is returned. eps: Small constant for safe sqrt. Returns: the computed distance with shape :math:`(B, N)`. """ if not isinstance(H, torch.Tensor): raise TypeError(f"H type is not a torch.Tensor. Got {type(H)}") if (len(H.shape) != 3) or not H.shape[-2:] == (3, 3): raise ValueError(f"H must be a (*, 3, 3) tensor. Got {H.shape}") if pts1.size(-1) == 3: pts1 = kornia.convert_points_from_homogeneous(pts1) if pts2.size(-1) == 3: pts2 = kornia.convert_points_from_homogeneous(pts2) # From Hartley and Zisserman, Error in one image (4.6) # dist = \sum_{i} ( d(x', Hx)**2) pts1_in_2: torch.Tensor = kornia.transform_points(H, pts1) error_squared: torch.Tensor = (pts1_in_2 - pts2).pow(2).sum(dim=-1) if squared: return error_squared return (error_squared + eps).sqrt()
def op_script(transform, points): return kornia.transform_points(transform, points)