def to_depth_mode(points, bboxes): """Convert points and bboxes to Depth Coord and Depth Box mode.""" if points is not None: points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR, Coord3DMode.DEPTH) if bboxes is not None: bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR, Box3DMode.DEPTH) return points, bboxes
def main(): args = parse_args() if args.output_dir is not None: mkdir_or_exist(args.output_dir) cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options) if cfg.data.train['type'] == 'RepeatDataset': cfg.data.train.dataset['pipeline'] = get_loading_pipeline( cfg.train_pipeline) else: cfg.data.train['pipeline'] = get_loading_pipeline(cfg.train_pipeline) dataset = build_dataset(cfg.data.train, default_args=dict(filter_empty_gt=False)) # For RepeatDataset type, the infos are stored in dataset.dataset if cfg.data.train['type'] == 'RepeatDataset': dataset = dataset.dataset data_infos = dataset.data_infos for idx, data_info in enumerate(track_iter_progress(data_infos)): if cfg.dataset_type in ['KittiDataset', 'WaymoDataset']: pts_path = data_info['point_cloud']['velodyne_path'] elif cfg.dataset_type in ['ScanNetDataset', 'SUNRGBDDataset']: pts_path = data_info['pts_path'] elif cfg.dataset_type in ['NuScenesDataset', 'LyftDataset']: pts_path = data_info['lidar_path'] else: raise NotImplementedError( f'unsupported dataset type {cfg.dataset_type}') file_name = osp.splitext(osp.basename(pts_path))[0] save_path = osp.join(args.output_dir, f'{file_name}.png') if args.output_dir else None example = dataset.prepare_train_data(idx) points = example['points']._data.numpy() points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, Coord3DMode.DEPTH) gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor if gt_bboxes is not None: gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR, Box3DMode.DEPTH) vis = Visualizer(points, save_path='./show.png') vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1)) vis.show(save_path) del vis
def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`CoordMode`): The target Point mode. rt_mat (np.ndarray | torch.Tensor): The rotation and translation matrix between different coordinates. Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. Returns: :obj:`BasePoints`: The converted point of the same type \ in the `dst` mode. """ from mmdet3d.core.bbox import Coord3DMode return Coord3DMode.convert_point( point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
def forward(self, imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas, calibs): """Forward function. Args: imgs (list[torch.Tensor]): Image features. bboxes_2d_rescaled (list[torch.Tensor]): 2D bboxes. seeds_3d_depth (torch.Tensor): 3D seeds. img_metas (list[dict]): Meta information of images. calibs: Camera calibration information of the images. Returns: torch.Tensor: Concatenated cues of each point. torch.Tensor: Validity mask of each feature. """ img_features = [] masks = [] for i, data in enumerate( zip(imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas)): img, bbox_2d_rescaled, seed_3d_depth, img_meta = data bbox_num = bbox_2d_rescaled.shape[0] seed_num = seed_3d_depth.shape[0] img_shape = img_meta['img_shape'] img_h, img_w, _ = img_shape # first reverse the data transformations xyz_depth = apply_3d_transformation(seed_3d_depth, 'DEPTH', img_meta, reverse=True) # then convert from depth coords to camera coords xyz_cam = Coord3DMode.convert_point(xyz_depth, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=calibs['Rt'][i]) # project to 2d to get image coords (uv) uv_origin = points_cam2img(xyz_cam, calibs['K'][i]) uv_origin = (uv_origin - 1).round() # rescale 2d coordinates and bboxes uv_rescaled = coord_2d_transform(img_meta, uv_origin, True) bbox_2d_origin = bbox_2d_transform(img_meta, bbox_2d_rescaled, False) if bbox_num == 0: imvote_num = seed_num * self.max_imvote_per_pixel # use zero features two_cues = torch.zeros((15, imvote_num), device=seed_3d_depth.device) mask_zero = torch.zeros(imvote_num - seed_num, device=seed_3d_depth.device).bool() mask_one = torch.ones(seed_num, device=seed_3d_depth.device).bool() mask = torch.cat([mask_one, mask_zero], dim=0) else: # expand bboxes and seeds bbox_expanded = bbox_2d_origin.view(1, bbox_num, -1).expand( seed_num, -1, -1) seed_2d_expanded = uv_origin.view(seed_num, 1, -1).expand(-1, bbox_num, -1) seed_2d_expanded_x, seed_2d_expanded_y = \ seed_2d_expanded.split(1, dim=-1) bbox_expanded_l, bbox_expanded_t, bbox_expanded_r, \ bbox_expanded_b, bbox_expanded_conf, bbox_expanded_cls = \ bbox_expanded.split(1, dim=-1) bbox_expanded_midx = (bbox_expanded_l + bbox_expanded_r) / 2 bbox_expanded_midy = (bbox_expanded_t + bbox_expanded_b) / 2 seed_2d_in_bbox_x = (seed_2d_expanded_x > bbox_expanded_l) * \ (seed_2d_expanded_x < bbox_expanded_r) seed_2d_in_bbox_y = (seed_2d_expanded_y > bbox_expanded_t) * \ (seed_2d_expanded_y < bbox_expanded_b) seed_2d_in_bbox = seed_2d_in_bbox_x * seed_2d_in_bbox_y # semantic cues, dim=class_num sem_cue = torch.zeros_like(bbox_expanded_conf).expand( -1, -1, self.num_classes) sem_cue = sem_cue.scatter(-1, bbox_expanded_cls.long(), bbox_expanded_conf) # bbox center - uv delta_u = bbox_expanded_midx - seed_2d_expanded_x delta_v = bbox_expanded_midy - seed_2d_expanded_y seed_3d_expanded = seed_3d_depth.view(seed_num, 1, -1).expand( -1, bbox_num, -1) z_cam = xyz_cam[..., 2:3].view(seed_num, 1, 1).expand(-1, bbox_num, -1) delta_u = delta_u * z_cam / calibs['K'][i, 0, 0] delta_v = delta_v * z_cam / calibs['K'][i, 0, 0] imvote = torch.cat( [delta_u, delta_v, torch.zeros_like(delta_v)], dim=-1).view(-1, 3) # convert from camera coords to depth coords imvote = Coord3DMode.convert_point(imvote.view((-1, 3)), Coord3DMode.CAM, Coord3DMode.DEPTH, rt_mat=calibs['Rt'][i]) # apply transformation to lifted imvotes imvote = apply_3d_transformation(imvote, 'DEPTH', img_meta, reverse=False) seed_3d_expanded = seed_3d_expanded.reshape(imvote.shape) # ray angle ray_angle = seed_3d_expanded + imvote ray_angle /= torch.sqrt(torch.sum(ray_angle**2, -1) + EPS).unsqueeze(-1) # imvote lifted to 3d xz = ray_angle[:, [0, 2]] / (ray_angle[:, [1]] + EPS) \ * seed_3d_expanded[:, [1]] - seed_3d_expanded[:, [0, 2]] # geometric cues, dim=5 geo_cue = torch.cat([xz, ray_angle], dim=-1).view(seed_num, -1, 5) two_cues = torch.cat([geo_cue, sem_cue], dim=-1) # mask to 0 if seed not in bbox two_cues = two_cues * seed_2d_in_bbox.float() feature_size = two_cues.shape[-1] # if bbox number is too small, append zeros if bbox_num < self.max_imvote_per_pixel: append_num = self.max_imvote_per_pixel - bbox_num append_zeros = torch.zeros( (seed_num, append_num, 1), device=seed_2d_in_bbox.device).bool() seed_2d_in_bbox = torch.cat( [seed_2d_in_bbox, append_zeros], dim=1) append_zeros = torch.zeros( (seed_num, append_num, feature_size), device=two_cues.device) two_cues = torch.cat([two_cues, append_zeros], dim=1) append_zeros = torch.zeros((seed_num, append_num, 1), device=two_cues.device) bbox_expanded_conf = torch.cat( [bbox_expanded_conf, append_zeros], dim=1) # sort the valid seed-bbox pair according to confidence pair_score = seed_2d_in_bbox.float() + bbox_expanded_conf # and find the largests mask, indices = pair_score.topk(self.max_imvote_per_pixel, dim=1, largest=True, sorted=True) indices_img = indices.expand(-1, -1, feature_size) two_cues = two_cues.gather(dim=1, index=indices_img) two_cues = two_cues.transpose(1, 0) two_cues = two_cues.reshape(-1, feature_size).transpose( 1, 0).contiguous() # since conf is ~ (0, 1), floor gives us validity mask = mask.floor().int() mask = mask.transpose(1, 0).reshape(-1).bool() # clear the padding img = img[:, :img_shape[0], :img_shape[1]] img_flatten = img.reshape(3, -1).float() img_flatten /= 255. # take the normalized pixel value as texture cue uv_flatten = uv_rescaled[:, 1].round() * \ img_shape[1] + uv_rescaled[:, 0].round() uv_expanded = uv_flatten.unsqueeze(0).expand(3, -1).long() txt_cue = torch.gather(img_flatten, dim=-1, index=uv_expanded) txt_cue = txt_cue.unsqueeze(1).expand(-1, self.max_imvote_per_pixel, -1).reshape(3, -1) # append texture cue img_feature = torch.cat([two_cues, txt_cue], dim=0) img_features.append(img_feature) masks.append(mask) return torch.stack(img_features, 0), torch.stack(masks, 0)
def test_points_conversion(): """Test the conversion of points between different modes.""" points_np = np.array([[ -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956, 0.4974, 0.9409 ], [ -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 0.1502, 0.3707, 0.1086, 0.6297 ], [ -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 0.6565, 0.6248, 0.6954, 0.2538 ], [ -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 0.2803, 0.0258, 0.4896, 0.3269 ]], dtype=np.float32) # test CAM to LIDAR and DEPTH cam_points = CameraPoints(points_np, points_dim=7, attribute_dims=dict(color=[3, 4, 5], height=6)) convert_lidar_points = cam_points.convert_to(Coord3DMode.LIDAR) expected_tensor = torch.tensor([[ 2.9757e-01, 5.2422e+00, -4.0021e+01, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ -9.1435e-01, 2.6675e+01, -5.5950e+00, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ 2.0089e-01, 5.8098e+00, -3.5409e+01, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ -1.9461e-01, 3.1309e+01, -1.0901e+00, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) lidar_point_tensor = Coord3DMode.convert_point(cam_points.tensor, Coord3DMode.CAM, Coord3DMode.LIDAR) assert torch.allclose(expected_tensor, convert_lidar_points.tensor, 1e-4) assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor, 1e-4) convert_depth_points = cam_points.convert_to(Coord3DMode.DEPTH) expected_tensor = torch.tensor([[ -5.2422e+00, 2.9757e-01, -4.0021e+01, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ -2.6675e+01, -9.1435e-01, -5.5950e+00, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ -5.8098e+00, 2.0089e-01, -3.5409e+01, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ -3.1309e+01, -1.9461e-01, -1.0901e+00, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) depth_point_tensor = Coord3DMode.convert_point(cam_points.tensor, Coord3DMode.CAM, Coord3DMode.DEPTH) assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4) assert torch.allclose(depth_point_tensor, convert_depth_points.tensor, 1e-4) # test LIDAR to CAM and DEPTH lidar_points = LiDARPoints(points_np, points_dim=7, attribute_dims=dict(color=[3, 4, 5], height=6)) convert_cam_points = lidar_points.convert_to(Coord3DMode.CAM) expected_tensor = torch.tensor([[ -4.0021e+01, -2.9757e-01, -5.2422e+00, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ -5.5950e+00, 9.1435e-01, -2.6675e+01, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ -3.5409e+01, -2.0089e-01, -5.8098e+00, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ -1.0901e+00, 1.9461e-01, -3.1309e+01, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) cam_point_tensor = Coord3DMode.convert_point(lidar_points.tensor, Coord3DMode.LIDAR, Coord3DMode.CAM) assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4) assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4) convert_depth_points = lidar_points.convert_to(Coord3DMode.DEPTH) expected_tensor = torch.tensor([[ -4.0021e+01, -5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ -5.5950e+00, -2.6675e+01, -9.1435e-01, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ -3.5409e+01, -5.8098e+00, 2.0089e-01, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ -1.0901e+00, -3.1309e+01, -1.9461e-01, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) depth_point_tensor = Coord3DMode.convert_point(lidar_points.tensor, Coord3DMode.LIDAR, Coord3DMode.DEPTH) assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4) assert torch.allclose(depth_point_tensor, convert_depth_points.tensor, 1e-4) # test DEPTH to CAM and LIDAR depth_points = DepthPoints(points_np, points_dim=7, attribute_dims=dict(color=[3, 4, 5], height=6)) convert_cam_points = depth_points.convert_to(Coord3DMode.CAM) expected_tensor = torch.tensor([[ -5.2422e+00, -2.9757e-01, 4.0021e+01, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ -2.6675e+01, 9.1435e-01, 5.5950e+00, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ -5.8098e+00, -2.0089e-01, 3.5409e+01, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ -3.1309e+01, 1.9461e-01, 1.0901e+00, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) cam_point_tensor = Coord3DMode.convert_point(depth_points.tensor, Coord3DMode.DEPTH, Coord3DMode.CAM) assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4) assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4) rt_mat_provided = torch.tensor([[0.99789, -0.012698, -0.063678], [-0.012698, 0.92359, -0.38316], [0.063678, 0.38316, 0.92148]]) depth_points_new = torch.cat([ depth_points.tensor[:, :3] @ rt_mat_provided.t(), depth_points.tensor[:, 3:] ], dim=1) mat = rt_mat_provided.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) rt_mat_provided = mat @ rt_mat_provided.transpose(1, 0) cam_point_tensor_new = Coord3DMode.convert_point(depth_points_new, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=rt_mat_provided) assert torch.allclose(expected_tensor, cam_point_tensor_new, 1e-4) convert_lidar_points = depth_points.convert_to(Coord3DMode.LIDAR) expected_tensor = torch.tensor([[ 4.0021e+01, 5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01, 4.9740e-01, 9.4090e-01 ], [ 5.5950e+00, 2.6675e+01, -9.1435e-01, 1.5020e-01, 3.7070e-01, 1.0860e-01, 6.2970e-01 ], [ 3.5409e+01, 5.8098e+00, 2.0089e-01, 6.5650e-01, 6.2480e-01, 6.9540e-01, 2.5380e-01 ], [ 1.0901e+00, 3.1309e+01, -1.9461e-01, 2.8030e-01, 2.5800e-02, 4.8960e-01, 3.2690e-01 ]]) lidar_point_tensor = Coord3DMode.convert_point(depth_points.tensor, Coord3DMode.DEPTH, Coord3DMode.LIDAR) assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor, 1e-4) assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor, 1e-4)
def test_boxes_conversion(): # test CAM to LIDAR and DEPTH cam_boxes = CameraInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800], [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200], [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700], [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900], [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test LIDAR to CAM and DEPTH lidar_boxes = LiDARInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.CAM) expected_tensor = torch.tensor( [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800], [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200], [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700], [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900], [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test DEPTH to CAM and LIDAR depth_boxes = DepthInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.CAM) expected_tensor = torch.tensor( [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)