def render_kitti(self, render_2d: bool) -> None: """ Renders the annotations in the KITTI dataset from a lidar and a camera view. :param render_2d: Whether to render 2d boxes (only works for camera data). """ if render_2d: print('Rendering 2d boxes from KITTI format') else: print('Rendering 3d boxes projected from 3d KITTI format') # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, )) # Create output folder. render_dir = os.path.join(self.nusc_kitti_dir, 'render') if not os.path.isdir(render_dir): os.mkdir(render_dir) # Render each image. for token in kitti.tokens[:self.image_count]: for sensor in ['lidar', 'camera']: out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor)) print('Rendering file to disk: %s' % out_path) kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) plt.close( ) # Close the windows to avoid a warning of too many open windows.
def kitti_res_to_nuscenes(self, meta: Dict[str, bool] = None) -> None: """ Converts a KITTI detection result to the nuScenes detection results format. :param meta: Meta data describing the method used to generate the result. See nuscenes.org/object-detection. """ # Dummy meta data, please adjust accordingly. if meta is None: meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } # Init. results = {} # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, )) # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] for sample_token in sample_tokens: # Get the KITTI boxes we just generated in LIDAR frame. kitti_token = '%s_%s' % (self.split, sample_token) boxes = kitti.get_boxes(token=kitti_token) # Convert KITTI boxes to nuScenes detection challenge result format. sample_results = [self._box_to_sample_result(sample_token, box) for box in boxes] # Store all results for this image. results[sample_token] = sample_results # Store submission file to disk. submission = { 'meta': meta, 'results': results } submission_path = os.path.join(self.nusc_kitti_dir, 'submission.json') print('Writing submission to: %s' % submission_path) with open(submission_path, 'w') as f: json.dump(submission, f, indent=2)
def get_transforms(token: str, root: str) -> dict: calib_filename = KittiDB.get_filepath(token, 'calib', root=root) lines = [line.rstrip() for line in open(calib_filename)] velo_to_cam = np.array(lines[5].strip().split(' ')[1:], dtype=np.float64) velo_to_cam.resize((3, 4)) r0_rect = np.array(lines[4].strip().split(' ')[1:], dtype=np.float64) r0_rect.resize((3, 3)) p_left = np.array(lines[2].strip().split(' ')[1:], dtype=np.float64) p_left.resize((3, 4)) # Merge rectification and projection into one matrix. p_combined = np.eye(4) p_combined[:3, :3] = r0_rect p_combined = np.dot(p_left, p_combined) return { 'velo_to_cam': { 'R': velo_to_cam[:, :3], 'T': velo_to_cam[:, 3] }, 'r0_rect': r0_rect, 'p_left': p_left, 'p_combined': p_combined, }
def render_kitti(self) -> None: """ Renders the annotations in the KITTI dataset from a lidar and a camera view. """ # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, )) # Create output folder. render_dir = os.path.join(self.nusc_kitti_dir, 'render') if not os.path.isdir(render_dir): os.mkdir(render_dir) # Render each image. for token in kitti.tokens[:self.image_count]: for sensor in ['lidar', 'camera']: out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor)) print('Rendering file to disk: %s' % out_path) kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path) plt.close( ) # Close the windows to avoid a warning of too many open windows.
def __getitem__(self, idx): # set defaults here kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) # sample_token based on index sample_token = self.sample_tokens[idx] # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] # set the img variable to its data here src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) img = Image.open(src_im_path) # Create calibration matrix. K = p_left_kitti K = [float(i) for i in K] K = np.array(K, dtype=np.float32).reshape(3, 4) K = K[:3, :3] # populate the list of object annotations for this sample anns = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) fieldnames = [ 'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin', 'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry' ] if self.is_train: f = StringIO(output) reader = csv.DictReader(f, delimiter=' ', fieldnames=fieldnames) for line, row in enumerate(reader): if row["type"] in self.classes: anns.append({ "class": row["type"], "label": TYPE_ID_CONVERSION[row["type"]], "truncation": float(row["truncated"]), "occlusion": float(row["occluded"]), "alpha": float(row["alpha"]), "dimensions": [ float(row['dl']), float(row['dh']), float(row['dw']) ], "locations": [ float(row['lx']), float(row['ly']), float(row['lz']) ], "rot_y": float(row["ry"]) }) center = np.array([i / 2 for i in img.size], dtype=np.float32) size = np.array([i for i in img.size], dtype=np.float32) """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (random.random() < self.flip_prob): flipped = True img = img.transpose(Image.FLIP_LEFT_RIGHT) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (random.random() < self.aug_prob): affine = True shift, scale = self.shift_scale[0], self.shift_scale[1] shift_ranges = np.arange(-shift, shift + 0.1, 0.1) center[0] += size[0] * random.choice(shift_ranges) center[1] += size[1] * random.choice(shift_ranges) scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1) size *= random.choice(scale_ranges) center_size = [center, size] trans_affine = get_transfrom_matrix( center_size, [self.input_width, self.input_height]) trans_affine_inv = np.linalg.inv(trans_affine) img = img.transform( (self.input_width, self.input_height), method=Image.AFFINE, data=trans_affine_inv.flatten()[:6], resample=Image.BILINEAR, ) trans_mat = get_transfrom_matrix( center_size, [self.output_width, self.output_height]) if not self.is_train: # for inference we parametrize with original size target = ParamsList(image_size=size, is_train=self.is_train) target.add_field("trans_mat", trans_mat) target.add_field("K", K) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx heat_map = np.zeros( [self.num_classes, self.output_height, self.output_width], dtype=np.float32) regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32) cls_ids = np.zeros([self.max_objs], dtype=np.int32) proj_points = np.zeros([self.max_objs, 2], dtype=np.int32) p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32) dimensions = np.zeros([self.max_objs, 3], dtype=np.float32) locations = np.zeros([self.max_objs, 3], dtype=np.float32) rotys = np.zeros([self.max_objs], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) flip_mask = np.zeros([self.max_objs], dtype=np.uint8) for i, a in enumerate(anns): a = a.copy() cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs) point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1) h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] if (0 < point[0] < self.output_width) and (0 < point[1] < self.output_height): point_int = point.astype(np.int32) p_offset = point - point_int radius = gaussian_radius(h, w) radius = max(0, int(radius)) heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int, radius) cls_ids[i] = cls regression[i] = box3d proj_points[i] = point_int p_offsets[i] = p_offset dimensions[i] = np.array(a["dimensions"]) locations[i] = locs rotys[i] = rot_y reg_mask[i] = 1 if not affine else 0 flip_mask[i] = 1 if not affine and flipped else 0 target = ParamsList(image_size=img.size, is_train=self.is_train) target.add_field("hm", heat_map) target.add_field("reg", regression) target.add_field("cls_ids", cls_ids) target.add_field("proj_p", proj_points) target.add_field("dimensions", dimensions) target.add_field("locations", locations) target.add_field("rotys", rotys) target.add_field("trans_mat", trans_mat) target.add_field("K", K) target.add_field("reg_mask", reg_mask) target.add_field("flip_mask", flip_mask) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def load_annotations(self, idx): annotations = [] """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse sample_token = self.sample_tokens[idx] sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4), dtype=np.float32) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4), dtype=np.float32) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. p_left_kitti = p_left_kitti[:3, :3] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] if self.is_train: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name in self.classes: # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Convert quaternion to yaw angle. v = np.dot(box_cam_kitti.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) annotations.append({ "class": detection_name, "label": TYPE_ID_CONVERSION[detection_name], "dimensions": [ float(box_cam_kitti.wlh[2]), float(box_cam_kitti.wlh[0]), float(box_cam_kitti.wlh[1]) ], "locations": [ float(box_cam_kitti.center[0]), float(box_cam_kitti.center[1]), float(box_cam_kitti.center[2]) ], "rot_y": float(yaw) }) return annotations, p_left_kitti
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib') image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, self.split, 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] tokens = [] for sample_token in sample_tokens: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_token + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_token + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_token + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue else: print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n')
def process_token_to_kitti(self, sample_token: str) -> None: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] lidar_token = sample['data'][self.lidar_name] sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) for cam_name in self.cams_to_see: cam_front_token = sample['data'][cam_name] token_to_write = cam_front_token # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cam_height = sd_record_cam['height'] cam_width = sd_record_cam['width'] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam['camera_intrinsic'] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(self.image_folder, token_to_write + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(self.lidar_folder, token_to_write + '.bin') pcl = LidarPointCloud.from_file(src_lid_path) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms['R0_rect'] = r0_rect.rotation_matrix kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(self.calib_folder, token_to_write + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(self.label_folder, token_to_write + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue # else: # print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 detection_name = sample_annotation['category_name'] # category_to_detection_name(sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. - disabled if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None and not self.get_all_detections: continue elif bbox_2d is None and self.get_all_detections: bbox_2d = (-1.0, -1.0, -1.0, -1.0 ) # default KITTI bbox # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n')
def render_kitti(self, render_2d: bool = False) -> None: """ Renders the annotations in the KITTI dataset from a lidar and a camera view. :param render_2d: Whether to render 2d boxes (only works for camera data). """ if render_2d: print('Rendering 2d boxes from KITTI format') else: print('Rendering 3d boxes projected from 3d KITTI format') # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=['']) def get_transforms(token: str, root: str) -> dict: calib_filename = KittiDB.get_filepath(token, 'calib', root=root) lines = [line.rstrip() for line in open(calib_filename)] velo_to_cam = np.array(lines[5].strip().split(' ')[1:], dtype=np.float64) velo_to_cam.resize((3, 4)) r0_rect = np.array(lines[4].strip().split(' ')[1:], dtype=np.float64) r0_rect.resize((3, 3)) p_left = np.array(lines[2].strip().split(' ')[1:], dtype=np.float64) p_left.resize((3, 4)) # Merge rectification and projection into one matrix. p_combined = np.eye(4) p_combined[:3, :3] = r0_rect p_combined = np.dot(p_left, p_combined) return { 'velo_to_cam': { 'R': velo_to_cam[:, :3], 'T': velo_to_cam[:, 3] }, 'r0_rect': r0_rect, 'p_left': p_left, 'p_combined': p_combined, } kitti.get_transforms = get_transforms # monkeypatching np.float32 -> 64 # Create output folder. render_dir = os.path.join(self.nusc_kitti_dir, 'render') if not os.path.isdir(render_dir): os.mkdir(render_dir) # Render each image. tokens = kitti.tokens if self.samples_count is not None: tokens = tokens[:self.samples_count] # currently supports only single thread processing for token in tqdm(tokens): for sensor in ['lidar', 'camera']: out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor)) kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) # Close the windows to avoid a warning of too many open windows. plt.close()
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) scene_splits = create_splits_scenes(verbose=False) scene_to_log = { scene['name']: self.nusc.get('log', scene['log_token'])['logfile'] for scene in self.nusc.scene } logs = set() scenes = scene_splits[self.split] for scene in scenes: logs.add(scene_to_log[scene]) # print(len(scenes), len(logs)) split_mapping = {"train": "training", "val": "testing"} # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'calib') image_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) # sample_tokens = sample_tokens[:self.image_count] # print(len(sample_tokens)) tokens = [] if self.split == "train": split_file = [ os.path.join(self.nusc_kitti_dir, "train.txt"), os.path.join(self.nusc_kitti_dir, "val.txt") ] elif self.split == 'val': split_file = os.path.join(self.nusc_kitti_dir, "test.txt") # if os.path.isfile(split_file): # os.remove(split_file) if self.split == "train": cnt = 0 with open(split_file[0], "w") as f: for seq in list(self.sequence_mapping.keys())[:-150]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) cnt = 0 with open(split_file[1], "w") as f: for seq in list(self.sequence_mapping.keys())[-150:]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) elif self.split == "val": with open(split_file, "w") as f: for seq in self.sequence_mapping.keys(): for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") for idx, sample_token in enumerate(sample_tokens): # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] sample_name = "%06d" % idx # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_name + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_name + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) # pcl, _ = LidarPointCloud.from_file_multisweep_future(self.nusc, sample, self.lidar_name, self.lidar_name, nsweeps=5) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_name + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_name + '.txt') if os.path.exists(label_path): # print('Skipping existing file: %s' % label_path) continue # else: # print('Writing file: %s' % label_path) objects = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 obj = dict() # Convert nuScenes category to nuScenes detection challenge category. obj["detection_name"] = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if obj["detection_name"] is None or obj[ "detection_name"] not in CLASS_MAP.keys(): continue obj["detection_name"] = CLASS_MAP[obj["detection_name"]] # Convert from nuScenes to KITTI box format. obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti, imsize[1], imsize[0]) if bbox_2d is None: continue obj["bbox_2d"] = bbox_2d["bbox"] obj["truncated"] = bbox_2d["truncated"] # Set dummy score so we can use this file as result. obj["box_cam_kitti"].score = 0 v = np.dot(obj["box_cam_kitti"].rotation_matrix, np.array([1, 0, 0])) rot_y = -np.arctan2(v[2], v[0]) obj["alpha"] = -np.arctan2( obj["box_cam_kitti"].center[0], obj["box_cam_kitti"].center[2]) + rot_y obj["depth"] = np.linalg.norm( np.array(obj["box_cam_kitti"].center[:3])) objects.append(obj) objects = postprocessing(objects, imsize[1], imsize[0]) with open(label_path, "w") as label_file: for obj in objects: # Convert box to output string format. output = box_to_string(name=obj["detection_name"], box=obj["box_cam_kitti"], bbox_2d=obj["bbox_2d"], truncation=obj["truncated"], occlusion=obj["occluded"], alpha=obj["alpha"]) label_file.write(output + '\n')
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Create output folders. label_folder = os.path.join(self.output_dir, self.split, 'label_2') calib_folder = os.path.join(self.output_dir, self.split, 'calib') image_folder = os.path.join(self.output_dir, self.split, 'image_2') lidar_folder = os.path.join(self.output_dir, self.split, 'velodyne') radar_folder = os.path.join(self.output_dir, self.split, 'radar') for folder in [ label_folder, calib_folder, image_folder, lidar_folder, radar_folder ]: if not os.path.isdir(folder): os.makedirs(folder) id_to_token_file = os.path.join(self.output_dir, self.split, 'id2token.txt') id2token = open(id_to_token_file, "w+") # Use only the samples from the current split. split_logs = create_splits_logs(self.split, self.nusc) sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] out_filenames = [] for sample_token in tqdm(sample_tokens): # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] radar_tokens = [] for radar_name in _C.RADARS.keys(): radar_tokens.append(sample['data'][radar_name]) # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) sd_record_rad = [] cs_record_rad = [] for i, radar_token in enumerate(radar_tokens): sd_record_rad.append(self.nusc.get('sample_data', radar_token)) cs_record_rad.append( self.nusc.get('calibrated_sensor', sd_record_rad[i]['calibrated_sensor_token'])) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) rad_to_ego = [] for cs_rec_rad in cs_record_rad: rad_to_ego.append( transform_matrix(cs_rec_rad['translation'], Quaternion(cs_rec_rad['rotation']), inverse=False)) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # # TODO: Assuming Radar point are going to be in ego coordinates # radar_to_cam = ego_to_cam # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # # Nuscenes radars use same convention as KITTI lidar # radar_to_cam_kitti = radar_to_cam # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam['camera_intrinsic'] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # radar_to_cam_rot = radar_to_cam_kitti[:3, :3] # radar_to_cam_trans = radar_to_cam_kitti[:3, 3] # Check that the lidar rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] filename_rad_full = [] for sd_rec_rad in sd_record_rad: filename_rad_full.append(sd_rec_rad['filename']) out_filename = '%06d' % token_idx # Alternative to use KITTI names. # out_filename = sample_token # Write token to disk. text = sample_token id2token.write(text + '\n') id2token.flush() token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, out_filename + '.png') if self.use_symlinks: # Create symbolic links to nuscenes images os.symlink(os.path.abspath(src_im_path), dst_im_path) else: im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, out_filename + '.bin') assert not dst_lid_path.endswith('.pcd.bin') # pcl = LidarPointCloud.from_file(src_lid_path) pcl, _ = LidarPointCloud.from_file_multisweep( nusc=self.nusc, sample_rec=sample, chan=self.lidar_name, ref_chan=self.lidar_name, nsweeps=self.lidar_sweeps, min_distance=1) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. pcl.points = pcl.points.astype('float32') with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # # Visualize pointclouds # _, ax = plt.subplots(1, 1, figsize=(9, 9)) # points = view_points(pcl.points[:3, :], np.eye(4), normalize=False) # dists = np.sqrt(np.sum(pcl.points[:2, :] ** 2, axis=0)) # colors = np.minimum(1, dists / 50) # ax.scatter(points[0, :], points[1, :], c=colors, s=0.2) # # plt.show(block=False) # plt.show() # Convert radar. src_rad_path = [] for filename_radar in filename_rad_full: src_rad_path.append( os.path.join(self.nusc.dataroot, filename_radar)) dst_rad_path = os.path.join(radar_folder, out_filename + '.bin') assert not dst_rad_path.endswith('.pcd.bin') pcl = RadarPointCloud(np.zeros((18, 0))) ## Get Radar points in Lidar coordinate system for i, rad_path in enumerate(src_rad_path): pc, _ = RadarPointCloud.from_file_multisweep( self.nusc, sample_rec=sample, chan=sd_record_rad[i]['channel'], ref_chan=self.lidar_name, nsweeps=self.radar_sweeps, min_distance=0) # rot_matrix = Quaternion(cs_record_rad[i]['rotation']).rotation_matrix # pc.rotate(rot_matrix) # pc.translate(np.array(cs_record_rad[i]['translation'])) pcl.points = np.hstack((pcl.points, pc.points)) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. ## Visualize pointclouds # _, ax = plt.subplots(1, 1, figsize=(9, 9)) # points = view_points(pcl.points[:3, :], np.eye(4), normalize=False) # dists = np.sqrt(np.sum(pcl.points[:2, :] ** 2, axis=0)) # colors = np.minimum(1, dists / 50) # ax.scatter(points[0, :], points[1, :], c=colors, s=0.2) # plt.show() pcl.points = pcl.points.astype('float32') with open(dst_rad_path, "w") as rad_file: pcl.points.T.tofile(rad_file) # Add to tokens. out_filenames.append(out_filename) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) # kitti_transforms['Tr_radar_to_cam'] = np.hstack((radar_to_cam_rot, radar_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, out_filename + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, out_filename + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = _C.KITTI_CLASSES.get( sample_annotation['category_name']) # Skip categories that are not in the KITTI classes. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: # continue ## If box is not inside the image, 2D boxes are set to zero bbox_2d = (0, 0, 0, 0) # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n') id2token.close()