def generateXYZRGB(ds: LyftDataset, sample_token: str, config: dict, folder: str): sample = ds.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = ds.get("sample_data", sample_lidar_token) lidar_filepath = ds.get_sample_data_path(sample_lidar_token) ego_pose = ds.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = ds.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion( calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) pc = copy.deepcopy(lidar_pointcloud) lidar_pointcloud.transform(car_from_sensor) except Exception as e: print("Failed to load Lidar Pointcloud for {}: {}:".format( sample_token, e)) pass XYZRGB = getXYZRGB(ds, pc, sample, lidar_data) np.savez(folder + sample_lidar_token + ".xyzrgb")
def get_sensor_to_world_transform_matrix_from_sample_data_token(sample_data_token, lyftd: LyftDataset): sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) sensor_to_ego_mtx = transform_matrix(translation=np.array(cs_record["translation"]), rotation=Quaternion(cs_record["rotation"])) ego_to_world_mtx = transform_matrix(translation=np.array(pose_record["translation"]), rotation=Quaternion(pose_record["rotation"])) return np.dot(ego_to_world_mtx, sensor_to_ego_mtx)
def __getitem__(self, item): sample_idx = item // len(CAMS) sample = self.lyft_data.sample[sample_idx] sd_lidar = self.lyft_data.get('sample_data', sample['data']['LIDAR_TOP']) cs_lidar = self.lyft_data.get('calibrated_sensor', sd_lidar['calibrated_sensor_token']) pc = LidarPointCloud.from_file(self.lyft_data.data_path / sd_lidar['filename']) cam = CAMS[item % len(CAMS)] cam_token = sample['data'][cam] sd_cam = self.lyft_data.get('sample_data', cam_token) cs_cam = self.lyft_data.get('calibrated_sensor', sd_cam['calibrated_sensor_token']) img = Image.open(str(self.lyft_data.data_path / cam["filename"])) img = transform(img) lidar_2_ego = transform_matrix(cs_lidar['translation'], Quaternion(cs_lidar['rotation']), inverse=False) ego_2_cam = transform_matrix(cs_cam['translation'], Quaternion(cs_cam['rotation']), inverse=True) cam_2_bev = Quaternion(axis=[1, 0, 0], angle=-np.pi / 2).transformation_matrix # lidar_2_cam = ego_2_cam @ lidar_2_ego lidar_2_bevcam = cam_2_bev @ ego_2_cam @ lidar_2_ego points = view_points(pc.points[:3, :], lidar_2_bevcam, normalize=False) points = clip_points(points) points = pd.DataFrame(np.swapaxes(points, 0, 1), columns=['x', 'y', 'z']) points = PyntCloud(points) voxelgrid_id = points.add_structure("voxelgrid", size_x=0.1, size_y=0.1, size_z=0.2, regular_bounding_box=False) voxelgrid = points.structures[voxelgrid_id] occ_map = voxelgrid.get_feature_vector(mode='binary') padded_occ = np.zeros((800, 700, 30)) padded_occ[:occ_map.shape[0], :occ_map.shape[1], :occ_map. shape[2]] = occ_map return img, padded_occ
def prepare_training_data_for_scene(level5data, first_sample_token, output_folder, bev_shape, voxel_size, z_offset, box_scale, classes): """ Given a first sample token (in a scene), output rasterized input volumes and targets in birds-eye-view perspective. """ sample_token = first_sample_token while sample_token: sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) lidar_pointcloud.transform(car_from_sensor) except Exception as e: print("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e)) sample_token = sample["next"] continue bev = create_voxel_pointcloud(lidar_pointcloud.points, bev_shape, voxel_size=voxel_size, z_offset=z_offset) bev = normalize_voxel_intensities(bev) boxes = level5data.get_boxes(sample_lidar_token) target = np.zeros_like(bev) move_boxes_to_car_space(boxes, ego_pose) scale_boxes(boxes, box_scale) draw_boxes(target, voxel_size, boxes=boxes, classes=classes, z_offset=z_offset) bev_im = np.round(bev * 255).astype(np.uint8) target_im = target[:, :, 0] # take one channel only cv2.imwrite(os.path.join(output_folder, "{}_input.png".format(sample_token)), bev_im) cv2.imwrite(os.path.join(output_folder, "{}_target.png".format(sample_token)), target_im) sample_token = sample["next"]
def load_position_raw(self, sample_lidar_token): lidar_data = self.level5data.get("sample_data", sample_lidar_token) ego_pose = self.level5data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) return global_from_car
def prepare_test_data_for_scene(first_sample_token, output_folder, level5data, bev_shape, voxel_size, z_offset, box_scale): """ Given a first sample token (in a scene), output rasterized input volumes and targets in birds-eye-view perspective. """ # get the first sample tken for the scene sample_token = first_sample_token while sample_token: sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) lidar_pointcloud.transform(car_from_sensor) except Exception as e: print ("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e)) sample_token = sample["next"] continue bev = create_voxel_pointcloud(lidar_pointcloud.points, bev_shape, voxel_size=voxel_size, z_offset=z_offset) bev = normalize_voxel_intensities(bev) bev_im = np.round(bev*255).astype(np.uint8) cv2.imwrite(os.path.join(output_folder, "{}_input.png".format(sample_token)), bev_im) # go to the next sample token sample_token = sample["next"]
def load_cloud_raw(self, sample_lidar_token): lidar_data = self.level5data.get("sample_data", sample_lidar_token) lidar_filepath = self.level5data.get_sample_data_path(sample_lidar_token) ego_pose = self.level5data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) return lidar_pointcloud.points[:3,:].T
def make_box_db(train_df,l5d): first_samples = train_df.first_sample_token.values box_db = {} box_scale = cfg.DATA.BOX_SCALE num_sweeps = cfg.DATA.NUM_SWEEPS min_distance = cfg.DATA.MIN_DISTANCE for sample_token in tqdm(first_samples): while sample_token: sample = l5d.get('sample',sample_token) sample_lidar_token = sample['data']['LIDAR_TOP'] lidar_data = l5d.get('sample_data',sample_lidar_token) ego_pose = l5d.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = l5d.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud = LidarPointCloud.from_file_multisweep(l5d,sample,'LIDAR_TOP','LIDAR_TOP',num_sweeps=num_sweeps,min_distance=min_distance)[0] lidar_pointcloud.transform(car_from_sensor) except Exception as e: print ("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e)) sample_token = sample["next"] continue boxes = l5d.get_boxes(sample_lidar_token) move_boxes_to_car_space(boxes, ego_pose) scale_boxes(boxes, box_scale) for box in boxes: point_mask = points_in_box(box,lidar_pointcloud.points[:3,:]) box_points = lidar_pointcloud.points[:,point_mask] if box.name not in box_db: box_db[box.name] = [{'lidar':box_points,'box':box}] else: box_db[box.name].append({'lidar':box_points,'box':box}) sample_token = sample['next'] pickle.dump(box_db,open(cfg.DATA.BOX_DB_FILE,'wb'))
def get_pointcloud(sample_token, level5data): """Get sample of lidar point cloud Transform it to car coordinates """ sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # sensor (lidar) token calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) return lidar_pointcloud
def process_token_to_kitti(self, sample_token: str) -> None: # Get sample data. sample = self.lyft_ds.get("sample", sample_token) sample_annotation_tokens = sample["anns"] lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get( "calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) for cam_name in self.cams_to_see: cam_front_token = sample["data"][cam_name] if self.get_all_detections: token_to_write = sample_token else: token_to_write = cam_front_token # Retrieve sensor records. sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) cs_record_cam = self.lyft_ds.get( "calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) cam_height = sd_record_cam["height"] cam_width = sd_record_cam["width"] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion( cs_record_lid["rotation"]), inverse=False) ego_to_cam = transform_matrix(cs_record_cam["translation"], Quaternion( cs_record_cam["rotation"]), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. if self.lyft_ds.get( "sensor", cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT": expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) assert ( velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot ).all(), velo_to_cam_rot.round(0) assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam["filename"] filename_lid_full = sd_record_lid["filename"] # Convert image (jpg to png). src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full) dst_im_path = self.image_folder.joinpath(f"{token_to_write}.png") if not dst_im_path.exists(): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full) dst_lid_path = self.lidar_folder.joinpath(f"{token_to_write}.bin") pcl = LidarPointCloud.from_file(Path(src_lid_path)) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms["P0"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P1"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P2"] = p_left_kitti # Left camera transform. kitti_transforms["P3"] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms["R0_rect"] = r0_rect.rotation_matrix kitti_transforms["Tr_velo_to_cam"] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti calib_path = self.calib_folder.joinpath(f"{token_to_write}.txt") with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = "%.12e" % val[0] for v in val[1:]: val_str += " %.12e" % v calib_file.write("%s: %s\n" % (key, val_str)) # Write label file. label_path = self.label_folder.joinpath(f"{token_to_write}.txt") if label_path.exists(): print("Skipping existing file: %s" % label_path) continue with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.lyft_ds.get( "sample_annotation", sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 detection_name = sample_annotation["category_name"] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None and not self.get_all_detections: continue elif bbox_2d is None and self.get_all_detections: # default KITTI bbox bbox_2d = (-1.0, -1.0, -1.0, -1.0) # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string( name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded, ) # Write to disk. label_file.write(output + "\n")
def fill_trainval_infos(data_path, lyft, train_scenes, val_scenes, test=False): train_lyft_infos = [] val_lyft_infos = [] progress_bar = tqdm.tqdm(total=len(lyft.sample), desc='create_info', dynamic_ncols=True) ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"] for index, sample in enumerate(lyft.sample): progress_bar.update() ref_info = {} for ref_chan in ref_chans: if ref_chan not in sample["data"]: continue ref_sd_token = sample["data"][ref_chan] ref_sd_rec = lyft.get("sample_data", ref_sd_token) ref_cs_token = ref_sd_rec["calibrated_sensor_token"] ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token) ref_to_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=False, ) ref_from_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=True, ) ref_lidar_path = lyft.get_sample_data_path(ref_sd_token) ref_info[ref_chan] = { "lidar_path": Path(ref_lidar_path).relative_to(data_path).__str__(), "ref_from_car": ref_from_car, "ref_to_car": ref_to_car, } if ref_chan == "LIDAR_TOP": ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token) ref_time = 1e-6 * ref_sd_rec["timestamp"] car_from_global = transform_matrix( ref_pose_rec["translation"], Quaternion(ref_pose_rec["rotation"]), inverse=True, ) info = { "ref_info": ref_info, 'token': sample['token'], 'car_from_global': car_from_global, 'timestamp': ref_time, } if not test: annotations = [ lyft.get("sample_annotation", token) for token in sample["anns"] ] locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1) velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) names = np.array([b.name for b in ref_boxes]) tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1) gt_boxes = np.concatenate([locs, dims, rots], axis=1) assert len(annotations) == len(gt_boxes) info["gt_boxes"] = gt_boxes info["gt_boxes_velocity"] = velocity info["gt_names"] = names info["gt_boxes_token"] = tokens if sample["scene_token"] in train_scenes: train_lyft_infos.append(info) else: val_lyft_infos.append(info) progress_bar.close() return train_lyft_infos, val_lyft_infos
lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) print(lidar_filepath) print(os.path.exists(lidar_filepath)) # 在ego_pose 表中拿到 lidar_data 对应的ego_pose ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) # 拿到calibrated_sensor 这些token 都可以在https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110257#latest-636505的sample_data.json里面找到 calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # 坐标系的转换 从车坐标系转到世界坐标系 # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) # 从雷达转到车坐标系 # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) # 拿到的坐标是相机坐标系的坐标 所以不是以车为中心 lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # 将相机坐标系 转到 车的坐标系, 点就以原点为中心 可以通过直方图看出来, X代表在车的X轴上的坐标 Y代表Y轴的坐标, 可以看到XY都以0为中心 # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) print(lidar_pointcloud.nbr_points())
def _fill_trainval_infos(lyft, train_scenes, val_scenes, test=False): from lyft_dataset_sdk.utils.geometry_utils import transform_matrix train_lyft_infos = [] val_lyft_infos = [] ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"] for sample in tqdm(lyft.sample): ref_info = {} for ref_chan in ref_chans: if ref_chan not in sample["data"]: continue ref_sd_token = sample["data"][ref_chan] ref_sd_rec = lyft.get("sample_data", ref_sd_token) ref_cs_token = ref_sd_rec["calibrated_sensor_token"] ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token) ref_to_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=False, ) ref_from_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=True, ) ref_lidar_path = lyft.get_sample_data_path(ref_sd_token) ref_info[ref_chan] = { "lidar_path": ref_lidar_path, "ref_from_car": ref_from_car, "ref_to_car": ref_to_car, } if ref_chan == "LIDAR_TOP": ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token) ref_time = 1e-6 * ref_sd_rec["timestamp"] car_from_global = transform_matrix( ref_pose_rec["translation"], Quaternion(ref_pose_rec["rotation"]), inverse=True, ) info = { "ref_info": ref_info, "token": sample["token"], "car_from_global": car_from_global, "timestamp": ref_time, } if not test: annotations = [ lyft.get("sample_annotation", token) for token in sample["anns"] ] locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3) rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape( -1, 1 ) velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) names = np.array([b.name for b in ref_boxes]) tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1) gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) assert len(annotations) == len(gt_boxes) info["gt_boxes"] = gt_boxes info["gt_boxes_velocity"] = velocity info["gt_names"] = names info["gt_boxes_token"] = tokens if sample["scene_token"] in train_scenes: train_lyft_infos.append(info) else: val_lyft_infos.append(info) return train_lyft_infos, val_lyft_infos
def make_prediction_boxes(model, dataloader, num_inputs, batch_size, level5data): classes = cfg.DATA.CLASSES height = cfg.DATA.BEV_SHAPE[0] width = cfg.DATA.BEV_SHAPE[1] device = cfg.TRAIN.DEVICE bev_shape = cfg.DATA.BEV_SHAPE voxel_size = cfg.DATA.VOXEL_SIZE z_offset = cfg.DATA.Z_OFFSET # we quantize to uint8 here to conserve memory. we're allocating >20GB of memory otherwise. predictions = np.zeros((num_inputs, 1 + len(classes), height, width), dtype=np.uint8) # [N,C,H,W] sample_tokens = [] progress_bar = tqdm(dataloader) # evaluate samples with loaded model - predictions are gathered in 'predictions' with torch.no_grad(): model.eval() for ii, (X, target, batch_sample_tokens) in enumerate(progress_bar): offset = ii * batch_size sample_tokens.extend(batch_sample_tokens) X = X.to(device) # [N, 1, H, W] prediction = model(X) # [N, 2, H, W] prediction = F.softmax(prediction, dim=1) prediction_cpu = prediction.cpu().numpy() predictions[offset:offset + batch_size] = np.round( prediction_cpu * 255).astype(np.uint8) predictions_non_class0 = 255 - predictions[:, 0] # [N,H,W] background_threshold = 255 // 2 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) predictions_opened = np.zeros((predictions_non_class0.shape), dtype=np.uint8) # [N,H,W] for i, p in enumerate(tqdm(predictions_non_class0)): thresholded_p = (p > background_threshold).astype(np.uint8) predictions_opened[i] = cv2.morphologyEx(thresholded_p, cv2.MORPH_OPEN, kernel) # [H,W] detection_boxes = [] detection_scores = [] detection_classes = [] for i in tqdm(range(len(predictions))): prediction_opened = predictions_opened[i] # [H,W] probability_non_class0 = predictions_non_class0[i] # [H,W] class_probability = predictions[i] # [C,H,W] sample_boxes = [] sample_detection_scores = [] sample_detection_classes = [] contours, hierarchy = cv2.findContours(prediction_opened, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) for cnt in contours: rect = cv2.minAreaRect(cnt) box = cv2.boxPoints(rect) # Let's take the center pixel value as the confidence value box_center_index = np.int0(np.mean(box, axis=0)) for class_index in range(len(classes)): box_center_value = class_probability[class_index + 1, box_center_index[1], box_center_index[0]] # Let's remove candidates with very low probability if box_center_value < 0.01: continue box_center_class = classes[class_index] box_detection_score = box_center_value sample_detection_classes.append(box_center_class) sample_detection_scores.append(box_detection_score) sample_boxes.append(box) detection_boxes.append(np.array(sample_boxes)) detection_scores.append(sample_detection_scores) detection_classes.append(sample_detection_classes) pred_box3ds = [] height_dict = cfg.DATA.AVG_CAT_HEIGHT # This could use some refactoring.. for (sample_token, sample_boxes, sample_detection_scores, sample_detection_class) in tqdm(zip(sample_tokens, detection_boxes, detection_scores, detection_classes), total=len(sample_tokens)): sample_boxes = sample_boxes.reshape(-1, 2) # (N, 4, 2) -> (N*4, 2) sample_boxes = sample_boxes.transpose(1, 0) # (N*4, 2) -> (2, N*4) # Add Z dimension sample_boxes = np.vstack(( sample_boxes, np.zeros(sample_boxes.shape[1]), )) # (2, N*4) -> (3, N*4) sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) ego_translation = np.array(ego_pose['translation']) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_voxel = np.linalg.inv( create_transformation_matrix_to_voxel_space( bev_shape, voxel_size, (0, 0, z_offset))) global_from_voxel = np.dot(global_from_car, car_from_voxel) sample_boxes = transform_points(sample_boxes, global_from_voxel) # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at # the same height as the ego vehicle. sample_boxes[2, :] = ego_pose["translation"][2] # (3, N*4) -> (N, 4, 3) sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3) box_height = [height_dict[name] for name in sample_detection_class] # Note: Each of these boxes describes the ground corners of a 3D box. # To get the center of the box in 3D, we'll have to add half the height to it. sample_boxes_centers = sample_boxes.mean(axis=1) sample_boxes_centers[:, 2] += box_height / 2 # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation # It doesn't matter for evaluation, so no need to worry about that here. # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that sample_lengths = np.linalg.norm( sample_boxes[:, 0, :] - sample_boxes[:, 1, :], axis=1) * 1 / box_scale sample_widths = np.linalg.norm( sample_boxes[:, 1, :] - sample_boxes[:, 2, :], axis=1) * 1 / box_scale sample_boxes_dimensions = np.zeros_like(sample_boxes_centers) sample_boxes_dimensions[:, 0] = sample_widths sample_boxes_dimensions[:, 1] = sample_lengths sample_boxes_dimensions[:, 2] = box_height for i in range(len(sample_boxes)): translation = sample_boxes_centers[i] size = sample_boxes_dimensions[i] class_name = sample_detection_class[i] ego_distance = float(np.linalg.norm(ego_translation - translation)) # Determine the rotation of the box v = (sample_boxes[i, 0] - sample_boxes[i, 1]) v /= np.linalg.norm(v) r = R.from_dcm([ [v[0], -v[1], 0], [v[1], v[0], 0], [0, 0, 1], ]) quat = r.as_quat() # XYZW -> WXYZ order of elements quat = quat[[3, 0, 1, 2]] detection_score = float(sample_detection_scores[i]) box3d = Box3D(sample_token=sample_token, translation=list(translation), size=list(size), rotation=list(quat), name=class_name, score=detection_score) pred_box3ds.append(box3d) return pred_box3ds
def __getitem__(self,ind): """ This function takes an index into the list of tokens and creates the pillar tensor, scatter tensor and training targets for the model. self.num_sweeps lidar point clouds are aggerated and transformed to the current reference coordinates. The resulting point cloud is then used to create the pillar and scatter tensors (pillars, indices). """ token = self.tokens[ind] # get the reference pose for the sample at token and create transformation matrix from the global # coordinate system to the reference car's coordinate system ref_pose = self.data_dict[token]['ego_pose'] ref_car_from_global = transform_matrix(ref_pose['translation'],Quaternion(ref_pose['rotation']), inverse=True) agg_pc = np.zeros((4,0)) curr_token = token # aggregate num_sweeps previous lidars and move them to the reference car's coordinate system for _ in range(self.num_sweeps): if not self.data_dict[curr_token]['lidar_fp']: curr_token = self.data_dict[curr_token]['prev_token'] continue # get the current point cloud for curr_token lidar_fp = pathlib.Path(self.data_dict[curr_token]['lidar_fp']) curr_pc = LidarPointCloud.from_file(lidar_fp) curr_pose = self.data_dict[curr_token]['ego_pose'] curr_sensor = self.data_dict[curr_token]['cal_sensor'] # create transformation matrices from current sensor to current car and from # current car to global global_from_curr_car = transform_matrix(curr_pose['translation'],Quaternion(curr_pose['rotation']), inverse=False) curr_car_from_curr_sensor = transform_matrix(curr_sensor['translation'], Quaternion(curr_sensor['rotation']), inverse=False) # combine transformation matrices into a single matrix that moves the point cloud # from current sensor -> current car -> global -> reference car transmat = reduce(np.dot,[ref_car_from_global,global_from_curr_car,curr_car_from_curr_sensor]) curr_pc.transform(transmat) curr_pc.remove_close(self.min_dist) # aggreate the resulting pointcloud agg_pc = np.hstack((agg_pc,curr_pc.points)) curr_token = self.data_dict[curr_token]['prev_token'] if not curr_token: break # create pillar tensor and scatter tensor lidar_points = agg_pc.transpose([1,0]) pillar = np.zeros((cfg.DATA.MAX_PILLARS,cfg.DATA.MAX_POINTS_PER_PILLAR,9)) indices = np.zeros((cfg.DATA.MAX_PILLARS,3)) pillars.create_pillars(lidar_points,pillar, indices,cfg.DATA.MAX_POINTS_PER_PILLAR, cfg.DATA.MAX_PILLARS,cfg.DATA.X_STEP,cfg.DATA.Y_STEP, cfg.DATA.X_MIN,cfg.DATA.Y_MIN,cfg.DATA.Z_MIN, cfg.DATA.X_MAX,cfg.DATA.Y_MAX,cfg.DATA.Z_MAX, cfg.DATA.CANVAS_HEIGHT) pillar = pillar.transpose([2,0,1]) pillar_size = pillar.shape pillar = torch.from_numpy(pillar).float() if self.data_mean is not None: # subtract the data mean from the pillar tensor pillar = pillar.reshape(-1) - self.data_mean pillar = pillar.reshape(pillar_size) indices = torch.from_numpy(indices).long() if self.training: # get the ground truth boxes of the token box_fp = self.data_dict[token]['boxes'] boxes = pickle.load(open(box_fp,'rb')) gt_centers,gt_corners = boxes_to_image_space(boxes) # create training labels c_target,r_target = create_target(self.anchor_corners,gt_corners, self.anchor_centers,gt_centers, self.anchor_boxes,boxes) c_target = torch.from_numpy(c_target).float() r_target = torch.from_numpy(r_target).float() gc.collect() return (pillar,indices,c_target,r_target) return (pillar,indices)
def process_token_to_kitti(self, sample_token: str) -> None: rotate = False # Get sample data. sample = self.lyft_ds.get("sample", sample_token) sample_annotation_tokens = sample["anns"] norm = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 1]]) norm_4 = np.eye(4) norm_4[:3, :3] = norm lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get( "calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) ego_record_lid = self.lyft_ds.get("ego_pose", sd_record_lid["ego_pose_token"]) for idx, cam_name in enumerate(self.cams_to_see): cam_front_token = sample["data"][cam_name] if self.get_all_detections: token_to_write = sample_token else: token_to_write = cam_front_token sample_name = "%06d" % self.tokens.index(token_to_write) # Retrieve sensor records. sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) cs_record_cam = self.lyft_ds.get( "calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) ego_record_cam = self.lyft_ds.get("ego_pose", sd_record_cam["ego_pose_token"]) cam_height = sd_record_cam["height"] cam_width = sd_record_cam["width"] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion( cs_record_lid["rotation"]), inverse=False) lid_ego_to_world = transform_matrix( ego_record_lid["translation"], Quaternion(ego_record_lid["rotation"]), inverse=False) world_to_cam_ego = transform_matrix( ego_record_cam["translation"], Quaternion(ego_record_cam["rotation"]), inverse=True) ego_to_cam = transform_matrix(cs_record_cam["translation"], Quaternion( cs_record_cam["rotation"]), inverse=True) velo_to_cam = np.dot( ego_to_cam, np.dot(world_to_cam_ego, np.dot(lid_ego_to_world, lid_to_ego))) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. if self.lyft_ds.get( "sensor", cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT": expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) assert ( velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot ).all(), velo_to_cam_rot.round(0) # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam["filename"] filename_lid_full = sd_record_lid["filename"] # Convert image (jpg to png). src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full) dst_im_path = self.image_folder.joinpath(f"{sample_name}.png") if not dst_im_path.exists(): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full) dst_lid_path = self.lidar_folder.joinpath(f"{sample_name}.bin") pcl = LidarPointCloud.from_file(Path(src_lid_path)) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) if rotate: pcl = np.dot(norm_4, pcl.points).T.astype(np.float32).reshape(-1) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms["P0"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P1"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P2"] = p_left_kitti # Left camera transform. kitti_transforms["P3"] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms["R0_rect"] = r0_rect.rotation_matrix if rotate: velo_to_cam_rot = np.dot(velo_to_cam_rot, norm.T) kitti_transforms["Tr_velo_to_cam"] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti calib_path = self.calib_folder.joinpath(f"{sample_name}.txt") with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = "%.12e" % val[0] for v in val[1:]: val_str += " %.12e" % v calib_file.write("%s: %s\n" % (key, val_str)) # Write label file. label_path = self.label_folder.joinpath(f"{sample_name}.txt") if label_path.exists(): # print("Skipping existing file: %s" % label_path) continue objects = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.lyft_ds.get("sample_annotation", sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 obj = dict() obj["detection_name"] = sample_annotation["category_name"] if obj["detection_name"] is None or obj[ "detection_name"] not in CLASS_MAP.keys(): continue obj["detection_name"] = CLASS_MAP[obj["detection_name"]] # Convert from nuScenes to KITTI box format. obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti, cam_height, cam_width) if bbox_2d is None: continue obj["bbox_2d"] = bbox_2d["bbox"] obj["truncated"] = bbox_2d["truncated"] # Set dummy score so we can use this file as result. obj["box_cam_kitti"].score = 0 v = np.dot(obj["box_cam_kitti"].rotation_matrix, np.array([1, 0, 0])) rot_y = -np.arctan2(v[2], v[0]) obj["alpha"] = -np.arctan2( obj["box_cam_kitti"].center[0], obj["box_cam_kitti"].center[2]) + rot_y obj["depth"] = np.linalg.norm( np.array(obj["box_cam_kitti"].center[:3])) objects.append(obj) objects = postprocessing(objects, cam_height, cam_width) with open(label_path, "w") as label_file: for obj in objects: # Convert box to output string format. output = box_to_string(name=obj["detection_name"], box=obj["box_cam_kitti"], bbox_2d=obj["bbox_2d"], truncation=obj["truncated"], occlusion=obj["occluded"], alpha=obj["alpha"]) label_file.write(output + '\n') with open(self.split_file, "a") as f: f.write(sample_name + "\n")
def main(): # get data level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"] df = pd.read_csv('host_scenes.csv') host_count_df = df.groupby("host")['scene_token'].count() print(host_count_df) sample_token = df.first_sample_token.values[0] sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) # car and sensor coords ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) print("ego_pose: ", ego_pose) calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) # Define hyper parameters #bev_shape = (768, 768, 3) #voxel_size = (0.2,0.2,1.5) bev_shape = (1024, 1024, 3) voxel_size = (0.2,0.2,1.5) z_offset = -2.0 box_scale = 0.8 #map_mask = level5data.map[0]["mask"] #print(map_mask) #plot_semantic_map(map_mask, ego_pose) #sample_token = train_df.first_sample_token.values[10] #bev = plot_bev_and_boxes(sample_token, level5data) train_data_folder = os.path.join(OUTPUT_ROOT, "bev_data_1024") validation_data_folder = os.path.join(OUTPUT_ROOT, "bev_data_1024") os.makedirs(train_data_folder, exist_ok=True) os.makedirs(validation_data_folder, exist_ok=True) # test on a single scene prepare_training_data_for_scene(df.first_sample_token.values[50], train_data_folder, level5data, classes, bev_shape, voxel_size, z_offset, box_scale) test_prepare_training_data(df.first_sample_token.values[50], train_data_folder) # get for all scenes first_samples = df.first_sample_token.values print(first_samples) for sample in first_samples: print(sample) prepare_training_data_for_scene(sample, train_data_folder, level5data, classes, bev_shape, voxel_size, z_offset, box_scale) # multiprocessing option #for df, data_folder in [(train_df, train_data_folder), (validation_df, validation_data_folder)]: # prepare_data_pool(df, data_folder, bev_shape, voxel_size, z_offset, box_scale) print('Mission accomplished!')
def fill_trainval_infos(data_path, lyft, train_scenes, val_scenes, test=False, max_sweeps=10): train_lyft_infos = [] val_lyft_infos = [] progress_bar = tqdm.tqdm(total=len(lyft.sample), desc='create_info', dynamic_ncols=True) # ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"] ref_chan = "LIDAR_TOP" for index, sample in enumerate(lyft.sample): progress_bar.update() ref_info = {} ref_sd_token = sample["data"][ref_chan] ref_sd_rec = lyft.get("sample_data", ref_sd_token) ref_cs_token = ref_sd_rec["calibrated_sensor_token"] ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token) ref_to_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=False, ) ref_from_car = transform_matrix( ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=True, ) ref_lidar_path = lyft.get_sample_data_path(ref_sd_token) ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token) ref_time = 1e-6 * ref_sd_rec["timestamp"] car_from_global = transform_matrix( ref_pose_rec["translation"], Quaternion(ref_pose_rec["rotation"]), inverse=True, ) car_to_global = transform_matrix( ref_pose_rec["translation"], Quaternion(ref_pose_rec["rotation"]), inverse=False, ) info = { "lidar_path": Path(ref_lidar_path).relative_to(data_path).__str__(), "ref_from_car": ref_from_car, "ref_to_car": ref_to_car, 'token': sample['token'], 'car_from_global': car_from_global, 'car_to_global': car_to_global, 'timestamp': ref_time, 'sweeps': [] } sample_data_token = sample['data'][ref_chan] curr_sd_rec = lyft.get('sample_data', sample_data_token) sweeps = [] while len(sweeps) < max_sweeps - 1: if curr_sd_rec['prev'] == '': if len(sweeps) == 0: sweep = { 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': None, 'time_lag': curr_sd_rec['timestamp'] * 0, } sweeps.append(sweep) else: sweeps.append(sweeps[-1]) else: curr_sd_rec = lyft.get('sample_data', curr_sd_rec['prev']) # Get past pose current_pose_rec = lyft.get('ego_pose', curr_sd_rec['ego_pose_token']) global_from_car = transform_matrix( current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False, ) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = lyft.get( 'calibrated_sensor', curr_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix( current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False, ) tm = reduce(np.dot, [ ref_from_car, car_from_global, global_from_car, car_from_current ]) lidar_path = lyft.get_sample_data_path(curr_sd_rec['token']) time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp'] sweep = { 'lidar_path': Path(lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': tm, 'global_from_car': global_from_car, 'car_from_current': car_from_current, 'time_lag': time_lag, } sweeps.append(sweep) info['sweeps'] = sweeps if not test: annotations = [ lyft.get("sample_annotation", token) for token in sample["anns"] ] locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1) velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) names = np.array([b.name for b in ref_boxes]) tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1) gt_boxes = np.concatenate([locs, dims, rots], axis=1) assert len(annotations) == len(gt_boxes) info["gt_boxes"] = gt_boxes info["gt_boxes_velocity"] = velocity info["gt_names"] = names info["gt_boxes_token"] = tokens if sample["scene_token"] in train_scenes: train_lyft_infos.append(info) else: val_lyft_infos.append(info) progress_bar.close() return train_lyft_infos, val_lyft_infos
def create_submit(): pred_box3ds = [] # This could use some refactoring.. for (sample_token, sample_boxes, sample_detection_scores, sample_detection_class) in tqdm_notebook( zip(sample_tokens, detection_boxes, detection_scores, detection_classes), total=len(sample_tokens)): sample_boxes = sample_boxes.reshape(-1, 2) # (N, 4, 2) -> (N*4, 2) sample_boxes = sample_boxes.transpose(1, 0) # (N*4, 2) -> (2, N*4) # Add Z dimension sample_boxes = np.vstack((sample_boxes, np.zeros(sample_boxes.shape[1]),)) # (2, N*4) -> (3, N*4) sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) ego_translation = np.array(ego_pose['translation']) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_voxel = np.linalg.inv(create_transformation_matrix_to_voxel_space(bev_shape, voxel_size, (0, 0, z_offset))) global_from_voxel = np.dot(global_from_car, car_from_voxel) sample_boxes = transform_points(sample_boxes, global_from_voxel) # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at # the same height as the ego vehicle. sample_boxes[2, :] = ego_pose["translation"][2] # (3, N*4) -> (N, 4, 3) sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3) # empirical heights box_height = np.array([class_heights[cls] for cls in sample_detection_class]) # To get the center of the box in 3D, we'll have to add half the height to it. sample_boxes_centers = sample_boxes.mean(axis=1) sample_boxes_centers[:, 2] += box_height / 2 # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation # It doesn't matter for evaluation, so no need to worry about that here. # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that sample_lengths = np.linalg.norm(sample_boxes[:, 0, :] - sample_boxes[:, 1, :], axis=1) * 1 / box_scale sample_widths = np.linalg.norm(sample_boxes[:, 1, :] - sample_boxes[:, 2, :], axis=1) * 1 / box_scale sample_boxes_dimensions = np.zeros_like(sample_boxes_centers) sample_boxes_dimensions[:, 0] = sample_widths sample_boxes_dimensions[:, 1] = sample_lengths sample_boxes_dimensions[:, 2] = box_height for i in range(len(sample_boxes)): translation = sample_boxes_centers[i] size = sample_boxes_dimensions[i] class_name = sample_detection_class[i] ego_distance = float(np.linalg.norm(ego_translation - translation)) # Determine the rotation of the box v = (sample_boxes[i, 0] - sample_boxes[i, 1]) v /= np.linalg.norm(v) r = R.from_dcm([ [v[0], -v[1], 0], [v[1], v[0], 0], [0, 0, 1], ]) quat = r.as_quat() # XYZW -> WXYZ order of elements quat = quat[[3, 0, 1, 2]] detection_score = float(sample_detection_scores[i]) box3d = Box3D( sample_token=sample_token, translation=list(translation), size=list(size), rotation=list(quat), name=class_name, score=detection_score ) pred_box3ds.append(box3d) sub = {} for i in tqdm_notebook(range(len(pred_box3ds))): # yaw = -np.arctan2(pred_box3ds[i].rotation[2], pred_box3ds[i].rotation[0]) yaw = 2 * np.arccos(pred_box3ds[i].rotation[0]); pred = str(pred_box3ds[i].score / 255) + ' ' + str(pred_box3ds[i].center_x) + ' ' + \ str(pred_box3ds[i].center_y) + ' ' + str(pred_box3ds[i].center_z) + ' ' + \ str(pred_box3ds[i].width) + ' ' \ + str(pred_box3ds[i].length) + ' ' + str(pred_box3ds[i].height) + ' ' + str(yaw) + ' ' \ + str(pred_box3ds[i].name) + ' ' if pred_box3ds[i].sample_token in sub.keys(): sub[pred_box3ds[i].sample_token] += pred else: sub[pred_box3ds[i].sample_token] = pred sample_sub = pd.read_csv(base_path + 'sample_submission.csv') for token in set(sample_sub.Id.values).difference(sub.keys()): print(token) sub[token] = '' sub = pd.DataFrame(list(sub.items())) sub.columns = sample_sub.columns sub.head() sub.tail() sub.to_csv(f'{submit_name}.csv', index=False)
def process_token_to_groundtruth(self, sample_tokens, sample_n_in_seqs): ### output groundtruth pose to the scence folder started = False # loop through all the samples for sample_token, sample_n in zip(sample_tokens, sample_n_in_seqs): # Get sample data. sample = self.lyft_ds.get("sample", sample_token) # Get lidar pose data lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get("calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) ego_record_lid = self.lyft_ds.get("ego_pose", sd_record_lid["ego_pose_token"]) # cam_front_token = sample["data"]["CAM_FRONT"] # sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) # cs_record_cam = self.lyft_ds.get("calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) # ego_record_cam = self.lyft_ds.get("ego_pose", sd_record_cam["ego_pose_token"]) lid_to_ego = transform_matrix( cs_record_lid["translation"], Quaternion(cs_record_lid["rotation"]), inverse=False ) lid_ego_to_world = transform_matrix( ego_record_lid["translation"], Quaternion(ego_record_lid["rotation"]), inverse=False ) # world_to_cam_ego = transform_matrix( # ego_record_cam["translation"], Quaternion(ego_record_cam["rotation"]), inverse=True # ) # ego_to_cam = transform_matrix( # cs_record_cam["translation"], Quaternion(cs_record_cam["rotation"]), inverse=True # ) # velo_to_cam = np.dot(ego_to_cam, np.dot(world_to_cam_ego, np.dot(lid_ego_to_world, lid_to_ego))) # # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. # velo_to_cam_kitti = np.dot(velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) velo_pose = np.dot(lid_ego_to_world, lid_to_ego) velo_pose_kitti = np.dot(velo_pose, self.kitti_to_nu_lidar.transformation_matrix) # If this is the start of the new sequence, update scence name, and update the first pose if sample_n == 0: ### get scene information scene = self.lyft_ds.get("scene", sample["scene_token"] ) scene_name = scene["name"] scene_folder = self.store_dir.joinpath(scene_name) if not scene_folder.is_dir(): scene_folder.mkdir(parents=True) if started: gt_file.close() gt_path = scene_folder.joinpath("groundtruth.txt") gt_file = open(gt_path, "w") car_pose = np.identity(4) first_pose = velo_pose started = True else: car_pose = np.dot(np.linalg.inv(first_pose), velo_pose) # Write to disk. gt_file.write(" ".join(str(value) for pose_row in car_pose[0:3, :] for value in pose_row)) gt_file.write('\n')
# Add Z dimension sample_boxes = np.vstack(( sample_boxes, np.zeros(sample_boxes.shape[1]), )) # (2, N*4) -> (3, N*4) sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) ego_translation = np.array(ego_pose['translation']) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_voxel = np.linalg.inv( create_transformation_matrix_to_voxel_space(bev_shape, voxel_size, (0, 0, z_offset))) global_from_voxel = np.dot(global_from_car, car_from_voxel) sample_boxes = transform_points(sample_boxes, global_from_voxel) # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at # the same height as the ego vehicle. sample_boxes[2, :] = ego_pose["translation"][2] # (3, N*4) -> (N, 4, 3) sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3)
data_dict[token] = { 'lidar_fp': None, 'ego_pose': None, 'cal_sensor': None, 'boxes': None, 'prev_token': prev_token } token = sample['next'] continue # get the lidar, ego and sensor objects for the token lidar_data = l5d.get('sample_data', lidar_token) ego_pose = l5d.get('ego_pose', lidar_data['ego_pose_token']) cal_sensor = l5d.get('calibrated_sensor', lidar_data['calibrated_sensor_token']) car_from_sensor = transform_matrix(cal_sensor['translation'], Quaternion( cal_sensor['rotation']), inverse=False) lidar_pointcloud.transform(car_from_sensor) lidar_points = lidar_pointcloud.points[:3, :] boxes = l5d.get_boxes(lidar_token) # collect the ground truth boxes canv_boxes = move_boxes_to_canvas_space(boxes, ego_pose, lidar_points) boxes_fp = osp.join(box_dir, token + '_boxes.pkl') pickle.dump(canv_boxes, open(boxes_fp, 'wb')) prev_token = sample['prev'] data_dict[token] = { 'lidar_fp': str(lidar_filepath), 'ego_pose': ego_pose, 'cal_sensor': cal_sensor, 'boxes': boxes_fp,
def from_file_multisweep( cls, lyftd, sample_rec: Dict, chan: str, ref_chan: str, num_sweeps: int = 26, min_distance: float = 1.0) -> Tuple["PointCloud", np.ndarray]: """Return a point cloud that aggregates multiple sweeps. As every sweep is in a different coordinate frame, we need to map the coordinates to a single reference frame. As every sweep has a different timestamp, we need to account for that in the transformations and timestamps. Args: lyftd: A LyftDataset instance. sample_rec: The current sample. chan: The radar channel from which we track back n sweeps to aggregate the point cloud. ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to. num_sweeps: Number of sweeps to aggregated. min_distance: Distance below which points are discarded. Returns: (all_pc, all_times). The aggregated point cloud and timestamps. """ # Init points = np.zeros((cls.nbr_dims(), 0)) all_pc = cls(points) all_times = np.zeros((1, 0)) # Get reference pose and timestamp ref_sd_token = sample_rec["data"][ref_chan] ref_sd_rec = lyftd.get("sample_data", ref_sd_token) ref_pose_rec = lyftd.get("ego_pose", ref_sd_rec["ego_pose_token"]) ref_cs_rec = lyftd.get("calibrated_sensor", ref_sd_rec["calibrated_sensor_token"]) ref_time = 1e-6 * ref_sd_rec["timestamp"] # Homogeneous transform from ego car frame to reference frame ref_from_car = transform_matrix(ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=True) # Homogeneous transformation matrix from global to _current_ ego car frame car_from_global = transform_matrix(ref_pose_rec["translation"], Quaternion( ref_pose_rec["rotation"]), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec["data"][chan] current_sd_rec = lyftd.get("sample_data", sample_data_token) for _ in range(num_sweeps): # Load up the pointcloud. current_pc = cls.from_file(lyftd.data_path / current_sd_rec["filename"]) # Get past pose. current_pose_rec = lyftd.get("ego_pose", current_sd_rec["ego_pose_token"]) global_from_car = transform_matrix( current_pose_rec["translation"], Quaternion(current_pose_rec["rotation"]), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = lyftd.get( "calibrated_sensor", current_sd_rec["calibrated_sensor_token"]) car_from_current = transform_matrix( current_cs_rec["translation"], Quaternion(current_cs_rec["rotation"]), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce(np.dot, [ ref_from_car, car_from_global, global_from_car, car_from_current ]) current_pc.transform(trans_matrix) # Remove close points and add timevector. current_pc.remove_close(min_distance) time_lag = ref_time - 1e-6 * current_sd_rec[ "timestamp"] # positive difference times = time_lag * np.ones((1, current_pc.nbr_points())) all_times = np.hstack((all_times, times)) # Merge with key pc. all_pc.points = np.hstack((all_pc.points, current_pc.points)) # Abort if there are no previous sweeps. if current_sd_rec["prev"] == "": break else: current_sd_rec = lyftd.get("sample_data", current_sd_rec["prev"]) return all_pc, all_times
def prepare_training_data_for_scene(sample_token, level5data, output_folder, bev_shape=bev_shape, voxel_size=voxel_size, z_offset=z_offset, box_scale=box_scale): """ Given a sample token (in a scene), output rasterized input volumes in birds-eye-view perspective. """ if (os.path.exists( os.path.join(output_folder, "{}_target.png".format(sample_token)))): print("file processed, skip") else: sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path( sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = level5data.get( "calibrated_sensor", lidar_data["calibrated_sensor_token"]) global_from_car = transform_matrix(ego_pose['translation'], Quaternion( ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix( calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) try: # generate input images which is the bird eye view data lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) lidar_pointcloud.transform(car_from_sensor) bev = create_voxel_pointcloud(lidar_pointcloud.points, bev_shape, voxel_size=voxel_size, z_offset=z_offset) bev = normalize_voxel_intensities(bev) bev_im = np.round(bev * 255).astype(np.uint8) cv2.imwrite( os.path.join(output_folder, "{}_input.png".format(sample_token)), bev_im) # generate target images which is the bird eye view label data boxes = level5data.get_boxes(sample_lidar_token) target = np.zeros_like(bev) move_boxes_to_car_space(boxes, ego_pose) scale_boxes(boxes, box_scale) draw_boxes(target, voxel_size, boxes=boxes, classes=classes, z_offset=z_offset) target_im = target[:, :, 0] # take one channel only cv2.imwrite( os.path.join(output_folder, "{}_target.png".format(sample_token)), target_im) # generate map mask image for better performance map_mask = level5data.map[0]["mask"] semantic_im = get_semantic_map_around_ego( map_mask, ego_pose, voxel_size[0], target_im.shape) semantic_im = np.round(semantic_im * 255).astype(np.uint8) cv2.imwrite( os.path.join(output_folder, "{}_map.png".format(sample_token)), semantic_im) except Exception as e: print("Failed to load Lidar Pointcloud for {}: {}:".format( sample_token, e))
def main(): # get data #level5data = LyftDataset(data_path = '../input/', json_path='../input/train_data', verbose=True) # local laptop level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) # server classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"] df = pd.read_csv('host_scenes.csv') host_count_df = df.groupby("host")['scene_token'].count() print(host_count_df) sample_token = df.first_sample_token.values[0] sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) # car and sensor coords ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) print("ego_pose: ", ego_pose) calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) lidar_pointcloud.transform(car_from_sensor) # Define hyper parameters bev_shape = (768, 768, 3) voxel_size = (0.2,0.2,1.5) #bev_shape = (2048, 2048, 3) #voxel_size = (0.1,0.1,1.5) z_offset = -2.0 box_scale = 0.8 test_coco_targets_for_sample(sample_token=sample_token, output_folder=OUTPUT_ROOT, level5data=level5data, classes=classes, bev_shape=bev_shape, voxel_size=voxel_size, z_offset=z_offset, box_scale=box_scale) train_data_folder = os.path.join(OUTPUT_ROOT, "coco_768") os.makedirs(train_data_folder, exist_ok=True) """ # test on a single scene prepare_coco_targets_for_scene(df.first_sample_token.values[50], train_data_folder, level5data, classes, bev_shape, voxel_size, z_offset, box_scale) test_prepare_coco_targets(df.first_sample_token.values[50], train_data_folder, level5data, bev_shape, voxel_size, z_offset) """ # get for all scenes first_samples = df.first_sample_token.values for sample in first_samples: print(sample) prepare_coco_targets_for_scene(sample, train_data_folder, level5data, classes, bev_shape, voxel_size, z_offset, box_scale) print('Mission accomplished!')