def save_lidar(self, frame, frame_num): """ parse and save the lidar data in psd format :param frame: open dataset frame proto :param frame_num: the current frame number :return: """ # range_images, range_image_top_pose = self.parse_range_image_and_camera_projection( # frame) # points, intensity = self.convert_range_image_to_point_cloud( # frame, # range_images, # range_image_top_pose) range_images, camera_projections, range_image_top_pose = frame_utils.parse_range_image_and_camera_projection( frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) points_all = np.concatenate(points, axis=0) intensity_all = np.ones((points_all.shape[0], 1)) point_cloud = np.column_stack((points_all, intensity_all)) pc_path = LIDAR_PATH + '/' + str(frame_num).zfill( INDEX_LENGTH) + '.bin' point_cloud.tofile(pc_path)
def save_lidar(self, frame, file_idx, frame_idx): """Parse and save the lidar data in psd format. Args: frame (:obj:`Frame`): Open dataset frame proto. file_idx (int): Current file index. frame_idx (int): Current frame index. """ (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frame) # range_images, camera_projections, range_image_top_pose = \ # parse_range_image_and_camera_projection(frame) #convert_range_image_to_point_cloud points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, keep_polar_features=True) # 3d points in vehicle frame. points_all = np.concatenate(points, axis=0) #combines 5 lidar data together # declare new index list i = [3, 4, 5, 1] # create output pointsxyzintensity_output = points_all[:, i] pc_path = f'{self.point_cloud_save_dir}/' + \ f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin' pointsxyzintensity_output.astype(np.float32).tofile(pc_path)
def lidar(self,frame): (range_images, camera_projections,range_image_top_pose) = \ frame_utils.parse_range_image_and_camera_projection(frame) points, _ = frame_utils.convert_range_image_to_point_cloud(frame,range_images, \ camera_projections, \ range_image_top_pose) points_all = np.concatenate(points, axis=0) return points_all
def extract(i): FILENAME = segments[i] run = FILENAME.split('segment-')[-1].split('.')[0] out_base_dir = sys.argv[2] + '/%s/' % run if not os.path.exists(out_base_dir): os.makedirs(out_base_dir) dataset = tf.data.TFRecordDataset(FILENAME, compression_type='') print(FILENAME) pc, pc_c = [], [] camID2extr_v2c = {} camID2intr = {} all_static_pc = [] for frame_cnt, data in enumerate(dataset): if frame_cnt % 2 != 0: continue ### Set the sampling rate here print('frame ', frame_cnt) frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) extr_laser2v = np.array( frame.context.laser_calibrations[0].extrinsic.transform).reshape( 4, 4) extr_v2w = np.array(frame.pose.transform).reshape(4, 4) if frame_cnt == 0: for k in range(len(frame.context.camera_calibrations)): cameraID = frame.context.camera_calibrations[k].name extr_c2v =\ np.array(frame.context.camera_calibrations[k].extrinsic.transform).reshape(4, 4) extr_v2c = np.linalg.inv(extr_c2v) camID2extr_v2c[frame.images[k].name] = extr_v2c fx = frame.context.camera_calibrations[k].intrinsic[0] fy = frame.context.camera_calibrations[k].intrinsic[1] cx = frame.context.camera_calibrations[k].intrinsic[2] cy = frame.context.camera_calibrations[k].intrinsic[3] k1 = frame.context.camera_calibrations[k].intrinsic[4] k2 = frame.context.camera_calibrations[k].intrinsic[5] p1 = frame.context.camera_calibrations[k].intrinsic[6] p2 = frame.context.camera_calibrations[k].intrinsic[7] k3 = frame.context.camera_calibrations[k].intrinsic[8] camID2intr[frame.images[k].name] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) # lidar point cloud (range_images, camera_projections, range_image_top_pose) = \ frame_utils.parse_range_image_and_camera_projection(frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) points_all = np.concatenate(points, axis=0) np.save('%s/frame_%03d.npy' % (out_base_dir, frame_cnt), points_all) datalist.append( os.path.abspath('%s/frame_%03d.npy' % (out_base_dir, frame_cnt)))
def save_pc(frame, dst): range_images, camera_projections, range_image_top_pose = frame_utils.parse_range_image_and_camera_projection( frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1) points = np.concatenate(points + points_ri2, axis=0) # norm = np.array([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) # points = np.dot(points, norm) # set the reflectance to 1.0 for every point points = np.concatenate( [points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1) points = points.reshape(-1).astype(np.float32) points.tofile(dst)
def get_lidar(frames, frame_index): (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection( frames[frame_index]) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frames[frame_index], range_images, camera_projections, range_image_top_pose) intensity_top_lidar_r0 = get_intensity( range_images[open_dataset.LaserName.TOP][0]) return points, intensity_top_lidar_r0
def extract_laser(self, ndx, frame): # Extract the range images, camera projects and top pose range_images, camera_projections, range_image_top_pose = frame_utils.parse_range_image_and_camera_projection(frame) frame.lasers.sort(key=lambda laser: laser.name) # Using the function provided by Waymo OD to convert range image into to point clouds to visualize and save the data points, cp_points = frame_utils.convert_range_image_to_point_cloud(frame, range_images, camera_projections, range_image_top_pose) # Point cloud data f_p = open("{}/{}_points.data".format(self.laser_images_dir, ndx), 'wb') pickle.dump(points, f_p) # Camera projects for each point cloud data f_cp = open("{}/{}_cp_points.data".format(self.laser_images_dir, ndx), 'wb') pickle.dump(cp_points, f_cp)
def get_lidar(frame): """ parse and save the lidar data in psd format :param frame: open dataset frame proto """ range_images, camera_projections, range_image_top_pose = frame_utils.parse_range_image_and_camera_projection( frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) points_all = np.concatenate(points, axis=0) intensity_all = np.ones((points_all.shape[0], 1)) point_cloud = np.column_stack((points_all, intensity_all)) return point_cloud
def dataExtractor(self): frame = open_dataset.Frame() # Convert tfrecord to a list datasetAsList = list(self.dataset.as_numpy_iterator()) self.totalFrames = len(datasetAsList) # Convert to bytearray for frameIdx in range(self.totalFrames): frame.ParseFromString(datasetAsList[frameIdx]) range_images, camera_projections, range_image_top_pose = frame_utils.parse_range_image_and_camera_projection( frame) frame.lasers.sort(key=lambda laser: laser.name) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) self.d = { "type_unknown": 0, "type_vehicle": 0, "type_pedestrian": 0, "type_sign": 0, "type_cyclist": 0, "label_unknown": 0, "label_level_1": 0, "label_level_2": 0 } self.process_frames(frame, frame.camera_labels) for index, image in enumerate(frame.images): self.extract_image(image) self.extract_labels(image, frame) self.assign_camera_labels(image.name) # Write camera data to folder self.write_camera_data(self.front_labels, self.front, 'FRONT') self.write_camera_data(self.front_left_labels, self.front_left, 'FRONT_LEFT') self.write_camera_data(self.front_right_labels, self.front_right, 'FRONT_RIGHT') self.write_camera_data(self.side_left_labels, self.side_left, 'SIDE_LEFT') self.write_camera_data(self.side_right_labels, self.side_right, 'SIDE_RIGHT') # Write range data to folder self.write_range_data(points, cp_points) self.count += 1
def frame_points(self, frame, projection=False): """ Returns Tensor of points in a vehicle coord. system for a given frame. If projection is True => returns points projection on frame """ (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=0) if projection: return tf.constant(np.concatenate(cp_points)) else: return tf.constant(np.concatenate(points, axis=0))
def parse_lidar_data(frame): (range_images, camera_projections, range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection( frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) # points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( # frame, # range_images, # camera_projections, # range_image_top_pose, # ri_index=1) # 3d points in vehicle frame. points_all = np.concatenate(points, axis=0) # points_all_ri2 = np.concatenate(points_ri2, axis=0) # camera projection corresponding to each point. cp_points_all = np.concatenate(cp_points, axis=0) # cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0) images = sorted(frame.images, key=lambda i: i.name) cp_points_all_concat = np.concatenate([cp_points_all, points_all], axis=-1) # cp_points_all_concat_tensor = tf.constant(cp_points_all_concat) # The distance between lidar points and vehicle frame origin. points_all_tensor = tf.norm(points_all, axis=-1, keepdims=True) cp_points_all_tensor = tf.constant(cp_points_all, dtype=tf.int32) mask = tf.equal(cp_points_all_tensor[..., 0], images[0].name) cp_points_all_tensor = tf.cast(tf.gather_nd(cp_points_all_tensor, tf.where(mask)), dtype=tf.float32) points_all_tensor = tf.gather_nd(points_all_tensor, tf.where(mask)) projected_points_all_from_raw_data = tf.concat( [cp_points_all_tensor[..., 1:3], points_all_tensor], axis=-1).numpy() #plot_points_on_image(projected_points_all_from_raw_data, images[0], rgba, point_size=5.0) # plt.show() return projected_points_all_from_raw_data
def main(data_folder, output_folder, multi_process_token=(0, 1)): tf_records = os.listdir(data_folder) tf_records = [x for x in tf_records if 'tfrecord' in x] tf_records = sorted(tf_records) for record_index, tf_record_name in enumerate(tf_records): if record_index % multi_process_token[1] != multi_process_token[0]: continue print('starting for raw pc', record_index + 1, ' / ', len(tf_records), ' ', tf_record_name) FILE_NAME = os.path.join(data_folder, tf_record_name) dataset = tf.data.TFRecordDataset(FILE_NAME, compression_type='') segment_name = tf_record_name.split('.')[0] # if os.path.exists(os.path.join(output_folder, '{}.npz'.format(segment_name))): # continue frame_num = 0 pcs = dict() for data in dataset: frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) # extract the points (range_images, camera_projections, range_image_top_pose) = \ frame_utils.parse_range_image_and_camera_projection(frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=0) points_all = np.concatenate(points, axis=0) pcs[str(frame_num)] = points_all frame_num += 1 if frame_num % 10 == 0: print('Record {} / {} FNumber {:}'.format(record_index + 1, len(tf_records), frame_num)) print('{:} frames in total'.format(frame_num)) np.savez_compressed(os.path.join(output_folder, "{}.npz".format(segment_name)), **pcs)
FLImages.append(frame_images[1]) SLImages.append(frame_images[2]) FRImages.append(frame_images[3]) SRImages.append(frame_images[4]) # labels FImageLabels.append(frames[i].camera_labels[0]) FLImageLabels.append(frames[i].camera_labels[1]) SLImageLabels.append(frames[i].camera_labels[2]) FRImageLabels.append(frames[i].camera_labels[3]) SRImageLabels.append(frames[i].camera_labels[4]) # lidar points (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frames[i]) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frames[i], range_images, camera_projections, range_image_top_pose) lidars_frames.append(points) vehicle_poses.append((np.array(frames[i].pose.transform)).reshape((4, 4))) # laser labels (3d bounding boxes) laser_labels.append(frames[i].laser_labels) counter += 1 rospy.loginfo("Processing Frame: " + str(counter)) TF_lidarF = (np.array( frames[0].context.laser_calibrations[0].extrinsic.transform)).reshape( (4, 4)) TF_lidarR = (np.array( frames[0].context.laser_calibrations[1].extrinsic.transform)).reshape(
def main(args: argparse.Namespace) -> None: """Main script to convert Waymo object labels, LiDAR, images, pose, and calibration to the Argoverse data format on disk """ TFRECORD_DIR = args.waymo_dir ARGO_WRITE_DIR = args.argo_dir track_id_dict = {} img_count = 0 log_ids = get_log_ids_from_files(TFRECORD_DIR) for log_id, tf_fpath in log_ids.items(): dataset = tf.data.TFRecordDataset(tf_fpath, compression_type="") log_calib_json = None for data in dataset: frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) # Checking if we extracted the correct log ID assert log_id == frame.context.name # Frame start time, which is the timestamp # of the first top lidar spin within this frame, in microseconds timestamp_ms = frame.timestamp_micros timestamp_ns = int(timestamp_ms * 1000) # to nanoseconds SE3_flattened = np.array(frame.pose.transform) city_SE3_egovehicle = SE3_flattened.reshape(4, 4) if args.save_poses: dump_pose(city_SE3_egovehicle, timestamp_ns, log_id, ARGO_WRITE_DIR) # Reading lidar data and saving it in point cloud format # We are only using the first range image (Waymo provides two range images) # If you want to use the second one, you can change it in the arguments ( range_images, camera_projections, range_image_top_pose, ) = frame_utils.parse_range_image_and_camera_projection(frame) if args.range_image == 1: points_ri, cp_points_ri = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose ) elif args.range_image == 2: points_ri, cp_points_ri = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1, ) points_all_ri = np.concatenate(points_ri, axis=0) if args.save_cloud: dump_point_cloud(points_all_ri, timestamp_ns, log_id, ARGO_WRITE_DIR) # Saving labels if args.save_labels: dump_object_labels( frame.laser_labels, timestamp_ns, log_id, ARGO_WRITE_DIR, track_id_dict, ) if args.save_calibration: calib_json = form_calibration_json(frame.context.camera_calibrations) if log_calib_json is None: log_calib_json = calib_json calib_json_fpath = ( f"{ARGO_WRITE_DIR}/{log_id}/vehicle_calibration_info.json" ) check_mkdir(str(Path(calib_json_fpath).parent)) save_json_dict(calib_json_fpath, calib_json) else: assert calib_json == log_calib_json # 5 images per frame for index, tf_cam_image in enumerate(frame.images): # 4x4 row major transform matrix that transforms # 3d points from one frame to another. SE3_flattened = np.array(tf_cam_image.pose.transform) city_SE3_egovehicle = SE3_flattened.reshape(4, 4) # in seconds timestamp_s = tf_cam_image.pose_timestamp # TODO: this looks buggy, need to confirm timestamp_ns = int(round_to_micros(int(timestamp_s * 1e9)) * 1000) # to nanoseconds if args.save_poses: dump_pose(city_SE3_egovehicle, timestamp_ns, log_id, ARGO_WRITE_DIR) if args.save_images: camera_name = CAMERA_NAMES[tf_cam_image.name] img = tf.image.decode_jpeg(tf_cam_image.image) new_img = undistort_image( np.asarray(img), frame.context.camera_calibrations, tf_cam_image.name, ) img_save_fpath = f"{ARGO_WRITE_DIR}/{log_id}/{camera_name}/" img_save_fpath += f"{camera_name}_{timestamp_ns}.jpg" check_mkdir(str(Path(img_save_fpath).parent)) imageio.imwrite(img_save_fpath, new_img) img_count += 1 if img_count % 100 == 0: print(f"\tSaved {img_count}'th image for log = {log_id}")
def label_ext_with_occlusion(i, filename, Label_all, Label): FileName = filename dataset = tf.data.TFRecordDataset(FileName, compression_type='') # __lidar_list = ['_FRONT', '_FRONT_LEFT', '_FRONT_RIGHT', '_SIDE_LEFT', '_SIDE_RIGHT'] __lidar_list = [ '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT', '_SIDE_LEFT' ] __type_list = ['unknown', 'Vehicle', 'Pedestrian', 'Sign', 'Cyclist'] for data in dataset: i_str = '{0:06}'.format(i) frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) file_path = os.path.join(Label_all, "%s.txt" % (i_str)) f_open = open(file_path, 'w+') id_to_bbox = dict() id_to_name = dict() for labels in frame.projected_lidar_labels: name = labels.name for label in labels.labels: bbox = [ label.box.center_x - label.box.length / 2, label.box.center_y - label.box.width / 2, label.box.center_x + label.box.length / 2, label.box.center_y + label.box.width / 2 ] id_to_bbox[label.id] = bbox id_to_name[label.id] = name - 1 Tr_velo_to_cam = [] waymo_cam_RT = np.array( [0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 1]).reshape(4, 4) for camera in frame.context.camera_calibrations: tmp = np.array(camera.extrinsic.transform).reshape(4, 4) tmp = np.linalg.inv(tmp) tmp = np.matmul(waymo_cam_RT, tmp) Tr_velo_to_cam.append(tmp) laser = open_dataset.LaserName.TOP laser_calib = [ obj for obj in frame.context.laser_calibrations if obj.name == laser ] laser_calib = laser_calib[0] RI, CP, RITP = frame_utils.parse_range_image_and_camera_projection( frame) pcl, pcl_attr = frame_utils.convert_range_image_to_point_cloud( frame, RI, CP, RITP) pcl = np.concatenate(pcl, axis=0) vehicle_to_labels = [ np.linalg.inv(get_box_transformation_matrix(label.box)) for label in frame.laser_labels ] vehicle_to_labels = np.stack(vehicle_to_labels) pcl1 = np.concatenate((pcl, np.ones_like(pcl[:, 0:1])), axis=1) proj_pcl = np.einsum('lij,bj->lbi', vehicle_to_labels, pcl1) mask = np.logical_and.reduce(np.logical_and(proj_pcl >= -1, proj_pcl <= 1), axis=2) counts = mask.sum(1) visibility = counts > 10 for obj, visib in zip(frame.laser_labels, visibility): # caculate bounding box bounding_box = None name = None id = obj.id for lidar in __lidar_list: if id + lidar in id_to_bbox: bounding_box = id_to_bbox.get(id + lidar) name = str(id_to_name.get(id + lidar)) break if bounding_box == None or name == None: continue my_type = __type_list[obj.type] if visib: occluded = 0 else: occluded = 1 truncated = 0.00 height = obj.box.height width = obj.box.width length = obj.box.length x = obj.box.center_x y = obj.box.center_y z = obj.box.center_z rotation_y = obj.box.heading z -= height / 2 transform_box_to_cam = Tr_velo_to_cam[int( name)] @ get_box_transformation_matrix(obj.box) pt1 = np.array([-0.5, 0.5, 0, 1.]) pt2 = np.array([0.5, 0.5, 0, 1.]) pt1 = np.matmul(transform_box_to_cam, pt1) pt2 = np.matmul(transform_box_to_cam, pt2) new_ry = math.atan2(pt2[2] - pt1[2], pt2[0] - pt1[0]) rotation_y = -new_ry new_loc = np.matmul(Tr_velo_to_cam[int(name)], np.array([x, y, z, 1]).T) x, y, z = new_loc[:3] beta = math.atan2(x, z) alpha = (rotation_y + beta - math.pi / 2) % (2 * math.pi) # save the labels line = my_type + ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format( round(truncated, 2), round(occluded, 2), round(alpha, 2), round(bounding_box[0], 2), round(bounding_box[1], 2), round(bounding_box[2], 2), round(bounding_box[3], 2), round(height, 2), round(width, 2), round(length, 2), round( x, 2), round(y, 2), round(z, 2), round(rotation_y, 2)) line_all = line[:-1] + ' ' + '\n' # # store the label name_of_file = ("%s.txt" % (i_str)) fp_label = open(Label + name + '/' + name_of_file, 'a') fp_label.write(line) fp_label.close() f_open.write(line_all) f_open.close() i = i + 1 return i
def main(data_folder, output_folder, multi_process_token=(0, 1)): tf_records = os.listdir(data_folder) tf_records = [x for x in tf_records if 'tfrecord' in x] tf_records = sorted(tf_records) pc_folder = os.path.join(output_folder, 'pc', 'raw_pc') for record_index, tf_record_name in enumerate(tf_records): if record_index % multi_process_token[1] != multi_process_token[0]: continue print('starting for raw pc', record_index + 1, ' / ', len(tf_records), ' ', tf_record_name) FILE_NAME = os.path.join(data_folder, tf_record_name) dataset = tf.data.TFRecordDataset(FILE_NAME, compression_type='') segment_name = tf_record_name.split('.')[0] frame_num = 0 pcs = dict() gt_info = dict() for data in dataset: frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) context = frame.context.name ts = frame.timestamp_micros # name = str(context) + '/' + str(ts) # extract the points (range_images, camera_projections, range_image_top_pose) = \ frame_utils.parse_range_image_and_camera_projection(frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=0) points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1) points_all = np.concatenate(points, axis=0) points_all_ri2 = np.concatenate(points_ri2, axis=0) laser_labels = frame.laser_labels frame_ids = list() frame_boxes = list() frame_types = list() frame_nums = list() for laser_label in laser_labels: id = laser_label.id box = laser_label.box box_dict = pb2dict(box) box_array = bbox_dict2array(box_dict) frame_boxes.append(box_array[np.newaxis, :]) frame_ids.append(id) frame_types.append(laser_label.type) frame_nums.append(laser_label.num_lidar_points_in_box) gt_info = { 'boxes': frame_boxes, 'ids': frame_ids, 'types': frame_types, 'pts_nums': frame_nums } frame_num += 1 if frame_num % 10 == 0: print('Record {} / {} FNumber {:}'.format( record_index + 1, len(tf_records), frame_num)) pc_folder = os.path.join(output_folder, 'pc', segment_name) gt_folder = os.path.join(output_folder, 'gt', segment_name) if not os.path.exists(pc_folder): os.makedirs(pc_folder) if not os.path.exists(gt_folder): os.makedirs(gt_folder) pc_path1 = os.path.join(pc_folder, str(ts) + "_1") pc_path2 = os.path.join(pc_folder, str(ts) + "_2") gt_path = os.path.join(gt_folder, str(ts)) np.savez_compressed(pc_path1, pc=points_all) np.savez_compressed(pc_path2, pc=points_all_ri2) np.savez_compressed(gt_path, **gt_info) print('{:} frames in total'.format(frame_num))
# test_object_detection(image.image) show_camera_image(image, frame.camera_labels, [3, 3, index + 1]) # 3. Print range images plt.figure(figsize=(64, 20)) frame.lasers.sort(key=lambda laser: laser.name) show_range_image(get_range_image(open_dataset.LaserName.TOP, 0), 1) show_range_image(get_range_image(open_dataset.LaserName.TOP, 1), 4) # 4. Convert images to point cloud and view their content # points = Num lasers * [ [x, y, z In Vehicle frame] * num_points_with_range > 0] # cp_points = Num lasers * [ [name Of camera 1, x in cam 1, y in cam 1], ...same for cam 2] # The indices and shapes are them same => we can take points[P] and say that its coordinates in vehicle frame # corresponds to cp_points[P] -> giving us the camera name of the projections plus the x,y in the respective camera. # My question is why there are two camera projections but probably this is another story... points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) # Same as above but for return index 1 points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1) # 3d points in vehicle frame. points_all_ri0 = np.concatenate(points, axis=0) points_all_ri2 = np.concatenate(points_ri2, axis=0) # camera projection corresponding to each point. cp_points_all_ri0 = np.concatenate(cp_points, axis=0) cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0) returnsIndicesToUse = [0, 1] points_byreturn = [points_all_ri0, points_all_ri2] cp_points_byreturn = [cp_points_all_ri0, cp_points_all_ri2]
def getPointCloudPointsAndCameraProjections(frame, thisFramePose, worldToReferencePointTransform, useEnvPoints=True): # Get the range images, camera projects from the frame data (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frame) if IS_VISUAL_DEBUG_ENABLED: # 2. Print some cameras images in the data plt.figure(figsize=(25, 20)) for index, image in enumerate(frame.images): # test_object_detection(image.image) show_camera_image(frame, image, frame.camera_labels, [3, 3, index + 1]) # 3. Print range images plt.figure(figsize=(64, 20)) frame.lasers.sort(key=lambda laser: laser.name) show_range_image( get_range_image(range_images, open_dataset.LaserName.TOP, 0), 1) show_range_image( get_range_image(range_images, open_dataset.LaserName.TOP, 1), 4) # Step 2: Convert images to point cloud and view their content # points = Num lasers * [ [x, y, z In Vehicle frame] * num_points_with_range > 0] # cp_points = Num lasers * [ [name Of camera 1, x in cam 1, y in cam 1], ...same for cam 2] # The indices and shapes are them same => we can take points[P] and say that its coordinates in vehicle frame # corresponds to cp_points[P] -> giving us the camera name of the projections plus the x,y in the respective camera. # My question is why there are two camera projections but probably this is another story... points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) # Same as above but for return index 1 points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1) # 3d points in vehicle frame. points_all_ri0 = np.concatenate(points, axis=0) points_all_ri2 = np.concatenate(points_ri2, axis=0) # camera projection corresponding to each point. cp_points_all_ri0 = np.concatenate(cp_points, axis=0) cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0) # These are the 3d points and camera projection data for each points_byreturn = [points_all_ri0, points_all_ri2] cp_points_byreturn = [cp_points_all_ri0, cp_points_all_ri2] # Create a mask of all point cloud points that are inside a box containing either a pedestrian or car and delete it # Acts as an inverse if useEnvPoints is False, such that you can get only the points inside those boxes ! if IGNORE_POINTS_IN_CAR_OR_PEDESTRIAN_BBOXES: lidarLabels = frame.laser_labels allBoxes = np.zeros((len(lidarLabels), 7)) for entityIndex, label in enumerate(lidarLabels): box = label.box allBoxes[entityIndex][:] = [ box.center_x, box.center_y, box.center_z, box.length * BBOX_EXTENSION_F, box.width * BBOX_EXTENSION_F, box.height * BBOX_EXTENSION_F, box.heading ] for returnIndex in returnsIndicesToUse: points = points_byreturn[returnIndex] if useEnvPoints == True: mask_inBoxOfPedestrianOrCar = np.logical_not( box_utils.is_within_any_box_3d(points, allBoxes)) else: mask_inBoxOfPedestrianOrCar = box_utils.is_within_any_box_3d( points, allBoxes) points_byreturn[returnIndex] = points_byreturn[returnIndex][ mask_inBoxOfPedestrianOrCar] cp_points_byreturn[returnIndex] = cp_points_byreturn[returnIndex][ mask_inBoxOfPedestrianOrCar] # Last step: convert all points from vehicle frame space to world space - relative to the first vehicle frame pose # Get the vehicle transform to world in this frame if DO_OUTPUT_IN_WORLD_SPACE is True: vehicleToWorldTransform = thisFramePose vehicleToWorldRotation = vehicleToWorldTransform[0:3, 0:3] vehicleToWorldTranslation = vehicleToWorldTransform[0:3, 3] # Then world transform back to reference point worldToReferenceRotation = worldToReferencePointTransform[0:3, 0:3] worldToReferenceTranslation = worldToReferencePointTransform[0:3, 3] for returnIndex, allPoints in enumerate(points_byreturn): # Transform all points to world coordinates first allPoints = tf.einsum('jk,ik->ij', vehicleToWorldRotation, allPoints) + tf.expand_dims( vehicleToWorldTranslation, axis=-2) # Then from world to world reference allPoints = tf.einsum('jk,ik->ij', worldToReferenceRotation, allPoints) + tf.expand_dims( worldToReferenceTranslation, axis=-2) # Copy back points_byreturn[returnIndex] = allPoints.numpy() return points_byreturn, cp_points_byreturn
def decode_tf(FILENAME): day_file = open('./day_segment_val.txt', 'a') basename = os.path.basename(FILENAME)[:-9] if not os.path.exists('./waymo_decode_val/' + basename): os.makedirs('./waymo_decode_val/' + basename + '/image/') os.makedirs('./waymo_decode_val/' + basename + '/depth/') os.makedirs('./waymo_decode_val/' + basename + '/pose/') dataset = tf.data.TFRecordDataset(FILENAME, compression_type='') count = 0 #print(len(dataset)) frame = open_dataset.Frame() for data in dataset: frame.ParseFromString(bytearray(data.numpy())) (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frame) # plt.figure(figsize=(25, 20)) gpspose_file = open( './waymo_decode_val/' + basename + '/pose/' + str(count).zfill(5) + '_gpspose.txt', 'w') # gpspose_file.write('%s %s %s %s %s pose = np.array([frame.pose.transform]) # print (pose.shape) gpspose_file.write('%f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f'%(pose[0,0],pose[0,1],pose[0,2],pose[0,3],pose[0,4]\ ,pose[0,5],pose[0,6],pose[0,7],pose[0,8],pose[0,9],pose[0,10],pose[0,11],pose[0,12],pose[0,13],pose[0,14],pose[0,15])) gpspose_file.close() if count == 0 and frame.context.stats.time_of_day == 'Day': day_file.write('%s\n' % (basename)) day_file.close() for index, image in enumerate(frame.images): if index == 0: imageio.imwrite( './waymo_decode_val/' + basename + '/image/' + str(count).zfill(5) + '.jpg', tf.image.decode_jpeg(image.image)) break frame.lasers.sort(key=lambda laser: laser.name) #show_range_image(get_range_image(open_dataset.LaserName.TOP, 0), 1) #show_range_image(get_range_image(open_dataset.LaserName.TOP, 1), 4) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose, ri_index=1) # 3d points in vehicle frame. points_all = np.concatenate(points, axis=0) points_all_ri2 = np.concatenate(points_ri2, axis=0) # camera projection corresponding to each point. cp_points_all = np.concatenate(cp_points, axis=0) cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0) images = sorted(frame.images, key=lambda i: i.name) cp_points_all_concat = np.concatenate([cp_points_all, points_all], axis=-1) cp_points_all_concat_tensor = tf.constant(cp_points_all_concat) # The distance between lidar points and vehicle frame origin. points_all_tensor = tf.norm(points_all, axis=-1, keepdims=True) cp_points_all_tensor = tf.constant(cp_points_all, dtype=tf.int32) mask = tf.equal(cp_points_all_tensor[..., 0], images[0].name) cp_points_all_tensor = tf.cast(tf.gather_nd(cp_points_all_tensor, tf.where(mask)), dtype=tf.float32) points_all_tensor = tf.gather_nd(points_all_tensor, tf.where(mask)) projected_points_all_from_raw_data = tf.concat( [cp_points_all_tensor[..., 1:3], points_all_tensor], axis=-1).numpy() saved_depth = save_depth_image(projected_points_all_from_raw_data, tf.image.decode_jpeg(image.image)) imageio.imwrite( './waymo_decode_val/' + basename + '/depth/' + str(count).zfill(5) + '.png', saved_depth) #print(projected_points_all_from_raw_data.shape) count += 1
def get_pc(self, i): points, _ = frame_utils.convert_range_image_to_point_cloud( self.framelist[i], self.range_imagelist[i], self.camera_imagelist[i], self.range_image_toplist[i]) points_all = np.concatenate(points, axis=0) return points_all
def prep_data(frame, num_points=40000): mesh_vertices, instance_labels, semantic_labels, instance_bboxes = None, None, None, None # Transform frame to point cloud (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection(frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) # 3d points in vehicle frame. points_all = np.concatenate(points, axis=0) # sub sample points if num_points < 0: mesh_vertices = points_all else: sub_samp_points = points_all[ np.random.choice(np.arange(points_all.shape[0]), num_points), :] mesh_vertices = sub_samp_points num_detected_objects = len(frame.laser_labels) instance_labels = np.zeros((len(mesh_vertices), )) semantic_labels = np.zeros((len(mesh_vertices), )) instance_bboxes = np.zeros((num_detected_objects, 8)) instance_id_map = {} # loop through each ground truth object for instance_id, detected_object in enumerate(frame.laser_labels): if detected_object.type not in instance_id_map: instance_id_map[detected_object.type] = (instance_id, detected_object) box = detected_object.box TYPE_2_SIZE_DICT[detected_object.type].append( np.array([ box.width, box.length, box.height, ])) points_in_bbox = is_within_box_3d( point=tf.convert_to_tensor(mesh_vertices, dtype=tf.float32), box=tf.convert_to_tensor(np.array([ box.center_x, box.center_y, box.center_z, box.width, box.length, box.height, box.heading, ]).reshape((1, 7)), dtype=tf.float32)).numpy().ravel() instance_labels[ points_in_bbox] = instance_id + 1 # The reason we do this is so that we don't treat not being part of an isntance as being part of instance zero semantic_labels[points_in_bbox] = detected_object.type instance_bboxes[instance_id, :] = np.array([ box.center_x, box.center_y, box.center_z, box.width, box.length, box.height, box.heading, detected_object.type - 1, # Map to 0 - num_classes -1 vs. 1 - num_classes ]).reshape((1, 8)) # Do a check if we have all points if num_points < 0: print(np.sum(points_in_bbox > 0), detected_object.num_lidar_points_in_box) # Return full scene results yield ('FULL_SCENE', (mesh_vertices, instance_labels, semantic_labels, instance_bboxes)) # Loop through and generate data for each frustum ... TODO do we need to do any padding ! for instance_id, detected_object in instance_id_map.values(): box = detected_object.box center_vec = np.array([ box.center_x, box.center_y, box.center_z, ]) normalized_cone_dic_vec = center_vec / np.linalg.norm(center_vec) # Go thorugh all the frusutum bull shit box = detected_object.box near_dist = np.linalg.norm( np.array([ box.center_x, box.center_y, box.center_z, ])) fov_angle = 2 * np.arctan(box.height / (2 * near_dist)) aspect_ratio = box.width / box.height base_radius = max( box.height, box.width ) * 1.5 # The base radius of the cone is max of length / width of the far frustum with a 10% fudge factor # this is very inefficent ... lets vectorize this code mesh_vertices_mask = np.zeros( (mesh_vertices.shape[0])).astype(np.int32) for i in range(mesh_vertices.shape[0]): p = mesh_vertices[i, :] # TODO need to do this for all objects in the scene cone_dist = np.dot(p, normalized_cone_dic_vec) cone_radius = (cone_dist / near_dist) * base_radius orth_distance = np.linalg.norm(p - cone_dist * normalized_cone_dic_vec) mesh_vertices_mask[i] = orth_distance < cone_radius yield ('FRUSTUM', (mesh_vertices[mesh_vertices_mask == 1, :], instance_labels[mesh_vertices_mask == 1], semantic_labels[mesh_vertices_mask == 1], instance_bboxes[instance_id, :]))
def waymo_to_pytorch_offline(data_root='', idx_dataset_batch=-1): ''' Converts tfrecords from waymo open data set to (1) Images -> torch Tensor (2) Lidar Range Image -> torch Tensor (3) Labels -> dictionary of dictionaries dictionaries with following keys: 'type': 1=TYPE_VEHICLE; 2=TYPE_PEDESTRIAN; 4=TYPE_CYCLIST 'x': upper left corner x !!labeling not as in original!! 'y': upper left corner y !!labeling not as in original!! 'width': width of corresponding bounding box !!labeling not as in original!! 'height': height of corresponding bbounding box !!labeling not as in original!! (4) heat_maps from labels -> torch Tensor; image like colab: there are a lot of unnessecary subdirs because of googlefilestream limitations data is not stored in batches because the data comes in sequences of 20s. ''' # allows __iter__() for tf record dataset tf.compat.v1.enable_eager_execution() # get config if not data_root: config = get_config() data_root = config.dir.data.root # read all entries in data root directory tf_dirs = [tfd for tfd in os.listdir(data_root) if tfd.startswith('tf_')] for idx_tf_dir, tf_dir in enumerate(tf_dirs): # skip all non tfrecord files tf_data_path = os.path.join(data_root, tf_dir) for file in os.listdir(tf_data_path): if not file.endswith('.tfrecord'): continue # dir names save_path_labels = os.path.join(tf_data_path, 'labels') save_path_images = os.path.join(tf_data_path, 'images') save_path_lidar = os.path.join(tf_data_path, 'lidar') save_path_heat_maps = os.path.join(tf_data_path, 'heat_maps') # create save dirs if not exist Path(save_path_labels).mkdir(exist_ok=True) Path(save_path_images).mkdir(exist_ok=True) Path(save_path_lidar).mkdir(exist_ok=True) Path(save_path_heat_maps).mkdir(exist_ok=True) dataset = tf.data.TFRecordDataset( os.path.join(data_root, tf_dir, file), compression_type='') # read tfrecord # for all datasets stored in tfrecord for idx_data, data in enumerate(dataset): frame = open_dataset.Frame() frame.ParseFromString(bytearray( data.numpy())) # pass data from tfrecord to frame # for all images of current frame for idx_img, image in enumerate( frame.images ): # can probably reduce this + next if to: image = frame.images[0] # Only consider FRONT images if image.name != 1: # if not CameraName == FRONT: skip; best lidar, rgb match continue ### retrieve, convert and save rgb data np_img = np.moveaxis( tf.image.decode_jpeg(image.image).numpy(), -1, 0 ) # frame -> np array with tensor like dims: channels,y,x downsized_img_tensor = avgpool_tensor(torch.Tensor(np_img)) img_filename = 'img_%d_%d_%d_%d' % ( idx_dataset_batch, idx_tf_dir, idx_data, idx_img) torch.save(downsized_img_tensor, os.path.join(save_path_images, img_filename)) ### retrieve, convert and save lidar data (range_images, camera_projections, range_image_top_pose ) = frame_utils.parse_range_image_and_camera_projection( frame) points, cp_points = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) lidar_array = extract_lidar_array_from_point_cloud( points, cp_points ) # lidar corresponds to image due to the mask in this function range_tensor = lidar_array_to_image_like_tensor( lidar_array) downsized_range_tensor = pool_lidar_tensor(range_tensor) lidar_filename = 'lidar_' + img_filename torch.save(downsized_range_tensor, os.path.join(save_path_lidar, lidar_filename)) ### retrieve, convert and save labels label_dict = {} # dict of dicts labels_filename = 'labels_' + img_filename for camera_labels in frame.camera_labels: # ignore labels corresponding to other images if camera_labels.name != image.name: continue # for all labels for idx_label, label in enumerate( camera_labels.labels): label_dict[str(idx_label)] = { 'type': label.type, # weird waymo labeling 'x': int(label.box.center_x - 0.5 * label.box.length), 'y': int(label.box.center_y - 0.5 * label.box.width), 'height': int(label.box.width), 'width': int(label.box.length) } save_dict(label_dict, os.path.join(save_path_labels, labels_filename)) ### create ground truth maps from labels and save heat_map_filename = 'heat_map_' + img_filename heat_map = create_ground_truth_maps(label_dict) downsized_heat_map = maxpool_tensor(heat_map) torch.save( downsized_heat_map, os.path.join(save_path_heat_maps, heat_map_filename)) want_small_dataset_for_testing = False if idx_data == 9 and want_small_dataset_for_testing: return 1 print(idx_data + 1, ' IMAGES PROCESSED')
async def transmit_frame(websocket, segment_id, frame_index): record_path = os.path.join(global_settings['segments_dir'], get_segment_filename(segment_id)) dataset = tf.data.TFRecordDataset([record_path]) dataset = dataset.skip(frame_index).take(1) frame = None for data in dataset: frame = open_dataset.Frame() frame.ParseFromString(bytearray(data.numpy())) (range_images, camera_projections, range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection(frame) intensities = get_intensities(range_images) intensities_all = np.concatenate(intensities, axis=0) points, _ = frame_utils.convert_range_image_to_point_cloud( frame, range_images, camera_projections, range_image_top_pose) num_points_per_laser = [len(laser_points) for laser_points in points] points_all = np.concatenate(points, axis=0) if global_settings['label_points']: boxes = np.array([[ l.box.center_x, l.box.center_y, l.box.center_z, l.box.length, l.box.width, l.box.height, l.box.heading, l.type, ] for l in frame.laser_labels]) points = np.array(points_all) point_labels = is_within_box_3d( tf.convert_to_tensor(points, dtype=tf.double), tf.convert_to_tensor(boxes, dtype=tf.double) ).numpy() output = [0, float(int(segment_id)), float(frame_index)] current_laser = 0 last_laser_index = 0 for index, point in enumerate(points_all): if index - last_laser_index > num_points_per_laser[current_laser] - 1: current_laser += 1 last_laser_index = index label = point_labels[index] if global_settings['label_points'] else 0 x, y, z = point intensity = intensities_all[index] output.extend((x, y, z, intensity, current_laser, label)) bytes = np.array(output, dtype=np.float32).tobytes() print(f"Sending segment {segment_id} frame {frame_index} points") await websocket.send(bytes)