def get_pose_intrinsic( save_path='/public/MARS/datasets/nuScenes-SF/meta/cam_pose_intrinsic.json' ): split = 'train' data_path = 'data/nuscenes/' nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True) samples = nusc.sample cam_token2cam_intext = {} for sample in tqdm(samples): for cam_name in CamNames: cam_token = sample['data'][cam_name] cam_data = nusc.get('sample_data', cam_token) ego_pose = nusc.get('ego_pose', cam_data['ego_pose_token']) cam_cs = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token']) # used to transform from ego to global pose_matrix = quat_trans2matrix(ego_pose['rotation'], ego_pose['translation']) # used to transform from cameral to ego cam_pose = quat_trans2matrix(cam_cs['rotation'], cam_cs['translation']) cam_pose_world = np.matmul(pose_matrix, cam_pose) ret = {'pose': cam_pose_world.tolist()} ret['intrinsic'] = cam_cs['camera_intrinsic'] cam_token2cam_intext[cam_token] = ret with open(save_path, 'w') as f: json.dump(cam_token2cam_intext, f)
def load_keyframe_rad_tokens(nusc: NuScenes) -> (List[str]): """ This method takes a Nuscenes instance and returns two lists with the sample_tokens of all RADAR_FRONT sample_data which have (almost) the same timestamp as their corresponding sample (is_key_frame = True). :param nusc: Nuscenes instance :return: rad_sd_tokens List of radar sample data tokens """ rad_sd_tokens = [] for scene_rec in nusc.scene: print('Loading samples of scene %s....' % scene_rec['name'], end = '') start_sample_rec = nusc.get('sample', scene_rec['first_sample_token']) rad_front_sd_rec = nusc.get('sample_data', start_sample_rec['data']['RADAR_FRONT']) cur_rad_front_sd_rec = rad_front_sd_rec rad_sd_tokens.append(cur_rad_front_sd_rec['token']) #Append all keyframe radar sample tokens in list while cur_rad_front_sd_rec['next'] != '': cur_rad_front_sd_rec = nusc.get('sample_data', cur_rad_front_sd_rec['next']) if cur_rad_front_sd_rec['is_key_frame']: rad_sd_tokens.append(cur_rad_front_sd_rec['token']) print("done!") return rad_sd_tokens
def get_egoposes_on_drivable_ratio(nusc: NuScenes, nusc_map: NuScenesMap, scene_token: str) -> float: """ Get the ratio of ego poses on the drivable area. :param nusc: A NuScenes instance. :param nusc_map: The NuScenesMap instance of a particular map location. :param scene_token: The token of the current scene. :return: The ratio of poses that fall on the driveable area. """ # Go through each sample in the scene. sample_tokens = nusc.field2token('sample', 'scene_token', scene_token) poses_all = 0 poses_valid = 0 for sample_token in sample_tokens: # Poses are associated with the sample_data. Here we use the lidar sample_data. sample_record = nusc.get('sample', sample_token) sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP']) pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token']) # Check if the ego pose is on the driveable area. ego_pose = pose_record['translation'][:2] record = nusc_map.record_on_point(ego_pose[0], ego_pose[1], 'drivable_area') if len(record) > 0: poses_valid += 1 poses_all += 1 ratio_valid = poses_valid / poses_all return ratio_valid
def load_keyframe_rad_cam_data( nusc: NuScenes) -> (List[str], List[str], List[str]): """ This method takes a Nuscenes instance and returns two lists with the sample_tokens of all CAM_FRONT and RADAR_FRONT sample_data which have (almost) the same timestamp as their corresponding sample (is_key_frame = True). In addition, it returns the sample_names which are set equal to the filename of each CAM_FRONT sample_data. :param nusc: Nuscenes instance :return: (cam_sd_tokens, rad_sd_tokens, sample_names). Tuple with lists of camera and radar tokens as well as sample_names """ #Lists to hold tokens of all cam and rad sample_data that have is_key_frame = True #These have (almost) the same timestamp as their corresponding sample and #correspond to the files in the ..sets/nuscenes/samples/ folder cam_sd_tokens = [] rad_sd_tokens = [] sample_names = [] for scene_rec in nusc.scene: #scene_name = scene_rec["name"] + "_sample_" print('Loading samples of scene %s....' % scene_rec['name'], end='') start_sample_rec = nusc.get('sample', scene_rec['first_sample_token']) #sample_name = scene_name + str(start_sample_rec["timestamp"]) cam_front_sd_rec = nusc.get('sample_data', start_sample_rec['data']['CAM_FRONT']) rad_front_sd_rec = nusc.get('sample_data', start_sample_rec['data']['RADAR_FRONT']) cur_cam_front_sd_rec = cam_front_sd_rec cur_rad_front_sd_rec = rad_front_sd_rec sample_name = cur_cam_front_sd_rec["filename"].replace( 'samples/CAM_FRONT/', '').replace('.jpg', '') #Append the first sample_name, cam and rad sample_data tokens in lists sample_names.append(sample_name) cam_sd_tokens.append(cur_cam_front_sd_rec['token']) rad_sd_tokens.append(cur_rad_front_sd_rec['token']) #Append all keyframe sample_names and camera sample tokens in list while cur_cam_front_sd_rec['next'] != '': cur_cam_front_sd_rec = nusc.get('sample_data', cur_cam_front_sd_rec['next']) sample_name = cur_cam_front_sd_rec["filename"].replace( 'samples/CAM_FRONT/', '').replace('.jpg', '') if cur_cam_front_sd_rec['is_key_frame']: sample_names.append(sample_name) cam_sd_tokens.append(cur_cam_front_sd_rec['token']) #Append all keyframe radar sample tokens in list while cur_rad_front_sd_rec['next'] != '': cur_rad_front_sd_rec = nusc.get('sample_data', cur_rad_front_sd_rec['next']) if cur_rad_front_sd_rec['is_key_frame']: rad_sd_tokens.append(cur_rad_front_sd_rec['token']) print("done!") assert (len(cam_sd_tokens) == len(rad_sd_tokens) == len(sample_names)) return (cam_sd_tokens, rad_sd_tokens, sample_names)
def main(): root_path = '/extssd/jiaxin/nuscenes/test' nusc = NuScenes(version='v1.0-test', dataroot=root_path, verbose=True) sensor = 'CAM_FRONT' counter = 0 for i, scene in enumerate(nusc.scene): scene_token = scene['token'] scene = nusc.get('scene', scene_token) first_sample = nusc.get('sample', scene['first_sample_token']) camera = nusc.get('sample_data', first_sample['data'][sensor]) img = np.array( Image.open(os.path.join(nusc.dataroot, camera['filename'])).convert('L')) H, W = img.shape[0], img.shape[1] img_mean = np.mean(img.astype(np.float32)) white_mask = img > 150 white_area = np.sum(white_mask.astype(np.float32)) if img_mean < 110 and white_area < (H * W) * 0.1: print('\'%s\',' % (scene_token)) counter += 1 plt.figure() plt.gray() plt.imshow(img) plt.show() print('%d night scenes' % counter)
def get_rad_to_cam(nusc: NuScenes, cam_sd_token: str, rad_sd_token: str): """ Method to get the extrinsic calibration matrix from radar_front to camera_front for a specifi sample. Every sample_data has a record on which calibrated - sensor the data is collected from ("calibrated_sensor_token" key) The calibrated_sensor record consists of the definition of a particular sensor (lidar/radar/camera) as calibrated on a particular vehicle :param nusc: Nuscenes instance :param cam_sd_token : A token of a specific camera_front sample_data :param rad_sd_token : A token of a specific radar_front sample_data :param nuscenes_way : Temporal debug param until transformation order is clear :return: rad_to_cam <np.float: 4, 4> Returns homogeneous transform matrix from radar to camera """ cam_cs_token = nusc.get('sample_data', cam_sd_token)["calibrated_sensor_token"] cam_cs_rec = nusc.get('calibrated_sensor', cam_cs_token) rad_cs_token = nusc.get('sample_data', rad_sd_token)["calibrated_sensor_token"] rad_cs_rec = nusc.get('calibrated_sensor', rad_cs_token) #Based on how transforms are handled in nuScenes scripts like scripts/export_kitti.py rad_to_ego = transform_matrix(rad_cs_rec['translation'], Quaternion(rad_cs_rec['rotation']), inverse=False) ego_to_cam = transform_matrix(cam_cs_rec['translation'], Quaternion(cam_cs_rec['rotation']), inverse=True) rad_to_cam = np.dot(ego_to_cam, rad_to_ego) return rad_to_cam
def create_tf_record_train_as_val(fn_out, split, vis_results): label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map) writer = tf.python_io.TFRecordWriter(fn_out) params = read_params(FLAGS.param) logging.debug('Params: ' + str(params)) nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse split_logs = create_splits_logs(split, nusc) sample_tokens = split_to_samples(nusc, split_logs) random.shuffle(sample_tokens) print('Number of samples:', len(sample_tokens)) for sample_token in sample_tokens[1:100]: sample = nusc.get('sample', sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) if not lidar_top_data['prev']: continue lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev']) labels_corners, labels_center, labels_data = compute_labels_image(nusc, sample, sensor, nu_to_kitti_lidar, params) filename = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] filename_prev = os.path.splitext(os.path.splitext(lidar_top_data_prev['filename'])[0])[0] tf_example = dict_to_tf_example(labels_corners, labels_center, labels_data, params, label_map_dict, FLAGS.data, FLAGS.data_beliefs, filename, filename_prev) writer.write(tf_example.SerializeToString()) if (vis_results): visualize_results(FLAGS.data, filename, labels_corners, os.path.join(FLAGS.output, 'Debug'))
def get_boxes2d(sample_data_token: str, image_annotations_token2ind = {}, image_annotations = []): nusc = NuScenes(version='v1.0-mini', dataroot='dataset/nuScenes/v1.0-mini', verbose=True) # Retrieve sensor & pose records sd_record = nusc.get('sample_data', sample_data_token) curr_sample_record = nusc.get('sample', sd_record['sample_token']) #curr_sample_record['image_anns'] boxes2d = [] if curr_sample_record['prev'] == "" or sd_record['is_key_frame']: # If no previous annotations available, or if sample_data is keyframe just return the current ones. for i,x in enumerate(curr_sample_record['anns']): record = nusc.get('sample_annotation', x) instance_token = record['instance_token'] record2d = image_annotations[image_annotations_token2ind[instance_token]] box2d = Box2d(record2d['bbox_corners'], name=record2d['category_name'], token=record2d['sample_annotation_token'], visibility=record2d['visibility_token'],filename=record2d['filename']) boxes2d.append(box2d) else: prev_sample_record = nusc.get('sample', curr_sample_record['prev']) curr_ann_recs = [nusc.get('sample_annotation', token) for token in curr_sample_record['anns']] prev_ann_recs = [nusc.get('sample_annotation', token) for token in prev_sample_record['anns']] # Maps instance tokens to prev_ann records prev_inst_map = {entry['instance_token']: entry for entry in prev_ann_recs} t0 = prev_sample_record['timestamp'] t1 = curr_sample_record['timestamp'] t = sd_record['timestamp'] # There are rare situations where the timestamps in the DB are off so ensure that t0 < t < t1. t = max(t0, min(t1, t)) boxes = [] for curr_ann_rec in curr_ann_recs: if curr_ann_rec['instance_token'] in prev_inst_map: # If the annotated instance existed in the previous frame, interpolate center & orientation. prev_ann_rec = prev_inst_map[curr_ann_rec['instance_token']] # Interpolate center. center = [np.interp(t, [t0, t1], [c0, c1]) for c0, c1 in zip(prev_ann_rec['translation'], curr_ann_rec['translation'])] # Interpolate orientation. rotation = Quaternion.slerp(q0=Quaternion(prev_ann_rec['rotation']), q1=Quaternion(curr_ann_rec['rotation']), amount=(t - t0) / (t1 - t0)) box = Box(center, curr_ann_rec['size'], rotation, name=curr_ann_rec['category_name'], token=curr_ann_rec['token']) else: # If not, simply grab the current annotation. box = self.get_box(curr_ann_rec['token']) boxes.append(box) return boxes2d
def test_egoposes_on_map(self): """ Test that all ego poses land on """ nusc = NuScenes(version=self.version, dataroot=os.environ['NUSCENES'], verbose=False) whitelist = [ 'scene-0499', 'scene-0501', 'scene-0502', 'scene-0515', 'scene-0517' ] invalid_scenes = [] for scene in tqdm.tqdm(nusc.scene, leave=False): if scene['name'] in whitelist: continue log = nusc.get('log', scene['log_token']) map_name = log['location'] nusc_map = self.nusc_maps[map_name] ratio_valid = get_egoposes_on_drivable_ratio( nusc, nusc_map, scene['token']) if ratio_valid != 1.0: print( 'Error: Scene %s has a ratio of %f ego poses on the driveable area!' % (scene['name'], ratio_valid)) invalid_scenes.append(scene['name']) self.assertEqual(len(invalid_scenes), 0)
def merge_depth_sf(depth_meta_path, sf_meta_path, save_path): with open(depth_meta_path, 'r') as f: depth_meta = json.load(f) with open(sf_meta_path, 'r') as f: sf_meta = json.load(f) split = 'train' data_path = 'data/nuscenes/' nusc = NuScenes( version=SPLITS[split], dataroot=data_path, verbose=True) imgpath2paths = {} for depth_info in depth_meta: sample_token = depth_info['sample_token'] depth_path = depth_info['depth_path'] img_path = depth_info['img_path'] cam_name = img_path.split('/')[-2] cam_token = nusc.get('sample', sample_token)['data'][cam_name] sf_path = sf_meta[cam_token]['points_path'] img_path = sf_meta[cam_token]['img_path'] # use this version of img path tmp = {'token': cam_token, 'depth_path': depth_path, 'cam_name': cam_name, 'sf_path': sf_path, 'img_path': img_path} imgpath2paths[img_path] = tmp with open(save_path, 'w') as f: json.dump(imgpath2paths, f)
def convert_second_results(second_result_dir, dataset_dir, output_dir): with open(second_result_dir, 'rb') as f: data = json.load(f) prediction_template = { 'sample_token': '', 'translation': [], 'size': [], 'rotation': [], 'name': '', 'score': 0 } nusc = NuScenes(version='v1.0-trainval', dataroot=dataset_dir) predictions = list() ground_truth = list() print('Converting reuslts and ground truth ...') for token, value in data['results'].items(): for item in value: pred = copy(prediction_template) for k, v in key_mapping.items(): pred[v] = item[k] predictions.append(pred) anns = nusc.get('sample', token)['anns'] for ann_token in anns: gt = copy(prediction_template) ann_meta = nusc.get('sample_annotation', ann_token) for k, v in nusc_key_mappings.items(): gt[v] = ann_meta[k] ground_truth.append(gt) with open(Path(output_dir) / 'predictions.json', 'w') as f: print('Saving Predictions ...') json.dump(predictions, f) with open(Path(output_dir) / 'ground_truth.json', 'w') as f: print('Saving Ground Truth ...') json.dump(ground_truth, f) print('Done Converting.')
def nuscenes_extract_detection(detection_file, unpack_dir): print("Generating detection as txt files from {}".format(detection_file)) nusc = NuScenes(dataroot=GlobalConfig.nuscenes_root_trainval, version='v1.0-trainval', verbose=True) with open(detection_file, 'r') as f: detections = json.load(f) print(detections['meta']) processed_samples = set() for sample_token in tqdm(detections['results'].keys()): if sample_token in processed_samples: continue sample = nusc.get('sample', sample_token) scene = nusc.get('scene', sample['scene_token']) scene_detections_file = open( os.path.join(unpack_dir, '{}.txt'.format(sample['scene_token'])), 'w') # main loop for processing a scene scene_sample_token = scene['first_sample_token'] scene_timestamp = 0 while scene_sample_token != '': # get detections of this sample sample_dets = detections['results'][ scene_sample_token] # list(sample_results) for det in sample_dets: if det['detection_name'] in nuscenes_tracking_names: scene_detections_file.write( format_detection(scene_timestamp, det)) # save this sample token into processes_samples processed_samples.add(scene_sample_token) # move on scene_sample = nusc.get('sample', scene_sample_token) scene_timestamp += 1 scene_sample_token = scene_sample['next'] # finish for this scene scene_detections_file.close()
def main(): nusc = NuScenes(version='v1.0-mini', dataroot='../../data/datasets/nuscenes', verbose=True) my_sample = nusc.sample[10] my_sample_token = my_sample['token'] sample_record = nusc.get('sample', my_sample_token) pointsensor_token = sample_record['data']['LIDAR_TOP'] camera_token = sample_record['data']['CAM_FRONT'] # Get point cloud pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = os.path.join(nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': pc_data = LidarPointCloud.from_file(pcl_path) else: raise NotImplementedError() pc = pc_data.points[0:3] vtk_window_size = (1280, 720) vtk_renderer = demo_utils.setup_vtk_renderer() vtk_render_window = demo_utils.setup_vtk_render_window( 'Point Cloud', vtk_window_size, vtk_renderer) vtk_interactor = vtk.vtkRenderWindowInteractor() vtk_interactor.SetRenderWindow(vtk_render_window) vtk_interactor.SetInteractorStyle( vtk_utils.ToggleActorsInteractorStyle(None, vtk_renderer)) vtk_interactor.Initialize() vtk_pc = VtkPointCloudGlyph() vtk_pc.set_points(pc.T) vtk_renderer.AddActor(vtk_pc.vtk_actor) vtk_render_window.Render() vtk_interactor.Start() # Blocking
def main(dataroot: str, version: str, output_prefix: str, output_format: str = 'kml') -> None: """ Extract the latlon coordinates for each available pose and write the results to a file. The file is organized by location and scene_name. :param dataroot: Path of the nuScenes dataset. :param version: NuScenes version. :param output_format: The output file format, kml or json. :param output_prefix: Where to save the output file (without the file extension). """ # Init nuScenes. nusc = NuScenes(dataroot=dataroot, version=version, verbose=False) coordinates_per_location = {} print(f'Extracting coordinates...') for scene in tqdm(nusc.scene): # Retrieve nuScenes poses. scene_name = scene['name'] scene_token = scene['token'] location = nusc.get('log', scene['log_token'])[ 'location'] # Needed to extract the reference coordinate. poses = get_poses( nusc, scene_token ) # For each pose, we will extract the corresponding coordinate. # Compute and store coordinates. coordinates = derive_latlon(location, poses) if location not in coordinates_per_location: coordinates_per_location[location] = {} coordinates_per_location[location][scene_name] = coordinates # Create output directory if necessary. dest_dir = os.path.dirname(output_prefix) if dest_dir != '' and not os.path.exists(dest_dir): os.makedirs(dest_dir) # Write to json. output_path = f'{output_prefix}_{version}.{output_format}' if output_format == 'json': with open(output_path, 'w') as fh: json.dump(coordinates_per_location, fh, sort_keys=True, indent=4) elif output_format == 'kml': # Write to kml. export_kml(coordinates_per_location, output_path) else: raise Exception('Error: Invalid output format: %s' % output_format) print(f"Saved the coordinates in {output_path}")
def process_data(data_path, version, val_split): nusc = NuScenes(version=version, dataroot=data_path, verbose=True) splits = create_splits_scenes() train_scenes, val_scenes = train_test_split(splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split) train_scene_names = splits['train' if 'mini' not in version else 'mini_train'] val_scene_names = splits['val' if 'mini' not in version else 'mini_val'] ns_scene_names = dict() ns_scene_names['train'] = train_scene_names ns_scene_names['val'] = val_scene_names scenes = [] for data_class in ['train', 'val']: for ns_scene_name in tqdm(ns_scene_names[data_class]): ns_scene = nusc.get('scene', nusc.field2token('scene', 'name', ns_scene_name)[0]) scene_id = int(ns_scene['name'].replace('scene-', '')) if scene_id in scene_blacklist: # Some scenes have bad localization continue scene = process_scene(ns_scene, nusc) if scene is not None: scenes.append(scene) print(f'Processed {len(scenes):.2f} scenes')
def get_poses(nusc: NuScenes, scene_token: str) -> List[dict]: """ Return all ego poses for the current scene. :param nusc: The NuScenes instance to load the ego poses from. :param scene_token: The token of the scene. :return: A list of the ego pose dicts. """ pose_list = [] scene_rec = nusc.get('scene', scene_token) sample_rec = nusc.get('sample', scene_rec['first_sample_token']) sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) ego_pose = nusc.get('ego_pose', sd_rec['token']) pose_list.append(ego_pose) while sd_rec['next'] != '': sd_rec = nusc.get('sample_data', sd_rec['next']) ego_pose = nusc.get('ego_pose', sd_rec['token']) pose_list.append(ego_pose) return pose_list
nusc = NuScenes(version='v1.0-mini', dataroot="/home/ubuntu/data/nuscenes", verbose=True) # Specify sensors to use radar_channel = 'RADAR_FRONT' camera_channel = 'CAM_FRONT' # Get all scene tokens in a list scene_tokens = [s['token'] for s in nusc.scene] # Choose a scene token (between 0 and 100) scene_token = scene_tokens[5] # Get the first sample of this scene to be demonstrated scene_rec = nusc.get('scene', scene_token) sample = nusc.get('sample', scene_rec['first_sample_token']) # Get the sample token and the records for sensor tokens sample_token = scene_rec['first_sample_token'] sample_record = nusc.get('sample', sample_token) # Grab the front camera and the radar sensor. radar_token = sample_record['data'][radar_channel] camera_token = sample_record['data'][camera_channel] # Get radar and image data radar_data = get_sensor_sample_data(nusc, sample, radar_channel) image_data = get_sensor_sample_data(nusc, sample, camera_channel) ## Define parameters for image_plus_creation
def create_dataset(dataroot, save_dir, width=672, height=672, grid_range=70., nusc_version='v1.0-mini', use_constant_feature=True, use_intensity_feature=True, end_id=None): os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True) nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True) ref_chan = 'LIDAR_TOP' if width == height: size = width else: raise Exception( 'Currently only supported if width and height are equal') grid_length = 2. * grid_range / size label_half_length = 10 data_id = 0 grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length) grid_centers \ = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1] for my_scene in nusc.scene: first_sample_token = my_scene['first_sample_token'] token = first_sample_token while (token != ''): print('--- {} '.format(data_id) + token + ' ---') my_sample = nusc.get('sample', token) sd_record = nusc.get('sample_data', my_sample['data'][ref_chan]) sample_rec = nusc.get('sample', sd_record['sample_token']) chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps=10) _, boxes, _ = nusc.get_sample_data(sd_record['token'], box_vis_level=0) out_feature = np.zeros((size, size, 7), dtype=np.float32) for box_idx, box in enumerate(boxes): label = 0 if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] == 'car': label = 1 elif (box.name.split('.')[1] in ['bus', 'construction', 'trailer', 'truck']): label = 2 elif box.name.split('.')[1] == 'emergency': if box.name.split('.')[2] == 'ambulance': label = 2 # police else: label = 1 elif box.name.split('.')[1] in ['bicycle', 'motorcycle']: label = 3 elif box.name.split('.')[0] == 'human': label = 4 elif (box.name.split('.')[0] in ['animal', 'movable_object', 'static_object']): label = 5 else: continue height_pt = np.linalg.norm(box.corners().T[0] - box.corners().T[3]) corners2d = box.corners()[:2, :] box2d = corners2d.T[[2, 3, 7, 6]] box2d_center = box2d.mean(axis=0) generate_out_feature(width, height, size, grid_centers, box2d, box2d_center, height_pt, label, label_half_length, out_feature) out_feature = out_feature.astype(np.float16) feature_generator = fg.Feature_generator(grid_range, width, height, use_constant_feature, use_intensity_feature) feature_generator.generate(pc.points.T) in_feature = feature_generator.feature if use_constant_feature and use_intensity_feature: in_feature = in_feature.reshape(size, size, 8) elif use_constant_feature or use_intensity_feature: in_feature = in_feature.reshape(size, size, 6) else: in_feature = in_feature.reshape(size, size, 4) # instance_pt is flipped due to flip out_feature = np.flip(np.flip(out_feature, axis=0), axis=1) out_feature[:, :, 1:3] *= -1 np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)), in_feature) np.save( os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)), out_feature) token = my_sample['next'] data_id += 1 if data_id == end_id: return
class NuScenesDataset(Dataset): def __init__(self, cfg, root, is_train=True, transforms=None): super(NuScenesDataset, self).__init__() self.root = root self.image_dir = os.path.join(root, "image_2") self.label_dir = os.path.join(root, "label_2") self.calib_dir = os.path.join(root, "calib") self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT self.is_train = is_train self.transforms = transforms if self.split == "train": imageset_txt = os.path.join(root, "ImageSets", "train.txt") elif self.split == "val": imageset_txt = os.path.join(root, "ImageSets", "val.txt") elif self.split == "trainval": imageset_txt = os.path.join(root, "ImageSets", "trainval.txt") elif self.split == "test": imageset_txt = os.path.join(root, "ImageSets", "test.txt") else: raise ValueError("Invalid split!") image_files = [] for line in open(imageset_txt, "r"): base_name = line.replace("\n", "") image_name = base_name + ".png" image_files.append(image_name) self.image_files = image_files self.label_files = [ i.replace(".png", ".txt") for i in self.image_files ] self.num_samples = len(self.image_files) self.classes = cfg.DATASETS.DETECT_CLASSES self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0 self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0 self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN self.num_classes = len(self.classes) self.input_width = cfg.INPUT.WIDTH_TRAIN self.input_height = cfg.INPUT.HEIGHT_TRAIN self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO self.max_objs = cfg.DATASETS.MAX_OBJECTS self.logger = logging.getLogger(__name__) self.logger.info( "Initializing KITTI {} set with {} files loaded".format( self.split, self.num_samples)) # NuScenes specific stuff here # KITTI unwanted lines not yet deleted above # Select subset of the data to look at. nusc_version = 'v1.0' self.nusc = NuScenes(version=nusc_version) # Get assignment of scenes to splits. self.split_logs = create_split_logs(self.split, self.nusc) # Use only the samples from the current split. self.sample_tokens = self._split_to_samples(self.split_logs) self.image_count = len(self.sample_tokens) self.image_count = 100 self.sample_tokens = self.sample_tokens[:self.image_count] def __len__(self): return self.image_count def _split_to_samples(self, split_logs: List[str]) -> List[str]: """ Convenience function to get the samples in a particular split. :param split_logs: A list of the log names in this split. :return: The list of samples. """ samples = [] for sample in self.nusc.sample: scene = self.nusc.get('scene', sample['scene_token']) log = self.nusc.get('log', scene['log_token']) logfile = log['logfile'] if logfile in split_logs: samples.append(sample['token']) return samples def __getitem__(self, idx): # set defaults here kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) # sample_token based on index sample_token = self.sample_tokens[idx] # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] # set the img variable to its data here src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) img = Image.open(src_im_path) # Create calibration matrix. K = p_left_kitti K = [float(i) for i in K] K = np.array(K, dtype=np.float32).reshape(3, 4) K = K[:3, :3] # populate the list of object annotations for this sample anns = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) fieldnames = [ 'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin', 'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry' ] if self.is_train: f = StringIO(output) reader = csv.DictReader(f, delimiter=' ', fieldnames=fieldnames) for line, row in enumerate(reader): if row["type"] in self.classes: anns.append({ "class": row["type"], "label": TYPE_ID_CONVERSION[row["type"]], "truncation": float(row["truncated"]), "occlusion": float(row["occluded"]), "alpha": float(row["alpha"]), "dimensions": [ float(row['dl']), float(row['dh']), float(row['dw']) ], "locations": [ float(row['lx']), float(row['ly']), float(row['lz']) ], "rot_y": float(row["ry"]) }) center = np.array([i / 2 for i in img.size], dtype=np.float32) size = np.array([i for i in img.size], dtype=np.float32) """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (random.random() < self.flip_prob): flipped = True img = img.transpose(Image.FLIP_LEFT_RIGHT) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (random.random() < self.aug_prob): affine = True shift, scale = self.shift_scale[0], self.shift_scale[1] shift_ranges = np.arange(-shift, shift + 0.1, 0.1) center[0] += size[0] * random.choice(shift_ranges) center[1] += size[1] * random.choice(shift_ranges) scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1) size *= random.choice(scale_ranges) center_size = [center, size] trans_affine = get_transfrom_matrix( center_size, [self.input_width, self.input_height]) trans_affine_inv = np.linalg.inv(trans_affine) img = img.transform( (self.input_width, self.input_height), method=Image.AFFINE, data=trans_affine_inv.flatten()[:6], resample=Image.BILINEAR, ) trans_mat = get_transfrom_matrix( center_size, [self.output_width, self.output_height]) if not self.is_train: # for inference we parametrize with original size target = ParamsList(image_size=size, is_train=self.is_train) target.add_field("trans_mat", trans_mat) target.add_field("K", K) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx heat_map = np.zeros( [self.num_classes, self.output_height, self.output_width], dtype=np.float32) regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32) cls_ids = np.zeros([self.max_objs], dtype=np.int32) proj_points = np.zeros([self.max_objs, 2], dtype=np.int32) p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32) dimensions = np.zeros([self.max_objs, 3], dtype=np.float32) locations = np.zeros([self.max_objs, 3], dtype=np.float32) rotys = np.zeros([self.max_objs], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) flip_mask = np.zeros([self.max_objs], dtype=np.uint8) for i, a in enumerate(anns): a = a.copy() cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs) point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1) h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] if (0 < point[0] < self.output_width) and (0 < point[1] < self.output_height): point_int = point.astype(np.int32) p_offset = point - point_int radius = gaussian_radius(h, w) radius = max(0, int(radius)) heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int, radius) cls_ids[i] = cls regression[i] = box3d proj_points[i] = point_int p_offsets[i] = p_offset dimensions[i] = np.array(a["dimensions"]) locations[i] = locs rotys[i] = rot_y reg_mask[i] = 1 if not affine else 0 flip_mask[i] = 1 if not affine and flipped else 0 target = ParamsList(image_size=img.size, is_train=self.is_train) target.add_field("hm", heat_map) target.add_field("reg", regression) target.add_field("cls_ids", cls_ids) target.add_field("proj_p", proj_points) target.add_field("dimensions", dimensions) target.add_field("locations", locations) target.add_field("rotys", rotys) target.add_field("trans_mat", trans_mat) target.add_field("K", K) target.add_field("reg_mask", reg_mask) target.add_field("flip_mask", flip_mask) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
# Add sensors to VCD for sensor in nusc.sensor: if sensor['modality'] == 'camera': vcd.add_stream(sensor['channel'], '', sensor['token'], core.StreamType.camera) elif sensor['modality'] == 'lidar': vcd.add_stream(sensor['channel'], '', sensor['token'], core.StreamType.lidar) else: vcd.add_stream(sensor['channel'], '', sensor['token'], core.StreamType.other) # Dictionary to map between frame nº and sample_token token_sample = my_scene['first_sample_token'] current_sample = nusc.get('sample', token_sample) # Add metadata to VCD vcd.data['vcd']['metadata'].setdefault('properties', dict()) vcd.data['vcd']['metadata']['properties']['scene_token'] = my_scene[ 'token'] vcd.data['vcd']['metadata']['properties']['scene_name'] = my_scene['name'] vcd.data['vcd']['metadata']['properties']['scene_description'] = my_scene[ 'description'] log = nusc.get('log', my_scene['log_token']) vcd.data['vcd']['metadata']['properties']['log_token'] = log['token'] vcd.data['vcd']['metadata']['properties']['log_file'] = log['logfile'] vcd.data['vcd']['metadata']['properties']['vehicle'] = log['vehicle'] vcd.data['vcd']['metadata']['properties']['date'] = log['date_captured'] vcd.data['vcd']['metadata']['properties']['location'] = log['location']
nuscenes_path = '/mrtstorage/datasets/nuscenes/data/data_LIDAR/v1.0-trainval/v1.0-trainval_meta' nusc = NuScenes(version='v1.0-trainval', dataroot=nuscenes_path, verbose=True) x_positions = dict.fromkeys(class_list) y_positions = dict.fromkeys(class_list) length = dict.fromkeys(class_list) width = dict.fromkeys(class_list) num_lidar_points = dict.fromkeys(class_list) y_translation = [] yaw = [] sensor = 'LIDAR_TOP' nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse for scene in nusc.scene: current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] while current_sample_token != last_sample_token: sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) if lidar_top_data['prev']: ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token']) calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token']) lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev']) ego_pose_prev = nusc.get('ego_pose', lidar_top_data_prev['ego_pose_token']) calib_sensor_prev = nusc.get('calibrated_sensor', lidar_top_data_prev['calibrated_sensor_token']) ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation'])) lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation'])) ego_to_global_prev = transform_matrix(ego_pose_prev['translation'], Quaternion(ego_pose_prev['rotation'])) lidar_to_ego_prev = transform_matrix(calib_sensor_prev['translation'], Quaternion(calib_sensor_prev['rotation'])) lidar_to_global = np.dot(ego_to_global, lidar_to_ego) lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev) delta = inv(lidar_to_global_prev).dot(lidar_to_global)
'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT', ] lll = [] splits = create_splits_scenes() for camera_channel in valid_channels: for token in tokens_list: first_time = True Dict = {} current_recs = {} num_objects = 0 scene_token = token[:-1] scene_record = nusc.get('scene', scene_token) scene_rec = scene_record scene = nusc.get('scene', scene_token) scene_name = scene_record['name'] scene_id = int(scene_name.replace('scene-', '')) if scene_name in splits['val']: spl = '/test/' else: spl = '/train/' os.mkdir(data_directory + spl + str(scene_id) + camera_channel) os.mkdir(data_directory + spl + str(scene_id) + camera_channel + '/img1') os.mkdir(data_directory + spl + str(scene_id) + camera_channel + '/gt') file_w = open(
print("No scene with name {} found.".format(sys.argv[2])) print("Available scenes: {}".format(" ".join(name2id.keys()))) exit(1) scenes2parse.append(sys.argv[3]) else: scenes2parse = name2id.keys() for scene_name in scenes2parse: print("Converting {} ...".format(scene_name)) output_folder = os.path.join(sys.argv[2], scene_name) velodyne_folder = os.path.join(output_folder, "velodyne/") first_lidar = nusc.get('sample', nusc.scene[name2id[scene_name]]["first_sample_token"])["data"]["LIDAR_TOP"] last_lidar = nusc.get('sample', nusc.scene[name2id[scene_name]]["last_sample_token"])["data"]["LIDAR_TOP"] current_lidar = first_lidar lidar_filenames = [] poses = [] if not os.path.exists(velodyne_folder): print("Creating output folder: {}".format(output_folder)) os.makedirs(velodyne_folder) original = [] while current_lidar != "": lidar_data = nusc.get('sample_data', current_lidar) calib_data = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
def create_tracks(all_boxes: EvalBoxes, nusc: NuScenes, eval_split: str, gt: bool) \ -> Dict[str, Dict[int, List[TrackingBox]]]: """ Returns all tracks for all scenes. Samples within a track are sorted in chronological order. This can be applied either to GT or predictions. :param all_boxes: Holds all GT or predicted boxes. :param nusc: The NuScenes instance to load the sample information from. :param eval_split: The evaluation split for which we create tracks. :param gt: Whether we are creating tracks for GT or predictions :return: The tracks. """ # Only keep samples from this split. splits = create_splits_scenes() scene_tokens = set() for sample_token in all_boxes.sample_tokens: scene_token = nusc.get('sample', sample_token)['scene_token'] scene = nusc.get('scene', scene_token) if scene['name'] in splits[eval_split]: scene_tokens.add(scene_token) # Tracks are stored as dict {scene_token: {timestamp: List[TrackingBox]}}. tracks = defaultdict(lambda: defaultdict(list)) # Init all scenes and timestamps to guarantee completeness. for scene_token in scene_tokens: # Init all timestamps in this scene. scene = nusc.get('scene', scene_token) cur_sample_token = scene['first_sample_token'] while True: # Initialize array for current timestamp. cur_sample = nusc.get('sample', cur_sample_token) tracks[scene_token][cur_sample['timestamp']] = [] # Abort after the last sample. if cur_sample_token == scene['last_sample_token']: break # Move to next sample. cur_sample_token = cur_sample['next'] # Group annotations wrt scene and timestamp. for sample_token in all_boxes.sample_tokens: sample_record = nusc.get('sample', sample_token) scene_token = sample_record['scene_token'] tracks[scene_token][sample_record['timestamp']] = all_boxes.boxes[sample_token] # Replace box scores with track score (average box score). This only affects the compute_thresholds method and # should be done before interpolation to avoid diluting the original scores with interpolated boxes. if not gt: for scene_id, scene_tracks in tracks.items(): # For each track_id, collect the scores. track_id_scores = defaultdict(list) for timestamp, boxes in scene_tracks.items(): for box in boxes: track_id_scores[box.tracking_id].append(box.tracking_score) # Compute average scores for each track. track_id_avg_scores = {} for tracking_id, scores in track_id_scores.items(): track_id_avg_scores[tracking_id] = np.mean(scores) # Apply average score to each box. for timestamp, boxes in scene_tracks.items(): for box in boxes: box.tracking_score = track_id_avg_scores[box.tracking_id] # Interpolate GT and predicted tracks. for scene_token in tracks.keys(): tracks[scene_token] = interpolate_tracks(tracks[scene_token]) if not gt: # Make sure predictions are sorted in in time. (Always true for GT). tracks[scene_token] = defaultdict(list, sorted(tracks[scene_token].items(), key=lambda kv: kv[0])) return tracks
class NuscDataset(data.Dataset): """Superclass for monocular dataloaders Args: data_root filenames height width frame_idxs num_scales is_train img_ext """ def __init__(self, data_root, filenames, height, width, frame_idxs, num_scales, version='v1.0-mini', sensor='CAM_FRONT', is_train=False, img_ext='.jpg'): super(NuscDataset, self).__init__() self.data_path = data_root self.data_path = '/share/nuscenes' self.filenames = filenames self.height = height self.width = width self.num_scales = num_scales self.interp = Image.ANTIALIAS self.frame_idxs = frame_idxs self.is_train = is_train self.img_ext = img_ext self.loader = pil_loader self.to_tensor = transforms.ToTensor() self.nusc = NuScenes(version=version, dataroot=self.data_path, verbose=True) self.sensor = sensor self.data_root = '/share/nuscenes' self.full_res_shape = (1600, 640) # We need to specify augmentations differently in newer versions of torchvision. # We first try the newer tuple version; if this fails we fall back to scalars try: self.brightness = (0.8, 1.2) self.contrast = (0.8, 1.2) self.saturation = (0.8, 1.2) self.hue = (-0.1, 0.1) transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue) except TypeError: self.brightness = 0.2 self.contrast = 0.2 self.saturation = 0.2 self.hue = 0.1 self.resize = {} for i in range(self.num_scales): s = 2**i self.resize[i] = transforms.Resize( (self.height // s, self.width // s), interpolation=self.interp) self.load_depth = self.check_depth() def get_image_path(self, folder, frame_index, side): f_str = "{:010d}{}".format(frame_index, self.img_ext) image_path = os.path.join(self.data_path, folder, "image_0{}/data".format(self.side_map[side]), f_str) return image_path def preprocess(self, inputs, color_aug): """Resize colour images to the required scales and augment if required We create the color_aug object in advance and apply the same augmentation to all images in this item. This ensures that all images input to the pose network receive the same augmentation. """ for k in list(inputs): frame = inputs[k] if "color" in k: n, im, i = k # for nuscenes dataset we crop the image to resolution 1600x640 for i in range(self.num_scales): inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)]) for k in list(inputs): f = inputs[k] if "color" in k: n, im, i = k inputs[(n, im, i)] = self.to_tensor(f) inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) def __len__(self): return len(self.nusc.sample) def __getitem__(self, index): """Returns a single training item from the dataset as a dictionary. Values correspond to torch tensors. Keys in the dictionary are either strings or tuples: ("color", <frame_id>, <scale>) for raw colour images, ("color_aug", <frame_id>, <scale>) for augmented colour images, ("K", scale) or ("inv_K", scale) for camera intrinsics, "stereo_T" for camera extrinsics, and "depth_gt" for ground truth depth maps. <frame_id> is either: an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', or "s" for the opposite image in the stereo pair. <scale> is an integer representing the scale of the image relative to the fullsize image: -1 images at native resolution as loaded from disk 0 images resized to (self.width, self.height ) 1 images resized to (self.width // 2, self.height // 2) 2 images resized to (self.width // 4, self.height // 4) 3 images resized to (self.width // 8, self.height // 8) """ inputs = {} do_color_aug = self.is_train and random.random() > 0.5 do_flip = self.is_train and random.random() > 0.5 sample = self.nusc.sample[index] if sample['prev'] == '': sample = self.nusc.get('sample', token=sample['next']) elif sample['next'] == '': sample = self.nusc.get('sample', token=sample['prev']) elif sample['prev'] == '' and sample['next'] == '': raise FileNotFoundError('Can not find three consecutive samples') for i in self.frame_idxs: if i == "s": raise NotImplementedError( 'nuscenes dataset does not support stereo depth') else: inputs[("color", i, -1)] = self.get_color(sample, i, do_flip) # # for inp in inputs: # if 'color' in inp: # inputs[inp].save('/home/user/xwh/monodepth2-master/dataset_check/{}.png'.format(str(index)+'_'+str(i))) # adjusting intrinsics to match each scale in the pyramid SENSOR_DATA = self.nusc.get('sample_data', sample['data'][self.sensor]) sensor_calib_data = self.nusc.get( 'calibrated_sensor', SENSOR_DATA['calibrated_sensor_token']) K = np.zeros((4, 4), dtype=np.float32) K[:, 3] = [0, 0, 0, 1] K[:3, :3] = np.array(sensor_calib_data['camera_intrinsic']) K[0, :] /= 1600 K[1, :] /= 900 self.K = K for scale in range(self.num_scales): K = self.K.copy() # nusc camera instrinsic has not been normalized by orinigal image size K[0, :] *= self.width // (2**scale) K[1, :] *= self.height // (2**scale) inv_K = np.linalg.pinv(K) inputs[("K", scale)] = torch.from_numpy(K) # print('-'*20) # print('Nusc ("K", {}) : '.format(scale), K) # print('-' * 20) inputs[("inv_K", scale)] = torch.from_numpy(inv_K) if do_color_aug: color_aug = transforms.ColorJitter.get_params( self.brightness, self.contrast, self.saturation, self.hue) else: color_aug = (lambda x: x) self.preprocess(inputs, color_aug) for i in self.frame_idxs: del inputs[("color", i, -1)] del inputs[("color_aug", i, -1)] if self.load_depth: LIDAR_RECORD = sample['data']['LIDAR_TOP'] CAMERA_RECORD = sample['data'][self.sensor] points, depth, img = self.map_pointcloud_to_image( self.nusc, LIDAR_RECORD, CAMERA_RECORD, render_intensity=False) depth_gt = self.get_depth(self.nusc, sample, do_flip) # depth_gt[points[:, 1].astype(np.int), points[:, 0].astype(np.int)] = depth # TODO FINISH THE NUSCENES DEPTH MAP GENERATION # depth_gt = self.get_depth(folder, frame_index, side, do_flip) inputs["depth_gt"] = np.expand_dims(depth_gt, 0) inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype( np.float32)) if "s" in self.frame_idxs: stereo_T = np.eye(4, dtype=np.float32) baseline_sign = -1 if do_flip else 1 side_sign = -1 if side == "l" else 1 stereo_T[0, 3] = side_sign * baseline_sign * 0.1 inputs["stereo_T"] = torch.from_numpy(stereo_T) return inputs def get_color(self, sample, i, do_flip): if i == 0: color_path = self.nusc.get( 'sample_data', token=sample['data'][self.sensor])['filename'] full_color_path = os.path.join(self.data_root, color_path) color = self.loader(full_color_path) # color = color.crop((0,2,1600,898)) color = color.crop((0, 240, 1600, 880)) elif i == -1: prev_sample = self.nusc.get('sample', token=sample['prev']) color_path = self.nusc.get( 'sample_data', token=prev_sample['data'][self.sensor])['filename'] full_color_path = os.path.join(self.data_root, color_path) color = self.loader(full_color_path) # color = color.crop((0,2,1600,898)) color = color.crop((0, 240, 1600, 880)) if i == 1: next_sample = self.nusc.get('sample', token=sample['next']) color_path = self.nusc.get( 'sample_data', token=next_sample['data'][self.sensor])['filename'] full_color_path = os.path.join(self.data_root, color_path) color = self.loader(full_color_path) # color = color.crop((0,2,1600,898)) color = color.crop((0, 240, 1600, 880)) if do_flip: color = color.transpose(pil.FLIP_LEFT_RIGHT) return color # def get_relative_pose(self, current_sample, next_sample): def check_depth(self): return True def get_depth(self, nusc, sample, do_flip): pointsensor_token = sample['data']['LIDAR_TOP'] camsensor_token = sample['data'][self.sensor] pts, depth, img = self.map_pointcloud_to_image(nusc, pointsensor_token, camsensor_token) depth_gt = np.zeros((img.size[0], img.size[1])) pts_int = np.array(pts, dtype=int) depth_gt[pts_int[0, :], pts_int[1, :]] = depth # apply maxpool on nusc depth gt to make the lidar point denser # depth_gt = max_pool2d(torch.from_numpy(depth_gt).unsqueeze(dim=0), kernel_size=3, stride=1, padding=1) # depth_gt = depth_gt.squeeze(0).numpy() # we crop nuscenes depth_gt to match the orinigal color image shape # depth_gt = depth_gt[:,2:898] depth_gt = depth_gt[:, 240:880] if do_flip: depth_gt = np.fliplr(depth_gt) # depth_gt = skimage.transform.resize( # depth_gt, self.full_res_shape[::-1], order=0, preserve_range=True, mode='constant') return np.transpose(depth_gt, (1, 0)) # return depth_gt def get_relative_pose(self, pose1, pose2): """ calculate relative from pose1 to pose2 in the global frame :param from_pose: :param to_pose: :return: """ relative_pose_matrix = np.ones((4, 4)) pose1_rot = pose1[0:3, 0:3] pose1_tran = pose1[3, 0:3] pose2_rot = pose1[0:3, 0:3] pose2_tran = pose2[3, 0:3] pose_1_inv = np.linalg.inv(pose1_rot) rot_pose1_to_pose2 = np.dot(pose_1_inv, pose2_rot) tran_pose1_to_pose2 = pose2_tran - pose1_tran relative_pose_matrix[0:3, 0:3] = rot_pose1_to_pose2 relative_pose_matrix[3, 0:3] = tran_pose1_to_pose2 return relative_pose_matrix def map_pointcloud_to_image(self, nusc, pointsensor_token: str, camera_token: str, min_dist: float = 1.0, render_intensity: bool = False, show_lidarseg: bool = False): """ Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :param render_intensity: Whether to render lidar intensity instead of point depth. :param show_lidarseg: Whether to render lidar intensity instead of point depth. :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None or the list is empty, all classes will be displayed. :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation predictions for the sample. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': # Ensure that lidar pointcloud is from a keyframe. assert pointsensor['is_key_frame'], \ 'Error: Only pointclouds which are keyframes have lidar segmentation labels. Rendering aborted.' pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform from ego to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform from global into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform from ego into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] if render_intensity: assert pointsensor['sensor_modality'] == 'lidar', 'Error: Can only render intensity for lidar, ' \ 'not %s!' % pointsensor['sensor_modality'] # Retrieve the color from the intensities. # Performs arbitary scaling to achieve more visually pleasing results. intensities = pc.points[3, :] intensities = (intensities - np.min(intensities)) / ( np.max(intensities) - np.min(intensities)) intensities = intensities**0.1 intensities = np.maximum(0, intensities - 0.5) coloring = intensities else: # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] coloring = coloring[mask] return points, coloring, im
def visualize_sample(nusc: NuScenes, sample_token: str, all_annotations: Dict, all_results: Dict, nsweeps: int=1, conf_th: float=0.15, eval_range: float=40, verbose=True) -> None: """ Visualizes a sample from BEV with annotations and detection results. :param nusc: NuScenes object. :param sample_token: The nuScenes sample token. :param all_annotations: Maps each sample token to its annotations. :param all_results: Maps each sample token to its results. :param nsweeps: Number of sweeps used for lidar visualization. :param conf_th: The confidence threshold used to filter negatives. :param eval_range: Range in meters beyond which boxes are ignored. :param verbose: Whether to print to stdout. """ # Retrieve sensor & pose records. sample_rec = nusc.get('sample', sample_token) sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) # Get boxes. boxes_gt_global = all_annotations[sample_token] boxes_est_global = all_results[sample_token] # Map GT boxes to lidar. boxes_gt = boxes_to_sensor(boxes_gt_global, pose_record, cs_record) # Map EST boxes to lidar. boxes_est = boxes_to_sensor(boxes_est_global, pose_record, cs_record) # Get point cloud in lidar frame. pc, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, 'LIDAR_TOP', 'LIDAR_TOP', nsweeps=nsweeps) # Init axes. _, ax = plt.subplots(1, 1, figsize=(9, 9)) # Show point cloud. points = view_points(pc.points[:3, :], np.eye(4), normalize=False) dists = np.sqrt(np.sum(pc.points[:2, :] ** 2, axis=0)) colors = np.minimum(1, dists / eval_range) ax.scatter(points[0, :], points[1, :], c=colors, s=0.2) # Show ego vehicle. ax.plot(0, 0, 'x', color='black') # Show GT boxes. for box in boxes_gt: box.render(ax, view=np.eye(4), colors=('g', 'g', 'g'), linewidth=2) # Show EST boxes. for box in boxes_est: # Show only predictions with a high score. if box.score >= conf_th: box.render(ax, view=np.eye(4), colors=('b', 'b', 'b'), linewidth=1) # Limit visible range. axes_limit = eval_range + 3 # Slightly bigger to include boxes that extend beyond the range. ax.set_xlim(-axes_limit, axes_limit) ax.set_ylim(-axes_limit, axes_limit) # Show plot. if verbose: print('Showing sample token %s' % sample_token) plt.title(sample_token) plt.show()
class NuScenesEvaluator(Evaluator): """ NuScenes Model Evaluator Evaluates models based on the nuScenes dataset. The evaluator logs per sample F1 scores as well as visualizations of the validation data samples. Arguments: logdir: Folder path to store the evaluation log files, <str>. batch_size: Batch size used for the model prediction, <int>. verbose: Verbosity mode (0 = silent, 1 = progress bar, 2 = one line per epoch), <int>. steps: Total number of epoch before stopping the evaluation, <int>. callbacks: List of callbacks to apply during evaluation, <list>. max_queue_size: Maximum size for the generator queue (used for generator input only), <int>. workers: Maximum number of processes to spin up when using process-based threading (used for generator input only), <int>. use_multiprocessing: Whether use process-based threading (used for generator input only), <bool>. visualize: Whether to visualize the prediction or not, <bool>. velocities: Whether to visualize the velocity vectors or not, <bool>. version: Version of the nuScenes dataset, <str>. dataroot: Path to the raw nuScenes dataset files, <str>. class_matching_dict: Key-Value-Pairs of the original category names and the user configured class names, <dict>. preprocessor_attributes: Keyword attributes for the nuScenes pre-processor, <dict>. """ def __init__(self, logdir: str = None, batch_size: int = 1, verbose: int = 2, steps: int = None, callbacks: list = None, max_queue_size: int = 10, workers: int = 1, use_multiprocessing: bool = False, visualize: bool = True, velocities: bool = True, version: str = 'v1.0-mini', dataroot: str = None, class_matching_dict: dict = None, preprocessor_attributes: dict = None): # Initialize base class super(NuScenesEvaluator, self).__init__(logdir=logdir, batch_size=batch_size, steps=steps, verbose=verbose, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) # Initialize NuScenesEvaluator self.visualize = visualize self.velocities = velocities self.version = version self.dataroot = dataroot self.class_matching_dict = class_matching_dict if class_matching_dict is not None else {} self.preprocessor_attributes = preprocessor_attributes if preprocessor_attributes is not None else {} # Load nuScenes dataset self.nusc = NuScenes(version=self.version, dataroot=self.dataroot, verbose=False) # Get nuScenes pre-processor self.nusc_preprocessor = NuScenesPreProcessor( self.class_matching_dict, **self.preprocessor_attributes) def _write_to_csv(self, filename, data): with open(filename, "w", newline="") as file: writer = csv.writer(file) writer.writerow( ["Scene token"] + ["Val F1 score sample " + str(i) for i in range(41)]) writer.writerows(data) def _get_bounding_boxes(self, files): """ Returns the bounding boxes of the scenes specified in files. Hint: The filenames must correspond to the nuScenes scene token. Attributes: files: Dataset of filenames of the nuScenes scenes, <tf.data.Dataset>. Returns: boxes: Nested list of bounding boxes (scenes, samples, boxes), <List>. """ # Define boxes = [] # Get bounding boxes for filename in files: # Define sample_boxes = [] # Get current scene scene_token = os.path.basename( os.path.splitext(filename.numpy())[0]).decode("utf-8") scene = self.nusc.get('scene', scene_token) sample = self.nusc.get('sample', scene['first_sample_token']) # Skip first sample since it has less radar sweeps available sample = self.nusc.get('sample', sample['next']) # Iterate over all samples within the scene while True: # Get bounding boxes of the sample sample_boxes.append( self.nusc_preprocessor.get_annotations(dataset=self.nusc, sample=sample)) # Check if sample is the last sample in the current scene if sample['next']: sample = self.nusc.get('sample', sample['next']) else: break boxes.append(sample_boxes) return boxes def _get_f1_scores(self, confidence, dataset, num_classes): """ Returns sample-wise F1 scores for all scenes. Arguments: confidence: Confidance of the predicted label, <tf.Tensor>. dataset: Dataset with features and labels, <tf.data.Dataset>. num_classes: Number of classes, <int>. Returns: F1: List of F1 scores per sample, <List>. """ F1 = [] # Iterate over scenes for (_, lables), score in zip(dataset, confidence): sample_F1 = [] # Iterate over samples for sample_labels, sample_scores in zip(tf.unstack(lables, axis=1), tf.unstack(score, axis=1)): sample_F1.append( float( metrics.F1Score(num_classes=num_classes, average='macro', top_k=1)(sample_labels, sample_scores))) F1.append(sample_F1) return F1 def log_f1_scores(self, F1, files): data = [[ os.path.basename(os.path.splitext( token.numpy())[0]).decode("utf-8") ] + scores for token, scores in zip(files, F1)] filename = os.path.join(self.logdir, 'F1Scores.csv') self._write_to_csv(filename, data) def visualize_results(self, dataset, prediction, confidence, boxes, files): """ Visualizes the model predictions and the ground truth data per sample. Arguments: dataset: Dataset with features and labels, <tf.data.Dataset>. prediction: Predicted labels, <tf.Tensor>. confidence: Confidance of the predicted labels, <tf.Tensor>. boxes: Nested list of bounding boxes (scenes, samples, boxes), <List>. files: Dataset of filenames of the nuScenes scenes, <tf.data.Dataset>. """ # Define colors = utils.get_colors('blues') i = 0 # Visualize samples with progressbar.ProgressBar(max_value=len(prediction)) as bar: for filename, ( features, _ ), scene_predictions, scene_confidence, scene_boxes in zip( files, dataset, prediction, confidence, boxes): n_samples_dataset = features["x"].shape[1] n_samples_script_loader = len(scene_boxes) assert n_samples_dataset == n_samples_script_loader, f"""Number of scene samples in dataset: {n_samples_dataset} is different than that from nuScenes loader {n_samples_script_loader}. Make sure to skip first sample in dataset creation.""" # Update progressbar bar.update(i) # Get current scene scene_token = os.path.basename( os.path.splitext(filename.numpy())[0]).decode("utf-8") scene = self.nusc.get('scene', scene_token) # Get map log = self.nusc.get('log', scene['log_token']) map_ = self.nusc.get('map', log['map_token']) # Get sample sample = self.nusc.get('sample', scene['first_sample_token']) # Skip first sample since it has less radar sweeps available sample = self.nusc.get('sample', sample['next']) for j, sample_boxes in enumerate(scene_boxes): # Get sample data ref_data = self.nusc.get('sample_data', sample['data']['LIDAR_TOP']) pose = self.nusc.get('ego_pose', ref_data['ego_pose_token']) sample_points = tf.concat( (features['x'][:, j, :], features['y'][:, j, :], features['z'][:, j, :]), axis=0).numpy() sample_predictions = np.squeeze(scene_predictions[:, j, :]) sample_confidence = np.amax(np.squeeze( scene_confidence[:, j, :]), axis=-1) # Get velocities if self.velocities: velocities = tf.concat((features['vx_comp'][:, j, :], features['vy_comp'][:, j, :]), axis=0).numpy() else: velocities = None # Set image name out_path = os.path.join( self.logdir, 'images', scene_token + '_' + str(sample['timestamp']) + '_' + sample['token'] + '.svg') # Render sample render_sample.render_sample(sample_points, sample_predictions, sample_boxes, score=sample_confidence, velocities=velocities, map_=map_, pose=pose, colors=colors, out_path=out_path, out_format='svg') # Get next sample if sample['next']: sample = self.nusc.get('sample', sample['next']) i += 1 def evaluate(self, model, dataset, files, num_classes): # Make prediction prediction, confidence = self.predict(model=model, dataset=dataset) # Get bounding boxes boxes = self._get_bounding_boxes(files) # Log F1 scores F1 = self._get_f1_scores(confidence, dataset, num_classes) self.log_f1_scores(F1, files) # Visualize prediction if self.visualize: self.visualize_results(dataset, prediction, confidence, boxes, files) def get_config(self): # Get instance configuration config = { 'visualize': self.visualize, 'version': self.version, 'dataroot': self.dataroot, 'class_matching_dict': self.class_matching_dict, 'preprocessor_attributes': self.preprocessor_attributes } # Get base class configuration base_config = super(NuScenesEvaluator, self).get_config() return dict(list(base_config.items()) + list(config.items()))
import numpy as np nusc = NuScenes(version='v1.0-mini', dataroot='D:\\v1.0-mini', verbose=True) print('all scene') nusc.list_scenes() # 列出所有的场景 my_scene = nusc.scene[0] print('\nmy_scene\n', my_scene) # 列出所有场景中的第一个场景对应的所有 token first_sample_token = my_scene[ 'first_sample_token'] # scene.json 中第一个元素组中的 first_sample_token # sample.json 中第一帧的所有元素(token、timestamp、prev、next、scene_token) 和 # 汽车所有传感器对应的数据 my_sample = nusc.get('sample', first_sample_token) print('my_sample\n', my_sample) # 只取汽车所有传感器对应的数据的方法 print('my_sample\n', my_sample['data']) # 只取CAM_FRONT 的方法 # sensor = 'CAM_FRONT' # cam_front_data = nusc.get('sample_data', my_sample['data'][sensor]) # print(cam_front_data) # 以此类推 # 获取传感器对应的标注框的方法(以前一个例子为例)[可视化] # nusc.render_sample_data(cam_front_data['token']) nusc.list_sample( my_sample['token']) # 列出了与示例相关的所有sample_data关键帧和sample_annotation my_annotation_token = my_sample['anns'][ 18] # 取第19个 sample_annotation_token(其它的好像没标注好)
'vehicle.truck': 'truck', 'vehicle.construction': 'construction_vehicle', 'vehicle.emergency.ambulance': 'ignore', 'vehicle.emergency.police': 'ignore', 'vehicle.trailer': 'trailer', 'movable_object.barrier': 'barrier', 'movable_object.trafficcone': 'traffic_cone', 'movable_object.pushable_pullable': 'ignore', 'movable_object.debris': 'ignore', 'static_object.bicycle_rack': 'ignore', } for sample in nusc.sample: cam_token = sample['data']['CAM_FRONT'] pc_token = sample['data']['LIDAR_TOP'] pointsensor = nusc.get('sample_data', pc_token) # pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) cam = nusc.get('sample_data', cam_token) # print(cam) # im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # print(im.width, im.height) data_path, box_list, cam_intrinsic = nusc.get_sample_data(pc_token) # print(data_path) # print(pcl_path) # fname = pcl_path.split('/')[-1].split('.')[0] + '.txt' # print(fname)
def evaluate(split): params = read_params(FLAGS.param) nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } results = {} results_0_3 = {} detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) for node in od_graph_def.node: if 'BatchMultiClassNonMaxSuppression' in node.name: node.device = '/device:CPU:0' tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = False with tf.Session(graph=detection_graph, config=config) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') scene_splits = create_splits_scenes() # os.system('touch {}'.format()) inf_time_list = [] for scene in nusc.scene: if scene['name'] not in scene_splits[split]: continue current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] sample_in_scene = True while sample_in_scene: if current_sample_token == last_sample_token: sample_in_scene = False sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) # Get global pose and calibration data ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token']) calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token']) ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation'])) lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation'])) # Read input data filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix) # Inference start_time = time.time() (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run( [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_stacked}) inf_time = time.time() - start_time print('Inference time:', inf_time) inf_time_list.append(inf_time) # Evaluate object detection label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10, use_display_name=True) category_index = label_map_util.create_category_index(categories) boxes = [] boxes_0_3 = [] scores = np.squeeze(scores) for i in range(scores.shape[0]): if scores[i] > .230: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes.append(box) for i in range(scores.shape[0]): if scores[i] > .225: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes_0_3.append(box) # Convert boxes to nuScenes detection challenge result format. sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes] results[current_sample_token] = sample_results sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3] results_0_3[current_sample_token] = sample_results_0_3 current_sample_token = sample['next'] f_info = open( os.path.join(FLAGS.output, 'inference_time.txt'),'w+') average_inf_time = sum(inf_time_list) / float(len(inf_time_list)) f_info.write('average time:{}\n'.format(average_inf_time)) f_info.write(str(inf_time_list)) submission = { 'meta': meta, 'results': results } submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json') with open(submission_path, 'w') as f: json.dump(submission, f, indent=2) submission_0_3 = { 'meta': meta, 'results': results_0_3 } submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json') with open(submission_path_0_3, 'w') as f: json.dump(submission_0_3, f, indent=2)