Exemplo n.º 1
0
def get_pose_intrinsic(
    save_path='/public/MARS/datasets/nuScenes-SF/meta/cam_pose_intrinsic.json'
):

    split = 'train'
    data_path = 'data/nuscenes/'
    nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True)
    samples = nusc.sample

    cam_token2cam_intext = {}

    for sample in tqdm(samples):
        for cam_name in CamNames:
            cam_token = sample['data'][cam_name]
            cam_data = nusc.get('sample_data', cam_token)
            ego_pose = nusc.get('ego_pose', cam_data['ego_pose_token'])
            cam_cs = nusc.get('calibrated_sensor',
                              cam_data['calibrated_sensor_token'])

            # used to transform from ego to global
            pose_matrix = quat_trans2matrix(ego_pose['rotation'],
                                            ego_pose['translation'])
            # used to transform from cameral to ego
            cam_pose = quat_trans2matrix(cam_cs['rotation'],
                                         cam_cs['translation'])

            cam_pose_world = np.matmul(pose_matrix, cam_pose)

            ret = {'pose': cam_pose_world.tolist()}
            ret['intrinsic'] = cam_cs['camera_intrinsic']

            cam_token2cam_intext[cam_token] = ret

    with open(save_path, 'w') as f:
        json.dump(cam_token2cam_intext, f)
Exemplo n.º 2
0
def load_keyframe_rad_tokens(nusc: NuScenes) -> (List[str]):
    """
    This method takes a Nuscenes instance and returns two lists with the
    sample_tokens of all RADAR_FRONT sample_data which
    have (almost) the same timestamp as their corresponding sample
    (is_key_frame = True).
    :param nusc: Nuscenes instance
    :return: rad_sd_tokens List of radar sample data tokens
    """


    rad_sd_tokens = []
    for scene_rec in nusc.scene:
        print('Loading samples of scene %s....' % scene_rec['name'], end = '')
        start_sample_rec = nusc.get('sample', scene_rec['first_sample_token'])

        rad_front_sd_rec = nusc.get('sample_data', start_sample_rec['data']['RADAR_FRONT'])

        cur_rad_front_sd_rec = rad_front_sd_rec
        rad_sd_tokens.append(cur_rad_front_sd_rec['token'])

        #Append all keyframe radar sample tokens in list
        while cur_rad_front_sd_rec['next'] != '':
            cur_rad_front_sd_rec = nusc.get('sample_data', cur_rad_front_sd_rec['next'])
            if cur_rad_front_sd_rec['is_key_frame']:
                rad_sd_tokens.append(cur_rad_front_sd_rec['token'])
        print("done!")

    return rad_sd_tokens
Exemplo n.º 3
0
def get_egoposes_on_drivable_ratio(nusc: NuScenes, nusc_map: NuScenesMap,
                                   scene_token: str) -> float:
    """
    Get the ratio of ego poses on the drivable area.
    :param nusc: A NuScenes instance.
    :param nusc_map: The NuScenesMap instance of a particular map location.
    :param scene_token: The token of the current scene.
    :return: The ratio of poses that fall on the driveable area.
    """

    # Go through each sample in the scene.
    sample_tokens = nusc.field2token('sample', 'scene_token', scene_token)
    poses_all = 0
    poses_valid = 0
    for sample_token in sample_tokens:

        # Poses are associated with the sample_data. Here we use the lidar sample_data.
        sample_record = nusc.get('sample', sample_token)
        sample_data_record = nusc.get('sample_data',
                                      sample_record['data']['LIDAR_TOP'])
        pose_record = nusc.get('ego_pose',
                               sample_data_record['ego_pose_token'])

        # Check if the ego pose is on the driveable area.
        ego_pose = pose_record['translation'][:2]
        record = nusc_map.record_on_point(ego_pose[0], ego_pose[1],
                                          'drivable_area')
        if len(record) > 0:
            poses_valid += 1
        poses_all += 1
    ratio_valid = poses_valid / poses_all

    return ratio_valid
def load_keyframe_rad_cam_data(
        nusc: NuScenes) -> (List[str], List[str], List[str]):
    """
    This method takes a Nuscenes instance and returns two lists with the
    sample_tokens of all CAM_FRONT and RADAR_FRONT sample_data which
    have (almost) the same timestamp as their corresponding sample
    (is_key_frame = True). In addition, it returns the sample_names which are set
    equal to the filename of each CAM_FRONT sample_data.
    :param nusc: Nuscenes instance
    :return: (cam_sd_tokens, rad_sd_tokens, sample_names).
    Tuple with lists of camera and radar tokens as well as sample_names
    """

    #Lists to hold tokens of all cam and rad sample_data that have is_key_frame = True
    #These have (almost) the same timestamp as their corresponding sample and
    #correspond to the files in the ..sets/nuscenes/samples/ folder
    cam_sd_tokens = []
    rad_sd_tokens = []
    sample_names = []
    for scene_rec in nusc.scene:
        #scene_name = scene_rec["name"] + "_sample_"
        print('Loading samples of scene %s....' % scene_rec['name'], end='')
        start_sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
        #sample_name = scene_name + str(start_sample_rec["timestamp"])

        cam_front_sd_rec = nusc.get('sample_data',
                                    start_sample_rec['data']['CAM_FRONT'])
        rad_front_sd_rec = nusc.get('sample_data',
                                    start_sample_rec['data']['RADAR_FRONT'])

        cur_cam_front_sd_rec = cam_front_sd_rec
        cur_rad_front_sd_rec = rad_front_sd_rec
        sample_name = cur_cam_front_sd_rec["filename"].replace(
            'samples/CAM_FRONT/', '').replace('.jpg', '')
        #Append the first sample_name, cam and rad sample_data tokens in lists
        sample_names.append(sample_name)
        cam_sd_tokens.append(cur_cam_front_sd_rec['token'])
        rad_sd_tokens.append(cur_rad_front_sd_rec['token'])

        #Append all keyframe sample_names and camera sample tokens in list
        while cur_cam_front_sd_rec['next'] != '':
            cur_cam_front_sd_rec = nusc.get('sample_data',
                                            cur_cam_front_sd_rec['next'])
            sample_name = cur_cam_front_sd_rec["filename"].replace(
                'samples/CAM_FRONT/', '').replace('.jpg', '')
            if cur_cam_front_sd_rec['is_key_frame']:
                sample_names.append(sample_name)
                cam_sd_tokens.append(cur_cam_front_sd_rec['token'])

        #Append all keyframe radar sample tokens in list
        while cur_rad_front_sd_rec['next'] != '':
            cur_rad_front_sd_rec = nusc.get('sample_data',
                                            cur_rad_front_sd_rec['next'])
            if cur_rad_front_sd_rec['is_key_frame']:
                rad_sd_tokens.append(cur_rad_front_sd_rec['token'])
        print("done!")

    assert (len(cam_sd_tokens) == len(rad_sd_tokens) == len(sample_names))

    return (cam_sd_tokens, rad_sd_tokens, sample_names)
Exemplo n.º 5
0
def main():
    root_path = '/extssd/jiaxin/nuscenes/test'
    nusc = NuScenes(version='v1.0-test', dataroot=root_path, verbose=True)

    sensor = 'CAM_FRONT'

    counter = 0
    for i, scene in enumerate(nusc.scene):
        scene_token = scene['token']
        scene = nusc.get('scene', scene_token)
        first_sample = nusc.get('sample', scene['first_sample_token'])
        camera = nusc.get('sample_data', first_sample['data'][sensor])

        img = np.array(
            Image.open(os.path.join(nusc.dataroot,
                                    camera['filename'])).convert('L'))
        H, W = img.shape[0], img.shape[1]

        img_mean = np.mean(img.astype(np.float32))

        white_mask = img > 150
        white_area = np.sum(white_mask.astype(np.float32))

        if img_mean < 110 and white_area < (H * W) * 0.1:
            print('\'%s\',' % (scene_token))
            counter += 1
            plt.figure()
            plt.gray()
            plt.imshow(img)
            plt.show()

    print('%d night scenes' % counter)
def get_rad_to_cam(nusc: NuScenes, cam_sd_token: str, rad_sd_token: str):
    """
    Method to get the extrinsic calibration matrix from radar_front to camera_front
    for a specifi sample.
    Every sample_data has a record on which calibrated - sensor the
    data is collected from ("calibrated_sensor_token" key)
    The calibrated_sensor record consists of the definition of a
    particular sensor (lidar/radar/camera) as calibrated on a particular vehicle
    :param nusc: Nuscenes instance
    :param cam_sd_token : A token of a specific camera_front sample_data
    :param rad_sd_token : A token of a specific radar_front sample_data
    :param nuscenes_way : Temporal debug param until transformation order is clear
    :return: rad_to_cam <np.float: 4, 4> Returns homogeneous transform matrix from radar to camera
    """
    cam_cs_token = nusc.get('sample_data',
                            cam_sd_token)["calibrated_sensor_token"]
    cam_cs_rec = nusc.get('calibrated_sensor', cam_cs_token)

    rad_cs_token = nusc.get('sample_data',
                            rad_sd_token)["calibrated_sensor_token"]
    rad_cs_rec = nusc.get('calibrated_sensor', rad_cs_token)

    #Based on how transforms are handled in nuScenes scripts like scripts/export_kitti.py
    rad_to_ego = transform_matrix(rad_cs_rec['translation'],
                                  Quaternion(rad_cs_rec['rotation']),
                                  inverse=False)
    ego_to_cam = transform_matrix(cam_cs_rec['translation'],
                                  Quaternion(cam_cs_rec['rotation']),
                                  inverse=True)
    rad_to_cam = np.dot(ego_to_cam, rad_to_ego)
    return rad_to_cam
Exemplo n.º 7
0
def create_tf_record_train_as_val(fn_out, split, vis_results):
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map)
    writer = tf.python_io.TFRecordWriter(fn_out)
    params = read_params(FLAGS.param)
    logging.debug('Params: ' + str(params))
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'
    nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse
    split_logs = create_splits_logs(split, nusc)
    sample_tokens = split_to_samples(nusc, split_logs)
    random.shuffle(sample_tokens)
    print('Number of samples:', len(sample_tokens))

    for sample_token in sample_tokens[1:100]:
        sample = nusc.get('sample', sample_token)
        lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
        if not lidar_top_data['prev']:
            continue
        lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev'])
        labels_corners, labels_center, labels_data = compute_labels_image(nusc, sample, sensor,
                                                                          nu_to_kitti_lidar, params)
        filename = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
        filename_prev = os.path.splitext(os.path.splitext(lidar_top_data_prev['filename'])[0])[0]
        tf_example = dict_to_tf_example(labels_corners, labels_center, labels_data, params, label_map_dict,
                                        FLAGS.data, FLAGS.data_beliefs, filename, filename_prev)
        writer.write(tf_example.SerializeToString())
        if (vis_results):
            visualize_results(FLAGS.data, filename, labels_corners, os.path.join(FLAGS.output, 'Debug'))
def get_boxes2d(sample_data_token: str, image_annotations_token2ind = {}, image_annotations = []):
    nusc = NuScenes(version='v1.0-mini', dataroot='dataset/nuScenes/v1.0-mini', verbose=True)
    # Retrieve sensor & pose records
    sd_record = nusc.get('sample_data', sample_data_token)
    curr_sample_record = nusc.get('sample', sd_record['sample_token'])
    #curr_sample_record['image_anns']

    boxes2d = []
    if curr_sample_record['prev'] == "" or sd_record['is_key_frame']:
        # If no previous annotations available, or if sample_data is keyframe just return the current ones.
        for i,x in enumerate(curr_sample_record['anns']):
            record = nusc.get('sample_annotation', x)
            instance_token = record['instance_token']
            record2d = image_annotations[image_annotations_token2ind[instance_token]]
            box2d = Box2d(record2d['bbox_corners'], name=record2d['category_name'], token=record2d['sample_annotation_token'],
                          visibility=record2d['visibility_token'],filename=record2d['filename'])
            boxes2d.append(box2d)

    else:
        prev_sample_record = nusc.get('sample', curr_sample_record['prev'])

        curr_ann_recs = [nusc.get('sample_annotation', token) for token in curr_sample_record['anns']]
        prev_ann_recs = [nusc.get('sample_annotation', token) for token in prev_sample_record['anns']]

        # Maps instance tokens to prev_ann records
        prev_inst_map = {entry['instance_token']: entry for entry in prev_ann_recs}

        t0 = prev_sample_record['timestamp']
        t1 = curr_sample_record['timestamp']
        t = sd_record['timestamp']

        # There are rare situations where the timestamps in the DB are off so ensure that t0 < t < t1.
        t = max(t0, min(t1, t))

        boxes = []
        for curr_ann_rec in curr_ann_recs:

            if curr_ann_rec['instance_token'] in prev_inst_map:
                # If the annotated instance existed in the previous frame, interpolate center & orientation.
                prev_ann_rec = prev_inst_map[curr_ann_rec['instance_token']]

                # Interpolate center.
                center = [np.interp(t, [t0, t1], [c0, c1]) for c0, c1 in zip(prev_ann_rec['translation'],
                                                                             curr_ann_rec['translation'])]

                # Interpolate orientation.
                rotation = Quaternion.slerp(q0=Quaternion(prev_ann_rec['rotation']),
                                            q1=Quaternion(curr_ann_rec['rotation']),
                                            amount=(t - t0) / (t1 - t0))

                box = Box(center, curr_ann_rec['size'], rotation, name=curr_ann_rec['category_name'],
                          token=curr_ann_rec['token'])
            else:
                # If not, simply grab the current annotation.
                box = self.get_box(curr_ann_rec['token'])

            boxes.append(box)
    return boxes2d
    def test_egoposes_on_map(self):
        """ Test that all ego poses land on """
        nusc = NuScenes(version=self.version,
                        dataroot=os.environ['NUSCENES'],
                        verbose=False)
        whitelist = [
            'scene-0499', 'scene-0501', 'scene-0502', 'scene-0515',
            'scene-0517'
        ]

        invalid_scenes = []
        for scene in tqdm.tqdm(nusc.scene, leave=False):
            if scene['name'] in whitelist:
                continue

            log = nusc.get('log', scene['log_token'])
            map_name = log['location']
            nusc_map = self.nusc_maps[map_name]
            ratio_valid = get_egoposes_on_drivable_ratio(
                nusc, nusc_map, scene['token'])
            if ratio_valid != 1.0:
                print(
                    'Error: Scene %s has a ratio of %f ego poses on the driveable area!'
                    % (scene['name'], ratio_valid))
                invalid_scenes.append(scene['name'])

        self.assertEqual(len(invalid_scenes), 0)
Exemplo n.º 10
0
def merge_depth_sf(depth_meta_path, 
                sf_meta_path, save_path):
    
    with open(depth_meta_path, 'r') as f:
        depth_meta = json.load(f)

    with open(sf_meta_path, 'r') as f:
        sf_meta = json.load(f)
    
    split = 'train'
    data_path = 'data/nuscenes/'
    nusc = NuScenes(
        version=SPLITS[split], dataroot=data_path, verbose=True)
    
    imgpath2paths = {}
    for depth_info in depth_meta:
        sample_token = depth_info['sample_token']
    
        depth_path = depth_info['depth_path']
        img_path = depth_info['img_path']
        cam_name = img_path.split('/')[-2]

        cam_token = nusc.get('sample', sample_token)['data'][cam_name]
        sf_path = sf_meta[cam_token]['points_path']
        img_path = sf_meta[cam_token]['img_path'] # use this version of img path

        tmp = {'token': cam_token, 'depth_path': depth_path, 
                'cam_name': cam_name, 'sf_path': sf_path, 
                'img_path': img_path}
        imgpath2paths[img_path] = tmp

    with open(save_path, 'w') as f:
        json.dump(imgpath2paths, f)
Exemplo n.º 11
0
def convert_second_results(second_result_dir, dataset_dir, output_dir):
    with open(second_result_dir, 'rb') as f:
        data = json.load(f)

    prediction_template = {
        'sample_token': '',
        'translation': [],
        'size': [],
        'rotation': [],
        'name': '',
        'score': 0
    }

    nusc = NuScenes(version='v1.0-trainval', dataroot=dataset_dir)

    predictions = list()
    ground_truth = list()

    print('Converting reuslts and ground truth ...')
    for token, value in data['results'].items():
        for item in value:
            pred = copy(prediction_template)
            for k, v in key_mapping.items():
                pred[v] = item[k]
            predictions.append(pred)

        anns = nusc.get('sample', token)['anns']
        for ann_token in anns:
            gt = copy(prediction_template)
            ann_meta = nusc.get('sample_annotation', ann_token)
            for k, v in nusc_key_mappings.items():
                gt[v] = ann_meta[k]
            ground_truth.append(gt)

    with open(Path(output_dir) / 'predictions.json', 'w') as f:
        print('Saving Predictions ...')
        json.dump(predictions, f)

    with open(Path(output_dir) / 'ground_truth.json', 'w') as f:
        print('Saving Ground Truth ...')
        json.dump(ground_truth, f)

    print('Done Converting.')
Exemplo n.º 12
0
def nuscenes_extract_detection(detection_file, unpack_dir):
    print("Generating detection as txt files from {}".format(detection_file))
    nusc = NuScenes(dataroot=GlobalConfig.nuscenes_root_trainval,
                    version='v1.0-trainval',
                    verbose=True)

    with open(detection_file, 'r') as f:
        detections = json.load(f)
    print(detections['meta'])

    processed_samples = set()
    for sample_token in tqdm(detections['results'].keys()):
        if sample_token in processed_samples:
            continue

        sample = nusc.get('sample', sample_token)
        scene = nusc.get('scene', sample['scene_token'])
        scene_detections_file = open(
            os.path.join(unpack_dir, '{}.txt'.format(sample['scene_token'])),
            'w')

        # main loop for processing a scene
        scene_sample_token = scene['first_sample_token']
        scene_timestamp = 0
        while scene_sample_token != '':
            # get detections of this sample
            sample_dets = detections['results'][
                scene_sample_token]  # list(sample_results)
            for det in sample_dets:
                if det['detection_name'] in nuscenes_tracking_names:
                    scene_detections_file.write(
                        format_detection(scene_timestamp, det))

            # save this sample token into processes_samples
            processed_samples.add(scene_sample_token)

            # move on
            scene_sample = nusc.get('sample', scene_sample_token)
            scene_timestamp += 1
            scene_sample_token = scene_sample['next']
        # finish for this scene
        scene_detections_file.close()
Exemplo n.º 13
0
def main():
    nusc = NuScenes(version='v1.0-mini',
                    dataroot='../../data/datasets/nuscenes',
                    verbose=True)

    my_sample = nusc.sample[10]
    my_sample_token = my_sample['token']
    sample_record = nusc.get('sample', my_sample_token)

    pointsensor_token = sample_record['data']['LIDAR_TOP']
    camera_token = sample_record['data']['CAM_FRONT']

    # Get point cloud
    pointsensor = nusc.get('sample_data', pointsensor_token)
    pcl_path = os.path.join(nusc.dataroot, pointsensor['filename'])
    if pointsensor['sensor_modality'] == 'lidar':
        pc_data = LidarPointCloud.from_file(pcl_path)
    else:
        raise NotImplementedError()

    pc = pc_data.points[0:3]

    vtk_window_size = (1280, 720)
    vtk_renderer = demo_utils.setup_vtk_renderer()
    vtk_render_window = demo_utils.setup_vtk_render_window(
        'Point Cloud', vtk_window_size, vtk_renderer)

    vtk_interactor = vtk.vtkRenderWindowInteractor()
    vtk_interactor.SetRenderWindow(vtk_render_window)
    vtk_interactor.SetInteractorStyle(
        vtk_utils.ToggleActorsInteractorStyle(None, vtk_renderer))
    vtk_interactor.Initialize()

    vtk_pc = VtkPointCloudGlyph()
    vtk_pc.set_points(pc.T)
    vtk_renderer.AddActor(vtk_pc.vtk_actor)

    vtk_render_window.Render()
    vtk_interactor.Start()  # Blocking
Exemplo n.º 14
0
def main(dataroot: str,
         version: str,
         output_prefix: str,
         output_format: str = 'kml') -> None:
    """
    Extract the latlon coordinates for each available pose and write the results to a file.
    The file is organized by location and scene_name.
    :param dataroot: Path of the nuScenes dataset.
    :param version: NuScenes version.
    :param output_format: The output file format, kml or json.
    :param output_prefix: Where to save the output file (without the file extension).
    """
    # Init nuScenes.
    nusc = NuScenes(dataroot=dataroot, version=version, verbose=False)

    coordinates_per_location = {}
    print(f'Extracting coordinates...')
    for scene in tqdm(nusc.scene):
        # Retrieve nuScenes poses.
        scene_name = scene['name']
        scene_token = scene['token']
        location = nusc.get('log', scene['log_token'])[
            'location']  # Needed to extract the reference coordinate.
        poses = get_poses(
            nusc, scene_token
        )  # For each pose, we will extract the corresponding coordinate.

        # Compute and store coordinates.
        coordinates = derive_latlon(location, poses)
        if location not in coordinates_per_location:
            coordinates_per_location[location] = {}
        coordinates_per_location[location][scene_name] = coordinates

    # Create output directory if necessary.
    dest_dir = os.path.dirname(output_prefix)
    if dest_dir != '' and not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Write to json.
    output_path = f'{output_prefix}_{version}.{output_format}'
    if output_format == 'json':
        with open(output_path, 'w') as fh:
            json.dump(coordinates_per_location, fh, sort_keys=True, indent=4)
    elif output_format == 'kml':
        # Write to kml.
        export_kml(coordinates_per_location, output_path)
    else:
        raise Exception('Error: Invalid output format: %s' % output_format)

    print(f"Saved the coordinates in {output_path}")
def process_data(data_path, version, val_split):
    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
    splits = create_splits_scenes()
    train_scenes, val_scenes = train_test_split(splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split)
    train_scene_names = splits['train' if 'mini' not in version else 'mini_train']
    val_scene_names = splits['val' if 'mini' not in version else 'mini_val']

    ns_scene_names = dict()
    ns_scene_names['train'] = train_scene_names
    ns_scene_names['val'] = val_scene_names
    scenes = []
    for data_class in ['train', 'val']:
        for ns_scene_name in tqdm(ns_scene_names[data_class]):
            ns_scene = nusc.get('scene', nusc.field2token('scene', 'name', ns_scene_name)[0])
            scene_id = int(ns_scene['name'].replace('scene-', ''))
            if scene_id in scene_blacklist:  # Some scenes have bad localization
                continue

            scene = process_scene(ns_scene, nusc)
            if scene is not None:
                scenes.append(scene)
    
    print(f'Processed {len(scenes):.2f} scenes')
Exemplo n.º 16
0
def get_poses(nusc: NuScenes, scene_token: str) -> List[dict]:
    """
    Return all ego poses for the current scene.
    :param nusc: The NuScenes instance to load the ego poses from.
    :param scene_token: The token of the scene.
    :return: A list of the ego pose dicts.
    """
    pose_list = []
    scene_rec = nusc.get('scene', scene_token)
    sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
    sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])

    ego_pose = nusc.get('ego_pose', sd_rec['token'])
    pose_list.append(ego_pose)

    while sd_rec['next'] != '':
        sd_rec = nusc.get('sample_data', sd_rec['next'])
        ego_pose = nusc.get('ego_pose', sd_rec['token'])
        pose_list.append(ego_pose)

    return pose_list
Exemplo n.º 17
0
    nusc = NuScenes(version='v1.0-mini',
                    dataroot="/home/ubuntu/data/nuscenes",
                    verbose=True)

    # Specify sensors to use
    radar_channel = 'RADAR_FRONT'
    camera_channel = 'CAM_FRONT'

    # Get all scene tokens in a list
    scene_tokens = [s['token'] for s in nusc.scene]

    # Choose a scene token (between 0 and 100)
    scene_token = scene_tokens[5]

    # Get the first sample of this scene to be demonstrated
    scene_rec = nusc.get('scene', scene_token)
    sample = nusc.get('sample', scene_rec['first_sample_token'])

    # Get the sample token and the records for sensor tokens
    sample_token = scene_rec['first_sample_token']
    sample_record = nusc.get('sample', sample_token)

    # Grab the front camera and the radar sensor.
    radar_token = sample_record['data'][radar_channel]
    camera_token = sample_record['data'][camera_channel]

    # Get radar and image data
    radar_data = get_sensor_sample_data(nusc, sample, radar_channel)
    image_data = get_sensor_sample_data(nusc, sample, camera_channel)

    ## Define parameters for image_plus_creation
def create_dataset(dataroot,
                   save_dir,
                   width=672,
                   height=672,
                   grid_range=70.,
                   nusc_version='v1.0-mini',
                   use_constant_feature=True,
                   use_intensity_feature=True,
                   end_id=None):

    os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True)

    nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True)
    ref_chan = 'LIDAR_TOP'

    if width == height:
        size = width
    else:
        raise Exception(
            'Currently only supported if width and height are equal')

    grid_length = 2. * grid_range / size
    label_half_length = 10

    data_id = 0
    grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length)
    grid_centers \
        = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1]

    for my_scene in nusc.scene:
        first_sample_token = my_scene['first_sample_token']
        token = first_sample_token

        while (token != ''):
            print('--- {} '.format(data_id) + token + ' ---')
            my_sample = nusc.get('sample', token)
            sd_record = nusc.get('sample_data', my_sample['data'][ref_chan])
            sample_rec = nusc.get('sample', sd_record['sample_token'])
            chan = sd_record['channel']

            pc, times = LidarPointCloud.from_file_multisweep(nusc,
                                                             sample_rec,
                                                             chan,
                                                             ref_chan,
                                                             nsweeps=10)
            _, boxes, _ = nusc.get_sample_data(sd_record['token'],
                                               box_vis_level=0)

            out_feature = np.zeros((size, size, 7), dtype=np.float32)
            for box_idx, box in enumerate(boxes):
                label = 0
                if box.name.split('.')[0] == 'vehicle':
                    if box.name.split('.')[1] == 'car':
                        label = 1
                    elif (box.name.split('.')[1]
                          in ['bus', 'construction', 'trailer', 'truck']):
                        label = 2
                    elif box.name.split('.')[1] == 'emergency':
                        if box.name.split('.')[2] == 'ambulance':
                            label = 2
                        # police
                        else:
                            label = 1
                    elif box.name.split('.')[1] in ['bicycle', 'motorcycle']:
                        label = 3
                elif box.name.split('.')[0] == 'human':
                    label = 4
                elif (box.name.split('.')[0]
                      in ['animal', 'movable_object', 'static_object']):
                    label = 5
                else:
                    continue

                height_pt = np.linalg.norm(box.corners().T[0] -
                                           box.corners().T[3])
                corners2d = box.corners()[:2, :]
                box2d = corners2d.T[[2, 3, 7, 6]]

                box2d_center = box2d.mean(axis=0)
                generate_out_feature(width, height, size, grid_centers, box2d,
                                     box2d_center, height_pt, label,
                                     label_half_length, out_feature)

            out_feature = out_feature.astype(np.float16)
            feature_generator = fg.Feature_generator(grid_range, width, height,
                                                     use_constant_feature,
                                                     use_intensity_feature)
            feature_generator.generate(pc.points.T)
            in_feature = feature_generator.feature
            if use_constant_feature and use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 8)
            elif use_constant_feature or use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 6)
            else:
                in_feature = in_feature.reshape(size, size, 4)

            # instance_pt is flipped due to flip
            out_feature = np.flip(np.flip(out_feature, axis=0), axis=1)
            out_feature[:, :, 1:3] *= -1

            np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)),
                    in_feature)
            np.save(
                os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)),
                out_feature)

            token = my_sample['next']
            data_id += 1
            if data_id == end_id:
                return
Exemplo n.º 19
0
class NuScenesDataset(Dataset):
    def __init__(self, cfg, root, is_train=True, transforms=None):
        super(NuScenesDataset, self).__init__()
        self.root = root
        self.image_dir = os.path.join(root, "image_2")
        self.label_dir = os.path.join(root, "label_2")
        self.calib_dir = os.path.join(root, "calib")

        self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT
        self.is_train = is_train
        self.transforms = transforms

        if self.split == "train":
            imageset_txt = os.path.join(root, "ImageSets", "train.txt")
        elif self.split == "val":
            imageset_txt = os.path.join(root, "ImageSets", "val.txt")
        elif self.split == "trainval":
            imageset_txt = os.path.join(root, "ImageSets", "trainval.txt")
        elif self.split == "test":
            imageset_txt = os.path.join(root, "ImageSets", "test.txt")
        else:
            raise ValueError("Invalid split!")

        image_files = []
        for line in open(imageset_txt, "r"):
            base_name = line.replace("\n", "")
            image_name = base_name + ".png"
            image_files.append(image_name)
        self.image_files = image_files
        self.label_files = [
            i.replace(".png", ".txt") for i in self.image_files
        ]
        self.num_samples = len(self.image_files)
        self.classes = cfg.DATASETS.DETECT_CLASSES

        self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0
        self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0
        self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN
        self.num_classes = len(self.classes)

        self.input_width = cfg.INPUT.WIDTH_TRAIN
        self.input_height = cfg.INPUT.HEIGHT_TRAIN
        self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.max_objs = cfg.DATASETS.MAX_OBJECTS

        self.logger = logging.getLogger(__name__)
        self.logger.info(
            "Initializing KITTI {} set with {} files loaded".format(
                self.split, self.num_samples))

        # NuScenes specific stuff here
        # KITTI unwanted lines not yet deleted above
        # Select subset of the data to look at.
        nusc_version = 'v1.0'
        self.nusc = NuScenes(version=nusc_version)

        # Get assignment of scenes to splits.
        self.split_logs = create_split_logs(self.split, self.nusc)

        # Use only the samples from the current split.
        self.sample_tokens = self._split_to_samples(self.split_logs)
        self.image_count = len(self.sample_tokens)
        self.image_count = 100
        self.sample_tokens = self.sample_tokens[:self.image_count]

    def __len__(self):
        return self.image_count

    def _split_to_samples(self, split_logs: List[str]) -> List[str]:
        """
        Convenience function to get the samples in a particular split.
        :param split_logs: A list of the log names in this split.
        :return: The list of samples.
        """
        samples = []
        for sample in self.nusc.sample:
            scene = self.nusc.get('scene', sample['scene_token'])
            log = self.nusc.get('log', scene['log_token'])
            logfile = log['logfile']
            if logfile in split_logs:
                samples.append(sample['token'])
        return samples

    def __getitem__(self, idx):
        # set defaults here
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        # sample_token based on index
        sample_token = self.sample_tokens[idx]

        # Get sample data.
        sample = self.nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']
        cam_front_token = sample['data'][self.cam_name]
        lidar_token = sample['data'][self.lidar_name]

        # Retrieve sensor records.
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)
        sd_record_lid = self.nusc.get('sample_data', lidar_token)
        cs_record_cam = self.nusc.get('calibrated_sensor',
                                      sd_record_cam['calibrated_sensor_token'])
        cs_record_lid = self.nusc.get('calibrated_sensor',
                                      sd_record_lid['calibrated_sensor_token'])

        # Combine transformations and convert to KITTI format.
        # Note: cam uses same conventions in KITTI and nuScenes.
        lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                      Quaternion(cs_record_lid['rotation']),
                                      inverse=False)
        ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                      Quaternion(cs_record_cam['rotation']),
                                      inverse=True)
        velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

        # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
        velo_to_cam_kitti = np.dot(velo_to_cam,
                                   kitti_to_nu_lidar.transformation_matrix)

        r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

        # Projection matrix.
        p_left_kitti = np.zeros((3, 4))
        p_left_kitti[:3, :3] = cs_record_cam[
            'camera_intrinsic']  # Cameras are always rectified.

        # Create KITTI style transforms.
        velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
        velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

        # Check that the rotation has the same format as in KITTI.
        assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1],
                                                      [1, 0, 0]])).all()
        assert (velo_to_cam_trans[1:3] < 0).all()

        # Retrieve the token from the lidar.
        # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
        # not the camera.
        filename_cam_full = sd_record_cam['filename']

        # set the img variable to its data here
        src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
        img = Image.open(src_im_path)

        # Create calibration matrix.
        K = p_left_kitti
        K = [float(i) for i in K]
        K = np.array(K, dtype=np.float32).reshape(3, 4)
        K = K[:3, :3]

        # populate the list of object annotations for this sample
        anns = []
        for sample_annotation_token in sample_annotation_tokens:
            sample_annotation = self.nusc.get('sample_annotation',
                                              sample_annotation_token)

            # Get box in LIDAR frame.
            _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                lidar_token,
                box_vis_level=BoxVisibility.NONE,
                selected_anntokens=[sample_annotation_token])
            box_lidar_nusc = box_lidar_nusc[0]

            # Truncated: Set all objects to 0 which means untruncated.
            truncated = 0.0

            # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
            occluded = 0

            # Convert nuScenes category to nuScenes detection challenge category.
            detection_name = category_to_detection_name(
                sample_annotation['category_name'])

            # Skip categories that are not part of the nuScenes detection challenge.
            if detection_name is None:
                continue

            # Convert from nuScenes to KITTI box format.
            box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                velo_to_cam_trans, r0_rect)

            # Project 3d box to 2d box in image, ignore box if it does not fall inside.
            bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                         p_left_kitti,
                                                         imsize=imsize)
            if bbox_2d is None:
                continue

            # Set dummy score so we can use this file as result.
            box_cam_kitti.score = 0

            # Convert box to output string format.
            output = KittiDB.box_to_string(name=detection_name,
                                           box=box_cam_kitti,
                                           bbox_2d=bbox_2d,
                                           truncation=truncated,
                                           occlusion=occluded)
            fieldnames = [
                'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin',
                'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry'
            ]
            if self.is_train:
                f = StringIO(output)
                reader = csv.DictReader(f,
                                        delimiter=' ',
                                        fieldnames=fieldnames)
                for line, row in enumerate(reader):
                    if row["type"] in self.classes:
                        anns.append({
                            "class":
                            row["type"],
                            "label":
                            TYPE_ID_CONVERSION[row["type"]],
                            "truncation":
                            float(row["truncated"]),
                            "occlusion":
                            float(row["occluded"]),
                            "alpha":
                            float(row["alpha"]),
                            "dimensions": [
                                float(row['dl']),
                                float(row['dh']),
                                float(row['dw'])
                            ],
                            "locations": [
                                float(row['lx']),
                                float(row['ly']),
                                float(row['lz'])
                            ],
                            "rot_y":
                            float(row["ry"])
                        })

        center = np.array([i / 2 for i in img.size], dtype=np.float32)
        size = np.array([i for i in img.size], dtype=np.float32)
        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """
        flipped = False
        if (self.is_train) and (random.random() < self.flip_prob):
            flipped = True
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (random.random() < self.aug_prob):
            affine = True
            shift, scale = self.shift_scale[0], self.shift_scale[1]
            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
            center[0] += size[0] * random.choice(shift_ranges)
            center[1] += size[1] * random.choice(shift_ranges)

            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
            size *= random.choice(scale_ranges)

        center_size = [center, size]
        trans_affine = get_transfrom_matrix(
            center_size, [self.input_width, self.input_height])
        trans_affine_inv = np.linalg.inv(trans_affine)
        img = img.transform(
            (self.input_width, self.input_height),
            method=Image.AFFINE,
            data=trans_affine_inv.flatten()[:6],
            resample=Image.BILINEAR,
        )

        trans_mat = get_transfrom_matrix(
            center_size, [self.output_width, self.output_height])

        if not self.is_train:
            # for inference we parametrize with original size
            target = ParamsList(image_size=size, is_train=self.is_train)
            target.add_field("trans_mat", trans_mat)
            target.add_field("K", K)
            if self.transforms is not None:
                img, target = self.transforms(img, target)

            return img, target, idx

        heat_map = np.zeros(
            [self.num_classes, self.output_height, self.output_width],
            dtype=np.float32)
        regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32)
        cls_ids = np.zeros([self.max_objs], dtype=np.int32)
        proj_points = np.zeros([self.max_objs, 2], dtype=np.int32)
        p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32)
        dimensions = np.zeros([self.max_objs, 3], dtype=np.float32)
        locations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotys = np.zeros([self.max_objs], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        flip_mask = np.zeros([self.max_objs], dtype=np.uint8)

        for i, a in enumerate(anns):
            a = a.copy()
            cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1

            point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs)
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1)
            h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]

            if (0 < point[0] < self.output_width) and (0 < point[1] <
                                                       self.output_height):
                point_int = point.astype(np.int32)
                p_offset = point - point_int
                radius = gaussian_radius(h, w)
                radius = max(0, int(radius))
                heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int,
                                                    radius)

                cls_ids[i] = cls
                regression[i] = box3d
                proj_points[i] = point_int
                p_offsets[i] = p_offset
                dimensions[i] = np.array(a["dimensions"])
                locations[i] = locs
                rotys[i] = rot_y
                reg_mask[i] = 1 if not affine else 0
                flip_mask[i] = 1 if not affine and flipped else 0

        target = ParamsList(image_size=img.size, is_train=self.is_train)
        target.add_field("hm", heat_map)
        target.add_field("reg", regression)
        target.add_field("cls_ids", cls_ids)
        target.add_field("proj_p", proj_points)
        target.add_field("dimensions", dimensions)
        target.add_field("locations", locations)
        target.add_field("rotys", rotys)
        target.add_field("trans_mat", trans_mat)
        target.add_field("K", K)
        target.add_field("reg_mask", reg_mask)
        target.add_field("flip_mask", flip_mask)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Exemplo n.º 20
0
    # Add sensors to VCD
    for sensor in nusc.sensor:
        if sensor['modality'] == 'camera':
            vcd.add_stream(sensor['channel'], '', sensor['token'],
                           core.StreamType.camera)
        elif sensor['modality'] == 'lidar':
            vcd.add_stream(sensor['channel'], '', sensor['token'],
                           core.StreamType.lidar)
        else:
            vcd.add_stream(sensor['channel'], '', sensor['token'],
                           core.StreamType.other)

    # Dictionary to map between frame nº and sample_token
    token_sample = my_scene['first_sample_token']
    current_sample = nusc.get('sample', token_sample)
    # Add metadata to VCD
    vcd.data['vcd']['metadata'].setdefault('properties', dict())
    vcd.data['vcd']['metadata']['properties']['scene_token'] = my_scene[
        'token']
    vcd.data['vcd']['metadata']['properties']['scene_name'] = my_scene['name']
    vcd.data['vcd']['metadata']['properties']['scene_description'] = my_scene[
        'description']

    log = nusc.get('log', my_scene['log_token'])
    vcd.data['vcd']['metadata']['properties']['log_token'] = log['token']
    vcd.data['vcd']['metadata']['properties']['log_file'] = log['logfile']
    vcd.data['vcd']['metadata']['properties']['vehicle'] = log['vehicle']
    vcd.data['vcd']['metadata']['properties']['date'] = log['date_captured']
    vcd.data['vcd']['metadata']['properties']['location'] = log['location']
Exemplo n.º 21
0
nuscenes_path = '/mrtstorage/datasets/nuscenes/data/data_LIDAR/v1.0-trainval/v1.0-trainval_meta'
nusc = NuScenes(version='v1.0-trainval', dataroot=nuscenes_path, verbose=True)
x_positions = dict.fromkeys(class_list)
y_positions = dict.fromkeys(class_list)
length = dict.fromkeys(class_list)
width = dict.fromkeys(class_list)
num_lidar_points = dict.fromkeys(class_list)
y_translation = []
yaw = []
sensor = 'LIDAR_TOP'
nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse
for scene in nusc.scene:
    current_sample_token = scene['first_sample_token']
    last_sample_token = scene['last_sample_token']
    while current_sample_token != last_sample_token:
        sample = nusc.get('sample', current_sample_token)
        lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
        if lidar_top_data['prev']:
            ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token'])
            calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token'])
            lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev'])
            ego_pose_prev = nusc.get('ego_pose', lidar_top_data_prev['ego_pose_token'])
            calib_sensor_prev = nusc.get('calibrated_sensor', lidar_top_data_prev['calibrated_sensor_token'])
            ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']))
            lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation']))
            ego_to_global_prev = transform_matrix(ego_pose_prev['translation'], Quaternion(ego_pose_prev['rotation']))
            lidar_to_ego_prev = transform_matrix(calib_sensor_prev['translation'],
                                                 Quaternion(calib_sensor_prev['rotation']))
            lidar_to_global = np.dot(ego_to_global, lidar_to_ego)
            lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev)
            delta = inv(lidar_to_global_prev).dot(lidar_to_global)
        'CAM_FRONT_LEFT',
        'CAM_FRONT_RIGHT',
        'CAM_BACK_LEFT',
        'CAM_BACK',
        'CAM_BACK_RIGHT',
    ]
    lll = []
    splits = create_splits_scenes()
    for camera_channel in valid_channels:
        for token in tokens_list:
            first_time = True
            Dict = {}
            current_recs = {}
            num_objects = 0
            scene_token = token[:-1]
            scene_record = nusc.get('scene', scene_token)
            scene_rec = scene_record
            scene = nusc.get('scene', scene_token)
            scene_name = scene_record['name']

            scene_id = int(scene_name.replace('scene-', ''))
            if scene_name in splits['val']:
                spl = '/test/'
            else:
                spl = '/train/'
            os.mkdir(data_directory + spl + str(scene_id) + camera_channel)
            os.mkdir(data_directory + spl + str(scene_id) + camera_channel +
                     '/img1')
            os.mkdir(data_directory + spl + str(scene_id) + camera_channel +
                     '/gt')
            file_w = open(
Exemplo n.º 23
0
            print("No scene with name {} found.".format(sys.argv[2]))
            print("Available scenes: {}".format(" ".join(name2id.keys())))

            exit(1)
        scenes2parse.append(sys.argv[3])
    else:
        scenes2parse = name2id.keys()

    for scene_name in scenes2parse:

        print("Converting {} ...".format(scene_name))

        output_folder = os.path.join(sys.argv[2], scene_name)
        velodyne_folder = os.path.join(output_folder, "velodyne/")

        first_lidar = nusc.get('sample', nusc.scene[name2id[scene_name]]["first_sample_token"])["data"]["LIDAR_TOP"]
        last_lidar = nusc.get('sample', nusc.scene[name2id[scene_name]]["last_sample_token"])["data"]["LIDAR_TOP"]

        current_lidar = first_lidar
        lidar_filenames = []
        poses = []

        if not os.path.exists(velodyne_folder):
            print("Creating output folder: {}".format(output_folder))
            os.makedirs(velodyne_folder)

        original = []

        while current_lidar != "":
            lidar_data = nusc.get('sample_data', current_lidar)
            calib_data = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
Exemplo n.º 24
0
def create_tracks(all_boxes: EvalBoxes, nusc: NuScenes, eval_split: str, gt: bool) \
        -> Dict[str, Dict[int, List[TrackingBox]]]:
    """
    Returns all tracks for all scenes. Samples within a track are sorted in chronological order.
    This can be applied either to GT or predictions.
    :param all_boxes: Holds all GT or predicted boxes.
    :param nusc: The NuScenes instance to load the sample information from.
    :param eval_split: The evaluation split for which we create tracks.
    :param gt: Whether we are creating tracks for GT or predictions
    :return: The tracks.
    """
    # Only keep samples from this split.
    splits = create_splits_scenes()
    scene_tokens = set()
    for sample_token in all_boxes.sample_tokens:
        scene_token = nusc.get('sample', sample_token)['scene_token']
        scene = nusc.get('scene', scene_token)
        if scene['name'] in splits[eval_split]:
            scene_tokens.add(scene_token)

    # Tracks are stored as dict {scene_token: {timestamp: List[TrackingBox]}}.
    tracks = defaultdict(lambda: defaultdict(list))

    # Init all scenes and timestamps to guarantee completeness.
    for scene_token in scene_tokens:
        # Init all timestamps in this scene.
        scene = nusc.get('scene', scene_token)
        cur_sample_token = scene['first_sample_token']
        while True:
            # Initialize array for current timestamp.
            cur_sample = nusc.get('sample', cur_sample_token)
            tracks[scene_token][cur_sample['timestamp']] = []

            # Abort after the last sample.
            if cur_sample_token == scene['last_sample_token']:
                break

            # Move to next sample.
            cur_sample_token = cur_sample['next']

    # Group annotations wrt scene and timestamp.
    for sample_token in all_boxes.sample_tokens:
        sample_record = nusc.get('sample', sample_token)
        scene_token = sample_record['scene_token']
        tracks[scene_token][sample_record['timestamp']] = all_boxes.boxes[sample_token]

    # Replace box scores with track score (average box score). This only affects the compute_thresholds method and
    # should be done before interpolation to avoid diluting the original scores with interpolated boxes.
    if not gt:
        for scene_id, scene_tracks in tracks.items():
            # For each track_id, collect the scores.
            track_id_scores = defaultdict(list)
            for timestamp, boxes in scene_tracks.items():
                for box in boxes:
                    track_id_scores[box.tracking_id].append(box.tracking_score)

            # Compute average scores for each track.
            track_id_avg_scores = {}
            for tracking_id, scores in track_id_scores.items():
                track_id_avg_scores[tracking_id] = np.mean(scores)

            # Apply average score to each box.
            for timestamp, boxes in scene_tracks.items():
                for box in boxes:
                    box.tracking_score = track_id_avg_scores[box.tracking_id]

    # Interpolate GT and predicted tracks.
    for scene_token in tracks.keys():
        tracks[scene_token] = interpolate_tracks(tracks[scene_token])

        if not gt:
            # Make sure predictions are sorted in in time. (Always true for GT).
            tracks[scene_token] = defaultdict(list, sorted(tracks[scene_token].items(), key=lambda kv: kv[0]))

    return tracks
Exemplo n.º 25
0
class NuscDataset(data.Dataset):
    """Superclass for monocular dataloaders

    Args:
        data_root
        filenames
        height
        width
        frame_idxs
        num_scales
        is_train
        img_ext
    """
    def __init__(self,
                 data_root,
                 filenames,
                 height,
                 width,
                 frame_idxs,
                 num_scales,
                 version='v1.0-mini',
                 sensor='CAM_FRONT',
                 is_train=False,
                 img_ext='.jpg'):
        super(NuscDataset, self).__init__()

        self.data_path = data_root
        self.data_path = '/share/nuscenes'
        self.filenames = filenames
        self.height = height
        self.width = width
        self.num_scales = num_scales
        self.interp = Image.ANTIALIAS

        self.frame_idxs = frame_idxs

        self.is_train = is_train
        self.img_ext = img_ext

        self.loader = pil_loader
        self.to_tensor = transforms.ToTensor()
        self.nusc = NuScenes(version=version,
                             dataroot=self.data_path,
                             verbose=True)
        self.sensor = sensor
        self.data_root = '/share/nuscenes'
        self.full_res_shape = (1600, 640)

        # We need to specify augmentations differently in newer versions of torchvision.
        # We first try the newer tuple version; if this fails we fall back to scalars
        try:
            self.brightness = (0.8, 1.2)
            self.contrast = (0.8, 1.2)
            self.saturation = (0.8, 1.2)
            self.hue = (-0.1, 0.1)
            transforms.ColorJitter.get_params(self.brightness, self.contrast,
                                              self.saturation, self.hue)
        except TypeError:
            self.brightness = 0.2
            self.contrast = 0.2
            self.saturation = 0.2
            self.hue = 0.1

        self.resize = {}
        for i in range(self.num_scales):
            s = 2**i
            self.resize[i] = transforms.Resize(
                (self.height // s, self.width // s), interpolation=self.interp)

        self.load_depth = self.check_depth()

    def get_image_path(self, folder, frame_index, side):
        f_str = "{:010d}{}".format(frame_index, self.img_ext)
        image_path = os.path.join(self.data_path, folder,
                                  "image_0{}/data".format(self.side_map[side]),
                                  f_str)
        return image_path

    def preprocess(self, inputs, color_aug):
        """Resize colour images to the required scales and augment if required

        We create the color_aug object in advance and apply the same augmentation to all
        images in this item. This ensures that all images input to the pose network receive the
        same augmentation.
        """
        for k in list(inputs):
            frame = inputs[k]
            if "color" in k:
                n, im, i = k
                # for nuscenes dataset we crop the image to resolution 1600x640
                for i in range(self.num_scales):

                    inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)])

        for k in list(inputs):
            f = inputs[k]
            if "color" in k:
                n, im, i = k
                inputs[(n, im, i)] = self.to_tensor(f)
                inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))

    def __len__(self):
        return len(self.nusc.sample)

    def __getitem__(self, index):
        """Returns a single training item from the dataset as a dictionary.

        Values correspond to torch tensors.
        Keys in the dictionary are either strings or tuples:

            ("color", <frame_id>, <scale>)          for raw colour images,
            ("color_aug", <frame_id>, <scale>)      for augmented colour images,
            ("K", scale) or ("inv_K", scale)        for camera intrinsics,
            "stereo_T"                              for camera extrinsics, and
            "depth_gt"                              for ground truth depth maps.

        <frame_id> is either:
            an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
        or
            "s" for the opposite image in the stereo pair.

        <scale> is an integer representing the scale of the image relative to the fullsize image:
            -1      images at native resolution as loaded from disk
            0       images resized to (self.width,      self.height     )
            1       images resized to (self.width // 2, self.height // 2)
            2       images resized to (self.width // 4, self.height // 4)
            3       images resized to (self.width // 8, self.height // 8)
        """
        inputs = {}

        do_color_aug = self.is_train and random.random() > 0.5
        do_flip = self.is_train and random.random() > 0.5

        sample = self.nusc.sample[index]
        if sample['prev'] == '':
            sample = self.nusc.get('sample', token=sample['next'])
        elif sample['next'] == '':
            sample = self.nusc.get('sample', token=sample['prev'])
        elif sample['prev'] == '' and sample['next'] == '':
            raise FileNotFoundError('Can not find three consecutive samples')

        for i in self.frame_idxs:
            if i == "s":
                raise NotImplementedError(
                    'nuscenes dataset does not support stereo depth')
            else:
                inputs[("color", i, -1)] = self.get_color(sample, i, do_flip)
            #
            # for inp in inputs:
            #     if 'color' in inp:
            #         inputs[inp].save('/home/user/xwh/monodepth2-master/dataset_check/{}.png'.format(str(index)+'_'+str(i)))

        # adjusting intrinsics to match each scale in the pyramid
        SENSOR_DATA = self.nusc.get('sample_data', sample['data'][self.sensor])
        sensor_calib_data = self.nusc.get(
            'calibrated_sensor', SENSOR_DATA['calibrated_sensor_token'])
        K = np.zeros((4, 4), dtype=np.float32)
        K[:, 3] = [0, 0, 0, 1]
        K[:3, :3] = np.array(sensor_calib_data['camera_intrinsic'])
        K[0, :] /= 1600
        K[1, :] /= 900
        self.K = K

        for scale in range(self.num_scales):
            K = self.K.copy()
            #  nusc camera instrinsic has not been normalized by orinigal image size
            K[0, :] *= self.width // (2**scale)
            K[1, :] *= self.height // (2**scale)

            inv_K = np.linalg.pinv(K)

            inputs[("K", scale)] = torch.from_numpy(K)
            # print('-'*20)
            # print('Nusc ("K", {}) : '.format(scale), K)
            # print('-' * 20)
            inputs[("inv_K", scale)] = torch.from_numpy(inv_K)

        if do_color_aug:
            color_aug = transforms.ColorJitter.get_params(
                self.brightness, self.contrast, self.saturation, self.hue)
        else:
            color_aug = (lambda x: x)

        self.preprocess(inputs, color_aug)

        for i in self.frame_idxs:
            del inputs[("color", i, -1)]
            del inputs[("color_aug", i, -1)]

        if self.load_depth:
            LIDAR_RECORD = sample['data']['LIDAR_TOP']
            CAMERA_RECORD = sample['data'][self.sensor]
            points, depth, img = self.map_pointcloud_to_image(
                self.nusc, LIDAR_RECORD, CAMERA_RECORD, render_intensity=False)
            depth_gt = self.get_depth(self.nusc, sample, do_flip)
            # depth_gt[points[:, 1].astype(np.int), points[:, 0].astype(np.int)] = depth
            # TODO FINISH THE NUSCENES DEPTH MAP GENERATION
            # depth_gt = self.get_depth(folder, frame_index, side, do_flip)
            inputs["depth_gt"] = np.expand_dims(depth_gt, 0)
            inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype(
                np.float32))

        if "s" in self.frame_idxs:
            stereo_T = np.eye(4, dtype=np.float32)
            baseline_sign = -1 if do_flip else 1
            side_sign = -1 if side == "l" else 1
            stereo_T[0, 3] = side_sign * baseline_sign * 0.1

            inputs["stereo_T"] = torch.from_numpy(stereo_T)

        return inputs

    def get_color(self, sample, i, do_flip):
        if i == 0:
            color_path = self.nusc.get(
                'sample_data', token=sample['data'][self.sensor])['filename']
            full_color_path = os.path.join(self.data_root, color_path)
            color = self.loader(full_color_path)
            # color = color.crop((0,2,1600,898))
            color = color.crop((0, 240, 1600, 880))

        elif i == -1:
            prev_sample = self.nusc.get('sample', token=sample['prev'])
            color_path = self.nusc.get(
                'sample_data',
                token=prev_sample['data'][self.sensor])['filename']
            full_color_path = os.path.join(self.data_root, color_path)
            color = self.loader(full_color_path)
            # color = color.crop((0,2,1600,898))
            color = color.crop((0, 240, 1600, 880))

        if i == 1:
            next_sample = self.nusc.get('sample', token=sample['next'])
            color_path = self.nusc.get(
                'sample_data',
                token=next_sample['data'][self.sensor])['filename']
            full_color_path = os.path.join(self.data_root, color_path)
            color = self.loader(full_color_path)
            # color = color.crop((0,2,1600,898))
            color = color.crop((0, 240, 1600, 880))

        if do_flip:
            color = color.transpose(pil.FLIP_LEFT_RIGHT)

        return color

    # def get_relative_pose(self, current_sample, next_sample):

    def check_depth(self):
        return True

    def get_depth(self, nusc, sample, do_flip):
        pointsensor_token = sample['data']['LIDAR_TOP']
        camsensor_token = sample['data'][self.sensor]
        pts, depth, img = self.map_pointcloud_to_image(nusc, pointsensor_token,
                                                       camsensor_token)
        depth_gt = np.zeros((img.size[0], img.size[1]))
        pts_int = np.array(pts, dtype=int)
        depth_gt[pts_int[0, :], pts_int[1, :]] = depth

        # apply maxpool on nusc depth gt to make the lidar point denser
        # depth_gt = max_pool2d(torch.from_numpy(depth_gt).unsqueeze(dim=0), kernel_size=3, stride=1, padding=1)
        # depth_gt = depth_gt.squeeze(0).numpy()

        # we crop nuscenes depth_gt to match the orinigal color image shape
        # depth_gt = depth_gt[:,2:898]
        depth_gt = depth_gt[:, 240:880]

        if do_flip:
            depth_gt = np.fliplr(depth_gt)
        # depth_gt = skimage.transform.resize(
        #     depth_gt, self.full_res_shape[::-1], order=0, preserve_range=True, mode='constant')
        return np.transpose(depth_gt, (1, 0))
        # return depth_gt

    def get_relative_pose(self, pose1, pose2):
        """
        calculate relative from pose1 to pose2 in the global frame
        :param from_pose:
        :param to_pose:
        :return:
        """
        relative_pose_matrix = np.ones((4, 4))
        pose1_rot = pose1[0:3, 0:3]
        pose1_tran = pose1[3, 0:3]
        pose2_rot = pose1[0:3, 0:3]
        pose2_tran = pose2[3, 0:3]
        pose_1_inv = np.linalg.inv(pose1_rot)
        rot_pose1_to_pose2 = np.dot(pose_1_inv, pose2_rot)
        tran_pose1_to_pose2 = pose2_tran - pose1_tran
        relative_pose_matrix[0:3, 0:3] = rot_pose1_to_pose2
        relative_pose_matrix[3, 0:3] = tran_pose1_to_pose2
        return relative_pose_matrix

    def map_pointcloud_to_image(self,
                                nusc,
                                pointsensor_token: str,
                                camera_token: str,
                                min_dist: float = 1.0,
                                render_intensity: bool = False,
                                show_lidarseg: bool = False):
        """
        Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image
        plane.
        :param pointsensor_token: Lidar/radar sample_data token.
        :param camera_token: Camera sample_data token.
        :param min_dist: Distance from the camera below which points are discarded.
        :param render_intensity: Whether to render lidar intensity instead of point depth.
        :param show_lidarseg: Whether to render lidar intensity instead of point depth.
        :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
            or the list is empty, all classes will be displayed.
        :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
                                        predictions for the sample.
        :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>).
        """

        cam = nusc.get('sample_data', camera_token)
        pointsensor = nusc.get('sample_data', pointsensor_token)
        pcl_path = osp.join(nusc.dataroot, pointsensor['filename'])
        if pointsensor['sensor_modality'] == 'lidar':
            # Ensure that lidar pointcloud is from a keyframe.
            assert pointsensor['is_key_frame'], \
                'Error: Only pointclouds which are keyframes have lidar segmentation labels. Rendering aborted.'
            pc = LidarPointCloud.from_file(pcl_path)
        else:
            pc = RadarPointCloud.from_file(pcl_path)
        im = Image.open(osp.join(nusc.dataroot, cam['filename']))

        # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
        # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep.
        cs_record = nusc.get('calibrated_sensor',
                             pointsensor['calibrated_sensor_token'])
        pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
        pc.translate(np.array(cs_record['translation']))

        # Second step: transform from ego to the global frame.
        poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
        pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
        pc.translate(np.array(poserecord['translation']))

        # Third step: transform from global into the ego vehicle frame for the timestamp of the image.
        poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
        pc.translate(-np.array(poserecord['translation']))
        pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)

        # Fourth step: transform from ego into the camera.
        cs_record = nusc.get('calibrated_sensor',
                             cam['calibrated_sensor_token'])
        pc.translate(-np.array(cs_record['translation']))
        pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)

        # Fifth step: actually take a "picture" of the point cloud.
        # Grab the depths (camera frame z axis points away from the camera).
        depths = pc.points[2, :]

        if render_intensity:
            assert pointsensor['sensor_modality'] == 'lidar', 'Error: Can only render intensity for lidar, ' \
                                                              'not %s!' % pointsensor['sensor_modality']
            # Retrieve the color from the intensities.
            # Performs arbitary scaling to achieve more visually pleasing results.
            intensities = pc.points[3, :]
            intensities = (intensities - np.min(intensities)) / (
                np.max(intensities) - np.min(intensities))
            intensities = intensities**0.1
            intensities = np.maximum(0, intensities - 0.5)
            coloring = intensities

        else:
            # Retrieve the color from the depth.
            coloring = depths

        # Take the actual picture (matrix multiplication with camera-matrix + renormalization).
        points = view_points(pc.points[:3, :],
                             np.array(cs_record['camera_intrinsic']),
                             normalize=True)

        # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons.
        # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera
        # casing for non-keyframes which are slightly out of sync.
        mask = np.ones(depths.shape[0], dtype=bool)
        mask = np.logical_and(mask, depths > min_dist)
        mask = np.logical_and(mask, points[0, :] > 1)
        mask = np.logical_and(mask, points[0, :] < im.size[0] - 1)
        mask = np.logical_and(mask, points[1, :] > 1)
        mask = np.logical_and(mask, points[1, :] < im.size[1] - 1)
        points = points[:, mask]
        coloring = coloring[mask]

        return points, coloring, im
Exemplo n.º 26
0
def visualize_sample(nusc: NuScenes, sample_token: str, all_annotations: Dict, all_results: Dict, nsweeps: int=1,
                     conf_th: float=0.15, eval_range: float=40, verbose=True) -> None:
    """
    Visualizes a sample from BEV with annotations and detection results.
    :param nusc: NuScenes object.
    :param sample_token: The nuScenes sample token.
    :param all_annotations: Maps each sample token to its annotations.
    :param all_results: Maps each sample token to its results.
    :param nsweeps: Number of sweeps used for lidar visualization.
    :param conf_th: The confidence threshold used to filter negatives.
    :param eval_range: Range in meters beyond which boxes are ignored.
    :param verbose: Whether to print to stdout.
    """

    # Retrieve sensor & pose records.
    sample_rec = nusc.get('sample', sample_token)
    sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])

    # Get boxes.
    boxes_gt_global = all_annotations[sample_token]
    boxes_est_global = all_results[sample_token]

    # Map GT boxes to lidar.
    boxes_gt = boxes_to_sensor(boxes_gt_global, pose_record, cs_record)

    # Map EST boxes to lidar.
    boxes_est = boxes_to_sensor(boxes_est_global, pose_record, cs_record)

    # Get point cloud in lidar frame.
    pc, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, 'LIDAR_TOP', 'LIDAR_TOP', nsweeps=nsweeps)

    # Init axes.
    _, ax = plt.subplots(1, 1, figsize=(9, 9))

    # Show point cloud.
    points = view_points(pc.points[:3, :], np.eye(4), normalize=False)
    dists = np.sqrt(np.sum(pc.points[:2, :] ** 2, axis=0))
    colors = np.minimum(1, dists / eval_range)
    ax.scatter(points[0, :], points[1, :], c=colors, s=0.2)

    # Show ego vehicle.
    ax.plot(0, 0, 'x', color='black')

    # Show GT boxes.
    for box in boxes_gt:
        box.render(ax, view=np.eye(4), colors=('g', 'g', 'g'), linewidth=2)

    # Show EST boxes.
    for box in boxes_est:
        # Show only predictions with a high score.
        if box.score >= conf_th:
            box.render(ax, view=np.eye(4), colors=('b', 'b', 'b'), linewidth=1)

    # Limit visible range.
    axes_limit = eval_range + 3  # Slightly bigger to include boxes that extend beyond the range.
    ax.set_xlim(-axes_limit, axes_limit)
    ax.set_ylim(-axes_limit, axes_limit)

    # Show plot.
    if verbose:
        print('Showing sample token %s' % sample_token)
    plt.title(sample_token)
    plt.show()
Exemplo n.º 27
0
class NuScenesEvaluator(Evaluator):
    """
    NuScenes Model Evaluator

    Evaluates models based on the nuScenes dataset. The evaluator logs per sample F1 scores as well as visualizations of the
    validation data samples.

    Arguments:
        logdir: Folder path to store the evaluation log files, <str>.
        batch_size: Batch size used for the model prediction, <int>.
        verbose: Verbosity mode (0 = silent, 1 = progress bar, 2 = one line per epoch), <int>.
        steps: Total number of epoch before stopping the evaluation, <int>.
        callbacks: List of callbacks to apply during evaluation, <list>.
        max_queue_size: Maximum size for the generator queue (used for generator input only), <int>.
        workers: Maximum number of processes to spin up when using process-based threading (used for generator input only), <int>.
        use_multiprocessing: Whether use process-based threading (used for generator input only), <bool>.
        visualize: Whether to visualize the prediction or not, <bool>.
        velocities: Whether to visualize the velocity vectors or not, <bool>.
        version: Version of the nuScenes dataset, <str>.
        dataroot: Path to the raw nuScenes dataset files, <str>.
        class_matching_dict: Key-Value-Pairs of the original category names and the user configured class names, <dict>.
        preprocessor_attributes: Keyword attributes for the nuScenes pre-processor, <dict>.
    """
    def __init__(self,
                 logdir: str = None,
                 batch_size: int = 1,
                 verbose: int = 2,
                 steps: int = None,
                 callbacks: list = None,
                 max_queue_size: int = 10,
                 workers: int = 1,
                 use_multiprocessing: bool = False,
                 visualize: bool = True,
                 velocities: bool = True,
                 version: str = 'v1.0-mini',
                 dataroot: str = None,
                 class_matching_dict: dict = None,
                 preprocessor_attributes: dict = None):
        # Initialize base class
        super(NuScenesEvaluator,
              self).__init__(logdir=logdir,
                             batch_size=batch_size,
                             steps=steps,
                             verbose=verbose,
                             callbacks=callbacks,
                             max_queue_size=max_queue_size,
                             workers=workers,
                             use_multiprocessing=use_multiprocessing)

        # Initialize NuScenesEvaluator
        self.visualize = visualize
        self.velocities = velocities
        self.version = version
        self.dataroot = dataroot
        self.class_matching_dict = class_matching_dict if class_matching_dict is not None else {}
        self.preprocessor_attributes = preprocessor_attributes if preprocessor_attributes is not None else {}

        # Load nuScenes dataset
        self.nusc = NuScenes(version=self.version,
                             dataroot=self.dataroot,
                             verbose=False)

        # Get nuScenes pre-processor
        self.nusc_preprocessor = NuScenesPreProcessor(
            self.class_matching_dict, **self.preprocessor_attributes)

    def _write_to_csv(self, filename, data):
        with open(filename, "w", newline="") as file:
            writer = csv.writer(file)
            writer.writerow(
                ["Scene token"] +
                ["Val F1 score sample " + str(i) for i in range(41)])
            writer.writerows(data)

    def _get_bounding_boxes(self, files):
        """
        Returns the bounding boxes of the scenes specified in files.

        Hint: The filenames must correspond to the nuScenes scene token.

        Attributes:
            files: Dataset of filenames of the nuScenes scenes, <tf.data.Dataset>.

        Returns:
            boxes: Nested list of bounding boxes (scenes, samples, boxes), <List>.
        """
        # Define
        boxes = []

        # Get bounding boxes
        for filename in files:
            # Define
            sample_boxes = []

            # Get current scene
            scene_token = os.path.basename(
                os.path.splitext(filename.numpy())[0]).decode("utf-8")
            scene = self.nusc.get('scene', scene_token)
            sample = self.nusc.get('sample', scene['first_sample_token'])
            # Skip first sample since it has less radar sweeps available
            sample = self.nusc.get('sample', sample['next'])

            # Iterate over all samples within the scene
            while True:
                # Get bounding boxes of the sample
                sample_boxes.append(
                    self.nusc_preprocessor.get_annotations(dataset=self.nusc,
                                                           sample=sample))

                # Check if sample is the last sample in the current scene
                if sample['next']:
                    sample = self.nusc.get('sample', sample['next'])
                else:
                    break

            boxes.append(sample_boxes)

        return boxes

    def _get_f1_scores(self, confidence, dataset, num_classes):
        """
        Returns sample-wise F1 scores for all scenes.

        Arguments:
            confidence: Confidance of the predicted label, <tf.Tensor>.
            dataset: Dataset with features and labels, <tf.data.Dataset>.
            num_classes: Number of classes, <int>.

        Returns:
            F1: List of F1 scores per sample, <List>.
        """
        F1 = []
        # Iterate over scenes
        for (_, lables), score in zip(dataset, confidence):
            sample_F1 = []
            # Iterate over samples
            for sample_labels, sample_scores in zip(tf.unstack(lables, axis=1),
                                                    tf.unstack(score, axis=1)):
                sample_F1.append(
                    float(
                        metrics.F1Score(num_classes=num_classes,
                                        average='macro',
                                        top_k=1)(sample_labels,
                                                 sample_scores)))

            F1.append(sample_F1)

        return F1

    def log_f1_scores(self, F1, files):
        data = [[
            os.path.basename(os.path.splitext(
                token.numpy())[0]).decode("utf-8")
        ] + scores for token, scores in zip(files, F1)]
        filename = os.path.join(self.logdir, 'F1Scores.csv')
        self._write_to_csv(filename, data)

    def visualize_results(self, dataset, prediction, confidence, boxes, files):
        """
        Visualizes the model predictions and the ground truth data per sample.

        Arguments:
            dataset: Dataset with features and labels, <tf.data.Dataset>.
            prediction: Predicted labels, <tf.Tensor>.
            confidence: Confidance of the predicted labels, <tf.Tensor>.
            boxes: Nested list of bounding boxes (scenes, samples, boxes), <List>.
            files: Dataset of filenames of the nuScenes scenes, <tf.data.Dataset>.
        """
        # Define
        colors = utils.get_colors('blues')
        i = 0

        # Visualize samples
        with progressbar.ProgressBar(max_value=len(prediction)) as bar:
            for filename, (
                    features, _
            ), scene_predictions, scene_confidence, scene_boxes in zip(
                    files, dataset, prediction, confidence, boxes):
                n_samples_dataset = features["x"].shape[1]
                n_samples_script_loader = len(scene_boxes)
                assert n_samples_dataset == n_samples_script_loader, f"""Number of scene samples in dataset: {n_samples_dataset}
                    is different than that from nuScenes loader {n_samples_script_loader}. Make sure to skip first sample in dataset creation."""
                # Update progressbar
                bar.update(i)

                # Get current scene
                scene_token = os.path.basename(
                    os.path.splitext(filename.numpy())[0]).decode("utf-8")
                scene = self.nusc.get('scene', scene_token)

                # Get map
                log = self.nusc.get('log', scene['log_token'])
                map_ = self.nusc.get('map', log['map_token'])

                # Get sample
                sample = self.nusc.get('sample', scene['first_sample_token'])
                # Skip first sample since it has less radar sweeps available
                sample = self.nusc.get('sample', sample['next'])

                for j, sample_boxes in enumerate(scene_boxes):
                    # Get sample data
                    ref_data = self.nusc.get('sample_data',
                                             sample['data']['LIDAR_TOP'])
                    pose = self.nusc.get('ego_pose',
                                         ref_data['ego_pose_token'])
                    sample_points = tf.concat(
                        (features['x'][:, j, :], features['y'][:, j, :],
                         features['z'][:, j, :]),
                        axis=0).numpy()
                    sample_predictions = np.squeeze(scene_predictions[:, j, :])
                    sample_confidence = np.amax(np.squeeze(
                        scene_confidence[:, j, :]),
                                                axis=-1)

                    # Get velocities
                    if self.velocities:
                        velocities = tf.concat((features['vx_comp'][:, j, :],
                                                features['vy_comp'][:, j, :]),
                                               axis=0).numpy()
                    else:
                        velocities = None

                    # Set image name
                    out_path = os.path.join(
                        self.logdir, 'images',
                        scene_token + '_' + str(sample['timestamp']) + '_' +
                        sample['token'] + '.svg')

                    # Render sample
                    render_sample.render_sample(sample_points,
                                                sample_predictions,
                                                sample_boxes,
                                                score=sample_confidence,
                                                velocities=velocities,
                                                map_=map_,
                                                pose=pose,
                                                colors=colors,
                                                out_path=out_path,
                                                out_format='svg')

                    # Get next sample
                    if sample['next']:
                        sample = self.nusc.get('sample', sample['next'])

                i += 1

    def evaluate(self, model, dataset, files, num_classes):
        # Make prediction
        prediction, confidence = self.predict(model=model, dataset=dataset)

        # Get bounding boxes
        boxes = self._get_bounding_boxes(files)

        # Log F1 scores
        F1 = self._get_f1_scores(confidence, dataset, num_classes)
        self.log_f1_scores(F1, files)

        # Visualize prediction
        if self.visualize:
            self.visualize_results(dataset, prediction, confidence, boxes,
                                   files)

    def get_config(self):
        # Get instance configuration
        config = {
            'visualize': self.visualize,
            'version': self.version,
            'dataroot': self.dataroot,
            'class_matching_dict': self.class_matching_dict,
            'preprocessor_attributes': self.preprocessor_attributes
        }

        # Get base class configuration
        base_config = super(NuScenesEvaluator, self).get_config()

        return dict(list(base_config.items()) + list(config.items()))
import numpy as np

nusc = NuScenes(version='v1.0-mini', dataroot='D:\\v1.0-mini', verbose=True)
print('all scene')
nusc.list_scenes()  # 列出所有的场景

my_scene = nusc.scene[0]
print('\nmy_scene\n', my_scene)  # 列出所有场景中的第一个场景对应的所有 token

first_sample_token = my_scene[
    'first_sample_token']  # scene.json 中第一个元素组中的 first_sample_token

# sample.json 中第一帧的所有元素(token、timestamp、prev、next、scene_token) 和
# 汽车所有传感器对应的数据

my_sample = nusc.get('sample', first_sample_token)
print('my_sample\n', my_sample)
#       只取汽车所有传感器对应的数据的方法 print('my_sample\n', my_sample['data'])
#       只取CAM_FRONT 的方法
# sensor = 'CAM_FRONT'
# cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])
# print(cam_front_data)
#               以此类推
#       获取传感器对应的标注框的方法(以前一个例子为例)[可视化]
# nusc.render_sample_data(cam_front_data['token'])

nusc.list_sample(
    my_sample['token'])  # 列出了与示例相关的所有sample_data关键帧和sample_annotation

my_annotation_token = my_sample['anns'][
    18]  # 取第19个 sample_annotation_token(其它的好像没标注好)
    'vehicle.truck': 'truck',
    'vehicle.construction': 'construction_vehicle',
    'vehicle.emergency.ambulance': 'ignore',
    'vehicle.emergency.police': 'ignore',
    'vehicle.trailer': 'trailer',
    'movable_object.barrier': 'barrier',
    'movable_object.trafficcone': 'traffic_cone',
    'movable_object.pushable_pullable': 'ignore',
    'movable_object.debris': 'ignore',
    'static_object.bicycle_rack': 'ignore',
}

for sample in nusc.sample:
    cam_token = sample['data']['CAM_FRONT']
    pc_token = sample['data']['LIDAR_TOP']
    pointsensor = nusc.get('sample_data', pc_token)
    # pcl_path = osp.join(nusc.dataroot, pointsensor['filename'])

    cam = nusc.get('sample_data', cam_token)
    # print(cam)

    # im = Image.open(osp.join(nusc.dataroot, cam['filename']))
    # print(im.width, im.height)

    data_path, box_list, cam_intrinsic = nusc.get_sample_data(pc_token)

    # print(data_path)
    # print(pcl_path)

    # fname =  pcl_path.split('/')[-1].split('.')[0] + '.txt'
    # print(fname)
Exemplo n.º 30
0
def evaluate(split):
    params = read_params(FLAGS.param)
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'
    kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
    meta = {
        'use_camera': False,
        'use_lidar': True,
        'use_radar': False,
        'use_map': False,
        'use_external': False,
    }
    results = {}
    results_0_3 = {}

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            for node in od_graph_def.node:
                if 'BatchMultiClassNonMaxSuppression' in node.name:
                    node.device = '/device:CPU:0'
            tf.import_graph_def(od_graph_def, name='')
    with detection_graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = False
        with tf.Session(graph=detection_graph, config=config) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            scene_splits = create_splits_scenes()

            # os.system('touch {}'.format())

            inf_time_list = []

            for scene in nusc.scene:
                if scene['name'] not in scene_splits[split]:
                    continue
                current_sample_token = scene['first_sample_token']
                last_sample_token = scene['last_sample_token']
                sample_in_scene = True
                while sample_in_scene:
                    if current_sample_token == last_sample_token:
                        sample_in_scene = False
                    sample = nusc.get('sample', current_sample_token)
                    lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
                    # Get global pose and calibration data
                    ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token'])
                    calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token'])
                    ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']))
                    lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation']))

                    # Read input data
                    filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
                    image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix)
                    # Inference
                    start_time = time.time()
                    (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run(
                        [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes,
                         num_detections],
                        feed_dict={image_tensor: image_stacked})
                    inf_time = time.time() - start_time
                    print('Inference time:', inf_time)
                    inf_time_list.append(inf_time)



                    # Evaluate object detection
                    label_map = label_map_util.load_labelmap(FLAGS.label_map)
                    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10,
                                                                                use_display_name=True)
                    category_index = label_map_util.create_category_index(categories)
                    boxes = []
                    boxes_0_3 = []
                    scores = np.squeeze(scores)
                    for i in range(scores.shape[0]):
                        if scores[i] > .230:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes.append(box)
                    for i in range(scores.shape[0]):
                        if scores[i] > .225:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes_0_3.append(box)
                    # Convert boxes to nuScenes detection challenge result format.
                    sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes]
                    results[current_sample_token] = sample_results

                    sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3]
                    results_0_3[current_sample_token] = sample_results_0_3

                    current_sample_token = sample['next']
    f_info = open(
                os.path.join(FLAGS.output, 'inference_time.txt'),'w+')
    average_inf_time = sum(inf_time_list) / float(len(inf_time_list))
    f_info.write('average time:{}\n'.format(average_inf_time))
    f_info.write(str(inf_time_list))
    submission = {
        'meta': meta,
        'results': results
    }
    submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json')
    with open(submission_path, 'w') as f:
        json.dump(submission, f, indent=2)

    submission_0_3 = {
        'meta': meta,
        'results': results_0_3
    }
    submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json')
    with open(submission_path_0_3, 'w') as f:
        json.dump(submission_0_3, f, indent=2)