def get_camera_data(nusc: NuScenes, annotation_token: str, box_vis_level: BoxVisibility = BoxVisibility.ANY): """ Given an annotation token (3d detection in world coordinate frame) this method returns the camera in which the annotation is located. If the box is splitted between 2 cameras, it brings the first one found. :param nusc: NuScenes instance. :param annotation_token: Annotation token. :param box_vis_level: If sample_data is an image, this sets required visibility for boxes. :return camera channel. """ #Get sample annotation ann_record = nusc.get('sample_annotation', annotation_token) sample_record = nusc.get('sample', ann_record['sample_token']) boxes, cam = [], [] #Stores every camera cams = [key for key in sample_record['data'].keys() if 'CAM' in key] #Try with every camera a match for the annotation for cam in cams: _, boxes, _ = nusc.get_sample_data( sample_record['data'][cam], box_vis_level=box_vis_level, selected_anntokens=[annotation_token]) if len(boxes) > 0: break # Breaks if find an image that matches assert len(boxes) < 2, "Found multiple annotations. Something is wrong!" return cam
annotations = [] counter = 0 max_v = 0 min_v = 999999 # pdb.set_trace() for im_token in image_token: sample_data = nusc.get('sample_data', im_token) image_name = sample_data['filename'] img = cv2.imread('/home/fengjia/data/sets/nuscenes/' + image_name) im = np.array(img) sample = nusc.get('sample', sample_data['sample_token']) lidar_token = sample['data']['LIDAR_TOP'] # get ground truth boxes _, boxes, img_camera_intrinsic = nusc.get_sample_data(im_token, box_vis_level=BoxVisibility.ALL) for box in boxes: visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] vis_level = int(nusc.get('visibility', visibility_token)['token']) if (vis_level != 3) and (vis_level != 4): continue corners_4dim = np.ones((4, box.corners().shape[1])) corners_4dim[:3, :] = box.corners()[:, :] #ori_corners_ = get_gt(nusc=nusc, corners=np.array(corners_4dim, copy=True), camera_token=im_token, pointsensor_token=lidar_token) ori_corners = view_points(box.corners(), view=np.array(img_camera_intrinsic, copy=True), normalize=True)
def __init__(self, batch_size, num_classes, training=True, normalize=None): self._num_classes = num_classes # we make the height of image consistent to trim_height, trim_width self.trim_height = cfg.TRAIN.TRIM_HEIGHT self.trim_width = cfg.TRAIN.TRIM_WIDTH self.max_num_box = cfg.MAX_NUM_GT_BOXES self.training = training self.normalize = normalize self.batch_size = batch_size self.classes = ('__background__', 'pedestrian', 'barrier', 'trafficcone', 'bicycle', 'bus', 'car', 'construction', 'motorcycle', 'trailer', 'truck') self.max_num_box = 50 # Checks if pickle file of dataset already exists. If it doesn't exist, creates the file if os.path.exists('lib/roi_data_layer/roidb_CAMFRONT.pkl'): print("Reading roidb..") pickle_in = open("lib/roi_data_layer/roidb_CAMFRONT.pkl", "rb") self.roidb = pickle.load(pickle_in) trainsize = 20000 if training == True: self.roidb = self.roidb[:trainsize] print("roidb size: " + str(len(self.roidb))) else: self.roidb = self.roidb[trainsize:] else: nusc_path = '/data/sets/nuscenes' nusc = NuScenes(version='v1.0-trainval', dataroot=nusc_path, verbose=True) file_dir = os.path.dirname(os.path.abspath(__file__)) roots = file_dir.split('/')[:-2] root_dir = "" for folder in roots: if folder != "": root_dir = root_dir + "/" + folder PATH = root_dir + '/data/CAMFRONT.txt' with open(PATH) as f: image_token = [x.strip() for x in f.readlines()] roidb = [] # Loads information on images and ground truth boxes and saves it as pickle file for faster loading print("Loading roidb...") for i in range(len(image_token)): im_token = image_token[i] sample_data = nusc.get('sample_data', im_token) image_name = sample_data['filename'] image_path = nusc_path + '/' + image_name data_path, boxes, camera_intrinsic = nusc.get_sample_data( im_token, box_vis_level=BoxVisibility.ALL) # Only accepts boxes with above level 3 or 4 visibility and classes with more than 1000 instances gt_boxes = [] gt_cls = [] for box in boxes: visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] vis_level = int( nusc.get('visibility', visibility_token)['token']) if (vis_level == 3) or (vis_level == 4): visible = True else: visible = False if visible == True: if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] != 'emergency': name = box.name.split('.')[1] else: name = '' elif box.name.split('.')[0] == 'human': name = 'pedestrian' elif box.name.split('.')[0] == 'movable_object': if box.name.split( '.')[1] != 'debris' and box.name.split( '.')[1] != 'pushable_pullable': name = box.name.split('.')[1] else: name = '' else: name = '' if name != '': corners = view_points(box.corners(), view=camera_intrinsic, normalize=True)[:2, :] box = np.zeros(4) box[0] = np.min(corners[0]) box[1] = np.min(corners[1]) box[2] = np.max(corners[0]) box[3] = np.max(corners[1]) gt_boxes = gt_boxes + [box] gt_cls = gt_cls + [self.classes.index(name)] # Only accepts images with at least one object if len(gt_boxes) >= 2: image = {} image['image'] = image_path image['width'] = 1600 image['height'] = 900 image['boxes'] = np.asarray(gt_boxes) image['gt_classes'] = np.asarray(gt_cls) roidb = roidb + [image] print(len(roidb)) print("Saving roidb") pickle_out = open("lib/roi_data_layer/roidb_CAMFRONT.pkl", "wb") pickle.dump(roidb, pickle_out) pickle_out.close() self.roidb = roidb trainsize = int(len(self.roidb) * 0.8) if training == True: self.roidb = self.roidb[:trainsize] else: self.roidb = self.roidb[trainsize:]
def load_merge_from_pkl(nusc: NuScenes, pkl_path: str, box_cls, verbose: bool = False) -> EvalBoxes: # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(pkl_path, nusc.version)) import mmcv infos = mmcv.load(pkl_path)['infos'] samples = [] for info in infos: samples.append(nusc.get('sample', info['token'])) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. merge_map = dict(car='vehicle', truck='vehicle', bus='vehicle', trailer='vehicle', construction_vehicle='vehicle', pedestrian='pedestrian', motorcycle='bike', bicycle='bike', traffic_cone='traffic_boundary', barrier='traffic_boundary') for sample in tqdm.tqdm(samples, leave=verbose): sample_token = sample['token'] cam_token = sample['data']['CAM_FRONT'] _, boxes_cam, _ = nusc.get_sample_data(cam_token) sample_annotation_tokens = [box.token for box in boxes_cam] # sample = nusc.get('sample', sample_token) # sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue detection_name = merge_map[detection_name] # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
def load_gt_front_cam(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False) -> EvalBoxes: # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(eval_split, nusc.version)) # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError( 'Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' samples = [] for sample in nusc.sample: scene_record = nusc.get('scene', sample['scene_token']) if scene_record['name'] in splits[eval_split]: samples.append(sample) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. tracking_id_set = set() for sample in tqdm.tqdm(samples, leave=verbose): sample_token = sample['token'] cam_token = sample['data']['CAM_FRONT'] _, boxes_cam, _ = nusc.get_sample_data(cam_token) sample_annotation_tokens = [box.token for box in boxes_cam] # sample = nusc.get('sample', sample_token) # sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) elif box_cls == TrackingBox: # Use nuScenes token as tracking id. tracking_id = sample_annotation['instance_token'] tracking_id_set.add(tracking_id) # Get label name in detection task and filter unused labels. tracking_name = category_to_tracking_name( sample_annotation['category_name']) if tracking_name is None: continue sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], tracking_id=tracking_id, tracking_name=tracking_name, tracking_score=-1.0 # GT samples do not have a score. )) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
class NuscenesHelper: def __init__(self, version: str = 'v1.0-mini', dataroot: str = '/data/sets/nuscenes'): self.nusc = NuScenes(version=version, dataroot=dataroot, verbose=True) def filter_occluded_objects(self, occluded_tokens_file_path, sensor='CAM_FRONT', report_file_path=None): occluded_tokens_file = open(occluded_tokens_file_path, "w+") if report_file_path is not None: report_file = open(report_file_path, "w+") report_file.write("===================SENSOR=" + sensor + "============================\n") scenes = self.nusc.scene for scene in scenes: print("processing scene ", scene['token']) occluded_anns = self.get_occluded_annos_in_scence(scene, sensor) for ann in occluded_anns: occluded_tokens_file.write(ann['token'] + "\n") if report_file is not None: for line in self.get_report_string(scene, occluded_anns): report_file.write(line + "\n") occluded_tokens_file.close() def get_occluded_annos_in_scence(self, scene, sensor): occluded_anns = [] samples = self.get_samples(scene) for sample in samples: occluded_anns_in_sample = self.get_occluded_anns_in_sample( sample, sensor) occluded_anns += occluded_anns_in_sample print("----There are ", len(occluded_anns_in_sample), " in sample ", sample['token'], "of scene", scene['token']) return occluded_anns def get_samples(self, scene): samples = [] first_sample = self.nusc.get("sample", scene["first_sample_token"]) sample = first_sample if sample is not None: samples.append(sample) while sample["next"] != "": sample = self.nusc.get("sample", sample["next"]) samples.append(sample) return samples def get_anns_in_scene(self, scene): samples = self.get_samples(scene) annos = [] for sample in samples: annos += self.get_annos(sample) return annos ''' TODO: take care the sensor parameter ''' def get_occluded_anns_in_sample(self, sample, sensor): occluded_anns = [] anns = self.get_annos(sample) camera_data = self.nusc.get('sample_data', sample['data'][sensor]) for ann in anns: if self.is_occluded(ann, anns, camera_data): occluded_anns.append(ann) return occluded_anns def get_annos(self, sample): anns = [] ann_tokens = sample["anns"] for ann_token in ann_tokens: ann = self.nusc.get("sample_annotation", ann_token) anns.append(ann) return anns """ check if an `anno` is occluded by another in `anns` """ def is_occluded(self, anno, anns, camera_data): # Plot CAMERA view. sample_record = self.nusc.get('sample', anno['sample_token']) assert 'LIDAR_TOP' in sample_record['data'].keys( ), 'Error: No LIDAR_TOP in data, unable to render.' cam_path, boxes, camera_intrinsic_matrix = self.nusc.get_sample_data( camera_data['token'], selected_anntokens=[anno['token']]) if len(boxes) != 1: return True box = boxes[0] corners = view_points(box.corners(), camera_intrinsic_matrix, normalize=True)[:2, :] # two_d_bb1 = NuscenesHelper.min_max_x_y(corners) for annotation in anns: _, boxes, other_camera_intrinsic_matrix = self.nusc.get_sample_data( camera_data['token'], selected_anntokens=[annotation['token']]) if len(boxes) != 1: continue other_box = boxes[0] if box.center[2] < other_box.center[2]: continue other_corners = view_points(other_box.corners(), other_camera_intrinsic_matrix, normalize=True)[:2, :] # two_d_bb2 = NuscenesHelper.min_max_x_y(other_corners) if NuscenesHelper.is_occluded_2D(corners, other_corners): return True return False def get_scenes(self): return self.nusc.scene @staticmethod def is_occluded_2D(points1, points2): convex_hull = spatial.ConvexHull(np.concatenate( (points1.T, points2.T))) if np.amin(convex_hull.vertices) > 7: return True return False @staticmethod def get_categories(anns): categories = [] for ann in anns: if ann['category_name'] not in categories: categories.append(ann['category_name']) return categories @staticmethod def get_number_of_anns_in_category(anns, category_name): number = 0 for ann in anns: if ann['category_name'] == category_name: number += 1 return number def get_report_string(self, scene, occluded_anns): lines = [] all_annos = self.get_anns_in_scene(scene) categories = NuscenesHelper.get_categories(all_annos) lines.append("====scene " + scene['token']) lines.append(" number of samples = " + str(len(self.get_samples(scene)))) lines.append(" number of anns = " + str(len(self.get_anns_in_scene(scene)))) for category in categories: number_of_anns_in_category = NuscenesHelper.get_number_of_anns_in_category( all_annos, category) number_of_anns_occluded_in_category = NuscenesHelper.get_number_of_anns_in_category( occluded_anns, category) lines.append(" All " + category + " : " + str(number_of_anns_in_category) + ", occluded : " + str(number_of_anns_occluded_in_category)) return lines
class nuscenes_dataloader(data.Dataset): def __init__(self, batch_size, num_classes, training=True, normalize=None): self._num_classes = num_classes self.training = training self.normalize = normalize self.batch_size = batch_size self.data_path = "/home/fengjia/data/sets/nuscenes" self.nusc= NuScenes(version='v1.0-trainval', dataroot = self.data_path, verbose= True) self.explorer = NuScenesExplorer(self.nusc) self.classes = ('__background__', 'pedestrian', 'barrier', 'trafficcone', 'bicycle', 'bus', 'car', 'construction', 'motorcycle', 'trailer', 'truck') # PATH = self.data_path + '/annotations_list.txt' PATH = self.data_path + '/car_pedestrian_annotations_list.txt' with open(PATH) as f: self.token = [x.strip() for x in f.readlines()] self.token = self.token[:400] def __getitem__(self, index): # gather tokens and samples needed for data extraction tokens = self.token[index] if len(tokens.split('_')) < 2: print(tokens) im_token = tokens.split('_')[0] annotation_token = tokens.split('_')[1] sample_data = self.nusc.get('sample_data', im_token) image_name = sample_data['filename'] sample = self.nusc.get('sample', sample_data['sample_token']) lidar_token = sample['data']['LIDAR_TOP'] # get the sample_data for the image batch #image_path = '/data/sets/nuscenes/' + image_name img = imread('/home/fengjia/data/sets/nuscenes/' + image_name) im = np.array(img) # get ground truth boxes _, boxes, camera_intrinsic = self.nusc.get_sample_data(im_token, box_vis_level=BoxVisibility.ALL) for box in boxes: corners = view_points(box.corners(), view=camera_intrinsic, normalize=True) if box.token == annotation_token: # Find the crop area of the box width = corners[0].max() - corners[0].min() height = corners[1].max() - corners[1].min() x_mid = (corners[0].max() + corners[0].min())/2 y_mid = (corners[1].max() + corners[1].min())/2 side = max(width, height)*random.uniform(1.0,1.2) if (x_mid - side/2) < 0: side = x_mid*2 if (y_mid - side/2) < 0: side = y_mid*2 # Crop the image bottom_left = [int(x_mid - side/2), int(y_mid - side/2)] top_right = [int(x_mid + side/2), int(y_mid + side/2)] corners[0]=corners[0] - bottom_left[0] corners[1]=corners[1] - bottom_left[1] crop_img = im[bottom_left[1]:top_right[1],bottom_left[0]:top_right[0]] # Scale to same size scale = 128/ side scaled = cv2.resize(crop_img, (128, 128)) crop_img = np.transpose(scaled, (2,0,1)) crop_img = crop_img.astype(np.float32) crop_img /= 255 # Get corresponding point cloud for the crop pcl, m, offset, camera_intrinsic, box_corners = get_pointcloud(self.nusc, bottom_left, top_right, box, lidar_token, im_token) break pcl = pcl.astype(np.float32) box_corners = box_corners.astype(np.float32) return crop_img, pcl, offset, m, camera_intrinsic, box_corners def __len__(self): return len(self.token)
def prepare_roidb(): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ classes = ('__background__', 'pedestrian', 'barrier', 'trafficcone', 'bicycle', 'bus', 'car', 'construction', 'motorcycle', 'trailer', 'truck') nusc_path = '/data/sets/nuscenes' nusc= NuScenes(version='v1.0-trainval', dataroot = nusc_path, verbose= True) if os.path.exists('lib/roi_data_layer/roidb_nuscenes_mini.pkl'): print("Reading roidb..") pickle_in = open("lib/roi_data_layer/roidb_nuscenes_mini.pkl","rb") roidb = pickle.load(pickle_in) return nusc, roidb else: file_dir = os.path.dirname(os.path.abspath(__file__)) roots = file_dir.split('/')[:-2] root_dir = "" for folder in roots: if folder != "": root_dir = root_dir + "/" + folder PATH = root_dir + '/data/train_mini.txt' with open(PATH) as f: image_token = [x.strip() for x in f.readlines()] roidb = [] print("Loading roidb...") for i in range(len(image_token)): im_token = image_token[i] sample_data = nusc.get('sample_data', im_token) image_name = sample_data['filename'] image_path = nusc_path + '/' + image_name data_path, boxes, camera_intrinsic = nusc.get_sample_data(im_token, box_vis_level=BoxVisibility.ALL) gt_boxes = [] gt_cls = [] for box in boxes: visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] vis_level = int(nusc.get('visibility', visibility_token)['token']) if (vis_level == 3) or (vis_level == 4): visible = True else: visible = False if visible == True: if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] != 'emergency': name = box.name.split('.')[1] else: name = '' elif box.name.split('.')[0] == 'human': name = 'pedestrian' elif box.name.split('.')[0] == 'movable_object': if box.name.split('.')[1] != 'debris' and box.name.split('.')[1] != 'pushable_pullable': name = box.name.split('.')[1] else: name = '' else: name = '' if name != '': corners= view_points(box.corners(), view=camera_intrinsic, normalize=True)[:2,:] box = np.zeros(4) box[0]= np.min(corners[0]) box[1]= np.min(corners[1]) box[2]= np.max(corners[0]) box[3]= np.max(corners[1]) gt_boxes = gt_boxes + [box] gt_cls = gt_cls + [classes.index(name)] if len(gt_boxes)>= 2: image = {} image['image'] = image_path image['width'] = 1600 image['height'] = 900 image['boxes'] = np.asarray(gt_boxes) image['gt_classes'] = np.asarray(gt_cls) roidb = roidb + [image] print(len(roidb)) print("Saving roidb") pickle_out = open("lib/roi_data_layer/roidb_nuscenes_mini.pkl","wb") pickle.dump(roidb, pickle_out) pickle_out.close() return nusc, roidb