def extract_datapoints(root_dir, test_set=False): argoverse_loader = ArgoverseTrackingLoader(root_dir=root_dir) data = [] for log_id in argoverse_loader.log_list: # print(f"Processing log: {os.path.join(root_dir, log_id)}", end="\r") argoverse_data = argoverse_loader.get(log_id=log_id) # calibL = argoverse_data.get_calibration(camera=camL, log_id=log_id) # calibR = argoverse_data.get_calibration(camera=camR, log_id=log_id) calibs = {} for cam in STEREO_CAMERA_LIST + RING_CAMERA_LIST: calibs[cam] = argoverse_data.get_calibration(camera=cam, log_id=log_id) count = 0 for lidar_timestamp in argoverse_data.lidar_timestamp_list: data_point = dict() data_point["log_id"] = log_id data_point["frame_id"] = count count += 1 for cam in STEREO_CAMERA_LIST + RING_CAMERA_LIST: cam_timestamp = argoverse_loader.sync.get_closest_cam_channel_timestamp( lidar_timestamp=lidar_timestamp, camera_name=cam, log_id=log_id) if cam_timestamp is not None: data_point[cam] = argoverse_loader.get_image_at_timestamp( timestamp=cam_timestamp, camera=cam, log_id=log_id, load=False) else: data_point[cam] = None data_point["timestamp"] = lidar_timestamp data_point["lidar"] = argoverse_loader.timestamp_lidar_dict[ lidar_timestamp] data_point["calibs"] = calibs d = argoverse_data.get_pose( argoverse_data.get_idx_from_timestamp( lidar_timestamp)).translation r = Rotation.from_dcm( argoverse_data.get_pose( argoverse_data.get_idx_from_timestamp( lidar_timestamp)).rotation) data_point["pose"] = (d, r.as_euler('xyz')) if not test_set: data_point["labels"] = argoverse_loader.get_label_object( idx=argoverse_loader.lidar_timestamp_list.index( lidar_timestamp), log_id=log_id) data.append(data_point) return data
class ArgoDataset(Dataset): def __init__(self, args, transform=None): self.args = args self.transform = transform self.argoverse_loader = ArgoverseTrackingLoader(args.root_dir) self.image_list = [] for log_id in self.argoverse_loader.log_list: argoverse_data = self.argoverse_loader.get(log_id) for idx in range( len(argoverse_data.image_list_sync['ring_front_center'])): self.image_list.append(str(log_id) + '@' + str(idx)) self.camera_name = [ 'ring_front_center', 'ring_side_left', 'ring_side_right', 'ring_front_left', 'ring_front_right', 'ring_rear_right', 'ring_rear_left' ] def __len__(self): return len(self.image_list) def __getitem__(self, index): lidar_index = int(str(self.image_list[index]).split('@')[1]) log_id = str(self.image_list[index]).split('@')[0] argoverse_data = self.argoverse_loader.get(log_id) image_7 = [] for camera in self.camera_name: img = argoverse_data.get_image_sync(lidar_index, camera=camera) img = Image.fromarray(img) img = self.transform(img) if self.transform is not None else img image_7.append(img) sample = { 'image_7': image_7, 'video_idxs': lidar_index, 'image_names': log_id } return sample
def __init__(self, root_dir, split='train'): self.split = split is_test = (self.split == 'test') self.lidar_pathlist = [] self.label_pathlist = [] if split == 'train': for i in np.arange(1, 5): self.imageset_dir = os.path.join(root_dir, split + str(i)) data_loader = ArgoverseTrackingLoader( os.path.join(root_dir, split + str(i))) self.log_list = data_loader.log_list for log in self.log_list: self.lidar_pathlist.extend(data_loader.get(log).lidar_list) self.label_pathlist.extend(data_loader.get(log).label_list) print(len(self.lidar_pathlist)) else: self.imageset_dir = os.path.join(root_dir, split) data_loader = ArgoverseTrackingLoader(os.path.join( root_dir, split)) self.lidar_pathlist.extend(data_loader.lidar_list) self.label_pathlist.extend(data_loader.label_list) self.calib_file = data_loader.calib_filename assert len(self.lidar_pathlist) == len(self.label_pathlist) #z = list(zip(self.lidar_pathlist, self.label_pathlist)) #random.shuffle(z) #self.lidar_pathlist[:], self.label_pathlist[:] = zip(*z) self.num_sample = len(self.lidar_pathlist) self.image_idx_list = np.arange(self.num_sample) self.argo_to_kitti = np.array( [[6.927964e-03, -9.999722e-01, -2.757829e-03], [-1.162982e-03, 2.749836e-03, -9.999955e-01], [9.999753e-01, 6.931141e-03, -1.143899e-03]]) self.ground_removal = True self.image_dir = os.path.join('/data/') self.lidar_dir = os.path.join('/data/') self.calib_dir = os.path.join('/data/') self.label_dir = os.path.join('/data/')
def generate_weak_static(dataset_dir="", log_id="", output_dir=""): argoverse_loader = ArgoverseTrackingLoader(dataset_dir) argoverse_data = argoverse_loader.get(log_id) camera = argoverse_loader.CAMERA_LIST[7] calib = argoverse_data.get_calibration(camera) ply_fpath = os.path.join(dataset_dir, log_id, 'lidar') ply_locs = [] for idx, ply_name in enumerate(os.listdir(ply_fpath)): ply_loc = np.array([idx, int(ply_name.split('.')[0].split('_')[-1])]) ply_locs.append(ply_loc) ply_locs = np.array(ply_locs) lidar_timestamps = sorted(ply_locs[:, 1]) calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json" calib_data = read_json_file(calib_path) ind = 0 for i in range(9): if calib_data['camera_data_'][i]['key'] == \ 'image_raw_stereo_front_left': ind = i break rotation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['rotation']['coefficients']) translation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['translation']) sparse_road_bev_loc = os.path.join(output_dir, log_id, 'sparse_road_bev') try: os.makedirs(sparse_road_bev_loc) except BaseException: pass dense_city_road_pts = [] for idx in range(len(lidar_timestamps)): occupancy_map = np.zeros((256, 256)) lidar_timestamp = lidar_timestamps[idx] try: img_loc = argoverse_data.get_image_list_sync(camera=camera)[idx] except BaseException: continue cam_timestamp = int(img_loc.split('.')[0].split('_')[-1]) segment_loc = os.path.join( dataset_dir, log_id, 'segment', 'stereo_front_left_' + str(cam_timestamp) + '.png') segment_img = cv2.imread(segment_loc, 0) pc = argoverse_data.get_lidar(idx) uv = calib.project_ego_to_image(pc).T idx_ = np.where( np.logical_and.reduce( (uv[0, :] >= 0.0, uv[0, :] < np.shape(segment_img)[1] - 1.0, uv[1, :] >= 0.0, uv[1, :] < np.shape(segment_img)[0] - 1.0, uv[2, :] > 0))) idx_ = idx_[0] uv1 = uv[:, idx_] uv1 = uv1.T x = uv1[:, 0].astype(np.int32) y = uv1[:, 1].astype(np.int32) col = segment_img[y, x] filt = np.logical_or(col == 13, np.logical_or(col == 24, col == 24)) road_uv1 = uv1[filt, :] lidar_road_pts = calib.project_image_to_ego(road_uv1) cam_road_pts = calib.project_ego_to_cam(lidar_road_pts) X = cam_road_pts[:, 0] Z = cam_road_pts[:, 2] filt2 = np.logical_and(X > -20, X < 20) filt2 = np.logical_and(filt2, np.logical_and(Z > 0, Z < 40)) y_img = (-Z[filt2] / res).astype(np.int32) x_img = (X[filt2] / res).astype(np.int32) x_img -= int(np.floor(-20 / res)) y_img += int(np.floor(40 / res)) occupancy_map[y_img, x_img] = 255 occ_map_loc = os.path.join( sparse_road_bev_loc, 'stereo_front_left_' + str(cam_timestamp) + '.png') cv2.imwrite(occ_map_loc, occupancy_map) pose_fpath = os.path.join( dataset_dir, log_id, "poses", "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json") if not Path(pose_fpath).exists(): print("not a apth") continue pose_data = read_json_file(pose_fpath) rotation = np.array(pose_data["rotation"]) translation = np.array(pose_data["translation"]) city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation), translation=translation) sparse_city_road_pts = city_to_egovehicle_se3.transform_point_cloud( lidar_road_pts) try: dense_city_road_pts = np.concatenate( (dense_city_road_pts, sparse_city_road_pts), axis=0) except BaseException: dense_city_road_pts = sparse_city_road_pts dense_road_bev_loc = os.path.join(output_dir, log_id, 'dense_road_bev') try: os.makedirs(dense_road_bev_loc) except BaseException: pass for idx in range(len(lidar_timestamps)): occupancy_map = np.zeros((256, 256)) lidar_timestamp = lidar_timestamps[idx] try: img_loc = argoverse_data.get_image_list_sync(camera=camera)[idx] except BaseException: continue cam_timestamp = int(img_loc.split('.')[0].split('_')[-1]) pose_fpath = os.path.join( dataset_dir, log_id, "poses", "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json") if not Path(pose_fpath).exists(): print("not a path") continue pose_data = read_json_file(pose_fpath) rotation = np.array(pose_data["rotation"]) translation = np.array(pose_data["translation"]) city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation), translation=translation) current_ego_frame_road_pts =\ city_to_egovehicle_se3\ .inverse_transform_point_cloud(dense_city_road_pts) current_cam_frame_road_pts = calib.project_ego_to_cam( current_ego_frame_road_pts) X = current_cam_frame_road_pts[:, 0] Z = current_cam_frame_road_pts[:, 2] filt2 = np.logical_and(X > -20, X < 20) filt2 = np.logical_and(filt2, np.logical_and(Z > 0, Z < 40)) y_img = (-Z[filt2] / res).astype(np.int32) x_img = (X[filt2] / res).astype(np.int32) x_img -= int(np.floor(-20 / res)) y_img += int(np.floor(40 / res)) - 1 occupancy_map[y_img, x_img] = 255 occ_map_loc = os.path.join( dense_road_bev_loc, 'stereo_front_left_' + str(cam_timestamp) + '.png') cv2.imwrite(occ_map_loc, occupancy_map)
] data_loader = ArgoverseTrackingLoader(os.path.join(root_dir, split)) log_list = data_loader.log_list path_count = 0 actual_idx_list = [] logidx_to_count_map = {} log_to_count_map = {} map_objectid_bool = {} map_saveid_objectid = {} map_class_saveid_count = {} for log_id, log in enumerate(log_list): print("converting log {} {}".format(log, log_id)) argoverse_data = data_loader.get(log) city_name = argoverse_data.city_name lidar_lst = data_loader.get(log).lidar_list label_lst = data_loader.get(log).label_list assert len(lidar_lst) == len(label_lst) pbar = tqdm(lidar_lst) for idx, each_path in enumerate(pbar): ide = log_id + idx # lidar_pts = argoverse_data.get_lidar(ide) lidar_pts = argoverse_data.get_lidar_ring(ide) if (GROUND_REMOVED): city_to_egovehicle_se3 = argoverse_data.get_pose(ide) roi_area_pts = city_to_egovehicle_se3.transform_point_cloud(
class ArgoDataset(Dataset): def __init__(self, question_h5, image_feature_h5_path, lidar_feature_h5_path,vocab,load_lidar=True,npoint=1024,normal_channel=True,uniform=False,cache_size=15000,drivable_area=False, mode='prefix', image_h5=None, lidar_h5=None, max_samples=None, question_families=None, image_idx_start_from=None): #############read whole question_h5 file in memory############################# self.all_questions = question_h5['questions'][:] self.all_answers = get_answer_classes(question_h5['answers'][:], vocab) self.all_image_idxs = question_h5['image_idxs'][:] self.all_video_names = (question_h5['video_names'][:]).astype(str) self.questions_length = question_h5['question_length'][:] self.image_feature_h5 = image_feature_h5_path self.load_lidar=load_lidar ############for lidar########################################################## if self.load_lidar: self.argoverse_loader = ArgoverseTrackingLoader(lidar_feature_h5_path) self.am = ArgoverseMap() self.drivable_area=drivable_area ############################################################################### def __len__(self): print(self.all_questions.shape[0]) return self.all_questions.shape[0] def __getitem__(self, index): question = self.all_questions[index] question_length = self.questions_length[index] answer = self.all_answers[index] image_name = self.all_video_names[index] + '@'+str(self.all_image_idxs[index]) #####image feature ############################################### with h5py.File(self.image_feature_h5, 'r') as img_feat_file: image_feature = img_feat_file[image_name][:] ###########lidar feature########################################### if self.load_lidar: if index in self.cache: lidar_pts = self.cache[index] else: lidar_index=self.all_image_idxs[index] log_id=self.all_video_names[index] argoverse_data=self.argoverse_loader.get(log_id) lidar_pts = argoverse_data.get_lidar(lidar_index,load=True) city_to_egovehicle_se3 = argoverse_data.get_pose(lidar_index) city_name = argoverse_data.city_name roi_area_pts = copy.deepcopy(lidar_pts) roi_area_pts = city_to_egovehicle_se3.transform_point_cloud(roi_area_pts) # put into city coords roi_area_pts = self.am.remove_non_roi_points(roi_area_pts, city_name) roi_area_pts = self.am.remove_ground_surface(roi_area_pts, city_name) roi_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_area_pts) if self.drivable_area: driveable_area_pts = copy.deepcopy(roi_area_pts) driveable_area_pts = city_to_egovehicle_se3.transform_point_cloud(driveable_area_pts) # put into city coords driveable_area_pts = self.am.remove_non_driveable_area_points(driveable_area_pts, city_name) driveable_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(driveable_area_pts) point_set=driveable_area_pts else: point_set=roi_area_pts del lidar_pts ############################################################################################################ else: point_set=5 return (question, image_feature, question_length,point_set,answer,image_name)
def main(): # set root_dir to the correct path to your dataset folder root_dir = core.data_dir( ) + '/datasets/argoverse/argoverse-tracking/sample/' vtk_window_size = (1280, 720) argoverse_loader = ArgoverseTrackingLoader(root_dir) print('Total number of logs:', len(argoverse_loader)) argoverse_loader.print_all() for i, argoverse_data in enumerate(argoverse_loader): if i >= 3: break print(argoverse_data) argoverse_data = argoverse_loader[0] print(argoverse_data) argoverse_data = argoverse_loader.get( 'c6911883-1843-3727-8eaa-41dc8cda8993') print(argoverse_data) log_id = 'c6911883-1843-3727-8eaa-41dc8cda8993' # argoverse_loader.log_list[55] frame_idx = 150 camera = argoverse_loader.CAMERA_LIST[0] argoverse_data = argoverse_loader.get(log_id) city_name = argoverse_data.city_name print('-------------------------------------------------------') print(f'Log: {log_id}, \n\tframe: {frame_idx}, camera: {camera}') print('-------------------------------------------------------\n') lidar_points = argoverse_data.get_lidar(frame_idx) img = argoverse_data.get_image_sync(frame_idx, camera=camera) objects = argoverse_data.get_label_object(frame_idx) calib = argoverse_data.get_calibration(camera) # TODO: Calculate point colours vtk_pc = VtkPointCloudGlyph() vtk_pc.set_points(lidar_points) vtk_pc.set_point_size(2) vtk_renderer = demo_utils.setup_vtk_renderer() vtk_render_window = demo_utils.setup_vtk_render_window( 'Argoverse Demo', vtk_window_size, vtk_renderer) vtk_axes = vtk.vtkAxesActor() vtk_axes.SetTotalLength(2, 2, 2) vtk_renderer.AddActor(vtk_axes) vtk_renderer.AddActor(vtk_pc.vtk_actor) current_cam = vtk_renderer.GetActiveCamera() current_cam.SetViewUp(0, 0, 1) current_cam.SetPosition(-50, 0, 15) current_cam.SetFocalPoint(30, 0, 0) vtk_interactor = demo_utils.setup_vtk_interactor(vtk_render_window) vtk_interactor.Start()
def __init__(self, root_dir, split='train'): self.split = split is_test = (self.split == 'test') self.lidar_pathlist = [] self.label_pathlist = [] print("[LOG] Data dir is ", root_dir) self.lidar_dir = os.path.join(root_dir) if os.path.exists("lidar_pathlist.npy"): self.lidar_pathlist = np.load("lidar_pathlist.npy").tolist() if not is_test and os.path.exists("label_pathlist.npy"): self.label_pathlist = np.load("label_pathlist.npy").tolist() if not self.lidar_pathlist and not self.label_pathlist: data_loader = ArgoverseTrackingLoader(os.path.join(root_dir)) for each in data_loader.log_list: data = data_loader.get(each) self.lidar_pathlist.extend(data.lidar_list) if not is_test: self.label_pathlist.extend(data.label_list) if not is_test: # if prune needed pruned_lidar_pathlist = [] pruned_label_pathlist = [] classs, local_classs = 0, 0 t = tqdm.tqdm(range(len(self.label_pathlist))) for each in t: rand = random.uniform(0, 1) objs = self.get_label(each) local_classs = 0 for obj in objs: if obj.cls_id == 1: classs += 1 local_classs += 1 if local_classs > 0 or rand > 0.5: pruned_lidar_pathlist.append(self.lidar_pathlist[each]) pruned_label_pathlist.append(self.label_pathlist[each]) t.set_description(f" Peds: {classs}") np.save("lidar_pathlist", pruned_lidar_pathlist) np.save("label_pathlist", pruned_label_pathlist) print( f"[LOG] selected {len(pruned_lidar_pathlist)} out of {len(self.lidar_pathlist)}" ) self.lidar_pathlist = pruned_lidar_pathlist self.label_pathlist = pruned_label_pathlist # import pdb; pdb.set_trace() #self.calib_file = data_loader.calib_filename self.lidar_filename = [ x.split('.')[0].rsplit('/', 1)[1] for x in self.lidar_pathlist ] # assert len(self.lidar_pathlist) == len(self.label_pathlist) self.num_sample = len(self.lidar_pathlist) self.lidar_idx_list = ['%06d' % l for l in range(self.num_sample)] self.lidar_idx_table = dict( zip(self.lidar_idx_list, self.lidar_filename)) self.argo_to_kitti = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) self.ground_removal = False self.lidar_dir = os.path.join('/data/') self.label_dir = os.path.join('/data/')
def main(): args = parser.parse_args() pdb.set_trace() if not (args.output_disp or args.output_depth): print('You must at least output one value !') return argoverse_loader = ArgoverseTrackingLoader(args.argoverse_data_path) camera = argoverse_loader.CAMERA_LIST[0] argoverse_data = argoverse_loader.get(args.argoverse_log) num_lidar = len(argoverse_data.get_image_list_sync(camera)) disp_net = DispResNet(args.resnet_layers, False).to(device) weights = torch.load(args.pretrained, map_location=device) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() #dataset_dir = Path(args.dataset_dir) output_dir = Path(args.output_dir) output_dir.makedirs_p() # if args.dataset_list is not None: # with open(args.dataset_list, 'r') as f: # test_files = [dataset_dir/file for file in f.read().splitlines()] # else: # test_files = sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], []) # print('{} files to test'.format(len(test_files))) for frame in tqdm(range(0, num_lidar - 1)): file = argoverse_data.get_image_sync(frame, camera, load=False) img = imread(file).astype(np.float32) h, w, _ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = imresize(img, (args.img_height, args.img_width)).astype( np.float32) img = np.transpose(img, (2, 0, 1)) tensor_img = torch.from_numpy(img).unsqueeze(0) tensor_img = ((tensor_img / 255 - 0.45) / 0.225).to(device) output = disp_net(tensor_img)[0] file_path, file_ext = file.relpath(args.dataset_dir).splitext() file_name = '-'.join(file_path.splitall()) if args.output_disp: disp = (255 * tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8) imsave(output_dir / '{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1, 2, 0))) if args.output_depth: depth = 1 / output depth = (255 * tensor2array( depth, max_value=10, colormap='rainbow')).astype(np.uint8) imsave(output_dir / '{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1, 2, 0)))
class Lidar2Depth: """ Convert 360 degree LiDAR point cloud to depth map corresponding to each ring camera for monocular depth estimation. To use: >>> input_log_dir = "path/to/3d20ae25-5b29-320d-8bae-f03e9dc177b9/" >>> output_save_path = "path/to/depth_dataset/" >>> Lidar2Depth(input_log_dir, output_save_path) """ def __init__(self, input_log_dir: str, output_save_path: str) -> None: self.input_log_dir = input_log_dir self.output_save_path = output_save_path self.log_id = os.path.basename(input_log_dir) print("Log ID ", self.log_id) # Load Argo data dataset = os.path.dirname(self.input_log_dir) self.argoverse_loader = ArgoverseTrackingLoader(dataset) self.argoverse_data = self.argoverse_loader.get(self.log_id) # Count the number of LiDAR ply files in the log dir self.lidar_frame_counter = len( glob.glob1(os.path.join(self.input_log_dir, "lidar"), "*.ply")) # Setup depth dataset dir self.depth_data_dir_setup() # Extract depth data and ring camera frames self.depth_extraction() def depth_data_dir_setup(self) -> None: """ Depth dataset structure +-- train/val/test | +-- depth | | +-- 00c561b9-2057-358d-82c6-5b06d76cebcf | | | +-- ring_front_center | | | | +-- 1.png | | | | +-- 2.png | | | | +-- . | | | | +-- . | | | | +-- n.png | | | +-- ring_front_left | | | +-- . | | | +-- . | | | +-- ring_side_right | | +-- 0ef28d5c-ae34-370b-99e7-6709e1c4b929 | | | +-- ring_front_center | | | +-- . | | | +-- . | | | +-- ring_side_right | | +-- . | | +-- . | +-- rgb | | +-- 00c561b9-2057-358d-82c6-5b06d76cebcf | | | +-- ring_front_center | | | | +-- 1.png | | | | +-- . | | | | +-- n.png | | | +-- ring_front_left | | | +-- . | | | +-- ring_side_right | | +-- 0ef28d5c-ae34-370b-99e7-6709e1c4b929 | | | +-- ring_front_center | | | +-- ring_front_left | | | +-- . | | | +-- ring_side_right | | +-- . | | +-- . """ if fnmatch.fnmatchcase(self.input_log_dir, "*" + "train" + "*"): self.save_name = os.path.join(self.output_save_path, "train") self.logid_type = "train" elif fnmatch.fnmatchcase(self.input_log_dir, "*" + "val" + "*"): self.save_name = os.path.join(self.output_save_path, "val") self.logid_type = "val" elif fnmatch.fnmatchcase(self.input_log_dir, "*" + "test" + "*"): self.save_name = os.path.join(self.output_save_path, "test") self.logid_type = "test" for camera_name in RING_CAMERA_LIST: paths = [ os.path.join(self.save_name, "depth", self.log_id, camera_name), os.path.join(self.save_name, "rgb", self.log_id, camera_name), ] for sub_path in paths: if not os.path.exists(sub_path): os.makedirs(sub_path) def extract_lidar_image_pair( self, camera_ID: int, lidar_frame_idx: str) -> Optional[Tuple[np.ndarray, np.ndarray]]: """ For the provided camera_ID and LiDAR ply file, extract rgb image and corresponding LiDAR points in the fov. """ img = self.argoverse_data.get_image_sync(lidar_frame_idx, camera=camera_ID) self.calib = self.argoverse_data.get_calibration(camera_ID) pc = self.argoverse_data.get_lidar(lidar_frame_idx) uv = self.calib.project_ego_to_image(pc).T lidar_frame_idx_ = np.where( np.logical_and.reduce(( uv[0, :] >= 0.0, uv[0, :] < np.shape(img)[1] - 1.0, uv[1, :] >= 0.0, uv[1, :] < np.shape(img)[0] - 1.0, uv[2, :] > 0, ))) lidar_image_projection_points = uv[:, lidar_frame_idx_] if lidar_image_projection_points is None: print("No point image projection") return np.array(img), None else: return np.array(img), lidar_image_projection_points def save_image_pair( self, camera_ID: int, img: np.ndarray, lidar_frame_idx: str, lidar_image_projection_points: np.ndarray, ) -> None: """ Save the depth images and camera frame to the created dataset dir. """ x_values = np.round(lidar_image_projection_points[0], 0).astype(int) y_values = np.round(lidar_image_projection_points[1], 0).astype(int) lidar_depth_val = lidar_image_projection_points[2] # Create a blank image to place lidar points as pixels with depth information sparse_depth_img = np.zeros( [img.shape[0], img.shape[1]]) # keeping it float to maintain precision sparse_depth_img[y_values, x_values] = lidar_depth_val # Multiple to maintain precision, while model training, remember to divide by 256 # NOTE: 0 denotes a null value, rather than actually zero depth in the saved depth map depth_rescaled = sparse_depth_img * 256.0 depth_scaled = depth_rescaled.astype(np.uint16) depth_scaled = Image.fromarray(depth_scaled) raw_depth_path = os.path.join( self.save_name, "depth", self.log_id, str(camera_ID), str(lidar_frame_idx) + ".png", ) depth_scaled.save(raw_depth_path) # Save Depth image img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) raw_img_path = os.path.join( self.save_name, "rgb", self.log_id, str(camera_ID), str(lidar_frame_idx) + ".png", ) cv2.imwrite(raw_img_path, img_rgb, [cv2.IMWRITE_PNG_COMPRESSION, 0]) # Save RGB image def frame2depth_mapping(self, camera_ID: int, lidar_frame_idx: str) -> None: """ For your training dataloader, you will likely find it helpful to read image paths from a .txt file. We explicitly write to a .txt file all rgb image paths that have a corresponding sparse ground truth depth file along with focal length. """ mapping_file = open( os.path.join(self.output_save_path, "argo_" + self.logid_type + "_files_with_gt.txt"), "a", ) file_path = os.path.join(str(self.log_id), camera_ID, str(lidar_frame_idx) + ".png") gt_string = file_path + " " + file_path + " " + str( np.round(self.calib.fv, 4)) mapping_file.write(gt_string + "\n") def depth_extraction(self) -> None: """ For every lidar file, extract ring camera frames and store it in the save dir along with depth map """ for lidar_frame_idx in tqdm(range(self.lidar_frame_counter)): for camera_ID in RING_CAMERA_LIST: # Extract camera frames and associated lidar points img, lidar_image_projection_points = self.extract_lidar_image_pair( camera_ID, lidar_frame_idx) # Save image and depth map if LiDAR projection points exist if lidar_image_projection_points is not None: # Save the above extracted images self.save_image_pair(camera_ID, img, lidar_frame_idx, lidar_image_projection_points) # Write path of rgb image, depth image along with focal length # in a txt file for data loader self.frame2depth_mapping(camera_ID, lidar_frame_idx) else: continue
def make_att_files(root_dir: str) -> None: """Write a .pkl file with difficulty attributes per track""" path_output_vis = "vis_output" filename_output = "att_file.npy" if not os.path.exists(path_output_vis): os.mkdir(path_output_vis) list_folders = ["test"] list_name_class = ["VEHICLE", "PEDESTRIAN"] count_track = 0 dict_att_all: Dict[str, Any] = {} for name_folder in list_folders: dict_att_all[name_folder] = {} list_log_folders = glob.glob(os.path.join(root_dir, name_folder, "*")) for ind_log, path_log in enumerate(list_log_folders): id_log = f"{Path(path_log).name}" print("%s %s %d/%d" % (name_folder, id_log, ind_log, len(list_log_folders))) if check_track_label_folder: list_path_label_persweep = glob.glob( os.path.join(path_log, "per_sweep_annotations_amodal", "*")) list_path_label_persweep.sort() dict_track_labels: Dict[str, Any] = {} for path_label_persweep in list_path_label_persweep: data = read_json_file(path_label_persweep) for data_obj in data: id_obj = data_obj["track_label_uuid"] if id_obj not in dict_track_labels.keys(): dict_track_labels[id_obj] = [] dict_track_labels[id_obj].append(data_obj) data_amodal: Dict[str, Any] = {} for key in dict_track_labels.keys(): dict_amodal: Dict[str, Any] = {} data_amodal[key] = dict_amodal data_amodal[key]["label_class"] = dict_track_labels[key][ 0]["label_class"] data_amodal[key]["uuid"] = dict_track_labels[key][0][ "track_label_uuid"] data_amodal[key]["log_id"] = id_log data_amodal[key]["track_label_frames"] = dict_track_labels[ key] argoverse_loader = ArgoverseTrackingLoader( os.path.join(root_dir, name_folder)) data_log = argoverse_loader.get(id_log) list_lidar_timestamp = data_log.lidar_timestamp_list dict_tracks: Dict[str, Any] = {} for id_track in data_amodal.keys(): data = data_amodal[id_track] if data["label_class"] not in list_name_class: continue data_per_frame = data["track_label_frames"] dict_per_track: Dict[str, Any] = {} dict_tracks[id_track] = dict_per_track dict_tracks[id_track]["ind_lidar_min"] = -1 dict_tracks[id_track]["ind_lidar_max"] = -1 length_log = len(list_lidar_timestamp) dict_tracks[id_track]["list_city_se3"] = [None] * length_log dict_tracks[id_track]["list_bbox"] = [None] * length_log count_track += 1 dict_tracks[id_track]["list_center"] = np.full([length_log, 3], np.nan) dict_tracks[id_track]["list_center_w"] = np.full( [length_log, 3], np.nan) dict_tracks[id_track]["list_dist"] = np.full([length_log], np.nan) dict_tracks[id_track]["exists"] = np.full([length_log], False) for box in data_per_frame: if box["timestamp"] in list_lidar_timestamp: ind_lidar = list_lidar_timestamp.index( box["timestamp"]) else: continue if dict_tracks[id_track]["ind_lidar_min"] == -1: dict_tracks[id_track]["ind_lidar_min"] = ind_lidar dict_tracks[id_track]["ind_lidar_max"] = max( ind_lidar, dict_tracks[id_track]["ind_lidar_max"]) center = np.array([ box["center"]["x"], box["center"]["y"], box["center"]["z"] ]) city_SE3_egovehicle = argoverse_loader.get_pose( ind_lidar, id_log) if city_SE3_egovehicle is None: print("Pose not found!") continue center_w = city_SE3_egovehicle.transform_point_cloud( center[np.newaxis, :])[0] dict_tracks[id_track]["list_center"][ind_lidar] = center dict_tracks[id_track]["list_center_w"][ ind_lidar] = center_w dict_tracks[id_track]["list_dist"][ ind_lidar] = np.linalg.norm(center[0:2]) dict_tracks[id_track]["exists"][ind_lidar] = True dict_tracks[id_track]["list_city_se3"][ ind_lidar] = city_SE3_egovehicle dict_tracks[id_track]["list_bbox"][ind_lidar] = box length_track = dict_tracks[id_track][ "ind_lidar_max"] - dict_tracks[id_track][ "ind_lidar_min"] + 1 assert not (dict_tracks[id_track]["ind_lidar_max"] == -1 and dict_tracks[id_track]["ind_lidar_min"] == -1), "zero-length track" dict_tracks[id_track]["length_track"] = length_track ( dict_tracks[id_track]["list_vel"], dict_tracks[id_track]["list_acc"], ) = compute_v_a(dict_tracks[id_track]["list_center_w"]) dict_tracks[id_track]["num_missing"] = ( dict_tracks[id_track]["length_track"] - dict_tracks[id_track]["exists"].sum()) dict_tracks[id_track]["difficult_att"] = [] # get scalar velocity per timestamp as 2-norm of (vx, vy) vel_abs = np.linalg.norm( dict_tracks[id_track]["list_vel"][:, 0:2], axis=1) acc_abs = np.linalg.norm( dict_tracks[id_track]["list_acc"][:, 0:2], axis=1) ind_valid = np.nonzero( 1 - np.isnan(dict_tracks[id_track]["list_dist"]))[0] ind_close = np.nonzero(dict_tracks[id_track]["list_dist"] [ind_valid] < NEAR_DISTANCE_THRESH)[0] if len(ind_close) > 0: ind_close_max = ind_close.max() + 1 ind_close_min = ind_close.min() # Only compute "fast" and "occluded" tags for near objects # The thresholds are not very meaningful for faraway objects, since they are usually pretty short. if dict_tracks[id_track]["list_dist"][ind_valid].min( ) > NEAR_DISTANCE_THRESH: dict_tracks[id_track]["difficult_att"].append("far") else: is_short_len_track1 = dict_tracks[id_track][ "length_track"] < SHORT_TRACK_LENGTH_THRESH is_short_len_track2 = dict_tracks[id_track]["exists"].sum( ) < SHORT_TRACK_COUNT_THRESH if is_short_len_track1 or is_short_len_track2: dict_tracks[id_track]["difficult_att"].append("short") else: if (ind_close_max - ind_close_min) - dict_tracks[id_track][ "exists"][ind_close_min:ind_close_max].sum( ) > MAX_OCCLUSION_PCT: dict_tracks[id_track]["difficult_att"].append( "occ") if np.quantile(vel_abs[ind_valid][ind_close], 0.9) > FAST_TRACK_THRESH: dict_tracks[id_track]["difficult_att"].append( "fast") if len(dict_tracks[id_track]["difficult_att"]) == 0: dict_tracks[id_track]["difficult_att"].append("easy") if visualize: for ind_lidar, timestamp_lidar in enumerate( list_lidar_timestamp): list_bboxes = [] list_difficulty_att = [] for id_track in dict_tracks.keys(): if dict_tracks[id_track]["exists"][ind_lidar]: list_bboxes.append( dict_tracks[id_track]["list_bbox"][ind_lidar]) list_difficulty_att.append( dict_tracks[id_track]["difficult_att"]) path_lidar = os.path.join(path_log, "lidar", "PC_%s.ply" % timestamp_lidar) pc = np.asarray(o3d.io.read_point_cloud(path_lidar).points) list_lidar_timestamp = data_log.lidar_timestamp_list save_bev_img( path_output_vis, list_bboxes, list_difficulty_att, "argoverse_%s" % name_folder, id_log, timestamp_lidar, pc, ) for id_track in dict_tracks.keys(): list_key = list(dict_tracks[id_track].keys()).copy() for key in list_key: if key != "difficult_att": del dict_tracks[id_track][key] dict_att_all[name_folder][id_log] = dict_tracks save_pkl_dictionary(filename_output, dict_att_all)
def process_a_split(data_dir, target_data_dir, split_file_path, bev_bnds, bev_meter_per_pixel, bev_wanted_classes, offset=0): """ Args: data_dir: directory that contains data corresponding to A SPECIFIC SPLIT (train, validation, test) target_data_dir: the dir to write to. Contains data corresponding to A SPECIFIC SPLIT in KITTI (training, testing) split_file_path: location to write all the sample_idx of a split bev_bnds: [4] containing [x_min, x_max, y_min, y_max] in meter for the bev bev_meter_per_pixel: number of meters in a pixel in bev, most often fractional bev_wanted_classes: [VEHICLE, BICYCLIST, PEDESTRIAN, ...] the classes to be for bev offset: first sample_idx to start counting from, needed to differentiate training and validation sample_idx because in KITTI they are under the same dir """ print("Processing", data_dir) target_velodyne_dir = os.path.join(target_data_dir, 'velodyne') target_velodyne_reduced_dir = os.path.join(target_data_dir, 'velodyne_reduced') target_calib_dir = os.path.join(target_data_dir, 'calib') target_image_2_dir = os.path.join(target_data_dir, 'image_2') if 'test' not in split_file_path: target_label_2_dir = os.path.join(target_data_dir, 'label_2') os.makedirs(target_velodyne_dir, exist_ok=True) os.makedirs(target_velodyne_reduced_dir, exist_ok=True) os.makedirs(target_calib_dir, exist_ok=True) os.makedirs(target_image_2_dir, exist_ok=True) if 'test' not in split_file_path: os.makedirs(target_label_2_dir, exist_ok=True) ############################## for saving BEV segmentation masks paths target_bev_drivable_dir = os.path.join(target_data_dir, 'bev_DRIVABLE') os.makedirs(target_bev_drivable_dir, exist_ok=True) target_bev_fov_dir = os.path.join(target_data_dir, 'bev_FOV') os.makedirs(target_bev_fov_dir, exist_ok=True) target_bev_cls_dirs = [] for wanted_cls in bev_wanted_classes: target_bev_cls_dir = os.path.join(target_data_dir, 'bev_{}'.format(wanted_cls)) os.makedirs(target_bev_cls_dir, exist_ok=True) target_bev_cls_dirs.append(target_bev_cls_dir) ############################## end for saving BEV segmentation masks paths # Check the number of logs, defined as one continuous trajectory argoverse_loader = ArgoverseTrackingLoader(data_dir) print('Total number of logs:', len(argoverse_loader)) argoverse_loader.print_all() print('\n') cams = [ 'ring_front_center', 'ring_front_left', 'ring_front_right', 'ring_rear_left', 'ring_rear_right', 'ring_side_left', 'ring_side_right' ] # count total number of files total_number = 0 for q in argoverse_loader.log_list: log_lidar_path = os.path.join(data_dir, q, 'lidar') path, dirs, files = next(os.walk(log_lidar_path)) total_number = total_number + len(files) total_number = total_number * len(cams) print('Now write sample indices to split file.') write_split_file(split_file_path, offset, total_number) bar = progressbar.ProgressBar(maxval=total_number, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) print('Total number of files: {}. Translation starts...'.format( total_number)) print('Progress:') bar.start() i = 0 kitti_to_argo_mapping = {} for log_id in sorted(argoverse_loader.log_list): argoverse_data = argoverse_loader.get(log_id) from argoverse.map_representation.map_api import ArgoverseMap argoverse_map = ArgoverseMap() for cam in cams: ############################## Calibration for this whole log ############################## log_calib_path = os.path.join(data_dir, log_id, 'vehicle_calibration_info.json') calibration_data = calibration.load_calib(log_calib_path)[cam] calib_file_content = construct_calib_str(calibration_data, pts_in_cam_ego_coord=True) log_lidar_dir = os.path.join(data_dir, log_id, 'lidar') # Loop through the each lidar frame (10Hz) to rename, copy, and adapt # all images, lidars, calibration files, and label files. for frame_idx, timestamp in enumerate( sorted(argoverse_data.lidar_timestamp_list)): idx = str(i + offset).zfill(9) # recording the mapping from kitti to argo # (log index, the lidar frame index) uniquely identify a sample kitti_to_argo_mapping[idx] = (log_id, frame_idx) i += 1 if i < total_number: bar.update(i + 1) ############################## Lidar ############################## # Save lidar file into .bin format under the new directory lidar_file_path = os.path.join( log_lidar_dir, 'PC_{}.ply'.format(str(timestamp))) target_lidar_file_path = os.path.join(target_velodyne_dir, idx + '.bin') lidar_data = load_ply(lidar_file_path) # run this block of code only if we are using the cam coord instead of the ego coord if True: in_cam_coord = calibration_data.project_ego_to_cam( lidar_data) in_cam_coord = in_cam_coord[:, [2, 0, 1]] # tranpose the xyz in_cam_coord[:, [1, 2]] *= -1 # flip 2 of the axes lidar_data = in_cam_coord lidar_data_augmented = np.concatenate( (lidar_data, np.zeros([lidar_data.shape[0], 1])), axis=1) lidar_data_augmented = lidar_data_augmented.astype('float32') lidar_data_augmented.tofile(target_lidar_file_path) ############################## Image ############################## # Save the image file into .png format under the new directory cam_file_path = argoverse_data.image_list_sync[cam][frame_idx] target_cam_file_path = os.path.join(target_image_2_dir, idx + '.png') copyfile(cam_file_path, target_cam_file_path) target_calib_file_path = os.path.join(target_calib_dir, idx + '.txt') file = open(target_calib_file_path, 'w+') file.write(calib_file_content) file.close() ############################## BEV binary masks ############################## bev_drivable_save_path = os.path.join(target_bev_drivable_dir, idx + '.png') bev_wanted_save_paths = [ os.path.join(cls_dir, idx + '.png') for cls_dir in target_bev_cls_dirs ] bev_fov_save_path = os.path.join(target_bev_fov_dir, idx + '.png') get_bev(argoverse_data, argoverse_map, log_id, frame_idx, bev_bnds, bev_meter_per_pixel, bev_drivable_save_path, bev_wanted_classes, bev_wanted_save_paths, bev_fov_save_path, visualize=False, camera_calib=calibration_data) ############################## Labels ############################## if 'test' in split_file_path: continue label_object_list = argoverse_data.get_label_object(frame_idx) target_label_2_file_path = os.path.join( target_label_2_dir, idx + '.txt') file = open(target_label_2_file_path, 'w+') # DontCare objects must appear at the end as per KITTI label files object_lines = [] dontcare_lines = [] for detected_object in label_object_list: classes = detected_object.label_class classes = OBJ_CLASS_MAPPING_DICT[ classes] # map class type from artoverse to KITTI occulusion = round(detected_object.occlusion / 25) height = detected_object.height length = detected_object.length width = detected_object.width truncated = 0 center = detected_object.translation # in ego frame corners_ego_frame = detected_object.as_3d_bbox( ) # all eight points in ego frame corners_cam_frame = calibration_data.project_ego_to_cam( corners_ego_frame ) # all eight points in the camera frame image_corners = calibration_data.project_ego_to_image( corners_ego_frame) image_bbox = [ min(image_corners[:, 0]), min(image_corners[:, 1]), max(image_corners[:, 0]), max(image_corners[:, 1]) ] # the four coordinates we need for KITTI image_bbox = [round(x) for x in image_bbox] center_cam_frame = calibration_data.project_ego_to_cam( np.array([center])) # if 0 < center_cam_frame[0][2] < max_d and 0 < image_bbox[0] < 1920 and 0 < image_bbox[1] < 1200 and 0 < \ # image_bbox[2] < 1920 and 0 < image_bbox[3] < 1200: if True: # the center coordinates in cam frame we need for KITTI # for the orientation, we choose point 1 and point 5 for application p1 = corners_cam_frame[1] p5 = corners_cam_frame[5] # dz = p1[2] - p5[2] # dx = p1[0] - p5[0] # angle = math.atan2(dz, dx) angle_vec = p1 - p5 # norm vec along the x axis, points to the right in KITTI cam rect coordinate origin_vec = np.array([1, 0, 0]) import vg angle = vg.signed_angle(origin_vec, angle_vec, look=vg.basis.y, units='rad') # the orientation angle of the car beta = math.atan2(center_cam_frame[0][2], center_cam_frame[0][0]) alpha = angle + beta - math.pi / 2 line = classes + ' {} {} {} {} {} {} {} {} {} {} {} {} {} {} \n'.format( round(truncated, 2), occulusion, round(alpha, 2), round(image_bbox[0], 2), round(image_bbox[1], 2), round(image_bbox[2], 2), round(image_bbox[3], 2), round(height, 2), round(width, 2), round( length, 2), round(center_cam_frame[0][0], 2), round(center_cam_frame[0][1], 2) + height / 2, round(center_cam_frame[0][2], 2), round(angle, 2)) # separate the object lines so we can move all the dontcare lines at the end if classes == DONT_CARE: dontcare_lines.append(line) else: object_lines.append(line) for line in object_lines: file.write(line) for line in dontcare_lines: file.write(line) file.close() # write kitti_to_argo_mapping to a file split_dir = os.path.dirname(split_file_path) split_name = os.path.basename(split_file_path) split_name = os.path.splitext(split_name)[0] prefix = 'kitti_to_argo_mapping' kitti_to_argo_mapping_path = os.path.join( split_dir, "{}_{}.json".format(prefix, split_name)) file_handle = open(kitti_to_argo_mapping_path, 'w') json.dump(kitti_to_argo_mapping, file_handle) file_handle.close() bar.finish() print('Translation finished, processed {} files'.format(i))
class Argo_MonoDataset(data.Dataset): """Superclass for monocular dataloaders Args: data_path filenames height width frame_idxs num_scales is_train img_ext """ def __init__(self, data_path, filenames, height, width, frame_idxs, num_scales, is_train=False, img_ext='.jpg'): super(Argo_MonoDataset, self).__init__() self.data_path = data_path self.filenames = filenames self.height = height #2056 self.width = width #2464 self.num_scales = num_scales self.interp = Image.ANTIALIAS self.frame_idxs = frame_idxs self.is_train = is_train self.img_ext = img_ext self.loader = argoverse_load_image self.argoverse_loader = ArgoverseTrackingLoader(self.data_path) self.camera = self.argoverse_loader.CAMERA_LIST[0] self.argoverse_data = 0 self.to_tensor = transforms.ToTensor() # We need to specify augmentations differently in newer versions of torchvision. # We first try the newer tuple version; if this fails we fall back to scalars try: self.brightness = (0.8, 1.2) self.contrast = (0.8, 1.2) self.saturation = (0.8, 1.2) self.hue = (-0.1, 0.1) transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue) except TypeError: self.brightness = 0.2 self.contrast = 0.2 self.saturation = 0.2 self.hue = 0.1 self.resize = {} for i in range(self.num_scales): s = 2**i self.resize[i] = transforms.Resize( (self.height // s, self.width // s), interpolation=self.interp) self.load_depth = self.check_depth() def preprocess(self, inputs, color_aug): """Resize colour images to the required scales and augment if required We create the color_aug object in advance and apply the same augmentation to all images in this item. This ensures that all images input to the pose network receive the same augmentation. """ for k in list(inputs): frame = inputs[k] if "color" in k: n, im, i = k for i in range(self.num_scales): inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)]) for k in list(inputs): f = inputs[k] if "color" in k: n, im, i = k inputs[(n, im, i)] = self.to_tensor(f) inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) def __len__(self): return len(self.filenames) def __getitem__(self, index): """Returns a single training item from the dataset as a dictionary. Values correspond to torch tensors. Keys in the dictionary are either strings or tuples: ("color", <frame_id>, <scale>) for raw colour images, ("color_aug", <frame_id>, <scale>) for augmented colour images, ("K", scale) or ("inv_K", scale) for camera intrinsics, "stereo_T" for camera extrinsics, and "depth_gt" for ground truth depth maps. <frame_id> is either: an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', or "s" for the opposite image in the stereo pair. <scale> is an integer representing the scale of the image relative to the fullsize image: -1 images at native resolution as loaded from disk 0 images resized to (self.width, self.height ) 1 images resized to (self.width // 2, self.height // 2) 2 images resized to (self.width // 4, self.height // 4) 3 images resized to (self.width // 8, self.height // 8) """ inputs = {} do_color_aug = self.is_train and random.random() > 0.5 do_flip = self.is_train and random.random() > 0.5 line = self.filenames[index].split() folder = line[0] self.argoverse_data = self.argoverse_loader.get(folder) last_row = np.array([[0.0, 0.0, 0.0, 1.0]]) K_ = self.argoverse_data.get_calibration(self.camera).K K = np.vstack((K_, last_row)) H = 2056 W = 2464 K[0] = K[0] / W K[1] = K[1] / H if len(line) == 3: frame_index = int(line[1]) else: frame_index = 0 if len(line) == 3: side = line[2] else: side = None for i in self.frame_idxs: if i == "s": other_side = {"r": "l", "l": "r"}[side] inputs[("color", i, -1)] = self.get_color(folder, frame_index, other_side, do_flip) else: inputs[("color", i, -1)] = self.get_color(folder, frame_index + i, side, do_flip) # adjusting intrinsics to match each scale in the pyramid for scale in range(self.num_scales): K[0, :] *= self.width // (2**scale) K[1, :] *= self.height // (2**scale) inv_K = np.linalg.pinv(K) inputs[("K", scale)] = torch.from_numpy(K).type(torch.float32) inputs[("inv_K", scale)] = torch.from_numpy(inv_K).type(torch.float32) if do_color_aug: color_aug = transforms.ColorJitter.get_params( self.brightness, self.contrast, self.saturation, self.hue) else: color_aug = (lambda x: x) self.preprocess(inputs, color_aug) for i in self.frame_idxs: del inputs[("color", i, -1)] del inputs[("color_aug", i, -1)] if self.load_depth: depth_gt = self.get_depth(folder, frame_index, side, do_flip) inputs["depth_gt"] = np.expand_dims(depth_gt, 0) inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype( np.float32)) if "s" in self.frame_idxs: stereo_T = np.eye(4, dtype=np.float32) baseline_sign = -1 if do_flip else 1 side_sign = -1 if side == "l" else 1 stereo_T[0, 3] = side_sign * baseline_sign * 0.1 inputs["stereo_T"] = torch.from_numpy(stereo_T) return inputs def get_color(self, folder, frame_index, side, do_flip): raise NotImplementedError def check_depth(self): raise NotImplementedError def get_depth(self, folder, frame_index, side, do_flip): raise NotImplementedError
actual_idx_list = [] logidx_to_count_map = {} log_to_count_map = {} print("____________SPLIT IS : {} ______________".format(split)) if split == 'train': imageset_dir = os.path.join(root_dir, split) splitname = lambda x: [ x[len(imageset_dir + "/"):-4].split("/")[0], x[len( imageset_dir + "/"):-4].split("/")[2].split("_")[1] ] data_loader = ArgoverseTrackingLoader(os.path.join(root_dir, split)) log_list = data_loader.log_list path_count = 0 for log_id, log in enumerate(log_list): lidar_lst = data_loader.get(log).lidar_list lidar_pathlist.extend(lidar_lst) label_pathlist.extend(data_loader.get(log).label_list) actual_idx_list.extend([splitname(each) for each in lidar_lst]) logidx_to_count_map[log_id] = np.arange( path_count, path_count + len(lidar_lst)) log_to_count_map[log] = np.arange(path_count, path_count + len(lidar_lst)) path_count += len(lidar_lst) assert len(lidar_pathlist) == len(label_pathlist) elif split == 'test': imageset_dir = os.path.join(root_dir, split) splitname = lambda x: [ x[len(imageset_dir + "/"):-4].split("/")[0], x[len( imageset_dir + "/"):-4].split("/")[2].split("_")[1]
data_amodel: Dict[str, Any] = {} for key in dict_track_labels.keys(): dict_amodal: Dict[str, Any] = {} data_amodel[key] = dict_amodal data_amodel[key]["label_class"] = dict_track_labels[key][ 0]["label_class"] data_amodel[key]["uuid"] = dict_track_labels[key][0][ "track_label_uuid"] data_amodel[key]["log_id"] = id_log data_amodel[key]["track_label_frames"] = dict_track_labels[ key] argoverse_loader = ArgoverseTrackingLoader( os.path.join(root_dir, name_folder)) data_log = argoverse_loader.get(id_log) list_lidar_timestamp = data_log.lidar_timestamp_list dict_tracks: Dict[str, Any] = {} for id_track in data_amodel.keys(): data = data_amodel[id_track] if data["label_class"] not in list_name_class: continue data_per_frame = data["track_label_frames"] dict_per_track: Dict[str, Any] = {} dict_tracks[id_track] = dict_per_track dict_tracks[id_track]["ind_lidar_min"] = -1 dict_tracks[id_track]["ind_lidar_max"] = -1
class ArgoDataset(Dataset): def __init__(self, question_h5, image_feature_h5_path, lidar_feature_h5_path, vocab, load_lidar=True, npoint=1024, normal_channel=True, uniform=False, cache_size=15000, driveable_area=False, mode='prefix', image_h5=None, lidar_h5=None, max_samples=None, question_families=None, image_idx_start_from=None): #############read whole question_h5 file in memory############################# self.all_questions = question_h5['questions'][:] self.all_answers = get_answer_classes(question_h5['answers'][:], vocab) self.all_image_idxs = question_h5['image_idxs'][:] self.all_video_names = (question_h5['video_names'][:]).astype(str) self.questions_length = question_h5['question_length'][:] self.image_feature_h5 = image_feature_h5_path self.load_lidar = load_lidar ###########for lidar########################################################## if self.load_lidar: self.lidar_feature_h5 = lidar_feature_h5_path self.argoverse_loader = ArgoverseTrackingLoader( self.lidar_feature_h5) self.am = ArgoverseMap() self.driveable_area = driveable_area ############################################################################# self.npoints = npoint self.uniform = uniform self.normal_channel = normal_channel self.cache_size = cache_size # how many data points to cache in memory self.cache = {} # from index to (point_set, cls) tuple def __len__(self): print(self.all_questions.shape[0]) return self.all_questions.shape[0] def __getitem__(self, index): question = self.all_questions[index] question_length = self.questions_length[index] answer = self.all_answers[index] image_name = self.all_video_names[index] + '@' + str( self.all_image_idxs[index]) #####image feature ############################################### with h5py.File(self.image_feature_h5, 'r') as img_feat_file: image_feature = img_feat_file[image_name][:] if self.load_lidar: # if index in self.cache: # point_set = self.cache[index] # else: lidar_index = self.all_image_idxs[index] log_id = self.all_video_names[index] argoverse_data = self.argoverse_loader.get(log_id) lidar_pts = argoverse_data.get_lidar(lidar_index, load=True) city_to_egovehicle_se3 = argoverse_data.get_pose(lidar_index) city_name = argoverse_data.city_name roi_area_pts = copy.deepcopy(lidar_pts) roi_area_pts = city_to_egovehicle_se3.transform_point_cloud( roi_area_pts) # put into city coords roi_area_pts = self.am.remove_non_roi_points( roi_area_pts, city_name) roi_area_pts = self.am.remove_ground_surface( roi_area_pts, city_name) roi_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud( roi_area_pts) if self.driveable_area: driveable_area_pts = roi_area_pts driveable_area_pts = city_to_egovehicle_se3.transform_point_cloud( driveable_area_pts) # put into city coords driveable_area_pts = self.am.remove_non_driveable_area_points( driveable_area_pts, city_name) driveable_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud( driveable_area_pts) point_set = driveable_area_pts else: point_set = roi_area_pts del lidar_pts if self.uniform: point_set = furthest_point_sample(point_set, self.npoints) else: point_set = point_set[0:self.npoints, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) if not self.normal_channel: point_set = point_set[:, 0:3] if len(self.cache) < self.cache_size: self.cache[index] = point_set # print(point_set.shape,type(point_set)) point_set = point_set.T else: point_set = torch.randn(5) # image # image here means all 7 image # question feature like pretrained bert or word2vec or learned embeddings return (question, image_feature, question_length, point_set, answer)
import argoverse from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader import pdb pdb.set_trace() root_dir = '/ssd_scratch/cvit/raghava.modhugu/argoverse-tracking/' argoverse_loader = ArgoverseTrackingLoader(root_dir) camera = 'ring_front_center' with open('splits/argoverse/val_files.txt') as f: for cnt, line in enumerate(f): log, index, _ = line.split() print(log, index) print( argoverse_loader.get(log).get_image(int(index), camera=camera, load=False))
def generate_road_bev(dataset_dir="", log_id="", output_dir=""): argoverse_loader = ArgoverseTrackingLoader(dataset_dir) argoverse_data = argoverse_loader.get(log_id) city_name = argoverse_data.city_name avm = ArgoverseMap() sdb = SynchronizationDB(dataset_dir, collect_single_log_id=log_id) try: path = os.path.join(output_dir, log_id, 'road_gt') command = "rm -r " + path os.system(command) except BaseException: pass try: os.makedirs(os.path.join(output_dir, log_id, 'road_gt')) except BaseException: pass ply_fpath = os.path.join(dataset_dir, log_id, 'lidar') ply_locs = [] for idx, ply_name in enumerate(os.listdir(ply_fpath)): ply_loc = np.array([idx, int(ply_name.split('.')[0].split('_')[-1])]) ply_locs.append(ply_loc) ply_locs = np.array(ply_locs) lidar_timestamps = sorted(ply_locs[:, 1]) calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json" calib_data = read_json_file(calib_path) ind = 0 for i in range(9): if calib_data['camera_data_'][i]['key'] ==\ 'image_raw_stereo_front_left': ind = i break rotation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['rotation']['coefficients']) translation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['translation']) egovehicle_SE3_cam = SE3(rotation=quat2rotmat(rotation), translation=translation) for idx in range(len(lidar_timestamps)): lidar_timestamp = lidar_timestamps[idx] cam_timestamp = sdb.get_closest_cam_channel_timestamp( lidar_timestamp, "stereo_front_left", str(log_id)) occupancy_map = np.zeros((256, 256)) pose_fpath = os.path.join( dataset_dir, log_id, "poses", "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json") if not Path(pose_fpath).exists(): continue pose_data = read_json_file(pose_fpath) rotation = np.array(pose_data["rotation"]) translation = np.array(pose_data["translation"]) xcenter = translation[0] ycenter = translation[1] city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation), translation=translation) ego_car_nearby_lane_ids = avm.get_lane_ids_in_xy_bbox( xcenter, ycenter, city_name, 50.0) occupancy_map = get_lane_bev(ego_car_nearby_lane_ids, avm, city_name, city_to_egovehicle_se3, egovehicle_SE3_cam, occupancy_map, res, 255) output_loc = os.path.join( output_dir, log_id, 'road_gt', 'stereo_front_left_' + str(cam_timestamp) + '.png') cv2.imwrite(output_loc, occupancy_map)
path, dirs, files = next(os.walk(data_dir + q + '/lidar')) total_number = total_number + len(files) total_number = total_number * 7 bar = progressbar.ProgressBar(maxval=total_number, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) print('Total number of files: {}. Translation starts...'.format(total_number)) print('Progress:') bar.start() i = 0 for log_id in argoverse_loader.log_list: argoverse_data = argoverse_loader.get(log_id) for cam in cams: # Recreate the calibration file content calibration_data = calibration.load_calib( data_dir + log_id + '/vehicle_calibration_info.json')[cam] L3 = 'P2: ' for j in calibration_data.K.reshape(1, 12)[0]: L3 = L3 + str(j) + ' ' L3 = L3[:-1] L6 = 'Tr_velo_to_cam: ' for k in calibration_data.extrinsic.reshape(1, 16)[0][0:12]: L6 = L6 + str(k) + ' ' L6 = L6[:-1] L1 = 'P0: 0 0 0 0 0 0 0 0 0 0 0 0'
import argoverse from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader import argoverse.visualization.visualization_utils as viz_util import matplotlib.pyplot as plt #plotar import cv2 #opencv from PIL import Image ##set root_dir to the correct path to your dataset folder root_dir = '/home/iago/Documents/argoverse/data/tracking/sample/sample' argoverse_loader = ArgoverseTrackingLoader(root_dir) argoverse_data = argoverse_loader[0] print(argoverse_data) argoverse_data = argoverse_loader.get('c6911883-1843-3727-8eaa-41dc8cda8993') print(argoverse_data) camera = "ring_front_center" num_imgs = len(argoverse_data.image_list[camera]) for idx in range(0, num_imgs): img = argoverse_data.get_image_sync(idx, camera=camera) objects = argoverse_data.get_label_object(idx) calib = argoverse_data.get_calibration(camera) img_vis = Image.fromarray( viz_util.show_image_with_boxes(img, objects, calib)) plt.imshow(img_vis) plt.title("Right Front Center")
def test_data_loader_get(data_loader: ArgoverseTrackingLoader) -> None: data_0 = data_loader[0].current_log data_1 = data_loader.get("1").current_log assert data_0 == data_1
class BagConverter: def __init__(self, args): # Setup Argoverse loader self.dataset_dir = args.dataset_dir self.log_id = args.log_id self.argoverse_loader = ArgoverseTrackingLoader(self.dataset_dir) self.argoverse_data = self.argoverse_loader.get(self.log_id) print(self.argoverse_data) # List of cameras to publish self.cameras_list = [camera for camera in self.argoverse_loader.CAMERA_LIST if camera in args.cameras] self.load_camera_info() # Images timestamps self.image_files_dict = { camera: self.argoverse_data.timestamp_image_dict[camera] for camera in self.cameras_list} self.image_timestamps = { camera: list(self.image_files_dict[camera].keys()) for camera in self.cameras_list} # Save image files dict to be added into the bag file later using Python2 with open('/tmp/image_data_%s.json' % self.log_id, 'w') as f: data = {'image_files_dict': self.image_files_dict, 'image_timestamps': self.image_timestamps, 'cameras_list': self.cameras_list} json.dump(data, f, indent=4) # LiDAR timestamps self.lidar_files_dict = self.argoverse_data.timestamp_lidar_dict self.lidar_timestamps = list(self.lidar_files_dict.keys()) # ROSBAG output path if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) self.output_filename = os.path.join(args.output_dir, '%s_no_images.bag' % self.log_id) self.bag = rosbag.Bag(self.output_filename, 'w') # Topic names self.lidar_topic = '/argoverse/lidar/pointcloud' self.camera_info_topic_template = '/argoverse/%s/camera_info' # TF frames self.map_frame = 'city' self.vehicle_frame = 'egovehicle' self.load_camera_static_tf() def load_camera_info(self): # Make a dictionary of CameraInfo messages for all cameras listed self.camera_info_dict = { camera: utils.make_camera_info_message( self.argoverse_data.get_calibration(camera)) for camera in self.cameras_list} def load_camera_static_tf(self): # Make a dictionary of static TF messages for all cameras listed self.camera_static_tf_dict = { camera: utils.make_transform_stamped_message(self.vehicle_frame, camera, self.argoverse_data.get_calibration( camera).calib_data['value']['vehicle_SE3_camera_']) for camera in self.argoverse_loader.CAMERA_LIST} def get_camera_info_message(self, camera, timestamp): # Make camera info message camera_info_msg = self.camera_info_dict[camera] camera_info_msg.header.stamp = utils.ros_time_from_nsecs(timestamp) return camera_info_msg def get_lidar_message(self, timestamp): # Load ply file filename = self.lidar_files_dict[timestamp] with open(filename, 'rb') as f: plydata = PlyData.read(f) # Make header message header = Header() header.frame_id = self.vehicle_frame # LiDAR frame is ego vehicle frame header.stamp = utils.ros_time_from_nsecs(timestamp) # Make point cloud message (both LiDARs combined - 64 rings) pcl_msg = utils.make_pointcloud_message(header, plydata) return pcl_msg def convert(self): # Keep first timestamp for publishing static TF message static_timestamp = self.lidar_timestamps[0] # Publish LiDAR PointCloud messages for timestamp in tqdm(self.lidar_timestamps, desc='LiDAR, Pose'): lidar_msg = self.get_lidar_message(timestamp) self.bag.write(self.lidar_topic, lidar_msg, utils.ros_time_from_nsecs(timestamp)) # Publish ego vehicle pose tf_msg = utils.argoverse_pose_to_transform_message(self.dataset_dir, self.log_id, self.map_frame, self.vehicle_frame, timestamp, read_json_file) self.bag.write('/tf', tf_msg, utils.ros_time_from_nsecs(timestamp)) # Publish camera messages for camera in tqdm(self.cameras_list, desc='CameraInfo'): static_timestamp = min(static_timestamp, self.image_timestamps[camera][0]) for timestamp in tqdm(self.image_timestamps[camera], desc=camera, leave=False): camera_info_msg = self.get_camera_info_message(camera, timestamp) self.bag.write(self.camera_info_topic_template % camera, camera_info_msg, utils.ros_time_from_nsecs(timestamp)) # Publish static TF messages tf_msg = TFMessage() for camera in tqdm(self.camera_static_tf_dict, desc='Static TF'): msg = self.camera_static_tf_dict[camera] msg.header.stamp = utils.ros_time_from_nsecs(static_timestamp) tf_msg.transforms.append(msg) self.bag.write('/tf_static', tf_msg, utils.ros_time_from_nsecs(static_timestamp)) # Close rosbag file self.bag.close()
def generate_vehicle_bev(dataset_dir="", log_id="", output_dir=""): argoverse_loader = ArgoverseTrackingLoader(dataset_dir) argoverse_data = argoverse_loader.get(log_id) camera = argoverse_loader.CAMERA_LIST[7] calib = argoverse_data.get_calibration(camera) sdb = SynchronizationDB(dataset_dir, collect_single_log_id=log_id) calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json" calib_data = read_json_file(calib_path) ind = 0 for i in range(9): if calib_data['camera_data_'][i]['key'] == \ 'image_raw_stereo_front_left': ind = i break rotation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['rotation']['coefficients']) translation = np.array(calib_data['camera_data_'][ind]['value'] ['vehicle_SE3_camera_']['translation']) egovehicle_SE3_cam = SE3(rotation=quat2rotmat(rotation), translation=translation) if not os.path.exists(os.path.join(output_dir, log_id, "car_bev_gt")): os.makedirs(os.path.join(output_dir, log_id, "car_bev_gt")) lidar_dir = os.path.join(dataset_dir, log_id, "lidar") ply_list = os.listdir(lidar_dir) pfa = PerFrameLabelAccumulator(dataset_dir, dataset_dir, "argoverse_bev_viz", save=False, bboxes_3d=True) pfa.accumulate_per_log_data(log_id) log_timestamp_dict = pfa.log_timestamp_dict for i, ply_name in enumerate(ply_list): lidar_timestamp = ply_name.split('.')[0].split('_')[1] lidar_timestamp = int(lidar_timestamp) cam_timestamp = sdb.get_closest_cam_channel_timestamp( lidar_timestamp, "stereo_front_left", str(log_id)) image_path = os.path.join( output_dir, str(log_id), "car_bev_gt", "stereo_front_left_" + str(cam_timestamp) + ".jpg") objects = log_timestamp_dict[log_id][lidar_timestamp] top_view = np.zeros((256, 256)) all_occluded = True for frame_rec in objects: if frame_rec.occlusion_val != IS_OCCLUDED_FLAG: all_occluded = False if not all_occluded: for i, frame_rec in enumerate(objects): bbox_ego_frame = frame_rec.bbox_ego_frame uv = calib.project_ego_to_image(bbox_ego_frame).T idx_ = np.all( np.logical_and( np.logical_and( np.logical_and(uv[0, :] >= 0.0, uv[0, :] < size[1] - 1.0), np.logical_and(uv[1, :] >= 0.0, uv[1, :] < size[0] - 1.0)), uv[2, :] > 0)) if not idx_: continue bbox_cam_fr = egovehicle_SE3_cam.inverse().\ transform_point_cloud(bbox_ego_frame) X = bbox_cam_fr[:, 0] Z = bbox_cam_fr[:, 2] if (frame_rec.occlusion_val != IS_OCCLUDED_FLAG and frame_rec.obj_class_str == "VEHICLE"): y_img = (-Z / res).astype(np.int32) x_img = (X / res).astype(np.int32) x_img -= int(np.floor(-20 / res)) y_img += int(np.floor(40 / res)) box = np.array([x_img[2], y_img[2]]) box = np.vstack((box, [x_img[6], y_img[6]])) box = np.vstack((box, [x_img[7], y_img[7]])) box = np.vstack((box, [x_img[3], y_img[3]])) cv2.drawContours(top_view, [box], 0, 255, -1) cv2.imwrite(image_path, top_view)
def visualize_loop(args, val_loader): image_feature_size = 512 lidar_feature_size = 1024 if args.model_type == 'SAN': question_feat_size = 512 model = SAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MCB': question_feat_size = 512 model = MCB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MFB': question_feat_size = 512 # image_feature_size=512 model = MFB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MLB': question_feat_size = 1024 image_feature_size = 512 model = MLB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MUTAN': question_feat_size = 1024 image_feature_size = 512 model = MUTAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'DAN': question_feat_size = 512 model = DAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') data = load_weights(args, model, optimizer=None) if type(data) == list: model, optimizer, start_epoch, loss, accuracy = data print("Loaded weights") print("Epoch: %d, loss: %.3f, Accuracy: %.4f " % (start_epoch, loss, accuracy), flush=True) else: print(" error occured while loading model training freshly") model = data return ###########################################################################multiple GPU use# # if torch.cuda.device_count() > 1: # print("Using ", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model) model.to(device=args.device) model.eval() import argoverse from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader from argoverse.utils.json_utils import read_json_file from argoverse.map_representation.map_api import ArgoverseMap vocab = load_vocab(os.path.join(args.input_base, args.vocab)) argoverse_loader = ArgoverseTrackingLoader( '../../../Data/train/argoverse-tracking') k = 1 with torch.no_grad(): for data in tqdm(val_loader): question, image_feature, ques_lengths, point_set, answer, image_name = data question = question.to(device=args.device) ques_lengths = ques_lengths.to(device=args.device) image_feature = image_feature.to(device=args.device) point_set = point_set.to(device=args.device) pred, wgt, energies = model(question, image_feature, ques_lengths, point_set) question = question.cpu().data.numpy() answer = answer.cpu().data.numpy() pred = F.softmax(pred, dim=1) pred = torch.argmax(pred, dim=1) pred = np.asarray(pred.cpu().data) wgt = wgt.cpu().data.numpy() energies = energies.squeeze(1).cpu().data.numpy() ques_lengths = ques_lengths.cpu().data.numpy() pat = re.compile(r'(.*)@(.*)') _, keep = np.where([answer == pred]) temp_batch_size = question.shape[0] for b in range(temp_batch_size): q = get_ques(question[b], ques_lengths[b], vocab) ans = get_ans(answer[b]) pred_ans = get_ans(pred[b]) # print(q,ans) c = list(re.findall(pat, image_name[b]))[0] log_id = c[0] idx = int(c[1]) print(k) argoverse_data = argoverse_loader.get(log_id) if args.model_type == 'SAN': plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MCB': plot_att(argoverse_data, idx, wgt[b], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MFB': plot_att(argoverse_data, idx, wgt[b, :, :, 1], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MLB': plot_att(argoverse_data, idx, wgt[b, :, 3, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MUTAN': #only two glimpses plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'DAN': #only two memory plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) k = k + 1