def _get_simple_dataset(self) -> ChunkedDataset: # build a simple dataset with 3 frames # frame 0: # agent 0 # agent 1 # agent 2 # frame 1: # agent 0 # agent 1 # frame 2: # agent 0 dataset = ChunkedDataset("") dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE) dataset.frames = np.zeros(3, dtype=FRAME_DTYPE) dataset.agents = np.zeros(6, dtype=AGENT_DTYPE) dataset.scenes[0]["frame_index_interval"] = (0, 3) dataset.frames["agent_index_interval"] = [(0, 3), (3, 5), (5, 6)] dataset.agents["track_id"] = [0, 1, 2, 0, 1, 0] # set properties to something different than 0 dataset.agents["centroid"] = np.random.rand(*dataset.agents["centroid"].shape) dataset.agents["yaw"] = np.random.rand(*dataset.agents["yaw"].shape) dataset.agents["extent"] = np.random.rand(*dataset.agents["extent"].shape) return dataset
def test_compute_mse_error(tmp_path: Path) -> None: data = ChunkedDataset(path="./l5kit/tests/artefacts/single_scene.zarr") data.open() export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt1.csv"), 0, 50, 0.5) data.open() # avoid double select_agents export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt2.csv"), 0, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv")) assert np.all(err == 0.0) data_fake = ChunkedDataset(str(tmp_path)) data_fake.scenes = np.asarray(data.scenes).copy() data_fake.frames = np.asarray(data.frames).copy() data_fake.agents = np.asarray(data.agents).copy() data_fake.root = data.root data_fake.agents["centroid"] += np.random.rand(*data_fake.agents["centroid"].shape) export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 0, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv")) assert np.any(err > 0.0) # test invalid conf by removing lines in gt1 with open(str(tmp_path / "gt4.csv"), "w") as fp: lines = open(str(tmp_path / "gt1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
def test_compute_mse_error(tmp_path: Path, zarr_dataset: ChunkedDataset) -> None: export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt1.csv"), 10, 50, 0.5) export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt2.csv"), 10, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv")) assert np.all(err == 0.0) data_fake = ChunkedDataset(str(tmp_path)) data_fake.scenes = np.asarray(zarr_dataset.scenes).copy() data_fake.frames = np.asarray(zarr_dataset.frames).copy() data_fake.agents = np.asarray(zarr_dataset.agents).copy() data_fake.agents["centroid"] += np.random.rand( *data_fake.agents["centroid"].shape) * 1e-2 export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 10, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv")) assert np.any(err > 0.0) # test invalid conf by removing lines in gt1 with open(str(tmp_path / "gt4.csv"), "w") as fp: lines = open(str(tmp_path / "gt1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
def get_frames_subset(dataset: ChunkedDataset, frame_start_idx: int, frame_end_idx: int) -> ChunkedDataset: """Get a new dataset with frames between start (included) and end (excluded). Assumptions: - the dataset has only 1 scene - the dataset is in numpy format and not zarr anymore :param dataset: the single-scene dataset. :param frame_start_idx: first frame to keep. :param frame_end_idx: where to stop taking frames (excluded). """ if not len(dataset.scenes) == 1: raise ValueError( f"dataset should have a single scene, got {len(dataset.scenes)}") if not isinstance(dataset.agents, np.ndarray): raise ValueError("dataset agents should be an editable np array") if not isinstance(dataset.tl_faces, np.ndarray): raise ValueError("dataset tls should be an editable np array") if not isinstance(dataset.frames, np.ndarray): raise ValueError("dataset frames should be an editable np array") if frame_start_idx >= len(dataset.frames): raise ValueError( f"frame start {frame_start_idx} is over the length of the dataset") if frame_end_idx > len(dataset.frames): raise ValueError( f"frame end {frame_end_idx} is over the length of the dataset") if frame_start_idx >= frame_end_idx: raise ValueError( f"end frame {frame_end_idx} should be higher than start {frame_start_idx}" ) if frame_start_idx < 0: raise ValueError(f"start frame {frame_start_idx} should be positive") new_dataset = ChunkedDataset("") new_dataset.scenes = dataset.scenes.copy() new_dataset.scenes[0]["start_time"] = dataset.frames[frame_start_idx][ "timestamp"] new_dataset.scenes[0]["end_time"] = dataset.frames[frame_end_idx - 1]["timestamp"] new_dataset.frames = dataset.frames[frame_start_idx:frame_end_idx].copy() new_dataset.scenes[0]["frame_index_interval"] = (0, len(new_dataset.frames)) agent_slice = get_agents_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) tls_slice = get_tl_faces_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) new_dataset.frames["agent_index_interval"] -= new_dataset.frames[ "agent_index_interval"][0, 0] new_dataset.frames[ "traffic_light_faces_index_interval"] -= new_dataset.frames[ "traffic_light_faces_index_interval"][0, 0] new_dataset.agents = dataset.agents[agent_slice].copy() new_dataset.tl_faces = dataset.tl_faces[tls_slice].copy() return new_dataset
def dataset(tmp_path: Path) -> ChunkedDataset: dataset = ChunkedDataset(str(tmp_path)) dataset.scenes = np.zeros(1, dtype=dataset.scenes.dtype) dataset.frames = np.zeros(SCENE_LENGTH, dtype=dataset.frames.dtype) dataset.agents = np.zeros(SCENE_LENGTH, dtype=dataset.agents.dtype) dataset.scenes[0]["frame_index_interval"] = (0, SCENE_LENGTH) for idx in range(len(dataset.frames)): dataset.frames[idx]["agent_index_interval"] = (idx, idx + 1) dataset.frames[idx]["timestamp"] = idx for idx in range(len(dataset.agents)): # we don't check moving anymore, so the agent can stay still dataset.agents[idx]["extent"] = (5, 5, 5) dataset.agents[idx]["yaw"] = 0 dataset.agents[idx]["track_id"] = 1 dataset.agents[idx]["label_probabilities"][3] = 1.0 return dataset
def _mock_dataset() -> ChunkedDataset: zarr_dt = ChunkedDataset("") zarr_dt.scenes = np.zeros(1, dtype=SCENE_DTYPE) zarr_dt.scenes["frame_index_interval"][0] = (0, 4) zarr_dt.frames = np.zeros(4, dtype=FRAME_DTYPE) zarr_dt.frames["agent_index_interval"][0] = (0, 3) zarr_dt.frames["agent_index_interval"][1] = (3, 5) zarr_dt.frames["agent_index_interval"][2] = (5, 6) zarr_dt.frames["agent_index_interval"][3] = (6, 6) zarr_dt.agents = np.zeros(6, dtype=AGENT_DTYPE) # all agents except the first one are valid zarr_dt.agents["label_probabilities"][1:, 3] = 1 # FRAME 0 # second agent is close to ego and has id 1 zarr_dt.agents["track_id"][1] = 1 zarr_dt.agents["centroid"][1] = (1, 1) # third agent is too far and has id 2 zarr_dt.agents["track_id"][2] = 2 zarr_dt.agents["centroid"][2] = (100, 100) # FRAME 1 # track 1 agent is still close to ego zarr_dt.agents["track_id"][3] = 1 zarr_dt.agents["centroid"][3] = (1, 2) # track 2 is now close enough zarr_dt.agents["track_id"][4] = 2 zarr_dt.agents["centroid"][4] = (1, 1) # FRAME 2 # track 1 agent is far zarr_dt.agents["track_id"][5] = 1 zarr_dt.agents["centroid"][5] = (100, 100) # FRAME 3 is empty zarr_dt.tl_faces = np.zeros(0, dtype=TL_FACE_DTYPE) return zarr_dt
def _get_simple_dataset(self) -> ChunkedDataset: # build a simple dataset with 3 frames # frame 0: # agent 0 # agent 1 # agent 2 # frame 1: # agent 0 # agent 1 # frame 2: # agent 0 dataset = ChunkedDataset("") dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE) dataset.frames = np.zeros(3, dtype=FRAME_DTYPE) dataset.agents = np.zeros(6, dtype=AGENT_DTYPE) dataset.scenes[0]["frame_index_interval"] = (0, 3) dataset.frames["agent_index_interval"] = [(0, 3), (3, 5), (5, 6)] dataset.agents["track_id"] = [0, 1, 2, 0, 1, 0] return dataset
def test_mock_dataset_frames_subset() -> None: zarr_dataset = ChunkedDataset("") zarr_dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE) zarr_dataset.scenes[0]["frame_index_interval"] = (0, 4) zarr_dataset.frames = np.zeros(4, dtype=FRAME_DTYPE) zarr_dataset.frames["agent_index_interval"] = [(0, 1), (1, 2), (2, 3), (3, 4)] zarr_dataset.agents = np.zeros(4, dtype=AGENT_DTYPE) zarr_dataset.agents["track_id"] = np.arange(4) zarr_dataset.tl_faces = np.zeros(0, dtype=TL_FACE_DTYPE) frame_start = 1 frame_end = 3 zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end) assert np.all(zarr_cut.agents["track_id"] == [1, 2]) frame_start = 0 frame_end = 3 zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end) assert np.all(zarr_cut.agents["track_id"] == [0, 1, 2]) frame_start = 2 frame_end = 4 zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end) assert np.all(zarr_cut.agents["track_id"] == [2, 3])
def build_dataloader( cfg: Dict, split: str, data_manager: DataManager, dataset_class: Callable, rasterizer: Rasterizer, perturbation: Optional[Perturbation] = None, combine_scenes: bool = False, ) -> DataLoader: """ Function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and perturbation as the factory functions for those are likely to change between repos. Args: cfg (dict): configuration dict split (str): this will be used to index the cfg to get the correct datasets (train or val currently) data_manager (DataManager): manager for resolving paths dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset rasterizer (Rasterizer): the rasterizer for the dataset perturbation (Optional[Perturbation]): an optional perturbation object combine_scenes (bool): if to combine scenes that follow up each other perfectly Returns: DataLoader: pytorch Dataloader object built with Concat and Sub datasets """ data_loader_cfg = cfg[f"{split}_data_loader"] datasets = [] for dataset_param in data_loader_cfg["datasets"]: zarr_dataset_path = data_manager.require(key=dataset_param["key"]) zarr_dataset = ChunkedDataset(path=zarr_dataset_path) zarr_dataset.open() if combine_scenes: # possible future deprecation zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) # Let's load the zarr dataset with our dataset. dataset = dataset_class(cfg, zarr_dataset, rasterizer, perturbation=perturbation) scene_indices = dataset_param["scene_indices"] scene_subsets = [] if dataset_param["scene_indices"][0] == -1: # TODO replace with empty scene_subset = Subset(dataset, np.arange(0, len(dataset))) scene_subsets.append(scene_subset) else: for scene_idx in scene_indices: valid_indices = dataset.get_scene_indices(scene_idx) scene_subset = Subset(dataset, valid_indices) scene_subsets.append(scene_subset) datasets.extend(scene_subsets) # Let's concatenate the training scenes into one dataset for the data loader to load from. concat_dataset: ConcatDataset = ConcatDataset(datasets) # Initialize the data loader that our training loop will iterate on. batch_size = data_loader_cfg["batch_size"] shuffle = data_loader_cfg["shuffle"] num_workers = data_loader_cfg["num_workers"] dataloader = DataLoader(dataset=concat_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return dataloader