class SatBoxRasterizerTest(unittest.TestCase): def __init__(self, *args, **kwargs): # type: ignore super(SatBoxRasterizerTest, self).__init__(*args, **kwargs) self.dataset = ChunkedStateDataset( path="./l5kit/tests/data/single_scene.zarr") self.dataset.open() def test_shape(self) -> None: map_to_sat = np.block( [[np.eye(3) / 100, np.asarray([[1000], [1000], [1]])], [np.asarray([[0, 0, 0, 1]])]]) # just a translation and scale rast = SatBoxRasterizer( (224, 224), np.asarray((0.25, 0.25)), np.asarray((0.25, 0.5)), filter_agents_threshold=-1, map_im=np.zeros((10000, 10000, 3), dtype=np.uint8), map_to_sat=map_to_sat, ) hist_length = 10 out = rast.rasterize(self.dataset.frames[:hist_length], self.dataset.agents) assert out.shape == (224, 224, 10 * 2 + 3)
def test_compute_mse_error(tmp_path: Path) -> None: data = ChunkedStateDataset(path="./l5kit/tests/data/single_scene.zarr") data.open() export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt1.csv"), 0, 12, 0.5) export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt2.csv"), 0, 12, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv")) assert err == 0.0 data_fake = ChunkedStateDataset("") data_fake.scenes = np.asarray(data.scenes).copy() data_fake.frames = np.asarray(data.frames).copy() data_fake.agents = np.asarray(data.agents).copy() data_fake.root = data.root data_fake.agents["centroid"] += np.random.rand( *data_fake.agents["centroid"].shape) export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 0, 12, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv")) assert err > 0.0 # test invalid conf by removing lines in gt1 with open(str(tmp_path / "gt4.csv"), "w") as fp: lines = open(str(tmp_path / "gt1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
def hist_data() -> tuple: zarr_dataset = ChunkedStateDataset( path="./l5kit/tests/artefacts/single_scene.zarr") zarr_dataset.open() hist_frames = zarr_dataset.frames[ 100:111][::-1] # reverse to get them as history hist_agents = filter_agents_by_frames(hist_frames, zarr_dataset.agents) return hist_frames, hist_agents
class BoxRasterizerTest(unittest.TestCase): def __init__(self, *args, **kwargs): # type: ignore super(BoxRasterizerTest, self).__init__(*args, **kwargs) self.dataset = ChunkedStateDataset( path="./l5kit/tests/data/single_scene.zarr") self.dataset.open() self.hist_frames = self.dataset.frames[100: 101] # we know this has agents def test_ego_center(self) -> None: values = [(0.5, 0.5), (0.25, 0.5), (0.75, 0.5), (0.5, 0.25), (0.5, 0.75)] for v in values: rast = BoxRasterizer((224, 224), np.asarray((0.25, 0.25)), ego_center=np.asarray(v), filter_agents_threshold=0.0) out = rast.rasterize(self.hist_frames, self.dataset.agents) assert out[..., -1].sum() > 0 def test_agents_map(self) -> None: rast = BoxRasterizer((224, 224), np.asarray((0.25, 0.25)), np.asarray((0.25, 0.5)), filter_agents_threshold=1) out = rast.rasterize(self.hist_frames, self.dataset.agents) assert out[..., 0].sum() == 0 rast = BoxRasterizer((224, 224), np.asarray((0.25, 0.25)), np.asarray((0.25, 0.5)), filter_agents_threshold=0.0) out = rast.rasterize(self.hist_frames, self.dataset.agents) assert out[..., 0].sum() > 0 def test_agent_ego(self) -> None: rast = BoxRasterizer((224, 224), np.asarray((0.25, 0.25)), np.asarray((0.25, 0.5)), filter_agents_threshold=-1) agents = self.dataset.agents[slice( *self.hist_frames[0]["agent_index_interval"])] for ag in agents: out = rast.rasterize(self.hist_frames, self.dataset.agents, ag) assert out[..., -1].sum() > 0 def test_shape(self) -> None: rast = BoxRasterizer((224, 224), np.asarray((0.25, 0.25)), np.asarray((0.25, 0.5)), filter_agents_threshold=-1) hist_length = 10 out = rast.rasterize(self.dataset.frames[:hist_length], self.dataset.agents) assert out.shape == (224, 224, 10 * 2)
def test_perturbation_is_applied(perturb_prob: float) -> None: cfg = load_config_data("./l5kit/tests/artefacts/config.yaml") zarr_dataset = ChunkedStateDataset(path="./l5kit/tests/artefacts/single_scene.zarr") zarr_dataset.open() dm = LocalDataManager("./l5kit/tests/artefacts/") rasterizer = build_rasterizer(cfg, dm) dataset = EgoDataset(cfg, zarr_dataset, rasterizer, None) # no perturb data_no_perturb = dataset[0] # note we cannot change the object we already have as a partial is built at init time perturb = AckermanPerturbation(ReplayRandomGenerator(np.asarray([[4.0, 0.33]])), perturb_prob=perturb_prob) dataset = EgoDataset(cfg, zarr_dataset, rasterizer, perturb) # perturb data_perturb = dataset[0] assert np.linalg.norm(data_no_perturb["target_positions"] - data_perturb["target_positions"]) > 0 assert np.linalg.norm(data_no_perturb["target_yaws"] - data_perturb["target_yaws"]) > 0
def select_agents( input_folder: str, th_agent_prob: float, th_history_num_frames: int, th_future_num_frames: int, th_yaw_degree: float, th_extent_ratio: float, th_movement: float, th_distance_av: float, num_workers: int, ) -> None: """ Filter agents from zarr INPUT_FOLDER according to multiple thresholds and store a boolean array of the same shape. """ assert th_future_num_frames > 0 # ===== LOAD dm = LocalDataManager() input_folder = dm.require(input_folder) zarr_dataset = ChunkedStateDataset(path=input_folder) zarr_dataset.open() output_group = f"{th_history_num_frames}_{th_future_num_frames}_{th_agent_prob}" if "agents_mask" in zarr_dataset.root and f"agents_mask/{output_group}" in zarr_dataset.root: raise FileExistsError( f"{output_group} exists already! only one is supported for now!") frame_index_intervals = zarr_dataset.scenes["frame_index_interval"] # build a partial with all args except the first one (will be passed by threads) get_valid_agents_partial = partial( get_valid_agents, dataset=zarr_dataset, th_frames_past=th_history_num_frames, th_frames_future=th_future_num_frames, th_agent_filter_probability_threshold=th_agent_prob, th_yaw_degree=th_yaw_degree, th_extent_ratio=th_extent_ratio, th_movement=th_movement, th_distance_av=th_distance_av, ) try: root = zarr.open(zarr_dataset.path, mode="a") root.create_group("agents_mask") except ValueError: pass # group is already there agents_mask = zarr.open_array( str(Path(zarr_dataset.path) / "agents_mask" / output_group), mode="w", shape=(len(zarr_dataset.agents), ), chunks=(10000, ), dtype=np.bool, synchronizer=zarr.ProcessSynchronizer( f"/tmp/ag_mask_{str(uuid4())}.sync"), ) report: Counter = Counter() print("starting pool...") with Pool(num_workers) as pool: tasks = tqdm( enumerate( pool.imap_unordered(get_valid_agents_partial, frame_index_intervals))) for idx, (mask, count, agents_range) in tasks: report += count agents_mask[agents_range[0]:agents_range[1]] = mask tasks.set_description(f"{idx + 1}/{len(frame_index_intervals)}") print("collecting results..") assert (report["total_agent_frames"] == report["selected_agent_frames"] + report["total_reject"]), "something went REALLY wrong" agents_cfg = { "th_history_num_frames": th_history_num_frames, "th_future_num_frames": th_future_num_frames, "th_agent_filter_probability_threshold": th_agent_prob, "th_yaw_degree": th_yaw_degree, "th_extent_ratio": th_extent_ratio, "th_movement": th_movement, "th_distance_av": th_distance_av, } # print report pp = pprint.PrettyPrinter(indent=4) print(f"start report for {input_folder}") pp.pprint({**agents_cfg, **report}) print(f"end report for {input_folder}") print("==============================")
def zarr_dataset() -> ChunkedStateDataset: zarr_dataset = ChunkedStateDataset( path="./l5kit/tests/artefacts/single_scene.zarr") zarr_dataset.open() return zarr_dataset
def build_dataloader( cfg: Dict, split: str, data_manager: DataManager, dataset_class: Callable, rasterizer: Rasterizer, perturbation: Optional[Perturbation] = None, ) -> DataLoader: """ Util function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and perturbation as the factory functions for those are likely to change between repos. Args: cfg (dict): configuration dict split (str): this will be used to index the cfg to get the correct datasets (train or val currently) data_manager (DataManager): manager for resolving paths dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset rasterizer (Rasterizer): the rasterizer for the dataset perturbation (Optional[Perturbation]): an optional perturbation object Returns: DataLoader: pytorch Dataloader object built with Concat and Sub datasets """ data_loader_cfg = cfg[f"{split}_data_loader"] datasets = [] for dataset_param in data_loader_cfg["datasets"]: zarr_dataset_path = data_manager.require(key=dataset_param["key"]) zarr_dataset = ChunkedStateDataset(path=zarr_dataset_path) zarr_dataset.open() zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) # Let's load the zarr dataset with our dataset. dataset = dataset_class(cfg, zarr_dataset, rasterizer, perturbation=perturbation) scene_indices = dataset_param["scene_indices"] scene_subsets = [] if dataset_param["scene_indices"][0] == -1: # TODO replace with empty scene_subset = Subset(dataset, np.arange(0, len(dataset))) scene_subsets.append(scene_subset) else: for scene_idx in scene_indices: valid_indices = dataset.get_scene_indices(scene_idx) scene_subset = Subset(dataset, valid_indices) scene_subsets.append(scene_subset) datasets.extend(scene_subsets) # Let's concatenate the training scenes into one dataset for the data loader to load from. concat_dataset: ConcatDataset = ConcatDataset(datasets) # Initialize the data loader that our training loop will iterate on. batch_size = data_loader_cfg["batch_size"] shuffle = data_loader_cfg["shuffle"] num_workers = data_loader_cfg["num_workers"] dataloader = DataLoader(dataset=concat_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return dataloader
class TestDeepPredictionSampling(unittest.TestCase): def __init__(self, *args, **kwargs): # type: ignore super(TestDeepPredictionSampling, self).__init__(*args, **kwargs) self.dataset = ChunkedStateDataset(path="./l5kit/tests/data/single_scene.zarr") self.dataset.open() self.raster_size = (100, 100) self.pixel_size = np.array([1.0, 1.0]) self.ego_center = np.array([0.5, 0.25]) self.filter_agents_threshold = 0.5 self.rast = StubRasterizer(self.raster_size, self.pixel_size, self.ego_center, self.filter_agents_threshold) def get_partial( self, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int ) -> Callable: return functools.partial( generate_agent_sample, raster_size=self.raster_size, pixel_size=self.pixel_size, ego_center=self.ego_center, history_num_frames=history_num_frames, history_step_size=history_step_size, future_num_frames=future_num_frames, future_step_size=future_step_size, filter_agents_threshold=self.filter_agents_threshold, rasterizer=self.rast, ) def test_no_frames(self) -> None: gen_partial = self.get_partial(2, 1, 4, 1) with self.assertRaises(IndexError): gen_partial( state_index=0, frames=np.zeros(0, FRAME_DTYPE), all_agents=np.zeros(0, AGENT_DTYPE), selected_track_id=None, ) def test_out_bounds(self) -> None: gen_partial = self.get_partial(0, 1, 10, 1) data = gen_partial( state_index=0, frames=np.asarray(self.dataset.frames[90:96]), all_agents=self.dataset.agents, selected_track_id=None, ) assert bool(np.all(data["target_availabilities"][:5])) is True assert bool(np.all(data["target_availabilities"][5:])) is False def test_future(self) -> None: steps = [(1, 1), (2, 2), (4, 4)] # all of these should work for step, step_size in steps: gen_partial = self.get_partial(2, 1, step, step_size) data = gen_partial( state_index=10, frames=np.asarray(self.dataset.frames[90:150]), all_agents=self.dataset.agents, selected_track_id=None, ) assert data["target_positions"].shape == (step, 2) assert data["target_yaws"].shape == (step, 1) assert data["target_availabilities"].shape == (step, 3) assert data["centroid"].shape == (2,) assert isinstance(data["yaw"], float) assert data["extent"].shape == (3,) assert bool(np.all(data["target_availabilities"])) is True
table = PrettyTable(field_names=["past/future"] + [str(step) for step in future_steps]) for step_p in tqdm(past_steps, desc="computing past/future table"): row = [step_p] for step_f in future_steps: past_mask = agents_mask_np[:, 0] >= step_p future_mask = agents_mask_np[:, 1] >= step_f row.append(np.sum(past_mask * future_mask)) table.add_row(row) print(table) print(f"end report for {zarr_dataset.path}") print("==============================") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_folders", nargs="+", type=str, required=True, help="zarr path") parser.add_argument("--th_agent_prob", type=float, default=0.5, help="perception threshold on agents of interest") parser.add_argument("--th_yaw_degree", type=float, default=TH_YAW_DEGREE, help="max absolute distance in degree") parser.add_argument("--th_extent_ratio", type=float, default=TH_EXTENT_RATIO, help="max change in area allowed") parser.add_argument("--th_distance_av", type=float, default=TH_DISTANCE_AV, help="max distance from AV in meters") args = parser.parse_args() for input_folder in args.input_folders: zarr_dataset = ChunkedStateDataset(path=input_folder) zarr_dataset.open() select_agents( zarr_dataset, args.th_agent_prob, args.th_yaw_degree, args.th_extent_ratio, args.th_distance_av, )