Пример #1
0
def test_compute_mse_error(tmp_path: Path,
                           zarr_dataset: ChunkedDataset) -> None:
    export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt1.csv"),
                                    10, 50, 0.5)
    export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt2.csv"),
                                    10, 50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt2.csv"))
    assert np.all(err == 0.0)

    data_fake = ChunkedDataset(str(tmp_path))
    data_fake.scenes = np.asarray(zarr_dataset.scenes).copy()
    data_fake.frames = np.asarray(zarr_dataset.frames).copy()
    data_fake.agents = np.asarray(zarr_dataset.agents).copy()
    data_fake.agents["centroid"] += np.random.rand(
        *data_fake.agents["centroid"].shape) * 1e-2

    export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 10,
                                    50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt3.csv"))
    assert np.any(err > 0.0)

    # test invalid conf by removing lines in gt1
    with open(str(tmp_path / "gt4.csv"), "w") as fp:
        lines = open(str(tmp_path / "gt1.csv")).readlines()
        fp.writelines(lines[:-10])

    with pytest.raises(ValueError):
        compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                              str(tmp_path / "gt4.csv"))
Пример #2
0
def test_graph_rasterizer_no_error():
    # contain traffic light
    index = 150

    cfg = load_config_data(config_file)
    cfg["raster_params"]["map_type"] = "semantic_graph"

    data_loader_conf = cfg.get(f"val_data_loader")
    dm = LocalDataManager()
    dataset_path = dm.require(data_loader_conf.get("key"))

    zarr_dataset = ChunkedDataset(dataset_path)
    zarr_dataset.open()

    rasterizer = build_rasterizer(cfg=cfg, data_manager=dm)
    dataset = AgentGraphDataset(cfg=cfg,
                                zarr_dataset=zarr_dataset,
                                rasterizer=rasterizer)
    data_point = dataset[index]

    assert "graph" in data_point
    assert "lanes" in data_point["graph"]
    assert isinstance(data_point["graph"]["lanes"], List)

    print()
    print(data_point.keys())
    element_types = SemGraphRasterizer.keys
    for e in element_types:
        print(f"---- {e} ----")
        if len(data_point["graph"][e]) > 0:
            print(data_point["graph"][e][0])
Пример #3
0
def uncompress_zar(fn_src, fn_dst):
    print(fn_src)
    print(fn_dst)
    print(zarr.storage.default_compressor)
    zarr.storage.default_compressor = None
    ds = ChunkedDataset(fn_src).open(cached=False)

    dst_dataset = ChunkedDataset(fn_dst)
    dst_dataset.initialize()
    #     'w',
    #     # num_scenes=len(ds.scenes),
    #     # num_frames=len(ds.frames),
    #     # num_agents=len(ds.agents),
    #     # num_tl_faces=len(ds.tl_faces)
    # )

    with utils.timeit_context("copy scenes"):
        dst_dataset.scenes.append(ds.scenes[:])
    with utils.timeit_context("copy frames"):
        dst_dataset.frames.append(ds.frames[:])
    with utils.timeit_context("copy agents"):
        for i in tqdm(range(0, len(ds.agents), 1024 * 1024)):
            dst_dataset.agents.append(ds.agents[i:i + 1024 * 1024])
    with utils.timeit_context("copy tl_faces"):
        dst_dataset.tl_faces.append(ds.tl_faces[:])
Пример #4
0
    def setup(self, stage=None):
        train_zarr = ChunkedDataset(self.dm.require(
            self.train_cfg["key"])).open()
        self.train_dataset = AgentDataset(self.cfg, train_zarr,
                                          self.rasterizer)

        val_zarr = ChunkedDataset(self.dm.require(self.val_cfg["key"])).open()
        self.val_dataset = AgentDataset(self.cfg, val_zarr, self.rasterizer)
Пример #5
0
def hist_data() -> tuple:
    zarr_dataset = ChunkedDataset(
        path="./l5kit/tests/artefacts/single_scene.zarr")
    zarr_dataset.open()
    hist_frames = zarr_dataset.frames[
        100:111][::-1]  # reverse to get them as history
    hist_agents = filter_agents_by_frames(hist_frames, zarr_dataset.agents)
    return hist_frames, hist_agents
Пример #6
0
def zarr_cat_dataset(dmg: LocalDataManager, tmp_path: Path) -> ChunkedDataset:
    concat_count = 4
    zarr_input_path = dmg.require("single_scene.zarr")
    zarr_output_path = str(tmp_path / f"{uuid4()}.zarr")

    zarr_concat([zarr_input_path] * concat_count, zarr_output_path)
    zarr_cat_dataset = ChunkedDataset(zarr_output_path)
    zarr_cat_dataset.open()
    return zarr_cat_dataset
Пример #7
0
def get_frames_subset(dataset: ChunkedDataset, frame_start_idx: int,
                      frame_end_idx: int) -> ChunkedDataset:
    """Get a new dataset with frames between start (included) and end (excluded).
    Assumptions:
    - the dataset has only 1 scene
    - the dataset is in numpy format and not zarr anymore

    :param dataset: the single-scene dataset.
    :param frame_start_idx: first frame to keep.
    :param frame_end_idx: where to stop taking frames (excluded).

    """
    if not len(dataset.scenes) == 1:
        raise ValueError(
            f"dataset should have a single scene, got {len(dataset.scenes)}")
    if not isinstance(dataset.agents, np.ndarray):
        raise ValueError("dataset agents should be an editable np array")
    if not isinstance(dataset.tl_faces, np.ndarray):
        raise ValueError("dataset tls should be an editable np array")
    if not isinstance(dataset.frames, np.ndarray):
        raise ValueError("dataset frames should be an editable np array")
    if frame_start_idx >= len(dataset.frames):
        raise ValueError(
            f"frame start {frame_start_idx} is over the length of the dataset")
    if frame_end_idx > len(dataset.frames):
        raise ValueError(
            f"frame end {frame_end_idx} is over the length of the dataset")
    if frame_start_idx >= frame_end_idx:
        raise ValueError(
            f"end frame {frame_end_idx} should be higher than start {frame_start_idx}"
        )
    if frame_start_idx < 0:
        raise ValueError(f"start frame {frame_start_idx} should be positive")

    new_dataset = ChunkedDataset("")
    new_dataset.scenes = dataset.scenes.copy()
    new_dataset.scenes[0]["start_time"] = dataset.frames[frame_start_idx][
        "timestamp"]
    new_dataset.scenes[0]["end_time"] = dataset.frames[frame_end_idx -
                                                       1]["timestamp"]

    new_dataset.frames = dataset.frames[frame_start_idx:frame_end_idx].copy()
    new_dataset.scenes[0]["frame_index_interval"] = (0,
                                                     len(new_dataset.frames))

    agent_slice = get_agents_slice_from_frames(
        *dataset.frames[[frame_start_idx, frame_end_idx - 1]])
    tls_slice = get_tl_faces_slice_from_frames(
        *dataset.frames[[frame_start_idx, frame_end_idx - 1]])
    new_dataset.frames["agent_index_interval"] -= new_dataset.frames[
        "agent_index_interval"][0, 0]
    new_dataset.frames[
        "traffic_light_faces_index_interval"] -= new_dataset.frames[
            "traffic_light_faces_index_interval"][0, 0]
    new_dataset.agents = dataset.agents[agent_slice].copy()
    new_dataset.tl_faces = dataset.tl_faces[tls_slice].copy()
    return new_dataset
def create_chopped_mask(zarr_path: str, th_agent_prob: float,
                        num_frames_to_copy: int, min_frame_future: int) -> str:
    """Create mask to emulate chopped dataset with gt data.

    Args:
        zarr_path (str): input zarr path to be chopped
        th_agent_prob (float): threshold over agents probabilities used in select_agents function
        num_frames_to_copy (int):  number of frames to copy from the beginning of each scene, others will be discarded
        min_frame_future (int): minimum number of frames that must be available in the future for an agent

    Returns:
        str: Path to saved mask
    """
    zarr_path = Path(zarr_path)
    mask_chopped_path = get_mask_chopped_path(zarr_path, th_agent_prob,
                                              num_frames_to_copy,
                                              min_frame_future)

    # Create standard mask for the dataset so we can use it to filter out unreliable agents
    zarr_dt = ChunkedDataset(str(zarr_path))
    zarr_dt.open()

    agents_mask_path = Path(zarr_path) / f"agents_mask/{th_agent_prob}"
    if not agents_mask_path.exists(
    ):  # don't check in root but check for the path
        select_agents(
            zarr_dt,
            th_agent_prob=th_agent_prob,
            th_yaw_degree=TH_YAW_DEGREE,
            th_extent_ratio=TH_EXTENT_RATIO,
            th_distance_av=TH_DISTANCE_AV,
        )
    agents_mask_origin = np.asarray(convenience.load(str(agents_mask_path)))

    # compute the chopped boolean mask, but also the original one limited to frames of interest for GT csv
    agents_mask_orig_bool = np.zeros(len(zarr_dt.agents), dtype=np.bool)

    for idx in range(len(zarr_dt.scenes)):
        scene = zarr_dt.scenes[idx]

        frame_original = zarr_dt.frames[scene["frame_index_interval"][0] +
                                        num_frames_to_copy - 1]
        slice_agents_original = get_agents_slice_from_frames(frame_original)

        mask = agents_mask_origin[slice_agents_original][:,
                                                         1] >= min_frame_future
        agents_mask_orig_bool[slice_agents_original] = mask.copy()

    # store the mask and the GT csv of frames on interest
    np.savez(str(mask_chopped_path), agents_mask_orig_bool)
    return str(mask_chopped_path)
Пример #9
0
def get_loaders(train_batch_size=32, valid_batch_size=64):
    """Prepare loaders.

    Args:
        train_batch_size (int, optional): batch size for training dataset.
            Default is `32`.
        valid_batch_size (int, optional): batch size for validation dataset.
            Default is `64`.

    Returns:
        train and validation data loaders
    """
    rasterizer = build_rasterizer(cfg, dm)
    DATASET_CLASS = AgentDataset

    train_zarr = ChunkedDataset(
        dm.require("scenes/train_chopped_100/train.zarr")).open()
    train_dataset = DATASET_CLASS(cfg, train_zarr, rasterizer)

    train_loader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        num_workers=NUM_WORKERS,
        shuffle=True,
        worker_init_fn=seed_all,
        drop_last=True,
    )
    # train_loader = BatchPrefetchLoaderWrapper(train_loader, num_prefetches=6)
    print(f" * Number of elements in train dataset - {len(train_dataset)}")
    print(f" * Number of elements in train loader - {len(train_loader)}")

    valid_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr")
    mask_path = dm.require("scenes/validate_chopped_100/mask.npz")
    valid_mask = np.load(mask_path)["arr_0"]
    valid_gt_path = dm.require("scenes/validate_chopped_100/gt.csv")

    valid_zarr = ChunkedDataset(valid_zarr_path).open()
    valid_dataset = DATASET_CLASS(cfg,
                                  valid_zarr,
                                  rasterizer,
                                  agents_mask=valid_mask)
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=valid_batch_size,
        shuffle=False,
        num_workers=NUM_WORKERS,
    )
    print(f" * Number of elements in valid dataset - {len(valid_dataset)}")
    print(f" * Number of elements in valid loader - {len(valid_loader)}")

    return train_loader, (valid_loader, valid_gt_path)
def test_zarr_scenes_chunk(dmg: LocalDataManager, tmp_path: Path,
                           zarr_dataset: ChunkedDataset,
                           num_frames_to_copy: int) -> None:
    # first let's concat so we have multiple scenes
    concat_count = 10
    zarr_input_path = dmg.require("single_scene.zarr")
    zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr")
    zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path)

    # now let's chunk it
    zarr_chopped_path = str(tmp_path / f"{uuid4()}.zarr")
    zarr_scenes_chop(zarr_concatenated_path,
                     zarr_chopped_path,
                     num_frames_to_copy=num_frames_to_copy)

    # open both and compare
    zarr_concatenated = ChunkedDataset(zarr_concatenated_path)
    zarr_concatenated.open()
    zarr_chopped = ChunkedDataset(zarr_chopped_path)
    zarr_chopped.open()

    assert len(zarr_concatenated.scenes) == len(zarr_chopped.scenes)
    assert len(
        zarr_chopped.frames) == num_frames_to_copy * len(zarr_chopped.scenes)

    for idx in range(len(zarr_concatenated.scenes)):
        scene_cat = zarr_concatenated.scenes[idx]
        scene_chopped = zarr_chopped.scenes[idx]

        frames_cat = zarr_concatenated.frames[
            scene_cat["frame_index_interval"][0]:
            scene_cat["frame_index_interval"][0] + num_frames_to_copy]

        frames_chopped = zarr_chopped.frames[get_frames_slice_from_scenes(
            scene_chopped)]

        agents_cat = zarr_concatenated.agents[get_agents_slice_from_frames(
            *frames_cat[[0, -1]])]
        tl_faces_cat = zarr_concatenated.tl_faces[
            get_tl_faces_slice_from_frames(*frames_cat[[0, -1]])]

        agents_chopped = zarr_chopped.agents[get_agents_slice_from_frames(
            *frames_chopped[[0, -1]])]
        tl_faces_chopped = zarr_chopped.tl_faces[
            get_tl_faces_slice_from_frames(*frames_chopped[[0, -1]])]

        assert scene_chopped["host"] == scene_cat["host"]
        assert scene_chopped["start_time"] == scene_cat["start_time"]
        assert scene_chopped["end_time"] == scene_cat["end_time"]

        assert len(frames_chopped) == num_frames_to_copy

        assert np.all(frames_chopped["ego_translation"] ==
                      frames_cat["ego_translation"][:num_frames_to_copy])
        assert np.all(frames_chopped["ego_rotation"] ==
                      frames_cat["ego_rotation"][:num_frames_to_copy])

        assert np.all(agents_chopped == agents_cat)
        assert np.all(tl_faces_chopped == tl_faces_cat)
Пример #11
0
    def setup(self, stage=None):
        if self.data_manager is None:
            self.data_manager = LocalDataManager(self.data_root)
        if self.rasterizer is None:
            self.rasterizer = build_rasterizer(self.config, self.data_manager)
        if stage == 'fit' or stage is None:
            train_zarr = ChunkedDataset(
                self.data_manager.require(self.train_split)).open(
                    cache_size_bytes=int(self.cache_size))
            train_data = AgentDataset(self.config, train_zarr, self.rasterizer)

            if self.train_idxs is not None:
                train_data = Subset(train_data, self.train_idxs)
            if self.val_split is None or self.val_split == self.train_split:
                tl = len(train_data)
                vl = int(tl * self.val_proportion)
                self.train_data, self.val_data = random_split(
                    train_data, [tl - vl, vl])
            else:
                self.train_data = train_data
                val_zarr = ChunkedDataset(
                    self.data_manager.require(self.val_split)).open(
                        cache_size_bytes=int(self.cache_size))
                self.val_data = AgentDataset(self.config, val_zarr,
                                             self.rasterizer)
                if self.val_idxs is not None:
                    self.val_data = Subset(self.val_data, self.val_idxs)
            if self.raster_cache_size:
                self.train_data = CachedDataset(self.train_data,
                                                self.raster_cache_size)
                self.val_data = CachedDataset(self.val_data,
                                              self.raster_cache_size)
        if stage == 'test' or stage is None:
            test_zarr = ChunkedDataset(
                self.data_manager.require(self.test_split)).open(
                    cache_size_bytes=int(self.cache_size))
            if self.test_mask is not None:
                test_data = AgentDataset(self.config,
                                         test_zarr,
                                         self.rasterizer,
                                         agents_mask=self.test_mask)
            else:
                test_data = AgentDataset(self.config, test_zarr,
                                         self.rasterizer)
            if self.test_idxs is not None:
                test_data = Subset(test_data, self.test_idxs)
            else:
                self.test_idxs = np.arange(start=1, stop=len(test_data) + 1)
            self.test_data = IndexedDataset(test_data, self.test_idxs)
Пример #12
0
    def __init__(self):
        print("Visualization Class initialized.")
        # get config
        self.cfg = load_config_data("/mnt/extra/kaggle/competitions/2020lyft/ProjectLyft/Modules/visualisation_config.yaml")
        print(self.cfg)

        dm = LocalDataManager()
        self.dataset_path = dm.require(self.cfg["val_data_loader"]["key"])
        self.zarr_dataset = ChunkedDataset(self.dataset_path)
        self.zarr_dataset.open()


        # Dataset package
        self.rast = build_rasterizer(self.cfg, dm)
        self.dataset = EgoDataset(self.cfg, self.zarr_dataset, self.rast)
def test_zarr_split(dmg: LocalDataManager, tmp_path: Path,
                    zarr_dataset: ChunkedDataset) -> None:
    concat_count = 10
    zarr_input_path = dmg.require("single_scene.zarr")
    zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr")
    zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path)

    split_infos = [
        {
            "name": f"{uuid4()}.zarr",
            "split_size_GB": 0.002
        },  # cut around 2MB
        {
            "name": f"{uuid4()}.zarr",
            "split_size_GB": 0.001
        },  # cut around 0.5MB
        {
            "name": f"{uuid4()}.zarr",
            "split_size_GB": -1
        },
    ]  # everything else

    scene_splits = zarr_split(zarr_concatenated_path, str(tmp_path),
                              split_infos)

    # load the zarrs and check elements
    zarr_concatenated = ChunkedDataset(zarr_concatenated_path)
    zarr_concatenated.open()

    for scene_split, split_info in zip(scene_splits, split_infos):
        zarr_out = ChunkedDataset(str(tmp_path / str(split_info["name"])))
        zarr_out.open()

        # compare elements at the start and end of each scene in both zarrs
        for idx_scene in range(len(zarr_out.scenes)):
            # compare elements in the scene
            input_scene = zarr_concatenated.scenes[scene_split[0] + idx_scene]
            input_frames = zarr_concatenated.frames[
                get_frames_slice_from_scenes(input_scene)]
            input_agents = zarr_concatenated.agents[
                get_agents_slice_from_frames(*input_frames[[0, -1]])]
            input_tl_faces = zarr_concatenated.tl_faces[
                get_tl_faces_slice_from_frames(*input_frames[[0, -1]])]

            output_scene = zarr_out.scenes[idx_scene]
            output_frames = zarr_out.frames[get_frames_slice_from_scenes(
                output_scene)]
            output_agents = zarr_out.agents[get_agents_slice_from_frames(
                *output_frames[[0, -1]])]
            output_tl_faces = zarr_out.tl_faces[get_tl_faces_slice_from_frames(
                *output_frames[[0, -1]])]

            assert np.all(input_frames["ego_translation"] ==
                          output_frames["ego_translation"])
            assert np.all(
                input_frames["ego_rotation"] == output_frames["ego_rotation"])
            assert np.all(input_agents == output_agents)
            assert np.all(input_tl_faces == output_tl_faces)
Пример #14
0
def get_loaders(train_batch_size=32, valid_batch_size=64):
    """Prepare loaders.

    Args:
        train_batch_size (int, optional): batch size for training dataset.
            Default is `32`.
        valid_batch_size (int, optional): batch size for validation dataset.
            Default is `64`.

    Returns:
        train and validation data loaders
    """
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
    n_samples = len(train_dataset) // 5
    # n_samples = 100
    train_dataset = Subset(train_dataset, list(range(n_samples)))
    train_loader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        num_workers=NUM_WORKERS,
        shuffle=True,
        worker_init_fn=seed_all,
        drop_last=True,
    )
    print(f" * Number of elements in train dataset - {len(train_dataset)}")
    print(f" * Number of elements in train loader - {len(train_loader)}")

    eval_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr")
    eval_gt_path = "scenes/validate_chopped_100/gt.csv"
    eval_mask_path = "./data/scenes/validate_chopped_100/mask.npz"
    eval_mask = np.load(eval_mask_path)["arr_0"]

    valid_zarr = ChunkedDataset(eval_zarr_path).open()
    valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer)
    # valid_dataset = Subset(valid_dataset, list(range(200_000)))
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=valid_batch_size,
        shuffle=False,
        num_workers=NUM_WORKERS,
    )
    print(f" * Number of elements in valid dataset - {len(valid_dataset)}")
    print(f" * Number of elements in valid loader - {len(valid_loader)}")

    return train_loader, valid_loader
Пример #15
0
    def val_dataloader(self):
        # created chopped dataset

        rasterizer = build_rasterizer(cfg, dm)
        eval_cfg = cfg["valid_data_loader"]
        num_frames_to_chop = 100
        eval_base_path = create_chopped_dataset(
            dm.require(eval_cfg["key"]),
            cfg["raster_params"]["filter_agents_threshold"],
            num_frames_to_chop, cfg["model_params"]["future_num_frames"],
            MIN_FUTURE_STEPS)

        eval_zarr_path = str(
            Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
        eval_mask_path = str(Path(eval_base_path) / "mask.npz")
        eval_gt_path = str(Path(eval_base_path) / "gt.csv")
        self.eval_gt_path = eval_gt_path

        eval_zarr = ChunkedDataset(eval_zarr_path).open(cache_size_bytes=10e9)
        eval_mask = np.load(eval_mask_path)["arr_0"]

        eval_dataset = AgentDataset(cfg,
                                    eval_zarr,
                                    rasterizer,
                                    agents_mask=eval_mask)
        eval_dataloader = DataLoader(eval_dataset,
                                     shuffle=False,
                                     batch_size=eval_cfg["batch_size"],
                                     num_workers=8)

        return eval_dataloader
Пример #16
0
def disable_agents(dataset: ChunkedDataset, allowlist: np.ndarray) -> None:
    """Disable all agents in dataset except for the ones in allowlist
    Assumptions:
    - the dataset has only 1 scene
    - the dataset is in numpy format and not zarr anymore

    :param dataset: the single-scene dataset
    :param allowlist: 1D np array of track_ids to keep

    """
    if not len(dataset.scenes) == 1:
        raise ValueError(
            f"dataset should have a single scene, got {len(dataset.scenes)}")
    if not isinstance(dataset.agents, np.ndarray):
        raise ValueError("dataset agents should be an editable np array")
    if not len(allowlist.shape) == 1:
        raise ValueError("allow list should be 1D")

    agent_track_ids = dataset.agents["track_id"]

    mask_disable = ~np.in1d(agent_track_ids, allowlist)

    # this will set those agents as invisible
    # we also zeroes their pose and extent
    dataset.agents["centroid"][mask_disable] *= 0
    dataset.agents["yaw"][mask_disable] *= 0
    dataset.agents["extent"][mask_disable] *= 0
    dataset.agents["label_probabilities"][mask_disable] = -1
Пример #17
0
def test_simulation_dataset_build(zarr_cat_dataset: ChunkedDataset,
                                  dmg: LocalDataManager, cfg: dict,
                                  tmp_path: Path) -> None:
    # modify one frame to ensure everything works also when scenes are different
    zarr_cat_dataset.frames = np.asarray(zarr_cat_dataset.frames)
    for scene_idx in range(len(zarr_cat_dataset.scenes)):
        frame_slice = get_frames_slice_from_scenes(zarr_cat_dataset.scenes)
        zarr_cat_dataset.frames[
            frame_slice.start]["ego_translation"] += np.random.randn(3)

    rasterizer = build_rasterizer(cfg, dmg)
    ego_dataset = EgoDataset(cfg, zarr_cat_dataset, rasterizer)
    sim_cfg = SimulationConfig(use_ego_gt=True,
                               use_agents_gt=True,
                               disable_new_agents=False,
                               distance_th_far=30,
                               distance_th_close=10)
    # we should be able to create the same object by using both constructor and factory
    scene_indices = list(range(len(zarr_cat_dataset.scenes)))

    scene_dataset_batch: Dict[int, EgoDataset] = {}
    for scene_idx in scene_indices:
        scene_dataset = ego_dataset.get_scene_dataset(scene_idx)
        scene_dataset_batch[scene_idx] = scene_dataset
    sim_1 = SimulationDataset(scene_dataset_batch, sim_cfg)

    sim_2 = SimulationDataset.from_dataset_indices(ego_dataset, scene_indices,
                                                   sim_cfg)

    for (k_1, v_1), (k_2, v_2) in zip(sim_1.scene_dataset_batch.items(),
                                      sim_2.scene_dataset_batch.items()):
        assert k_1 == k_2
        assert np.allclose(v_1.dataset.frames["ego_translation"],
                           v_2.dataset.frames["ego_translation"])
Пример #18
0
def get_loaders(train_batch_size=32, valid_batch_size=64):
    """Prepare loaders.

    Args:
        train_batch_size (int, optional): batch size for training dataset.
            Default is `32`.
        valid_batch_size (int, optional): batch size for validation dataset.
            Default is `64`.

    Returns:
        train and validation data loaders
    """
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open()
    train_dataset = CubicAgentDataset(cfg, train_zarr, rasterizer)
    n_samples = len(train_dataset) // 5
    # n_samples = 100
    train_dataset = Subset(train_dataset, list(range(n_samples)))
    train_loader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        num_workers=NUM_WORKERS,
        shuffle=True,
        worker_init_fn=seed_all,
        drop_last=True,
    )
    print(f" * Number of elements in train dataset - {len(train_dataset)}")
    print(f" * Number of elements in train loader - {len(train_loader)}")
    return train_loader, None
Пример #19
0
    def evaluate(self, data_path, file_name="submission.csv"):

        # set env variable for data
        os.environ["L5KIT_DATA_FOLDER"] = data_path
        dm = LocalDataManager(None)

        cfg = self.cfg

        # ===== INIT DATASET
        test_cfg = cfg["test_data_loader"]

        # Rasterizer
        rasterizer = build_rasterizer(cfg, dm)

        # Test dataset/dataloader
        test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open()
        test_mask = np.load(f"{data_path}/scenes/mask.npz")["arr_0"]
        test_dataset = AgentDataset(cfg,
                                    test_zarr,
                                    rasterizer,
                                    agents_mask=test_mask)
        test_dataloader = DataLoader(test_dataset,
                                     shuffle=test_cfg["shuffle"],
                                     batch_size=test_cfg["batch_size"],
                                     num_workers=test_cfg["num_workers"])
        test_dataloader = test_dataloader
        print(test_dataloader)

        # ==== EVAL LOOP
        self.model.eval()
        torch.set_grad_enabled(False)
        criterion = nn.MSELoss(reduction="none")

        # store information for evaluation
        future_coords_offsets_pd = []
        timestamps = []
        pred_coords = []
        confidences_list = []

        agent_ids = []
        progress_bar = tqdm(test_dataloader)
        for data in progress_bar:
            _, pred, confidences = self.forward(data, criterion)

            # future_coords_offsets_pd.append(outputs.cpu().numpy().copy())
            timestamps.append(data["timestamp"].numpy().copy())
            agent_ids.append(data["track_id"].numpy().copy())
            #
            # pred, confidences = predictor(image)

            pred_coords.append(pred.cpu().numpy().copy())
            confidences_list.append(confidences.cpu().numpy().copy())

        # ==== Save Results
        pred_path = f"{os.getcwd()}/{file_name}"
        write_pred_csv(pred_path,
                       timestamps=np.concatenate(timestamps),
                       track_ids=np.concatenate(agent_ids),
                       coords=np.concatenate(pred_coords),
                       confs=np.concatenate(confidences_list))
Пример #20
0
def load_tune_data():
    dm = get_dm()

    eval_cfg = cfg["val_data_loader"]

    eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31'

    eval_zarr_path = str(Path(eval_base_path) /
                         Path(dm.require(eval_cfg["key"])).name)
    eval_mask_path = str(Path(eval_base_path) / "mask.npz")
    eval_gt_path = str(Path(eval_base_path) / "gt.csv")

    rasterizer = build_rasterizer(cfg, dm)
    eval_zarr = ChunkedDataset(eval_zarr_path).open()
    eval_mask = np.load(eval_mask_path)["arr_0"]
    # ===== INIT DATASET AND LOAD MASK
    eval_dataset = AgentDataset(
        cfg, eval_zarr, rasterizer, agents_mask=eval_mask)

    gt_dict = OrderedDict()
    for el in read_gt_csv(eval_gt_path):
        gt_dict[el["track_id"] + el["timestamp"]] = el
    
    eval_dataloader = DataLoader(eval_dataset,
                                 shuffle=eval_cfg["shuffle"],
                                 batch_size=eval_cfg["batch_size"],
                                 num_workers=eval_cfg["num_workers"])

    return eval_dataloader, gt_dict
Пример #21
0
    def __init__(self,
                 data_root: str,
                 config_path: str,
                 split: str,
                 show_progress=True,
                 turn_thresh=3.,
                 speed_thresh=0.5,
                 static_thresh=1.,
                 output_folder='preprocess',
                 autosave=True,
                 cache_size=1e9):
        self.autosave = autosave
        self.show_progress = show_progress
        self.turn_thresh = turn_thresh
        self.speed_thresh = speed_thresh
        self.static_thresh = static_thresh
        self.split = split
        self.config = load_config_data(config_path)
        self.output_folder = output_folder

        self.data_manager = LocalDataManager(data_root)
        self.rasterizer = build_rasterizer(self.config, self.data_manager)
        self.data_zarr = ChunkedDataset(self.data_manager.require(split)).open(
            cache_size_bytes=int(cache_size))
        self.dataset = AgentDataset(self.config, self.data_zarr,
                                    self.rasterizer)

        self.data = defaultdict(list)
        self.junk = defaultdict(list)

        self.progress = None
    def load_zarr_dataset(
        self,
        loader_name: str = "train_data_loder"
    ) -> Tuple[str, ChunkedDataset, AgentDataset]:

        zarr_path = self.dm.require(self.cfg[loader_name]["key"])
        print("load zarr data:", zarr_path)
        zarr_dataset = ChunkedDataset(zarr_path).open()
        if loader_name == "test_data_loader":
            mask_path = os.path.join(os.path.dirname(zarr_path), "mask.npz")
            agents_mask = np.load(mask_path)["arr_0"]
            agent_dataset = AgentDataset(self.cfg,
                                         zarr_dataset,
                                         self.rasterizer,
                                         agents_mask=agents_mask)
        else:
            agent_dataset = AgentDataset(
                self.cfg,
                zarr_dataset,
                self.rasterizer,
                min_frame_history=MIN_FRAME_HISTORY,
                min_frame_future=MIN_FRAME_FUTURE,
            )
        print(zarr_dataset)
        return zarr_path, zarr_dataset, agent_dataset
Пример #23
0
    def setup(self):
        self.dm = LocalDataManager(None)
        self.rasterizer = self.fn_rasterizer(self.cfg, self.dm)
        self.data_zarr = ChunkedDataset(
            self.dm.require(self.cfg[self.loader_key]["key"])).open()

        self.ds = AgentDataset(self.cfg, self.data_zarr, self.rasterizer)
Пример #24
0
    def train_dataloader(self):
        train_cfg = cfg["train_data_loader"]

        try:
            dataset_meta = _load_metadata(train_cfg["dataset_meta_key"], dm)
            world_to_ecef = np.array(dataset_meta["world_to_ecef"],
                                     dtype=np.float64)
        except (KeyError, FileNotFoundError):
            world_to_ecef = get_hardcoded_world_to_ecef()

        semantic_map_filepath = dm.require(train_cfg["semantic_map_key"])

        rasterizer = OpenGLSemanticRasterizer(
            raster_size=train_cfg["raster_size"],
            pixel_size=train_cfg["pixel_size"],
            ego_center=train_cfg["ego_center"],
            filter_agents_threshold=0.5,
            history_num_frames=train_cfg['history_num_frames'],
            semantic_map_path=semantic_map_filepath,
            world_to_ecef=world_to_ecef,
        )

        train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
        train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
        train_dataloader = DataLoader(
            train_dataset,
            sampler=RandomSampler(
                train_dataset,
                num_samples=cfg["train_params"]["max_num_steps"],
                replacement=True,
            ),
            batch_size=train_cfg["batch_size"],
            num_workers=train_cfg["num_workers"])
        return train_dataloader
Пример #25
0
def load_val_data():
    dm = get_dm()

    eval_cfg = cfg["val_data_loader"]

    # MIN_FUTURE_STEPS = 10
    # num_frames_to_chop = cfg['model_params']['history_num_frames']+1

    # eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]),
    #                                         cfg["raster_params"]["filter_agents_threshold"],
    #                                         num_frames_to_chop,
    #                                         cfg["model_params"]["future_num_frames"],
    #                                         MIN_FUTURE_STEPS)

    eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31'

    eval_zarr_path = str(
        Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
    eval_mask_path = str(Path(eval_base_path) / "mask.npz")
    eval_gt_path = str(Path(eval_base_path) / "gt.csv")

    rasterizer = build_rasterizer(cfg, dm)
    eval_zarr = ChunkedDataset(eval_zarr_path).open()
    eval_mask = np.load(eval_mask_path)["arr_0"]
    # ===== INIT DATASET AND LOAD MASK
    eval_dataset = AgentDataset(cfg,
                                eval_zarr,
                                rasterizer,
                                agents_mask=eval_mask)
    eval_dataloader = DataLoader(eval_dataset,
                                 shuffle=eval_cfg["shuffle"],
                                 batch_size=eval_cfg["batch_size"],
                                 num_workers=eval_cfg["num_workers"])

    return eval_dataloader
Пример #26
0
def test_get_valid_agents_multi_annot_hole(dataset: ChunkedDataset) -> None:
    frames_range = np.asarray([0, len(dataset.frames)])
    # put an annotation hole at 10 and 25
    dataset.agents[10]["track_id"] = 2
    dataset.agents[25]["track_id"] = 2

    agents_mask, *_ = get_valid_agents_p(frames_range, dataset)
    agents_mask = agents_mask.astype(np.int)

    assert np.all(np.diff(agents_mask[:10, 0]) == 1)
    assert np.all(np.diff(agents_mask[:10, 1]) == -1)
    assert agents_mask[10, 0] == agents_mask[10, 1] == 0

    assert np.all(np.diff(agents_mask[11:25, 0]) == 1)
    assert np.all(np.diff(agents_mask[11:25, 1]) == -1)
    assert agents_mask[25, 0] == agents_mask[25, 1] == 0
def test_get_valid_agents_yaw_change(dataset: ChunkedDataset) -> None:
    frames_range = np.asarray([0, len(dataset.frames)])
    # change centroid
    dataset.agents[10]["yaw"] = np.radians(50)
    dataset.agents[20]["yaw"] = np.radians(29)  # under yaw threshold

    agents_mask, *_ = get_valid_agents_p(frames_range, dataset)
    agents_mask = agents_mask.astype(np.int)

    assert np.all(np.diff(agents_mask[:10, 0]) == 1)
    assert np.all(np.diff(agents_mask[:10, 1]) == -1)

    assert agents_mask[10, 0] == agents_mask[10, 1] == 0

    assert np.all(np.diff(agents_mask[11:, 0]) == 1)
    assert np.all(np.diff(agents_mask[11:, 1]) == -1)
Пример #28
0
def get_loaders(train_batch_size=32, valid_batch_size=64):
    """Prepare loaders.

    Args:
        train_batch_size (int, optional): batch size for training dataset.
            Default is `32`.
        valid_batch_size (int, optional): batch size for validation dataset.
            Default is `64`.

    Returns:
        train and validation data loaders
    """
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
    train_sampler = RandomSampler(train_dataset,
                                  replacement=True,
                                  num_samples=100_000)
    train_loader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        num_workers=NUM_WORKERS,
        shuffle=False,
        sampler=train_sampler,
        worker_init_fn=seed_all,
    )
    print(f" * Number of elements in train dataset - {len(train_dataset)}")
    print(f" * Number of elements in train loader - {len(train_loader)}")

    valid_zarr = ChunkedDataset(dm.require("scenes/validate.zarr")).open()
    valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer)
    valid_sampler = RandomSampler(valid_dataset,
                                  replacement=True,
                                  num_samples=10_000)
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=valid_batch_size,
        shuffle=False,
        sampler=valid_sampler,
        num_workers=NUM_WORKERS,
    )
    print(f" * Number of elements in valid dataset - {len(valid_dataset)}")
    print(f" * Number of elements in valid loader - {len(valid_loader)}")

    return train_loader, valid_loader
Пример #29
0
 def prepare_train_data(self):
     train_cfg = cfg["train_data_loader"]
     rasterizer = build_rasterizer(cfg, dm)
     train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
     train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
     train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], 
                          num_workers=train_cfg["num_workers"])
     return train_dataloader
Пример #30
0
def test_perturbation_is_applied(perturb_prob: float) -> None:
    cfg = load_config_data("./l5kit/tests/artefacts/config.yaml")

    zarr_dataset = ChunkedDataset(path="./l5kit/tests/artefacts/single_scene.zarr")
    zarr_dataset.open()

    dm = LocalDataManager("./l5kit/tests/artefacts/")
    rasterizer = build_rasterizer(cfg, dm)

    dataset = EgoDataset(cfg, zarr_dataset, rasterizer, None)  # no perturb
    data_no_perturb = dataset[0]

    # note we cannot change the object we already have as a partial is built at init time
    perturb = AckermanPerturbation(ReplayRandomGenerator(np.asarray([[4.0, 0.33]])), perturb_prob=perturb_prob)
    dataset = EgoDataset(cfg, zarr_dataset, rasterizer, perturb)  # perturb
    data_perturb = dataset[0]

    assert np.linalg.norm(data_no_perturb["target_positions"] - data_perturb["target_positions"]) > 0
    assert np.linalg.norm(data_no_perturb["target_yaws"] - data_perturb["target_yaws"]) > 0