def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] images_dp, scenes_dp = Demultiplexer( archive_dp, 2, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) images_dp = Filter(images_dp, path_comparator("parent.name", self._split)) images_dp = hint_shuffling(images_dp) images_dp = hint_sharding(images_dp) if self._split != "test": scenes_dp = Filter(scenes_dp, path_comparator("name", f"CLEVR_{self._split}_scenes.json")) scenes_dp = JsonParser(scenes_dp) scenes_dp = Mapper(scenes_dp, getitem(1, "scenes")) scenes_dp = UnBatcher(scenes_dp) dp = IterKeyZipper( images_dp, scenes_dp, key_fn=path_accessor("name"), ref_key_fn=getitem("image_filename"), buffer_size=INFINITE_BUFFER_SIZE, ) else: dp = Mapper(images_dp, self._add_empty_anns) return Mapper(dp, self._prepare_sample)
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] splits_dp, joint_categories_dp, images_dp = Demultiplexer( archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE ) splits_dp = Filter(splits_dp, path_comparator("name", f"{config.split}{config.fold}.txt")) splits_dp = LineReader(splits_dp, decode=True, return_path=False) splits_dp = Shuffler(splits_dp, buffer_size=INFINITE_BUFFER_SIZE) splits_dp = hint_sharding(splits_dp) joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ") dp = IterKeyZipper( splits_dp, joint_categories_dp, key_fn=getitem(), ref_key_fn=getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, images_dp, key_fn=getitem(0), ref_key_fn=self._image_key_fn, buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, ) -> IterDataPipe[Dict[str, Any]]: archive_dp, extra_split_dp = resource_dps archive_dp = resource_dps[0] split_dp, images_dp, anns_dp = Demultiplexer( archive_dp, 3, self._classify_archive, buffer_size=INFINITE_BUFFER_SIZE, drop_none=True, ) if config.split == "train_noval": split_dp = extra_split_dp split_dp = Filter(split_dp, path_comparator("name", f"{config.split}.txt")) split_dp = LineReader(split_dp, decode=True) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = split_dp for level, data_dp in enumerate((images_dp, anns_dp)): dp = IterKeyZipper( dp, data_dp, key_fn=getitem(*[0] * level, 1), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: if config.split == "train": images_dp, ann_dp = Demultiplexer(resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) else: images_dp, ann_dp = resource_dps images_dp = Filter(images_dp, path_comparator("suffix", ".ppm")) # The order of the image files in the the .zip archives perfectly match the order of the entries in # the (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper. ann_dp = CSVDictParser(ann_dp, delimiter=";") dp = Zipper(images_dp, ann_dp) dp = hint_sharding(dp) dp = hint_shuffling(dp) dp = Mapper(dp, partial(self._collate_and_decode, decoder=decoder)) return dp
def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] split_dp, images_dp, anns_dp = Demultiplexer( archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_dp = Filter(split_dp, functools.partial(self._is_in_folder, name=self._split_folder)) split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt")) split_dp = LineReader(split_dp, decode=True) split_dp = hint_shuffling(split_dp) split_dp = hint_sharding(split_dp) dp = split_dp for level, data_dp in enumerate((images_dp, anns_dp)): dp = IterKeyZipper( dp, data_dp, key_fn=getitem(*[0] * level, 1), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _generate_categories(self) -> List[str]: resources = self._resources() dp = resources[0].load(self._root) dp = Filter(dp, path_comparator("name", self._META_FILE_NAME)) dp = Mapper(dp, self._unpickle) return cast(List[str], next(iter(dp))[self._CATEGORIES_KEY])
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig ) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = Filter(dp, path_comparator("parent.parent.name", self._SPLIT_NAME_MAPPER[config.split])) dp = hint_sharding(dp) dp = hint_shuffling(dp) return Mapper(dp, self._prepare_sample)
def _generate_categories(self) -> List[str]: resources = self._resources() devkit_dp = resources[1].load(self._root) meta_dp = Filter(devkit_dp, path_comparator("name", "cars_meta.mat")) _, meta_file = next(iter(meta_dp)) return list(read_mat(meta_file, squeeze_me=True)["class_names"])
def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.info.make_config(split="train") resources = self.resources(config) devkit_dp = resources[1].load(root) meta_dp = Filter(devkit_dp, path_comparator("name", "cars_meta.mat")) _, meta_file = next(iter(meta_dp)) return list(read_mat(meta_file, squeeze_me=True)["class_names"])
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = Filter( dp, path_comparator("parent.parent.name", self._split_folder_name)) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: images_dp, targets_dp = resource_dps if self._split == "train": targets_dp = Filter(targets_dp, path_comparator("name", "cars_train_annos.mat")) targets_dp = StanfordCarsLabelReader(targets_dp) dp = Zipper(images_dp, targets_dp) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _generate_categories(self) -> List[str]: self._year = "2011" resources = self._resources() dp = resources[0].load(self._root) dp = Filter(dp, path_comparator("name", "classes.txt")) dp = CSVDictParser(dp, fieldnames=("label", "category"), dialect="cub200") return [row["category"].split(".")[1] for row in dp]
def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.info.make_config(year="2011") resources = self.resources(config) dp = resources[0].load(root) dp = Filter(dp, path_comparator("name", "classes.txt")) dp = CSVDictParser(dp, fieldnames=("label", "category"), dialect="cub200") return [row["category"].split(".")[1] for row in dp]
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps images_dp = Filter(images_dp, self._filter_images) split_and_classification_dp, segmentations_dp = Demultiplexer( anns_dp, 2, self._classify_anns, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_and_classification_dp = Filter( split_and_classification_dp, path_comparator("name", f"{config.split}.txt")) split_and_classification_dp = CSVDictParser( split_and_classification_dp, fieldnames=("image_id", "label", "species"), delimiter=" ") split_and_classification_dp = hint_sharding( split_and_classification_dp) split_and_classification_dp = hint_shuffling( split_and_classification_dp) segmentations_dp = Filter(segmentations_dp, self._filter_segmentations) anns_dp = IterKeyZipper( split_and_classification_dp, segmentations_dp, key_fn=getitem("image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( anns_dp, images_dp, key_fn=getitem(0, "image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _generate_categories(self) -> List[str]: resources = self._resources() dp = resources[1].load(self._root) dp = Filter(dp, self._filter_split_and_classification_anns) dp = Filter(dp, path_comparator("name", "trainval.txt")) dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ") raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp} raw_categories, _ = zip( *sorted(raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1])) ) return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories]
def _generate_categories(self, root: pathlib.Path) -> List[Tuple[str, ...]]: config = self.info.make_config(split="val") resources = self.resources(config) devkit_dp = resources[1].load(root) meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp))) categories_and_wnids.sort( key=lambda category_and_wnid: category_and_wnid[1]) return categories_and_wnids
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] images_dp, scenes_dp = Demultiplexer( archive_dp, 2, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) images_dp = Filter(images_dp, path_comparator("parent.name", config.split)) images_dp = hint_sharding(images_dp) images_dp = hint_shuffling(images_dp) if config.split != "test": scenes_dp = Filter(scenes_dp, path_comparator("name", f"CLEVR_{config.split}_scenes.json")) scenes_dp = JsonParser(scenes_dp) scenes_dp = Mapper(scenes_dp, getitem(1, "scenes")) scenes_dp = UnBatcher(scenes_dp) dp = IterKeyZipper( images_dp, scenes_dp, key_fn=path_accessor("name"), ref_key_fn=getitem("image_filename"), buffer_size=INFINITE_BUFFER_SIZE, ) else: dp = Mapper(images_dp, self._add_empty_anns) return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.default_config dp = self.resources(config)[1].load(pathlib.Path(root) / self.name) dp = Filter(dp, self._filter_split_and_classification_anns) dp = Filter(dp, path_comparator("name", f"{config.split}.txt")) dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ") raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp} raw_categories, _ = zip(*sorted( raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1]))) return [ " ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories ]
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps images_dp = Filter(images_dp, self._filter_images) split_and_classification_dp, segmentations_dp = Demultiplexer( anns_dp, 2, self._classify_anns, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_and_classification_dp = Filter( split_and_classification_dp, path_comparator("name", f"{self._split}.txt")) split_and_classification_dp = CSVDictParser( split_and_classification_dp, fieldnames=("image_id", "label", "species"), delimiter=" ") split_and_classification_dp = hint_shuffling( split_and_classification_dp) split_and_classification_dp = hint_sharding( split_and_classification_dp) segmentations_dp = Filter(segmentations_dp, self._filter_segmentations) anns_dp = IterKeyZipper( split_and_classification_dp, segmentations_dp, key_fn=getitem("image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( anns_dp, images_dp, key_fn=getitem(0, "image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: if self._split == "train": images_dp, ann_dp = Demultiplexer( resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE ) else: images_dp, ann_dp = resource_dps images_dp = Filter(images_dp, path_comparator("suffix", ".ppm")) # The order of the image files in the .zip archives perfectly match the order of the entries in the # (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper. ann_dp = CSVDictParser(ann_dp, delimiter=";") dp = Zipper(images_dp, ann_dp) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: images_dp, devkit_dp = resource_dps if config.split == "train": # the train archive is a tar of tars dp = TarArchiveReader(images_dp) dp = hint_sharding(dp) dp = hint_shuffling(dp) dp = Mapper(dp, self._collate_train_data) elif config.split == "val": devkit_dp = Filter( devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt")) devkit_dp = LineReader(devkit_dp, return_path=False) devkit_dp = Mapper(devkit_dp, int) devkit_dp = Enumerator(devkit_dp, 1) devkit_dp = hint_sharding(devkit_dp) devkit_dp = hint_shuffling(devkit_dp) dp = IterKeyZipper( devkit_dp, images_dp, key_fn=getitem(0), ref_key_fn=self._val_test_image_key, buffer_size=INFINITE_BUFFER_SIZE, ) dp = Mapper(dp, self._collate_val_data) else: # config.split == "test" dp = hint_sharding(images_dp) dp = hint_shuffling(dp) dp = Mapper(dp, self._collate_test_data) return Mapper( dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] split_dp, images_dp, anns_dp = Demultiplexer( archive_dp, 3, functools.partial(self._classify_archive, config=config), drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_dp = Filter( split_dp, functools.partial(self._is_in_folder, name=self._SPLIT_FOLDER[config.task])) split_dp = Filter(split_dp, path_comparator("name", f"{config.split}.txt")) split_dp = LineReader(split_dp, decode=True) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = split_dp for level, data_dp in enumerate((images_dp, anns_dp)): dp = IterKeyZipper( dp, data_dp, key_fn=getitem(*[0] * level, 1), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial( self._prepare_sample, prepare_ann_fn=self._prepare_detection_ann if config.task == "detection" else self._prepare_segmentation_ann, ), )
def _generate_categories(self) -> Tuple[str, ...]: resources = self._resources() dp = resources[0].load(self._root) dp = Filter(dp, path_comparator("name", "category_names.m")) dp = LineReader(dp) dp = Mapper(dp, bytes.decode, input_col=1) lines = tuple(zip(*iter(dp)))[1] pattern = re.compile(r"\s*'(?P<category>\w+)';\s*%(?P<label>\d+)") categories_and_labels = cast( List[Tuple[str, ...]], [ pattern.match(line).groups() # type: ignore[union-attr] # the first and last line contain no information for line in lines[1:-1] ], ) categories_and_labels.sort(key=lambda category_and_label: int(category_and_label[1])) categories, _ = zip(*categories_and_labels) return categories
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp, extra_split_dp = resource_dps archive_dp = resource_dps[0] split_dp, images_dp, anns_dp = Demultiplexer( archive_dp, 3, self._classify_archive, buffer_size=INFINITE_BUFFER_SIZE, drop_none=True, ) if config.split == "train_noval": split_dp = extra_split_dp split_dp = Filter(split_dp, path_comparator("stem", config.split)) split_dp = LineReader(split_dp, decode=True) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = split_dp for level, data_dp in enumerate((images_dp, anns_dp)): dp = IterKeyZipper( dp, data_dp, key_fn=getitem(*[0] * level, 1), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._collate_and_decode_sample, config=config, decoder=decoder))
def _generate_categories(self, root: pathlib.Path) -> List[Tuple[str, ...]]: resources = self.resources(self.default_config) devkit_dp = resources[1].load(root) devkit_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) meta = next(iter(devkit_dp))[1] synsets = read_mat(meta, squeeze_me=True)["synsets"] categories_and_wnids = cast( List[Tuple[str, ...]], [ (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid) for _, wnid, category, _, num_children, *_ in synsets # if num_children > 0, we are looking at a superclass that has no direct instance if num_children == 0 ], ) categories_and_wnids.sort( key=lambda category_and_wnid: category_and_wnid[1]) return categories_and_wnids
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] images_dp, split_dp = Demultiplexer(archive_dp, 2, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt")) split_dp = LineReader(split_dp, decode=True, return_path=False) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = IterKeyZipper( split_dp, images_dp, key_fn=getitem(), ref_key_fn=self._image_key, buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: if config.year == "2011": archive_dp, segmentations_dp = resource_dps images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp) split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter( split_dp, functools.partial(self._2011_filter_split, split=config.split)) split_dp = Mapper(split_dp, getitem(0)) split_dp = Mapper(split_dp, image_files_map.get) bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200") bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0) anns_dp = IterKeyZipper( bounding_boxes_dp, segmentations_dp, key_fn=getitem(0), ref_key_fn=self._2011_segmentation_key, keep_key=True, buffer_size=INFINITE_BUFFER_SIZE, ) else: # config.year == "2010" split_dp, images_dp, anns_dp = resource_dps split_dp = Filter(split_dp, path_comparator("name", f"{config.split}.txt")) split_dp = LineReader(split_dp, decode=True, return_path=False) split_dp = Mapper(split_dp, self._2010_split_key) anns_dp = Mapper(anns_dp, self._2010_anns_key) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = IterKeyZipper( split_dp, images_dp, getitem(), path_accessor("name"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, anns_dp, getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._collate_and_decode_sample, year=config.year, decoder=decoder))
def _generate_categories(self) -> List[str]: resources = self._resources() dp = resources[0].load(self._root) dp = Filter(dp, path_comparator("name", "classes.txt")) dp = LineReader(dp, decode=True, return_path=False) return list(dp)
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: prepare_ann_fn: Callable if self._year == "2011": archive_dp, segmentations_dp = resource_dps images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp) split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter(split_dp, self._2011_filter_split) split_dp = Mapper(split_dp, getitem(0)) split_dp = Mapper(split_dp, image_files_map.get) bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200") bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0) anns_dp = IterKeyZipper( bounding_boxes_dp, segmentations_dp, key_fn=getitem(0), ref_key_fn=self._2011_segmentation_key, keep_key=True, buffer_size=INFINITE_BUFFER_SIZE, ) prepare_ann_fn = self._2011_prepare_ann else: # self._year == "2010" split_dp, images_dp, anns_dp = resource_dps split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt")) split_dp = LineReader(split_dp, decode=True, return_path=False) split_dp = Mapper(split_dp, self._2010_split_key) anns_dp = Mapper(anns_dp, self._2010_anns_key) prepare_ann_fn = self._2010_prepare_ann split_dp = hint_shuffling(split_dp) split_dp = hint_sharding(split_dp) dp = IterKeyZipper( split_dp, images_dp, getitem(), path_accessor("name"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, anns_dp, getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._prepare_sample, prepare_ann_fn=prepare_ann_fn))