def __init__( self, iterable: Iterable, batch_size: Optional[int] = None, is_shuffle: bool = True, transforms_dict: Dict[str, List[Transform]] = None, batcher=None, collate_fn=None, chunk_size: int = 1000, is_cycle: bool = False, length: Optional[int] = None, rank: int = 0, num_workers: int = 1, ): self.iterable = itertools.cycle(iterable) if is_cycle else iterable if num_workers > 1: self.iterable = shard(self.iterable, rank, num_workers) self.batch_size = batch_size or batcher.batch_size self.batcher = batcher or Batcher(self.batch_size) self.is_shuffle = is_shuffle self.transforms_dict = transforms_dict or {} self.collate_fun = collate_fn or default_collate_fn self.chunk_size = chunk_size # num of batches per chunk self.is_cycle = is_cycle self.length = length self.iterable = ChunkIterator( self.iterable, self.chunk_size * self.batch_size, self.length )
def __init__( self, iterable: Iterable, batch_size: int = 1, is_shuffle: bool = True, transform: Optional[Union[nn.Module, Callable]] = None, custom_batcher: Optional[Batcher] = None, collate_fn: Optional[Callable] = None, chunk_size: Optional[int] = 1000, is_cycle: bool = False, length: Optional[int] = None, rank: int = 0, world_size: int = 1, ): self.iterable = itertools.cycle(iterable) if is_cycle else iterable if world_size > 1: logger.error( f"data sharding for rank: {rank}, world_size: {world_size}") self.iterable = shard(self.iterable, rank, world_size) self.batch(batch_size, custom_batcher) self.is_shuffle = is_shuffle self.transform = RowsToColumnarTransform(transform or IdentityTransform()) self.collate_fn = collate_fn self.chunk_size = chunk_size # num of batches per chunk self.is_cycle = is_cycle self.length = length if self.chunk_size and self.batch_size: self.iterable = ChunkIterator(self.iterable, self.chunk_size * self.batch_size, self.length)
def __init__( self, iterable: Iterable, batch_size: Optional[int] = None, is_shuffle: bool = True, transform: Optional[Transform] = None, custom_batcher: Optional[Batcher] = None, collate_fn=None, chunk_size: Optional[int] = 1000, is_cycle: bool = False, length: Optional[int] = None, rank: int = 0, world_size: int = 1, ): self.iterable = itertools.cycle(iterable) if is_cycle else iterable if world_size > 1: self.iterable = shard(self.iterable, rank, world_size) self.batch(batch_size, custom_batcher) self.is_shuffle = is_shuffle self.transform = RowsToColumnarTransform( transform) or IdentityTransform() self.collate_fn = collate_fn self.chunk_size = chunk_size # num of batches per chunk self.is_cycle = is_cycle self.length = length if self.chunk_size and self.batch_size: self.iterable = ChunkIterator(self.iterable, self.chunk_size * self.batch_size, self.length)