def pipeline(self, dataset: tf.data.Dataset) -> tf.data.Dataset: """Build a pipeline fetching, shuffling, and preprocessing the dataset. Args: dataset: A `tf.data.Dataset` that loads raw files. Returns: A TensorFlow dataset outputting batched images and labels. """ if self._num_gpus > 1: dataset = dataset.shard(self._num_gpus, hvd.rank()) if self.is_training: # Shuffle the input files. dataset.shuffle(buffer_size=self._file_shuffle_buffer_size) if self.is_training and not self._cache: dataset = dataset.repeat() # Read the data from disk in parallel dataset = dataset.interleave( tf.data.TFRecordDataset, cycle_length=10, block_length=1, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self._cache: dataset = dataset.cache() if self.is_training: dataset = dataset.shuffle(self._shuffle_buffer_size) dataset = dataset.repeat() # Parse, pre-process, and batch the data in parallel preprocess = self.parse_record dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self._num_gpus > 1: # The batch size of the dataset will be multiplied by the number of # replicas automatically when strategy.distribute_datasets_from_function # is called, so we use local batch size here. dataset = dataset.batch(self.local_batch_size, drop_remainder=self.is_training) else: dataset = dataset.batch(self.global_batch_size, drop_remainder=self.is_training) # Apply Mixup mixup_alpha = self.mixup_alpha if self.is_training else 0.0 dataset = dataset.map( functools.partial(self.mixup, self.local_batch_size, mixup_alpha), num_parallel_calls=64) # Prefetch overlaps in-feed with training dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset
def _maybe_apply_data_service( self, dataset: tf.data.Dataset, input_context: Optional[tf.distribute.InputContext] = None ) -> tf.data.Dataset: """Potentially distributes a dataset.""" if self._enable_tf_data_service and input_context: if self._enable_round_robin_tf_data_service: replicas_per_input_pipeline = input_context.num_replicas_in_sync // ( input_context.num_input_pipelines) base_consumer_index = input_context.input_pipeline_id * ( replicas_per_input_pipeline) num_consumers = input_context.num_input_pipelines * ( replicas_per_input_pipeline) range_dataset = tf.data.Dataset.range( replicas_per_input_pipeline) tfds_kwargs = { 'processing_mode': 'parallel_epochs', 'service': self._tf_data_service_address, 'job_name': self._tf_data_service_job_name, 'num_consumers': num_consumers } if self._enable_shared_tf_data_service_between_parallel_trainers: raise ValueError( 'Shared tf.data service does not support round-robin' ' tf.data service.') dataset = range_dataset.map(lambda i: dataset.apply( # pylint: disable=g-long-lambda tf.data.experimental.service. distribute(consumer_index=base_consumer_index + i, **tfds_kwargs))) # Use parallel interleave to read multiple batches from a tf.data # service worker in parallel. dataset = dataset.interleave( lambda x: x, cycle_length=replicas_per_input_pipeline, num_parallel_calls=replicas_per_input_pipeline, deterministic=True) else: tfds_kwargs = { 'processing_mode': 'parallel_epochs', 'service': self._tf_data_service_address, 'job_name': self._tf_data_service_job_name, } if self._enable_shared_tf_data_service_between_parallel_trainers: tfds_kwargs.update({ 'processing_mode': tf.data.experimental.service.ShardingPolicy.OFF, 'cross_trainer_cache': tf.data.experimental.service.CrossTrainerCache( trainer_id=self._trainer_id) }) dataset = dataset.apply( tf.data.experimental.service.distribute(**tfds_kwargs)) return dataset
def episodes_to_timestep_batched_transitions( episode_dataset: tf.data.Dataset, return_horizon: int = 10, drop_return_horizon: bool = False, min_return_filter: Optional[float] = None) -> tf.data.Dataset: """Process an existing dataset converting it to episode to 3-transitions. A 3-transition is an Transition with each attribute having an extra dimension of size 3, representing 3 consecutive timesteps. Each 3-step object will be in random order relative to each other. See `episode_to_timestep_batch` for more information. Args: episode_dataset: An RLDS dataset to process. return_horizon: The horizon we want calculate Monte-Carlo returns to. drop_return_horizon: Whether we should drop the last `return_horizon` steps. min_return_filter: Minimum episode return below which we drop an episode. Returns: A tf.data.Dataset of 3-transitions. """ dataset = episode_dataset.interleave( functools.partial(episode_to_timestep_batch, return_horizon=return_horizon, drop_return_horizon=drop_return_horizon, calculate_episode_return=min_return_filter is not None), num_parallel_calls=tf.data.experimental.AUTOTUNE, deterministic=False) if min_return_filter is not None: def filter_on_return(step): return step[EPISODE_RETURN][0][0] > min_return_filter dataset = dataset.filter(filter_on_return) dataset = dataset.map(_step_to_transition, num_parallel_calls=tf.data.experimental.AUTOTUNE) return dataset
def _maybe_apply_data_service( self, dataset: tf.data.Dataset, input_context: Optional[tf.distribute.InputContext] = None ) -> tf.data.Dataset: """Potentially distributes a dataset.""" if self._enable_tf_data_service and input_context: if self._enable_round_robin_tf_data_service: replicas_per_input_pipeline = input_context.num_replicas_in_sync // ( input_context.num_input_pipelines) base_consumer_index = input_context.input_pipeline_id * ( replicas_per_input_pipeline) num_consumers = input_context.num_input_pipelines * ( replicas_per_input_pipeline) range_dataset = tf.data.Dataset.range( replicas_per_input_pipeline) dataset = range_dataset.map(lambda i: dataset.apply( # pylint: disable=g-long-lambda tf.data.experimental.service.distribute( processing_mode='parallel_epochs', service=self._tf_data_service_address, job_name=self._tf_data_service_job_name, consumer_index=base_consumer_index + i, num_consumers=num_consumers))) # Use parallel interleave to read multiple batches from a tf.data # service worker in parallel. dataset = dataset.interleave( lambda x: x, cycle_length=replicas_per_input_pipeline, num_parallel_calls=replicas_per_input_pipeline, deterministic=True) else: dataset = dataset.apply( tf.data.experimental.service.distribute( processing_mode='parallel_epochs', service=self._tf_data_service_address, job_name=self._tf_data_service_job_name)) return dataset
def pipeline(self, dataset: tf.data.Dataset) -> tf.data.Dataset: """Build a pipeline fetching, shuffling, and preprocessing the dataset. Args: dataset: A `tf.data.Dataset` that loads raw files. Returns: A TensorFlow dataset outputting batched images and labels. """ if (self.config.builder != 'tfds' and self.input_context and self.input_context.num_input_pipelines > 1): dataset = dataset.shard(self.input_context.num_input_pipelines, self.input_context.input_pipeline_id) logging.info( 'Sharding the dataset: input_pipeline_id=%d ' 'num_input_pipelines=%d', self.input_context.num_input_pipelines, self.input_context.input_pipeline_id) if self.is_training and self.config.builder == 'records': # Shuffle the input files. dataset.shuffle(buffer_size=self.config.file_shuffle_buffer_size) if self.is_training and not self.config.cache: dataset = dataset.repeat() if self.config.builder == 'records': # Read the data from disk in parallel dataset = dataset.interleave( tf.data.TFRecordDataset, cycle_length=10, block_length=1, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self.config.cache: dataset = dataset.cache() if self.is_training: dataset = dataset.shuffle(self.config.shuffle_buffer_size) dataset = dataset.repeat() # Parse, pre-process, and batch the data in parallel if self.config.builder == 'records': preprocess = self.parse_record else: preprocess = self.preprocess dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self.input_context and self.config.num_devices > 1: if not self.config.use_per_replica_batch_size: raise ValueError( 'The builder does not support a global batch size with more than ' 'one replica. Got {} replicas. Please set a ' '`per_replica_batch_size` and enable ' '`use_per_replica_batch_size=True`.'.format( self.config.num_devices)) # The batch size of the dataset will be multiplied by the number of # replicas automatically when strategy.distribute_datasets_from_function # is called, so we use local batch size here. dataset = dataset.batch(self.local_batch_size, drop_remainder=self.is_training) else: dataset = dataset.batch(self.global_batch_size, drop_remainder=self.is_training) # Prefetch overlaps in-feed with training dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) if self.config.tf_data_service: if not hasattr(tf.data.experimental, 'service'): raise ValueError( 'The tf_data_service flag requires Tensorflow version ' '>= 2.3.0, but the version is {}'.format(tf.__version__)) dataset = dataset.apply( tf.data.experimental.service.distribute( processing_mode='parallel_epochs', service=self.config.tf_data_service, job_name='resnet_train')) dataset = dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) return dataset
def pipeline( self, dataset: tf.data.Dataset, input_context: tf.distribute.InputContext = None ) -> tf.data.Dataset: """Build a pipeline fetching, shuffling, and preprocessing the dataset. Args: dataset: A `tf.data.Dataset` that loads raw files. input_context: An optional context provided by `tf.distribute` for cross-replica training. This isn't necessary if using Keras compile/fit. Returns: A TensorFlow dataset outputting batched images and labels. """ if input_context and input_context.num_input_pipelines > 1: dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) if self.is_training and not self.config.cache: dataset = dataset.repeat() if self.config.builder == 'records': # Read the data from disk in parallel buffer_size = 8 * 1024 * 1024 # Use 8 MiB per file dataset = dataset.interleave( lambda name: tf.data.TFRecordDataset(name, buffer_size=buffer_size), cycle_length=16, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.prefetch(self.global_batch_size) if self.config.cache: dataset = dataset.cache() if self.is_training: dataset = dataset.shuffle(self.config.shuffle_buffer_size) dataset = dataset.repeat() # Parse, pre-process, and batch the data in parallel if self.config.builder == 'records': preprocess = self.parse_record else: preprocess = self.preprocess dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.batch(self.batch_size, drop_remainder=self.is_training) # Note: we could do image normalization here, but we defer it to the model # which can perform it much faster on a GPU/TPU # TODO(dankondratyuk): if we fix prefetching, we can do it here if self.is_training and self.config.deterministic_train is not None: options = tf.data.Options() options.experimental_deterministic = self.config.deterministic_train options.experimental_slack = self.config.use_slack options.experimental_optimization.parallel_batch = True options.experimental_optimization.map_fusion = True options.experimental_optimization.map_vectorization.enabled = True options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) # Prefetch overlaps in-feed with training # Note: autotune here is not recommended, as this can lead to memory leaks. # Instead, use a constant prefetch size like the the number of devices. dataset = dataset.prefetch(self.config.num_devices) return dataset
def pipeline( self, dataset: tf.data.Dataset, input_context: tf.distribute.InputContext = None ) -> tf.data.Dataset: """Build a pipeline fetching, shuffling, and preprocessing the dataset. Args: dataset: A `tf.data.Dataset` that loads raw files. input_context: An optional context provided by `tf.distribute` for cross-replica training. If set with more than one replica, this function assumes `use_per_replica_batch_size=True`. Returns: A TensorFlow dataset outputting batched images and labels. """ if input_context and input_context.num_input_pipelines > 1: dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) if self.is_training and not self.config.cache: dataset = dataset.repeat() if self.config.builder == 'records': # Read the data from disk in parallel buffer_size = 8 * 1024 * 1024 # Use 8 MiB per file dataset = dataset.interleave( lambda name: tf.data.TFRecordDataset(name, buffer_size=buffer_size), cycle_length=16, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self.config.cache: dataset = dataset.cache() if self.is_training: dataset = dataset.shuffle(self.config.shuffle_buffer_size) dataset = dataset.repeat() # Parse, pre-process, and batch the data in parallel if self.config.builder == 'records': preprocess = self.parse_record else: preprocess = self.preprocess dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) if input_context and self.config.num_devices > 1: if not self.config.use_per_replica_batch_size: raise ValueError( 'The builder does not support a global batch size with more than ' 'one replica. Got {} replicas. Please set a ' '`per_replica_batch_size` and enable ' '`use_per_replica_batch_size=True`.'.format( self.config.num_devices)) # The batch size of the dataset will be multiplied by the number of # replicas automatically when strategy.distribute_datasets_from_function # is called, so we use local batch size here. dataset = dataset.batch(self.local_batch_size, drop_remainder=self.is_training) else: dataset = dataset.batch(self.global_batch_size, drop_remainder=self.is_training) if self.is_training: options = tf.data.Options() options.experimental_deterministic = self.config.deterministic_train options.experimental_slack = self.config.use_slack options.experimental_optimization.parallel_batch = True options.experimental_optimization.map_fusion = True options.experimental_optimization.map_vectorization.enabled = True options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) # Prefetch overlaps in-feed with training dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset
def transform(self, ds: tf.data.Dataset) -> tf.data.Dataset: ds = ds.interleave(self.item_to_dataset, cycle_length=self.cycle_length, block_length=self.block_length, num_parallel_calls=tf.data.experimental.AUTOTUNE) return ds
def pipeline(self, dataset: tf.data.Dataset) -> tf.data.Dataset: """Build a pipeline fetching, shuffling, and preprocessing the dataset. Args: dataset: A `tf.data.Dataset` that loads raw files. Returns: A TensorFlow dataset outputting batched images and labels. """ # This can help resolve OOM issues when using only 1 GPU for training options = tf.data.Options() options.experimental_optimization.map_parallelization = ( not self.disable_map_parallelization) dataset = dataset.with_options(options) if self._num_gpus > 1: # For multi-host training, we want each hosts to always process the same # subset of files. Each host only sees a subset of the entire dataset, # allowing us to cache larger datasets in memory. dataset = dataset.shard(self._num_gpus, hvd.rank()) if self.is_training: # Shuffle the input files. dataset.shuffle(buffer_size=self._file_shuffle_buffer_size) if self.is_training and not self._cache: dataset = dataset.repeat() # Read the data from disk in parallel dataset = dataset.interleave( tf.data.TFRecordDataset, cycle_length=10, block_length=1, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self._cache: dataset = dataset.cache() if self.is_training: dataset = dataset.shuffle(self._shuffle_buffer_size) dataset = dataset.repeat() # Parse, pre-process, and batch the data in parallel preprocess = self.parse_record dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self._num_gpus > 1: # The batch size of the dataset will be multiplied by the number of # replicas automatically when strategy.distribute_datasets_from_function # is called, so we use local batch size here. dataset = dataset.batch(self.local_batch_size, drop_remainder=self.is_training) else: dataset = dataset.batch(self.global_batch_size, drop_remainder=self.is_training) # apply Mixup/CutMix only during training, if requested in the data pipeline, # otherwise they will be applied in the model module on device mixup_alpha = self.mixup_alpha if self.is_training else 0.0 cutmix_alpha = self.cutmix_alpha if self.is_training else 0.0 dataset = dataset.map(functools.partial(mixing, self.local_batch_size, mixup_alpha, cutmix_alpha, self.defer_img_mixing), num_parallel_calls=64) # Assign static batch size dimension # dataset = dataset.map( # functools.partial(self.set_shapes, batch_size), # num_parallel_calls=tf.data.experimental.AUTOTUNE) # Prefetch overlaps in-feed with training dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset