class MapVectorizationOptions(options.OptionsBase): """Represents options for the MapVectorization optimization.""" # TODO(rachelim): Other configuration parameters can go here, for example, # how many "experiments" to run with ChooseFastestBranchDataset. enabled = options.create_option( name="enabled", ty=bool, docstring= "Whether to vectorize map transformations. If None, defaults to False." ) use_choose_fastest = options.create_option( name="use_choose_fastest", ty=bool, docstring="Whether to use ChooseFastestBranchDataset with this " "transformation. If True, the pipeline picks between the vectorized and " "original segment at runtime based on their iterations speed. If None, " "defaults to False.") def _graph_rewrites(self): if self.enabled: return ["map_vectorization"] return [] def _graph_rewrite_configs(self): if not self.enabled: return [] if self.use_choose_fastest: return ["map_vectorization:use_choose_fastest:true"] else: return ["map_vectorization:use_choose_fastest:false"]
class DistributeOptions(options.OptionsBase): """Represents options for distributed data processing. You can set the distribution options of a dataset through the `experimental_distribute` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.DistributeOptions`. ```python options = tf.data.Options() options.experimental_distribute.auto_shard = False dataset = dataset.with_options(options) ``` """ auto_shard = options.create_option( name="auto_shard", ty=bool, docstring= "Whether the dataset should be automatically sharded when processed" "in a distributed fashion. This is applicable when using Keras with " "multi-worker/TPU distribution strategy, and by " "using strategy.experimental_distribute_dataset(). In other cases, this " "option does nothing. If None, defaults to True.", default_factory=lambda: True) num_devices = options.create_option( name="num_devices", ty=int, docstring= "The number of devices attached to this input pipeline. This will be " "automatically set by MultiDeviceIterator.")
class ThreadingOptions(options.OptionsBase): """Represents options for dataset threading. You can set the threading options of a dataset through the `experimental_threading` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.ThreadingOptions`. ```python options = tf.data.Options() options.experimental_threading.private_threadpool_size = 10 dataset = dataset.with_options(options) ``` """ max_intra_op_parallelism = options.create_option( name="max_intra_op_parallelism", ty=int, docstring= "If set, it overrides the maximum degree of intra-op parallelism.") private_threadpool_size = options.create_option( name="private_threadpool_size", ty=int, docstring= "If set, the dataset will use a private threadpool of the given size.")
class ThreadingOptions(options.OptionsBase): """Represents options for dataset threading. To apply `ThreadingOptions` to a `dataset` object, use the following pattern: ```python options = tf.data.Options() options.experimental_threading = tf.data.experimental.ThreadingOptions() options.experimental_threading.private_threadpool_size = 10 dataset = dataset.with_options(options) ``` """ max_intra_op_parallelism = options.create_option( name="max_intra_op_parallelism", ty=int, docstring= "If set, it overrides the maximum degree of intra-op parallelism.") private_threadpool_size = options.create_option( name="private_threadpool_size", ty=int, docstring= "If set, the dataset will use a private threadpool of the given size.", default=None)
class _TestOptions(options.OptionsBase): x = options.create_option( name="x", ty=int, docstring="the answer to everything", default_factory=lambda: 42) y = options.create_option( name="y", ty=float, docstring="a tasty pie", default_factory=lambda: 3.14)
class DistributeOptions(options.OptionsBase): """Represents options for distributed data processing. You can set the distribution options of a dataset through the `experimental_distribute` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.DistributeOptions`. ```python options = tf.data.Options() options.experimental_distribute.auto_shard = False dataset = dataset.with_options(options) ``` """ auto_shard = options.create_option( name="auto_shard", ty=bool, docstring="Whether the dataset should be automatically sharded when " "processed in a distributed fashion. This is applicable when using Keras " "with multi-worker/TPU distribution strategy, and by " "using strategy.experimental_distribute_dataset(). You can control the " "behavior of the auto sharder via the `auto_shard_policy` option. In " "other cases, this option does nothing. If None, defaults to True.", default_factory=lambda: True) auto_shard_policy = options.create_option( name="auto_shard_policy", ty=AutoShardPolicy, docstring="The type of sharding that auto-shard should attempt. If this " "is set to FILE, then we will attempt to shard by files (each worker " "will get a set of files to process). If we cannot find a set of files " "to shard for at least one file per worker, we will error out. When this " "option is selected, make sure that you have enough files so that each " "worker gets at least one file. There will be a runtime error thrown if " "there are insufficient files." "If this is set to DATA, then we will shard by elements produced by the " "dataset, and each worker will process the whole dataset and discard the " "portion that is not for itself. " "This option is set to AUTO by default, AUTO will attempt to first shard " "by FILE, and fall back to sharding by DATA if we cannot find a set of " "files to shard.", default_factory=lambda: AutoShardPolicy.AUTO) _make_stateless = options.create_option( name="_make_stateless", ty=bool, docstring= "Determines whether the input pipeline should be rewritten to not " "contain stateful transformations (so that its graph can be moved " "between devices).") num_devices = options.create_option( name="num_devices", ty=int, docstring= "The number of devices attached to this input pipeline. This will be " "automatically set by MultiDeviceIterator.")
class AutotuneOptions(options_lib.OptionsBase): """Represents options for autotuning dataset performance. ```python options = tf.data.Options() options.autotune.enabled = False dataset = dataset.with_options(options) ``` """ enabled = options_lib.create_option( name="enabled", ty=bool, docstring="Whether to automatically tune performance knobs. If None, " "defaults to True.") cpu_budget = options_lib.create_option( name="cpu_budget", ty=int, docstring="When autotuning is enabled (through `autotune`), determines " "the CPU budget to use. Values greater than the number of schedulable " "CPU cores are allowed but may result in CPU contention. If None, " "defaults to the number of schedulable CPU cores.") ram_budget = options_lib.create_option( name="ram_budget", ty=int, docstring="When autotuning is enabled (through `autotune`), determines " "the RAM budget to use. Values greater than the available RAM in bytes " "may result in OOM. If None, defaults to half of the available RAM in " "bytes.") def _to_proto(self): pb = dataset_options_pb2.AutotuneOptions() if self.enabled is not None: pb.enabled = self.enabled if self.cpu_budget is not None: pb.cpu_budget = self.cpu_budget if self.ram_budget is not None: pb.ram_budget = self.ram_budget return pb def _from_proto(self, pb): if pb.WhichOneof("optional_enabled") is not None: self.enabled = pb.enabled if pb.WhichOneof("optional_cpu_budget") is not None: self.cpu_budget = pb.cpu_budget if pb.WhichOneof("optional_ram_budget") is not None: self.ram_budget = pb.ram_budget def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable)
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can apply `OptimizationOptions` to a `dataset` object, as follows: ```python options = tf.data.Options() options.optimization = tf.data.experimental.OptimizationOptions() options.optimization.map_and_batch_fusion = True dataset = dataset.with_options(options) ``` """ filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring="Whether to fuse filter transformations.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations." ) map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring="Whether to fuse map and batch transformations.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring="Whether to fuse map and filter transformations.") map_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring="Whether to fuse map transformations.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring="Whether to parallelize stateless map transformations.") map_vectorization = options.create_option( name="map_vectorization", ty=bool, docstring="Whether to vectorize map transformations.") noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring="Whether to eliminate no-op transformations.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations.")
class DistributeOptions(options.OptionsBase): """Represents options for distributed data processing. You can set the distribution options of a dataset through the `experimental_distribute` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.DistributeOptions`. ```python options = tf.data.Options() options.experimental_distribute.auto_shard_policy = AutoShardPolicy.OFF dataset = dataset.with_options(options) ``` """ auto_shard_policy = options.create_option( name="auto_shard_policy", ty=AutoShardPolicy, docstring="The type of sharding that auto-shard should attempt. If this " "is set to FILE, then we will attempt to shard by files (each worker " "will get a set of files to process). If we cannot find a set of files " "to shard for at least one file per worker, we will error out. When this " "option is selected, make sure that you have enough files so that each " "worker gets at least one file. There will be a runtime error thrown if " "there are insufficient files. " "If this is set to DATA, then we will shard by elements produced by the " "dataset, and each worker will process the whole dataset and discard the " "portion that is not for itself. " "If this is set to OFF, then we will not autoshard, and each worker will " "receive a copy of the full dataset. " "This option is set to AUTO by default, AUTO will attempt to first shard " "by FILE, and fall back to sharding by DATA if we cannot find a set of " "files to shard.", default_factory=lambda: AutoShardPolicy.AUTO) num_devices = options.create_option( name="num_devices", ty=int, docstring= "The number of devices attached to this input pipeline. This will be " "automatically set by MultiDeviceIterator.") def _to_proto(self): pb = dataset_options_pb2.DistributeOptions() pb.auto_shard_policy = AutoShardPolicy._to_proto( self.auto_shard_policy) # pylint: disable=protected-access if self.num_devices is not None: pb.num_devices = self.num_devices return pb def _from_proto(self, pb): self.auto_shard_policy = AutoShardPolicy._from_proto( pb.auto_shard_policy) # pylint: disable=protected-access if pb.WhichOneof("optional_num_devices") is not None: self.num_devices = pb.num_devices
class StatsOptions(options.OptionsBase): """Represents options for collecting dataset stats using `StatsAggregator`. To apply `StatsOptions` with a `tf.data.Dataset` object, use the following pattern: ```python aggretator = tf.data.experimental.StatsAggregator() options = dataset_ops.Options() options.experimental_stats = tf.data.experimental.StatsOptions() options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() ``` Note: a `StatsAggregator` object can be attached either duing construction or can be provided later like in above example. ```python aggretator = tf.data.experimental.StatsAggregator() # attach aggregator during construction options.experimental_stats = tf.data.experimental.StatsOptions(aggregator) ..... ``` """ aggregator = options.create_option( name="aggregator", ty=stats_aggregator.StatsAggregator, docstring= "Associates the given statistics aggregator with the dataset pipeline." ) prefix = options.create_option( name="prefix", ty=str, docstring= "Prefix to prepend all statistics recorded for the input `dataset` with.", default="") counter_prefix = options.create_option( name="counter_prefix", ty=str, docstring="Prefix for the statistics recorded as counter.", default="") latency_all_edges = options.create_option( name="latency_all_edges", ty=bool, docstring="Whether to add latency measurements on all edges.", default=True)
class MapVectorizationOptions(options.OptionsBase): """Represents options for the MapVectorization optimization.""" # TODO(rachelim): Other configuration parameters can go here, for example, # how many "experiments" to run with ChooseFastestBranchDataset. enabled = options.create_option( name="enabled", ty=bool, docstring= "Whether to vectorize map transformations. If None, defaults to False." ) use_choose_fastest = options.create_option( name="use_choose_fastest", ty=bool, docstring="Whether to use ChooseFastestBranchDataset with this " "transformation. If True, the pipeline picks between the vectorized and " "original segment at runtime based on their iterations speed. If None, " "defaults to False.") def _graph_rewrites(self): graph_rewrites = options.graph_rewrites() result = graph_rewrites(enabled=[], disabled=[], default=[]) if self.enabled is True: # pylint: disable=g-bool-id-comparison result.enabled.append("map_vectorization") elif self.enabled is False: # pylint: disable=g-bool-id-comparison result.disabled.append("map_vectorization") return result def _graph_rewrite_configs(self): if not self.enabled: return [] if self.use_choose_fastest: return ["map_vectorization:use_choose_fastest:true"] else: return ["map_vectorization:use_choose_fastest:false"] def _to_proto(self): pb = dataset_options_pb2.MapVectorization() if self.enabled is not None: pb.enabled = self.enabled if self.use_choose_fastest is not None: pb.use_choose_fastest = self.use_choose_fastest return pb def _from_proto(self, pb): if pb.WhichOneof("optional_enabled") is not None: self.enabled = pb.enabled if pb.WhichOneof("optional_use_choose_fastest") is not None: self.use_choose_fastest = pb.use_choose_fastest
class StatsOptions(options.OptionsBase): """Represents options for collecting dataset stats using `StatsAggregator`. You can set the stats options of a dataset through the `experimental_stats` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.StatsOptions`. For example, to collect latency stats on all dataset edges, use the following pattern: ```python aggregator = tf.data.experimental.StatsAggregator() options = tf.data.Options() options.experimental_stats.aggregator = aggregator options.experimental_stats.latency_all_edges = True dataset = dataset.with_options(options) ``` """ aggregator = options.create_option( name="aggregator", ty=(stats_aggregator.StatsAggregatorV2, stats_aggregator.StatsAggregatorV1), docstring= "Associates the given statistics aggregator with the dataset pipeline." ) prefix = options.create_option( name="prefix", ty=str, docstring= "Prefix to prepend all statistics recorded for the input `dataset` with.", default_factory=lambda: "") counter_prefix = options.create_option( name="counter_prefix", ty=str, docstring="Prefix for the statistics recorded as counter.", default_factory=lambda: "") latency_all_edges = options.create_option( name="latency_all_edges", ty=bool, docstring= "Whether to add latency measurements on all edges. Defaults to False.")
class DistributeOptions(options.OptionsBase): """Represents options for distributed data processing. You can set the distribution options of a dataset through the `experimental_distribute` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.DistributeOptions`. ```python options = tf.data.Options() options.experimental_distribute.auto_shard_policy = AutoShardPolicy.OFF dataset = dataset.with_options(options) ``` """ auto_shard_policy = options.create_option( name="auto_shard_policy", ty=AutoShardPolicy, docstring="The type of sharding to use. See " "`tf.data.experimental.AutoShardPolicy` for additional information.", default_factory=lambda: AutoShardPolicy.AUTO) num_devices = options.create_option( name="num_devices", ty=int, docstring= "The number of devices attached to this input pipeline. This will be " "automatically set by `MultiDeviceIterator`.") def _to_proto(self): pb = dataset_options_pb2.DistributeOptions() pb.auto_shard_policy = AutoShardPolicy._to_proto( self.auto_shard_policy) # pylint: disable=protected-access if self.num_devices is not None: pb.num_devices = self.num_devices return pb def _from_proto(self, pb): self.auto_shard_policy = AutoShardPolicy._from_proto( pb.auto_shard_policy) # pylint: disable=protected-access if pb.WhichOneof("optional_num_devices") is not None: self.num_devices = pb.num_devices
class ThreadingOptions(options.OptionsBase): """Represents options for dataset threading. You can set the threading options of a dataset through the `experimental_threading` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.ThreadingOptions`. ```python options = tf.data.Options() options.experimental_threading.private_threadpool_size = 10 dataset = dataset.with_options(options) ``` """ max_intra_op_parallelism = options.create_option( name="max_intra_op_parallelism", ty=int, docstring= "If set, it overrides the maximum degree of intra-op parallelism.") private_threadpool_size = options.create_option( name="private_threadpool_size", ty=int, docstring= "If set, the dataset will use a private threadpool of the given size. " "The value 0 can be used to indicate that the threadpool size should be " "determined at runtime based on the number of available CPU cores.") def _to_proto(self): pb = dataset_options_pb2.ThreadingOptions() if self.max_intra_op_parallelism is not None: pb.max_intra_op_parallelism = self.max_intra_op_parallelism if self.private_threadpool_size is not None: pb.private_threadpool_size = self.private_threadpool_size return pb def _from_proto(self, pb): if pb.WhichOneof("optional_max_intra_op_parallelism") is not None: self.max_intra_op_parallelism = pb.max_intra_op_parallelism if pb.WhichOneof("optional_private_threadpool_size") is not None: self.private_threadpool_size = pb.private_threadpool_size
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.noop_elimination = True options.experimental_optimization.map_vectorization.enabled = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default graph optimizations. If False, only graph " "optimizations that have been explicitly enabled will be applied.") autotune = options.create_option( name="autotune", ty=bool, docstring= "Whether to automatically tune performance knobs. If None, defaults to " "True.") autotune_buffers = options.create_option( name="autotune_buffers", ty=bool, docstring= "When autotuning is enabled (through `autotune`), determines whether to " "also autotune buffer sizes for datasets with parallelism. If None," " defaults to False.") autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the CPU " "budget to use. Values greater than the number of schedulable CPU cores " "are allowed but may result in CPU contention. If None, defaults to the " "number of schedulable CPU cores.") filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") filter_with_random_uniform_fusion = options.create_option( name="filter_with_random_uniform_fusion", ty=bool, docstring= "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations. " "If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to False.") map_vectorization = options.create_option( name="map_vectorization", ty=MapVectorizationOptions, docstring= "The map vectorization options associated with the dataset. See " "`tf.data.experimental.MapVectorizationOptions` for more details.", default_factory=MapVectorizationOptions) noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) parallel_batch = options.create_option( name="parallel_batch", ty=bool, docstring="Whether to parallelize copying of batch elements. This " "optimization is highly experimental and can cause performance " "degradation (e.g. when the parallelization overhead exceeds the " "benefits of performing the data copies in parallel). You should only " "enable this optimization if a) your input pipeline is bottlenecked on " "batching and b) you have validated that this optimization improves " "performance. If None, defaults to False.") reorder_data_discarding_ops = options.create_option( name="reorder_data_discarding_ops", ty=bool, docstring= "Whether to reorder ops that will discard data to the front of " "unary cardinality preserving transformations, e.g. " "dataset.map(...).take(3) will be optimized to dataset.take(3).map(...). " "For now this optimization will move `skip`, `shard` and `take` to the " "front of `map` and `prefetch`. This optimization is only for " "performance; it will not affect the output of the dataset. " "If None, defaults to True.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _autotune_buffers(self): if self.autotune_buffers is not None: return self.autotune_buffers # The default setting for autotune_buffers is based on # _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT return _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT def _autotune_settings(self): # Default autotune settings autotune = True # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by # default, which is more performant for tuning heterogeneous parameters. algorithm = (_AutotuneAlgorithm.GRADIENT_DESCENT if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB) cpu_budget = 0 # Indicates that all CPU cores should be used by default. # Set these options if they are explicitly set by the user. if self.autotune is False: # pylint: disable=g-bool-id-comparison autotune = False if self.autotune_cpu_budget is not None: cpu_budget = self.autotune_cpu_budget return autotune, algorithm, cpu_budget def _graph_rewrites(self): """Produces the list of enabled graph optimizations.""" result = set() all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", "map_parallelization", "map_fusion", "noop_elimination", "parallel_batch", "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] for optimization in all_optimizations: if getattr(self, optimization): result.add(optimization) if self.apply_default_optimizations is not False: # The following optimizations are turned on by default, unless the user # explicitly disables them. optimizations_to_disable = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: if getattr(self, optimization) is not False: result.add(optimization) if self.map_vectorization is not None: result.update(self.map_vectorization._graph_rewrites()) # pylint: disable=protected-access autotune_buffers = self._autotune_buffers() if self.autotune is not False and autotune_buffers: # pylint: disable=g-bool-id-comparison # When autotuning buffer sizes is enabled, we inject a `prefetch` # transformation after asynchronous dataset ops. Only the buffer sizes of # prefetch transformations will be autotuned, though this is practically # equivalent to tuning the buffer sizes of the other asynchronous # transformations. result.add("inject_prefetch") return sorted(list(result)) def _graph_rewrite_configs(self): if self.map_vectorization is not None: return self.map_vectorization._graph_rewrite_configs() # pylint: disable=protected-access return []
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.noop_elimination = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default graph optimizations. If False, only graph " "optimizations that have been explicitly enabled will be applied.") autotune = options.create_option( name="autotune", ty=bool, docstring= "Whether to automatically tune performance knobs. If None, defaults to " "True.") autotune_buffers = options.create_option( name="autotune_buffers", ty=bool, docstring= "When autotuning is enabled (through `autotune`), determines whether to " "also autotune buffer sizes for datasets with parallelism. If None," " defaults to False.") autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the CPU " "budget to use. Values greater than the number of schedulable CPU cores " "are allowed but may result in CPU contention. If None, defaults to the " "number of schedulable CPU cores.") autotune_ram_budget = options.create_option( name="autotune_ram_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the RAM " "budget to use. Values greater than the available RAM in bytes may " "result in OOM. If None, defaults to half of the available RAM in bytes." ) filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") filter_with_random_uniform_fusion = options.create_option( name="filter_with_random_uniform_fusion", ty=bool, docstring= "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations. " "If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to True.") noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) parallel_batch = options.create_option( name="parallel_batch", ty=bool, docstring="Whether to parallelize copying of batch elements. This " "optimization is highly experimental and can cause performance " "degradation (e.g. when the parallelization overhead exceeds the " "benefits of performing the data copies in parallel). You should only " "enable this optimization if a) your input pipeline is bottlenecked on " "batching and b) you have validated that this optimization improves " "performance. If None, defaults to False.") reorder_data_discarding_ops = options.create_option( name="reorder_data_discarding_ops", ty=bool, docstring= "Whether to reorder ops that will discard data to the front of " "unary cardinality preserving transformations, e.g. " "dataset.map(...).take(3) will be optimized to dataset.take(3).map(...). " "For now this optimization will move `skip`, `shard` and `take` to the " "front of `map` and `prefetch`. This optimization is only for " "performance; it will not affect the output of the dataset. " "If None, defaults to True.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _to_proto(self): pb = dataset_options_pb2.OptimizationOptions() if self.apply_default_optimizations is not None: pb.apply_default_optimizations = self.apply_default_optimizations if self.autotune is not None: pb.autotune = self.autotune if self.autotune_buffers is not None: pb.autotune_buffers = self.autotune_buffers if self.autotune_cpu_budget is not None: pb.autotune_cpu_budget = self.autotune_cpu_budget if self.autotune_ram_budget is not None: pb.autotune_ram_budget = self.autotune_ram_budget if self.filter_fusion is not None: pb.filter_fusion = self.filter_fusion if self.filter_with_random_uniform_fusion is not None: pb.filter_with_random_uniform_fusion = ( self.filter_with_random_uniform_fusion) if self.hoist_random_uniform is not None: pb.hoist_random_uniform = self.hoist_random_uniform if self.map_and_batch_fusion is not None: pb.map_and_batch_fusion = self.map_and_batch_fusion if self.map_and_filter_fusion is not None: pb.map_and_filter_fusion = self.map_and_filter_fusion if self.map_fusion is not None: pb.map_fusion = self.map_fusion if self.map_parallelization is not None: pb.map_parallelization = self.map_parallelization if self.noop_elimination is not None: pb.noop_elimination = self.noop_elimination if self.parallel_batch is not None: pb.parallel_batch = self.parallel_batch if self.reorder_data_discarding_ops is not None: pb.reorder_data_discarding_ops = self.reorder_data_discarding_ops if self.shuffle_and_repeat_fusion is not None: pb.shuffle_and_repeat_fusion = self.shuffle_and_repeat_fusion return pb def _from_proto(self, pb): if pb.WhichOneof("optional_apply_default_optimizations") is not None: self.apply_default_optimizations = pb.apply_default_optimizations if pb.WhichOneof("optional_autotune") is not None: self.autotune = pb.autotune if pb.WhichOneof("optional_autotune_buffers") is not None: self.autotune_buffers = pb.autotune_buffers if pb.WhichOneof("optional_autotune_cpu_budget") is not None: self.autotune_cpu_budget = pb.autotune_cpu_budget if pb.WhichOneof("optional_autotune_ram_budget") is not None: self.autotune_ram_budget = pb.autotune_ram_budget if pb.WhichOneof("optional_filter_fusion") is not None: self.filter_fusion = pb.filter_fusion if pb.WhichOneof( "optional_filter_with_random_uniform_fusion") is not None: self.filter_with_random_uniform_fusion = ( pb.filter_with_random_uniform_fusion) if pb.WhichOneof("optional_hoist_random_uniform") is not None: self.hoist_random_uniform = pb.hoist_random_uniform if pb.WhichOneof("optional_map_and_batch_fusion") is not None: self.map_and_batch_fusion = pb.map_and_batch_fusion if pb.WhichOneof("optional_map_and_filter_fusion") is not None: self.map_and_filter_fusion = pb.map_and_filter_fusion if pb.WhichOneof("optional_map_fusion") is not None: self.map_fusion = pb.map_fusion if pb.WhichOneof("optional_map_parallelization") is not None: self.map_parallelization = pb.map_parallelization if pb.WhichOneof("optional_noop_elimination") is not None: self.noop_elimination = pb.noop_elimination if pb.WhichOneof("optional_parallel_batch") is not None: self.parallel_batch = pb.parallel_batch if pb.WhichOneof("optional_reorder_data_discarding_ops") is not None: self.reorder_data_discarding_ops = pb.reorder_data_discarding_ops if pb.WhichOneof("optional_shuffle_and_repeat_fusion") is not None: self.shuffle_and_repeat_fusion = pb.shuffle_and_repeat_fusion def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable)
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.map_vectorization = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default static optimizations. If False, only static " "optimizations that have been explicitly enabled will be applied.") autotune = options.create_option( name="autotune", ty=bool, docstring= "Whether to automatically tune performance knobs. If None, defaults to " "True.") autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the CPU " "budget to use. Values greater than the number of schedulable CPU cores " "are allowed but may result in CPU contention. If None, defaults to the " "number of schedulable CPU cores.") filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations. " "If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to False.") map_vectorization = options.create_option( name="map_vectorization", ty=MapVectorizationOptions, docstring= "The map vectorization options associated with the dataset. See " "`tf.data.experimental.MapVectorizationOptions` for more details.", default_factory=MapVectorizationOptions) noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _static_optimizations(self): """Produces the list of enabled static optimizations.""" result = set() all_optimizations = [ "filter_fusion", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", "map_parallelization", "map_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] for optimization in all_optimizations: if getattr(self, optimization): result.add(optimization) if self.apply_default_optimizations is not False: # The following optimizations are turned on by default, unless the # user explicitly disables them. optimizations_to_disable = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: if getattr(self, optimization) is not False: result.add(optimization) if self.map_vectorization is not None: result.update(self.map_vectorization._static_optimizations()) # pylint: disable=protected-access return sorted(list(result)) def _static_optimization_configs(self): if self.map_vectorization is not None: return self.map_vectorization._static_optimization_configs() # pylint: disable=protected-access return []
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.noop_elimination = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default graph optimizations. If False, only graph " "optimizations that have been explicitly enabled will be applied.") autotune = options.create_option( name="autotune", ty=bool, docstring= "Whether to automatically tune performance knobs. If None, defaults to " "True.") autotune_buffers = options.create_option( name="autotune_buffers", ty=bool, docstring= "When autotuning is enabled (through `autotune`), determines whether to " "also autotune buffer sizes for datasets with parallelism. If None," " defaults to False.") autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the CPU " "budget to use. Values greater than the number of schedulable CPU cores " "are allowed but may result in CPU contention. If None, defaults to the " "number of schedulable CPU cores.") autotune_ram_budget = options.create_option( name="autotune_ram_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the RAM " "budget to use. Values greater than the available RAM in bytes may " "result in OOM. If None, defaults to half of the available RAM in bytes." ) filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to True.") noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) parallel_batch = options.create_option( name="parallel_batch", ty=bool, docstring="Whether to parallelize copying of batch elements. This " "optimization is highly experimental and can cause performance " "degradation (e.g. when the parallelization overhead exceeds the " "benefits of performing the data copies in parallel). You should only " "enable this optimization if a) your input pipeline is bottlenecked on " "batching and b) you have validated that this optimization improves " "performance. If None, defaults to False.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") """ append_forty_two = options.create_option( name="append_forty_two", ty=bool, docstring="Wether to append the custom forty_two_dataset_op at the end" "of the pipeline. If None, defaults to false." ) add_put_op = options.create_option( name="add_put_op", ty=bool, docstring="Wether to append the custom add_put_op after the " "ModelDatasetOp node." ) add_get_op = options.create_option( name="add_get_op", ty=bool, docstring="Wether to append the custom add_get_op after the " "ModelDatasetOp node." ) """ def _to_proto(self): pb = dataset_options_pb2.OptimizationOptions() if self.apply_default_optimizations is not None: pb.apply_default_optimizations = self.apply_default_optimizations if self.autotune is not None: pb.autotune = self.autotune if self.autotune_buffers is not None: pb.autotune_buffers = self.autotune_buffers if self.autotune_cpu_budget is not None: pb.autotune_cpu_budget = self.autotune_cpu_budget if self.autotune_ram_budget is not None: pb.autotune_ram_budget = self.autotune_ram_budget if self.filter_fusion is not None: pb.filter_fusion = self.filter_fusion if self.map_and_batch_fusion is not None: pb.map_and_batch_fusion = self.map_and_batch_fusion if self.map_and_filter_fusion is not None: pb.map_and_filter_fusion = self.map_and_filter_fusion if self.map_fusion is not None: pb.map_fusion = self.map_fusion if self.map_parallelization is not None: pb.map_parallelization = self.map_parallelization if self.noop_elimination is not None: pb.noop_elimination = self.noop_elimination if self.parallel_batch is not None: pb.parallel_batch = self.parallel_batch if self.shuffle_and_repeat_fusion is not None: pb.shuffle_and_repeat_fusion = self.shuffle_and_repeat_fusion return pb def _from_proto(self, pb): if pb.WhichOneof("optional_apply_default_optimizations") is not None: self.apply_default_optimizations = pb.apply_default_optimizations if pb.WhichOneof("optional_autotune") is not None: self.autotune = pb.autotune if pb.WhichOneof("optional_autotune_buffers") is not None: self.autotune_buffers = pb.autotune_buffers if pb.WhichOneof("optional_autotune_cpu_budget") is not None: self.autotune_cpu_budget = pb.autotune_cpu_budget if pb.WhichOneof("optional_autotune_ram_budget") is not None: self.autotune_ram_budget = pb.autotune_ram_budget if pb.WhichOneof("optional_filter_fusion") is not None: self.filter_fusion = pb.filter_fusion if pb.WhichOneof("optional_map_and_batch_fusion") is not None: self.map_and_batch_fusion = pb.map_and_batch_fusion if pb.WhichOneof("optional_map_and_filter_fusion") is not None: self.map_and_filter_fusion = pb.map_and_filter_fusion if pb.WhichOneof("optional_map_fusion") is not None: self.map_fusion = pb.map_fusion if pb.WhichOneof("optional_map_parallelization") is not None: self.map_parallelization = pb.map_parallelization if pb.WhichOneof("optional_noop_elimination") is not None: self.noop_elimination = pb.noop_elimination if pb.WhichOneof("optional_parallel_batch") is not None: self.parallel_batch = pb.parallel_batch if pb.WhichOneof("optional_shuffle_and_repeat_fusion") is not None: self.shuffle_and_repeat_fusion = pb.shuffle_and_repeat_fusion def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable)
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.map_vectorization = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default static optimizations. If False, only static " "optimizations that have been explicitly enabled will be applied.") filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations. " "If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to False.") map_vectorization = options.create_option( name="map_vectorization", ty=bool, docstring= "Whether to vectorize map transformations. If None, defaults to False." ) noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _static_optimizations(self): """Produces the list of enabled static optimizations.""" result = [] optimizations_to_enable = [ "filter_fusion", "hoist_random_uniform", "map_and_filter_fusion", "map_fusion", "map_parallelization", "map_vectorization", ] for optimization in optimizations_to_enable: if getattr(self, optimization): result.append(optimization) if self.apply_default_optimizations is not False: # The following optimizations are turned on by default, unless the # user explicitly disables them. optimizations_to_disable = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: if getattr(self, optimization) is not False: result.append(optimization) return result
class Options(options_lib.OptionsBase): """Represents options for `tf.data.Dataset`. A `tf.data.Options` object can be, for instance, used to control which static optimizations to apply to the input pipeline graph or whether to use performance modeling to dynamically tune the parallelism of operations such as `tf.data.Dataset.map` or `tf.data.Dataset.interleave`. The options are set for the entire dataset and are carried over to datasets created through tf.data transformations. The options can be set by constructing an `Options` object and using the `tf.data.Dataset.with_options(options)` transformation, which returns a dataset with the options set. >>> dataset = tf.data.Dataset.range(42) >>> options = tf.data.Options() >>> options.deterministic = False >>> dataset = dataset.with_options(options) >>> print(dataset.options().deterministic) False Note: A known limitation of the `tf.data.Options` implementation is that the options are not preserved across tf.function boundaries. In particular, to set options for a dataset that is iterated within a tf.function, the options need to be set within the same tf.function. """ autotune = options_lib.create_option( name="autotune", ty=AutotuneOptions, docstring="The autotuning options associated with the dataset. See " "`tf.data.experimental.AutotuneOptions` for more details.", default_factory=AutotuneOptions) deterministic = options_lib.create_option( name="deterministic", ty=bool, docstring= "Whether the outputs need to be produced in deterministic order. If None," " defaults to True.") experimental_deterministic = options_lib.create_option( name="experimental_deterministic", ty=bool, docstring="DEPRECATED. Use `deterministic` instead.") experimental_distribute = options_lib.create_option( name="experimental_distribute", ty=DistributeOptions, docstring= "The distribution strategy options associated with the dataset. See " "`tf.data.experimental.DistributeOptions` for more details.", default_factory=DistributeOptions) experimental_external_state_policy = options_lib.create_option( name="experimental_external_state_policy", ty=ExternalStatePolicy, docstring="This option can be used to override the default policy for " "how to handle external state when serializing a dataset or " "checkpointing its iterator. There are three settings available - " "IGNORE: External state is ignored without a warning; WARN: External " "state is ignored and a warning is logged; FAIL: External state results " "in an error.") experimental_optimization = options_lib.create_option( name="experimental_optimization", ty=OptimizationOptions, docstring="The optimization options associated with the dataset. See " "`tf.data.experimental.OptimizationOptions` for more details.", default_factory=OptimizationOptions) experimental_slack = options_lib.create_option( name="experimental_slack", ty=bool, docstring="Whether to introduce 'slack' in the last `prefetch` of the " "input pipeline, if it exists. This may reduce CPU contention with " "accelerator host-side activity at the start of a step. The slack " "frequency is determined by the number of devices attached to this " "input pipeline. If None, defaults to False.") experimental_threading = options_lib.create_option( name="experimental_threading", ty=ThreadingOptions, docstring="DEPRECATED. Use `threading` instead.") threading = options_lib.create_option( name="threading", ty=ThreadingOptions, docstring="The threading options associated with the dataset. See " "`tf.data.ThreadingOptions` for more details.", default_factory=ThreadingOptions) def __getattribute__(self, name): if name == "experimental_threading": logging.warning("options.experimental_threading is deprecated. " "Use options.threading instead.") return getattr(self, "threading") if name == "experimental_deterministic": # TODO(aaudibert): Uncomment after internal uses have been updated. # logging.warning("options.experimental_deterministic is deprecated. " # "Use options.deterministic instead.") return getattr(self, "deterministic") return super(Options, self).__getattribute__(name) def __setattr__(self, name, value): if name == "experimental_threading": logging.warning("options.experimental_threading is deprecated. " "Use options.threading instead.") super(Options, self).__setattr__("threading", value) return if name == "experimental_deterministic": # TODO(aaudibert): Uncomment after internal uses have been updated. # logging.warning("options.experimental_deterministic is deprecated. " # "Use options.deterministic instead.") super(Options, self).__setattr__("deterministic", value) return super(Options, self).__setattr__(name, value) def _to_proto(self): pb = dataset_options_pb2.Options() if self.deterministic is not None: pb.deterministic = self.deterministic pb.autotune_options.CopyFrom(self.autotune._to_proto()) # pylint: disable=protected-access pb.distribute_options.CopyFrom( self.experimental_distribute._to_proto()) # pylint: disable=protected-access if self.experimental_external_state_policy is not None: pb.external_state_policy = ( ExternalStatePolicy._to_proto( # pylint: disable=protected-access self.experimental_external_state_policy)) pb.optimization_options.CopyFrom( self.experimental_optimization._to_proto()) # pylint: disable=protected-access if self.experimental_slack is not None: pb.slack = self.experimental_slack pb.threading_options.CopyFrom(self.threading._to_proto()) # pylint: disable=protected-access return pb def _from_proto(self, pb): if pb.WhichOneof("optional_deterministic") is not None: self.deterministic = pb.deterministic self.autotune._from_proto(pb.autotune_options) # pylint: disable=protected-access self.experimental_distribute._from_proto(pb.distribute_options) # pylint: disable=protected-access if pb.WhichOneof("optional_external_state_policy") is not None: self.experimental_external_state_policy = ( ExternalStatePolicy._from_proto( # pylint: disable=protected-access pb.external_state_policy)) self.experimental_optimization._from_proto(pb.optimization_options) # pylint: disable=protected-access if pb.WhichOneof("optional_slack") is not None: self.experimental_slack = pb.slack self.threading._from_proto(pb.threading_options) # pylint: disable=protected-access def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable) self.autotune._set_mutable(mutable) self.experimental_distribute._set_mutable(mutable) self.experimental_optimization._set_mutable(mutable) self.threading._set_mutable(mutable) def merge(self, options): """Merges itself with the given `tf.data.Options`. If this object and the `options` to merge set an option differently, a warning is generated and this object's value is updated with the `options` object's value. Args: options: The `tf.data.Options` to merge with. Returns: New `tf.data.Options` object which is the result of merging self with the input `tf.data.Options`. """ return options_lib.merge_options(self, options)
class OptimizationOptions(options_lib.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.noop_elimination = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options_lib.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default graph optimizations. If False, only graph " "optimizations that have been explicitly enabled will be applied.") filter_fusion = options_lib.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") map_and_batch_fusion = options_lib.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options_lib.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options_lib.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options_lib.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to True.") noop_elimination = options_lib.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) parallel_batch = options_lib.create_option( name="parallel_batch", ty=bool, docstring="Whether to parallelize copying of batch elements. This " "optimization is highly experimental and can cause performance " "degradation (e.g. when the parallelization overhead exceeds the " "benefits of performing the data copies in parallel). You should only " "enable this optimization if a) your input pipeline is bottlenecked on " "batching and b) you have validated that this optimization improves " "performance. If None, defaults to False.") shuffle_and_repeat_fusion = options_lib.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _to_proto(self): pb = dataset_options_pb2.OptimizationOptions() if self.apply_default_optimizations is not None: pb.apply_default_optimizations = self.apply_default_optimizations if self.filter_fusion is not None: pb.filter_fusion = self.filter_fusion if self.map_and_batch_fusion is not None: pb.map_and_batch_fusion = self.map_and_batch_fusion if self.map_and_filter_fusion is not None: pb.map_and_filter_fusion = self.map_and_filter_fusion if self.map_fusion is not None: pb.map_fusion = self.map_fusion if self.map_parallelization is not None: pb.map_parallelization = self.map_parallelization if self.noop_elimination is not None: pb.noop_elimination = self.noop_elimination if self.parallel_batch is not None: pb.parallel_batch = self.parallel_batch if self.shuffle_and_repeat_fusion is not None: pb.shuffle_and_repeat_fusion = self.shuffle_and_repeat_fusion return pb def _from_proto(self, pb): if pb.WhichOneof("optional_apply_default_optimizations") is not None: self.apply_default_optimizations = pb.apply_default_optimizations if pb.WhichOneof("optional_filter_fusion") is not None: self.filter_fusion = pb.filter_fusion if pb.WhichOneof("optional_map_and_batch_fusion") is not None: self.map_and_batch_fusion = pb.map_and_batch_fusion if pb.WhichOneof("optional_map_and_filter_fusion") is not None: self.map_and_filter_fusion = pb.map_and_filter_fusion if pb.WhichOneof("optional_map_fusion") is not None: self.map_fusion = pb.map_fusion if pb.WhichOneof("optional_map_parallelization") is not None: self.map_parallelization = pb.map_parallelization if pb.WhichOneof("optional_noop_elimination") is not None: self.noop_elimination = pb.noop_elimination if pb.WhichOneof("optional_parallel_batch") is not None: self.parallel_batch = pb.parallel_batch if pb.WhichOneof("optional_shuffle_and_repeat_fusion") is not None: self.shuffle_and_repeat_fusion = pb.shuffle_and_repeat_fusion def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable)
class OptimizationOptions(options.OptionsBase): """Represents options for dataset optimizations. You can set the optimization options of a dataset through the `experimental_optimization` property of `tf.data.Options`; the property is an instance of `tf.data.experimental.OptimizationOptions`. ```python options = tf.data.Options() options.experimental_optimization.noop_elimination = True options.experimental_optimization.map_vectorization.enabled = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) ``` """ apply_default_optimizations = options.create_option( name="apply_default_optimizations", ty=bool, docstring= "Whether to apply default graph optimizations. If False, only graph " "optimizations that have been explicitly enabled will be applied.") autotune = options.create_option( name="autotune", ty=bool, docstring= "Whether to automatically tune performance knobs. If None, defaults to " "True.") autotune_buffers = options.create_option( name="autotune_buffers", ty=bool, docstring= "When autotuning is enabled (through `autotune`), determines whether to " "also autotune buffer sizes for datasets with parallelism. If None," " defaults to False.") autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the CPU " "budget to use. Values greater than the number of schedulable CPU cores " "are allowed but may result in CPU contention. If None, defaults to the " "number of schedulable CPU cores.") autotune_ram_budget = options.create_option( name="autotune_ram_budget", ty=int, docstring= "When autotuning is enabled (through `autotune`), determines the RAM " "budget to use. Values greater than the available RAM in bytes may " "result in OOM. If None, defaults to half of the available RAM in bytes." ) filter_fusion = options.create_option( name="filter_fusion", ty=bool, docstring= "Whether to fuse filter transformations. If None, defaults to False.") filter_with_random_uniform_fusion = options.create_option( name="filter_with_random_uniform_fusion", ty=bool, docstring= "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, docstring= "Whether to hoist `tf.random_uniform()` ops out of map transformations. " "If None, defaults to False.") map_and_batch_fusion = options.create_option( name="map_and_batch_fusion", ty=bool, docstring= "Whether to fuse map and batch transformations. If None, defaults to " "True.") map_and_filter_fusion = options.create_option( name="map_and_filter_fusion", ty=bool, docstring= "Whether to fuse map and filter transformations. If None, defaults to " "False.") map_fusion = options.create_option( name="map_fusion", ty=bool, docstring="Whether to fuse map transformations. If None, defaults to " "False.") map_parallelization = options.create_option( name="map_parallelization", ty=bool, docstring= "Whether to parallelize stateless map transformations. If None, defaults " "to True.") map_vectorization = options.create_option( name="map_vectorization", ty=MapVectorizationOptions, docstring= "The map vectorization options associated with the dataset. See " "`tf.data.experimental.MapVectorizationOptions` for more details.", default_factory=MapVectorizationOptions) noop_elimination = options.create_option( name="noop_elimination", ty=bool, docstring= "Whether to eliminate no-op transformations. If None, defaults to True." ) parallel_batch = options.create_option( name="parallel_batch", ty=bool, docstring="Whether to parallelize copying of batch elements. This " "optimization is highly experimental and can cause performance " "degradation (e.g. when the parallelization overhead exceeds the " "benefits of performing the data copies in parallel). You should only " "enable this optimization if a) your input pipeline is bottlenecked on " "batching and b) you have validated that this optimization improves " "performance. If None, defaults to False.") reorder_data_discarding_ops = options.create_option( name="reorder_data_discarding_ops", ty=bool, docstring= "Whether to reorder ops that will discard data to the front of " "unary cardinality preserving transformations, e.g. " "dataset.map(...).take(3) will be optimized to dataset.take(3).map(...). " "For now this optimization will move `skip`, `shard` and `take` to the " "front of `map` and `prefetch`. This optimization is only for " "performance; it will not affect the output of the dataset. " "If None, defaults to True.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, docstring="Whether to fuse shuffle and repeat transformations. If None, " "defaults to True.") def _autotune_buffers(self): if self.autotune_buffers is not None: return self.autotune_buffers # The default setting for autotune_buffers is based on # _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT return _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT def _autotune_settings(self): # Default autotune settings autotune = True # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by # default, which is more performant for tuning heterogeneous parameters. algorithm = (_AutotuneAlgorithm.GRADIENT_DESCENT if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB) cpu_budget = 0 # Indicates that all CPU cores should be used by default. ram_budget = 0 # Indicates that default value of RAM budget should be used. # Set these options if they are explicitly set by the user. if self.autotune is False: # pylint: disable=g-bool-id-comparison autotune = False if self.autotune_cpu_budget is not None: cpu_budget = self.autotune_cpu_budget if self.autotune_ram_budget is not None: ram_budget = self.autotune_ram_budget return autotune, algorithm, cpu_budget, ram_budget def _graph_rewrites(self): """Produces lists of enabled, disabled and default graph optimizations. Returns: result: a namedtuple with three attributes. `result.enabled` is the list of user enabled optimizations. `result.disabled` is the list of user disabled optimizations. `result.default` is the list of optimizations that are enabled by default (the user has not explicitly enabled or disabled them). """ if self.map_vectorization is not None: result = self.map_vectorization._graph_rewrites() # pylint: disable=protected-access else: result = MapVectorizationOptions()._graph_rewrites() # pylint: disable=protected-access all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", "map_parallelization", "map_fusion", "noop_elimination", "parallel_batch", "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] if self.apply_default_optimizations is not False: # pylint: disable=g-bool-id-comparison # The following optimizations are turned on by default, unless the user # explicitly disables them. optimizations_to_disable = [ "map_and_batch_fusion", "map_parallelization", "noop_elimination", "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: if getattr(self, optimization) is None: result.default.append(optimization) # Each of these attributes on the Options object is either True (explicitly # enabled), False (explicitly disabled), or None (default). for optimization in all_optimizations: if getattr(self, optimization) is True: # pylint: disable=g-bool-id-comparison result.enabled.append(optimization) elif getattr(self, optimization) is False: # pylint: disable=g-bool-id-comparison result.disabled.append(optimization) autotune_buffers = self._autotune_buffers() if self.autotune is not False and autotune_buffers is True: # pylint: disable=g-bool-id-comparison # When autotuning buffer sizes is enabled, we inject a `prefetch` # transformation after asynchronous dataset ops. Only the buffer sizes of # prefetch transformations will be autotuned, though this is practically # equivalent to tuning the buffer sizes of the other asynchronous # transformations. result.enabled.append("autotune_buffer_sizes") result.enabled.append("disable_prefetch_legacy_autotune") if self.autotune is False: # pylint: disable=g-bool-id-comparison result.disabled.append("autotune_buffer_sizes") result.disabled.append("disable_prefetch_legacy_autotune") return result def _graph_rewrite_configs(self, autotune): if self.map_vectorization is not None: graph_rewrite_configs = self.map_vectorization._graph_rewrite_configs( ) # pylint: disable=protected-access else: graph_rewrite_configs = [] autotune_only_optimizations = [ "autotune_buffer_sizes", "batch_parallelization", "disable_prefetch_legacy_autotune", "enable_gradient_descent", "map_parallelization" ] if autotune is False: # pylint: disable=g-bool-id-comparison for optimization in autotune_only_optimizations: graph_rewrite_configs.append(optimization + ":autotune:false") else: for optimization in autotune_only_optimizations: graph_rewrite_configs.append(optimization + ":autotune:true") return graph_rewrite_configs def _to_proto(self): pb = dataset_options_pb2.OptimizationOptions() if self.apply_default_optimizations is not None: pb.apply_default_optimizations = self.apply_default_optimizations if self.autotune is not None: pb.autotune = self.autotune if self.autotune_buffers is not None: pb.autotune_buffers = self.autotune_buffers if self.autotune_cpu_budget is not None: pb.autotune_cpu_budget = self.autotune_cpu_budget if self.autotune_ram_budget is not None: pb.autotune_ram_budget = self.autotune_ram_budget if self.filter_fusion is not None: pb.filter_fusion = self.filter_fusion if self.filter_with_random_uniform_fusion is not None: pb.filter_with_random_uniform_fusion = ( self.filter_with_random_uniform_fusion) if self.hoist_random_uniform is not None: pb.hoist_random_uniform = self.hoist_random_uniform if self.map_and_batch_fusion is not None: pb.map_and_batch_fusion = self.map_and_batch_fusion if self.map_and_filter_fusion is not None: pb.map_and_filter_fusion = self.map_and_filter_fusion if self.map_fusion is not None: pb.map_fusion = self.map_fusion if self.map_parallelization is not None: pb.map_parallelization = self.map_parallelization pb.map_vectorization.CopyFrom(self.map_vectorization._to_proto()) # pylint: disable=protected-access if self.noop_elimination is not None: pb.noop_elimination = self.noop_elimination if self.parallel_batch is not None: pb.parallel_batch = self.parallel_batch if self.reorder_data_discarding_ops is not None: pb.reorder_data_discarding_ops = self.reorder_data_discarding_ops if self.shuffle_and_repeat_fusion is not None: pb.shuffle_and_repeat_fusion = self.shuffle_and_repeat_fusion return pb def _from_proto(self, pb): if pb.WhichOneof("optional_apply_default_optimizations") is not None: self.apply_default_optimizations = pb.apply_default_optimizations if pb.WhichOneof("optional_autotune") is not None: self.autotune = pb.autotune if pb.WhichOneof("optional_autotune_buffers") is not None: self.autotune_buffers = pb.autotune_buffers if pb.WhichOneof("optional_autotune_cpu_budget") is not None: self.autotune_cpu_budget = pb.autotune_cpu_budget if pb.WhichOneof("optional_autotune_ram_budget") is not None: self.autotune_ram_budget = pb.autotune_ram_budget if pb.WhichOneof("optional_filter_fusion") is not None: self.filter_fusion = pb.filter_fusion if pb.WhichOneof( "optional_filter_with_random_uniform_fusion") is not None: self.filter_with_random_uniform_fusion = ( pb.filter_with_random_uniform_fusion) if pb.WhichOneof("optional_hoist_random_uniform") is not None: self.hoist_random_uniform = pb.hoist_random_uniform if pb.WhichOneof("optional_map_and_batch_fusion") is not None: self.map_and_batch_fusion = pb.map_and_batch_fusion if pb.WhichOneof("optional_map_and_filter_fusion") is not None: self.map_and_filter_fusion = pb.map_and_filter_fusion if pb.WhichOneof("optional_map_fusion") is not None: self.map_fusion = pb.map_fusion if pb.WhichOneof("optional_map_parallelization") is not None: self.map_parallelization = pb.map_parallelization self.map_vectorization._from_proto(pb.map_vectorization) # pylint: disable=protected-access if pb.WhichOneof("optional_noop_elimination") is not None: self.noop_elimination = pb.noop_elimination if pb.WhichOneof("optional_parallel_batch") is not None: self.parallel_batch = pb.parallel_batch if pb.WhichOneof("optional_reorder_data_discarding_ops") is not None: self.reorder_data_discarding_ops = pb.reorder_data_discarding_ops if pb.WhichOneof("optional_shuffle_and_repeat_fusion") is not None: self.shuffle_and_repeat_fusion = pb.shuffle_and_repeat_fusion def _set_mutable(self, mutable): """Change the mutability value to `mutable` on this options and children.""" # pylint: disable=protected-access object.__setattr__(self, "_mutable", mutable) self.map_vectorization._set_mutable(mutable)
class _NestedTestOptions(options.OptionsBase): opts = options.create_option(name="opts", ty=_TestOptions, docstring="nested options")