def benchmarkMapFanOut(self): fan_outs = [1, 2, 5, 10, 20, 50, 100] for fan_out in fan_outs: for mode in ["general", "single-threaded", "short-circuit"]: if mode == "general": map_fn = lambda *xs: [x + 1 for x in xs] use_inter_op_parallelism = True benchmark_label = "" if mode == "single-threaded": map_fn = lambda *xs: [x + 1 for x in xs] use_inter_op_parallelism = False benchmark_label = "_single_threaded" if mode == "short-circuit": map_fn = lambda *xs: xs use_inter_op_parallelism = True # should not have any significance benchmark_label = "_short_circuit" with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors( tuple(0 for _ in range(fan_out))).repeat(None) dataset = dataset_ops.MapDataset( dataset, map_fn, use_inter_op_parallelism=use_inter_op_parallelism) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) next_element = iterator.get_next() with session.Session() as sess: for _ in range(5): sess.run(next_element[0].op) deltas = [] for _ in range(100): start = time.time() for _ in range(100): sess.run(next_element[0].op) end = time.time() deltas.append(end - start) median_wall_time = np.median(deltas) / 100 self.report_benchmark(iters=1000, wall_time=median_wall_time, name="fan_out_%d%s" % (fan_out, benchmark_label))
def testOptimizationNestedDatasetWithModifiedRetval(self): def flat_map_fn(_): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.apply(testing.assert_next(["MapAndBatch"])) # Should be fused by map and batch fusion dataset = dataset.map(lambda x: x) dataset = dataset.batch(1) return dataset dataset = dataset_ops.Dataset.range(1) dataset = dataset.flat_map(flat_map_fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_batch_fusion = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[[0]])
def benchmarkChainOfMaps(self): chain_lengths = [0, 1, 2, 5, 10, 20, 50] for chain_length in chain_lengths: for mode in ["general", "single-threaded", "short-circuit"]: if mode == "general": map_fn = lambda x: x + 1 use_inter_op_parallelism = True benchmark_label = "" if mode == "single-threaded": map_fn = lambda x: x + 1 use_inter_op_parallelism = False benchmark_label = "_single_threaded" if mode == "short-circuit": map_fn = lambda x: x use_inter_op_parallelism = True # should not have any significance benchmark_label = "_short_circuit" with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) for _ in range(chain_length): dataset = dataset_ops.MapDataset( dataset, map_fn, use_inter_op_parallelism=use_inter_op_parallelism) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) next_element = iterator.get_next() with session.Session() as sess: for _ in range(5): sess.run(next_element.op) deltas = [] for _ in range(100): start = time.time() for _ in range(100): sess.run(next_element.op) end = time.time() deltas.append(end - start) median_wall_time = np.median(deltas) / 100 self.report_benchmark(iters=1000, wall_time=median_wall_time, name="chain_length_%d%s" % (chain_length, benchmark_label))
def testOptimizationWithCapturedRefVar(self, dataset_fn): """Tests that default optimizations are disabled with ref variables.""" variable = variable_scope.get_variable( "v", initializer=0, use_resource=False) assign_op = variable.assign_add(1) # Check that warning is logged. warnings.simplefilter("always") with warnings.catch_warnings(record=True) as w: unoptimized_dataset = dataset_fn(variable) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.noop_elimination = True options.experimental_optimization.map_and_batch_fusion = True optimized_dataset = unoptimized_dataset.with_options(options) optimized_it = dataset_ops.make_initializable_iterator(optimized_dataset) self.assertGreaterEqual(len(w), 1) graph_rewrites = options._graph_rewrites() expected = ( "tf.data graph rewrites are not compatible with " "tf.Variable. The following rewrites will be disabled: %s." " To enable rewrites, use resource variables instead by " "calling `tf.enable_resource_variables()` at the start of the " "program." % (", ".join(graph_rewrites.enabled + graph_rewrites.default))) self.assertTrue(any(expected in str(warning) for warning in w)) # Check that outputs are the same in the optimized and unoptimized cases, # when the variable value is changing. unoptimized_it = dataset_ops.make_initializable_iterator( unoptimized_dataset) with ops.control_dependencies([assign_op]): unoptimized_output = unoptimized_it.get_next() optimized_output = optimized_it.get_next() self.evaluate(variable.initializer) self.evaluate((unoptimized_it.initializer, optimized_it.initializer)) while True: try: unoptimized, optimized = self.evaluate((unoptimized_output, optimized_output)) self.assertEqual(unoptimized, optimized) except errors.OutOfRangeError: break
def testSimpleReorderingV1(self): dataset = dataset_ops.Dataset.range(100) # Map ops have preserve_cardinality=false in tensorflow v1. dataset = dataset.apply( testing.assert_next([ "ParallelMap", "FiniteSkip", "FiniteTake", "Shard", "Prefetch" ])) dataset = dataset.map(lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.reorder_data_discarding_ops = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, range(11, 61, 2))
def testCapturedInputs(self): a = constant_op.constant(1, dtype=dtypes.float32) b = constant_op.constant(0, dtype=dtypes.float32) some_tensor = math_ops.mul(a, b) def random_with_capture(_): return some_tensor + random_ops.random_uniform( [], minval=1, maxval=10, dtype=dtypes.float32, seed=42) dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def benchmark_stats(self): for stats in [True, False]: dataset = dataset_ops.Dataset.range(1000).repeat() dataset = dataset.map(lambda x: x + 1, num_parallel_calls=32) options = dataset_ops.Options() options.experimental_deterministic = False if stats: aggregator = stats_aggregator.StatsAggregator() options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.run_and_report_benchmark(dataset, num_elements=10000, extras={ "model_name": "map.benchmark.7", "parameters": "%s" % stats, }, name="stats_%s" % stats)
def _build_shuffle_dataset( self, range_limit=10, num_repeats=5, buffer_size=5, seed=None, reshuffle_each_iteration=None, ): dataset = dataset_ops.Dataset.range(range_limit).shuffle( buffer_size, seed=seed, reshuffle_each_iteration=reshuffle_each_iteration).repeat( num_repeats) # TODO(b/138399725): Re-enable default optimizations. options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False return dataset.with_options(options)
def testMapAndBatchTypes(self, element, dtype, numa_aware): def gen(): yield element dataset = dataset_ops.Dataset.from_generator( gen, dtype).repeat(100).apply( batching.map_and_batch(lambda x: x, batch_size=10)) if numa_aware: options = dataset_ops.Options() options.experimental_numa_aware = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) for _ in range(10): self.assertAllEqual([element for _ in range(10)], self.evaluate(get_next()))
def dataset_fn(delay_ms): def interleave_fn(x): ds = dataset_ops.Dataset.from_tensors(x) if math_ops.equal(x, 0): ds = ds.apply(testing.sleep(delay_ms * 1000)) else: ds = ds.apply(testing.sleep(0)) return ds ds = dataset_ops.Dataset.from_tensor_slices(elements) ds = ds.interleave(interleave_fn, cycle_length=10, num_parallel_calls=10) opts = dataset_ops.Options() opts.experimental_deterministic = False ds = ds.with_options(opts) ds = self.make_distributed_dataset(ds, cluster) return ds
def _create_iterators_per_worker_with_input_context(input_contexts, input_workers, dataset_fn): """Create a multidevice iterator per workers given a dataset function.""" iterators = [] for i, ctx in enumerate(input_contexts): worker = input_workers.worker_devices[i] with ops.device(worker): dataset = dataset_fn(ctx) # TODO(b/138745411): Remove once stateful transformations are supported. options = dataset_ops.Options() options.experimental_distribute._make_stateless = True # pylint: disable=protected-access dataset = dataset.with_options(options) devices = input_workers.compute_devices_for_worker(i) iterator = _SingleWorkerDatasetIterator(dataset, worker, devices) iterators.append(iterator) return iterators
def testOptimizationDisabled(self): """Tests the optimization settings by disabling all.""" options = dataset_ops.Options() options.experimental_optimization.filter_fusion = False options.experimental_optimization.filter_with_random_uniform_fusion = False options.experimental_optimization.hoist_random_uniform = False options.experimental_optimization.map_and_batch_fusion = False options.experimental_optimization.map_and_filter_fusion = False options.experimental_optimization.map_parallelization = False options.experimental_optimization.map_fusion = False options.experimental_optimization.noop_elimination = False options.experimental_optimization.parallel_batch = False options.experimental_optimization.shuffle_and_repeat_fusion = False options.experimental_optimization.map_vectorization.enabled = False options.experimental_optimization.autotune = False options.experimental_deterministic = True options.experimental_stats.latency_all_edges = False options.experimental_slack = False expected_optimizations_enabled = [] expected_optimizations_disabled = [ "filter_fusion", "filter_with_random_uniform_fusion", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", "map_parallelization", "map_fusion", "noop_elimination", "parallel_batch", "shuffle_and_repeat_fusion", "map_vectorization", "autotune_buffer_sizes", "make_sloppy", "latency_all_edges", "slack", "disable_prefetch_legacy_autotune", ] expected_optimizations_default = [] graph_rewrites = options._graph_rewrites() self.assertEqual(set(graph_rewrites.enabled), set(expected_optimizations_enabled)) self.assertEqual(set(graph_rewrites.disabled), set(expected_optimizations_disabled)) self.assertEqual(set(graph_rewrites.default), set(expected_optimizations_default))
def _create_device_dataset(self, i): """Uses _prototype_device_datasets[i] to build a dataset for the device.""" ds = self._prototype_device_datasets[i] ds = _ReincarnatedPerDeviceGenerator(ds, self._incarnation_id) if self._prefetch_buffer_size > 0: if self._experimental_slack: ds = dataset_ops.PrefetchDataset( ds, self._prefetch_buffer_size, slack_period=1) else: ds = ds.prefetch(self._prefetch_buffer_size) # TODO(jsimsa): Enable auto-tuning and optimizations when supported for # non-CPU devices. options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = False ds = ds.with_options(options) return ds
def testMapParallelizationWithCapturedVariable(self): """Tests that functions with captured variables are not parallelized.""" captured_t = variables.Variable(42, dtype=dtypes.int64) def fn(x): return x + captured_t dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map"])).map(fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) self.evaluate(variables.global_variables_initializer()) self.assertDatasetProduces( dataset, expected_output=[x + 42 for x in range(5)], requires_initialization=True)
def _apply_fn(dataset): # pylint: disable=missing-docstring external_state_policy = dataset.options( ).experimental_external_state_policy if external_state_policy is None: external_state_policy = ExternalStatePolicy.WARN uncompressed_spec = dataset.element_spec # Compress the dataset elements to reduce the amount of data that needs to # be sent over the network. # TODO(b/157105111): Make this an autotuned parallel map when we have a way # to limit memory usage. dataset = dataset.map(lambda *x: compression_ops.compress(x)) # Prefetch one compressed element to reduce latency when requesting data # from tf.data workers. # TODO(b/157105111): Set this to autotune when we have a way to limit # memory usage dataset = dataset.prefetch(1) # Apply options so that the dataset executed in the tf.data service will # be optimized and support autotuning. dataset = dataset._apply_options() # pylint: disable=protected-access dataset_id = gen_experimental_dataset_ops.register_dataset( dataset._variant_tensor, # pylint: disable=protected-access address=address, protocol=protocol, external_state_policy=external_state_policy.value) dataset = _DataServiceDataset( input_dataset=dataset, dataset_id=dataset_id, processing_mode=processing_mode, address=address, protocol=protocol, job_name=job_name, max_outstanding_requests=max_outstanding_requests, task_refresh_interval_hint_ms=task_refresh_interval_hint_ms) # TODO(b/157105111): Make this an autotuned parallel map when we have a way # to limit memory usage. dataset = dataset.map(lambda x: compression_ops.uncompress( x, output_spec=uncompressed_spec)) # Disable autosharding for shared jobs. if job_name: options = dataset_ops.Options() options.experimental_distribute.auto_shard_policy = AutoShardPolicy.OFF dataset = dataset.with_options(options) return dataset
def _get_test_datasets(self, base_dataset, map_fn, num_parallel_calls=None, expect_optimized=True): """Given base dataset and map fn, creates test datasets. Returns a tuple of (unoptimized dataset, optimized dataset). The unoptimized dataset has the assertion that Batch follows Map. The optimized dataset has the assertion that Map follows Batch, and has the "map_vectorization" optimization applied. Args: base_dataset: Input dataset to map->batch map_fn: Map function to use num_parallel_calls: (Optional.) num_parallel_calls argument for map expect_optimized: (Optional.) Whether we expect the optimization to take place, in which case we will assert that Batch is followed by Map, otherwise Map followed by Batch. Defaults to True. Returns: Tuple of (unoptimized dataset, optimized dataset). """ map_node_name = "Map" if num_parallel_calls is None else "ParallelMap" def _make_dataset(node_names): dataset = base_dataset.apply(optimization.assert_next(node_names)) dataset = dataset.map(map_fn, num_parallel_calls) dataset = dataset.batch(100) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_batch_fusion = False dataset = dataset.with_options(options) return dataset unoptimized = _make_dataset([map_node_name, "Batch"]) # Note that because of the `ChooseDataset` fork, we can't use `assert_next` # to verify the optimization result. optimized = _make_dataset( [] if expect_optimized else [map_node_name, "Batch"]) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_vectorization = True optimized = optimized.with_options(options) return unoptimized, optimized
def benchmark(label, series): """Runs benchmark the given series.""" def make_dataset(element_size, num_calls, batch_size): # pylint: disable=missing-docstring k = 1024 * 1024 x = constant_op.constant(np.random.rand(element_size, 4 * k)) y = constant_op.constant(np.random.rand(4 * k, 1)) dataset = dataset_ops.Dataset.range(1000000000000).map(lambda _: (x, y)) dataset = dataset.map( math_ops.matmul, num_parallel_calls=num_calls).batch(batch_size=batch_size) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False return dataset.with_options(options) for num_calls, inter_op, element_size, batch_size in series: num_iters = 1024 // ( (element_size * batch_size) // min(num_calls, inter_op)) # By default the chained map().batch() calls will not be fused. chained_dataset = make_dataset(element_size, num_calls, batch_size) session_config = config_pb2.ConfigProto( inter_op_parallelism_threads=inter_op, use_per_session_threads=True) self.run_and_report_benchmark( dataset=chained_dataset, iters=num_iters, num_elements=batch_size, warmup=True, session_config=session_config, name=name("chained", label, num_calls, inter_op, element_size, batch_size)) # Apply an option to the default dataset that will fuse map().batch(). options = dataset_ops.Options() options.experimental_optimization.map_and_batch_fusion = True fused_dataset = chained_dataset.with_options(options) self.run_and_report_benchmark( dataset=fused_dataset, iters=num_iters, num_elements=batch_size, warmup=True, session_config=session_config, name=name("fused", label, num_calls, inter_op, element_size, batch_size))
def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False): """Runs the core tests. Args: ds_fn1: 0-argument function that returns a Dataset. ds_fn2: 0-argument function that returns a Dataset different from ds_fn1. If None, verify_restore_in_modified_graph test is not run. num_outputs: Total number of outputs expected from this Dataset. sparse_tensors: Whether dataset is built from SparseTensor(s). Raises: AssertionError if any test fails. """ # NOTE: We disable all default optimizations in serialization tests in order # to test the actual dataset in question. options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False def ds_fn1_no_opt(): return ds_fn1().with_options(options) self.verify_unused_iterator( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_fully_used_iterator( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_exhausted_iterator( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_init_before_restore( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_multiple_breaks( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_reset_restored_iterator( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_restore_in_empty_graph( ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) if ds_fn2: def ds_fn2_no_opt(): return ds_fn2().with_options(options) self.verify_restore_in_modified_graph( ds_fn1_no_opt, ds_fn2_no_opt, num_outputs, sparse_tensors=sparse_tensors)
def _benchmark(self, dataset_fn, iters, num_elements): with ops.Graph().as_default(): options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset_fn().with_options(options) next_element = dataset_ops.make_one_shot_iterator( dataset).get_next() with session.Session() as sess: deltas = [] for _ in range(iters): start = time.time() for _ in range(num_elements): sess.run(next_element.op) end = time.time() deltas.append(end - start) mean_wall_time = np.mean(deltas) / num_elements self.report_benchmark(iters=iters, wall_time=mean_wall_time)
def _run_benchmark(self, dataset, autotune, autotune_buffers, benchmark_iters, benchmark_label): options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune options.experimental_optimization.autotune_buffers = autotune_buffers dataset = dataset.with_options(options) autotune_string = "_autotune_{}".format( "parallelism_and_buffer_sizes" if autotune_buffers else "parallelism_only") wall_time = self.run_and_report_benchmark( dataset=dataset, num_elements=benchmark_iters, warmup=True, iters=1, name=benchmark_label + (autotune_string if autotune else "")) return wall_time
def testCapturedInputs(self): a = constant_op.constant(3, dtype=dtypes.int64) b = constant_op.constant(4, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) function = lambda x: x * x def predicate(y): return math_ops.less(math_ops.cast(y, dtypes.int64), some_tensor) # We are currently not supporting functions with captured inputs. dataset = dataset_ops.Dataset.range(10).apply( testing.assert_next(["Map", "Filter"])).map(function).filter(predicate) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_filter_fusion = True dataset = dataset.with_options(options) self._testDataset(dataset, function, predicate)
def testMapAndBatchFails(self, numa_aware): """Test a dataset that maps a TF function across its input elements.""" dataset = dataset_ops.Dataset.from_tensors( array_ops.check_numerics( constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) batch_size = array_ops.placeholder(dtypes.int64, shape=[]) dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) if numa_aware: options = dataset_ops.Options() options.experimental_numa_aware = True dataset = dataset.with_options(options) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer with self.cached_session() as sess: with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): sess.run(init_op, feed_dict={batch_size: 14})
def dataset_fn(delay_ms): def interleave_fn(x): ds = dataset_ops.Dataset.from_tensors(x) if math_ops.equal(x, 0): ds = ds.apply(testing.sleep(delay_ms * 1000)) else: ds = ds.apply(testing.sleep(0)) return ds dataset = dataset_ops.Dataset.from_tensor_slices(elements) dataset = dataset.interleave(interleave_fn, cycle_length=10, num_parallel_calls=10, deterministic=local_determinism) opts = dataset_ops.Options() opts.experimental_deterministic = global_determinism dataset = dataset.with_options(opts) return dataset
def testPrefetchWithSlackOption(self): """Determines slack_period based on num devices attached to iterator.""" dataset = dataset_ops.Dataset.range(10) dataset = dataset.prefetch(1) options = dataset_ops.Options() options.experimental_slack = True dataset = dataset.with_options(options) multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, [self._devices[1], self._devices[2]]) self.evaluate(multi_device_iterator.initializer) for i in range(0, 10, 2): elem_on_1, elem_on_2 = multi_device_iterator.get_next() self.assertEqual(i, self.evaluate(elem_on_1)) self.assertEqual(i + 1, self.evaluate(elem_on_2)) with self.assertRaises(errors.OutOfRangeError): elem_on_1, elem_on_2 = multi_device_iterator.get_next() self.evaluate(elem_on_1) self.evaluate(elem_on_2)
def testShuffleAndRepeatFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["ShuffleAndRepeat" ])).shuffle(10).repeat(2) options = dataset_ops.Options() options.experimental_shuffle_and_repeat_fusion = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: for _ in range(2): results = [] for _ in range(10): results.append(sess.run(get_next)) self.assertAllEqual([x for x in range(10)], sorted(results)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testAutotuningSettings(self): options = dataset_ops.Options() options.experimental_optimization.autotune_cpu_budget = 1000 options.experimental_optimization.autotune_ram_budget = 999999999 options.experimental_optimization.autotune_buffers = True self.assertIn("autotune_buffer_sizes", options._graph_rewrites().enabled) self.assertIn("disable_prefetch_legacy_autotune", options._graph_rewrites().enabled) autotune, algorithm, cpu_budget, ram_budget = options._autotune_settings( ) self.assertTrue(autotune) self.assertEqual( algorithm, optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) self.assertEqual(cpu_budget, 1000) self.assertEqual(ram_budget, 999999999)
def testOptimizationMapParallelization(self, autotune, map_parallelization): dataset = dataset_ops.Dataset.range(5) if autotune is not False and map_parallelization is not False: # pylint: disable=g-bool-id-comparison dataset = dataset.apply(testing.assert_next(["ParallelMap"])) else: dataset = dataset.apply(testing.assert_next(["Map"])) dataset = dataset.map(lambda x: x + 1) options = dataset_ops.Options() if autotune is not None: options.experimental_optimization.autotune = autotune if map_parallelization is not None: options.experimental_optimization.map_parallelization = ( map_parallelization) dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=list(range(1, 6)))
def benchmark_batch_dense(self): for element_exp in [10, 12, 14, 16, 18, 20, 22]: for batch_exp in [3, 6, 9]: for parallel_copy in [True, False]: element_size = 1 << element_exp batch_size = 1 << batch_exp dataset = dataset_ops.Dataset.from_tensors( np.random.rand(element_size)).repeat().batch(batch_size) options = dataset_ops.Options() options.experimental_optimization.parallel_batch = parallel_copy dataset = dataset.with_options(options) tag = "_parallel" if parallel_copy else "" self.run_and_report_benchmark( dataset, num_elements=(1 << (22 - batch_exp - element_exp // 2)), iters=1, name="batch_element_size_%d_batch_size_%d%s" % (element_size, batch_size, tag))
def testSloppyInterleaveInOrder(self, input_values, cycle_length, block_length, num_parallel_calls): dataset, coordination_events = _make_coordinated_sloppy_dataset( input_values, cycle_length, block_length, num_parallel_calls) options = dataset_ops.Options() options.experimental_threading = threading_options.ThreadingOptions() options.experimental_threading.private_threadpool_size = ( num_parallel_calls + 1) dataset = dataset.with_options(options) get_next = self.getNext(dataset, requires_initialization=True) for expected_element in _interleave(_repeat(input_values, 2), cycle_length, block_length): coordination_events[expected_element].set() self.assertEqual(expected_element * expected_element, self.evaluate(get_next())) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testUseLegacyRebatchWithDataSharding(self, sharding_policy, with_prefetch): # This test simulates a distributed environment with 3 workers, each with # 1 replica. dataset = dataset_ops.Dataset.range(8) dataset = dataset.batch(4) options = dataset_ops.Options() options.experimental_distribute.auto_shard_policy = sharding_policy dataset = dataset.with_options(options) # We expect the auto-shard rewrite to rewrite RebatchDatasetV2 to # RebatchDataset(V1) for correctness reasons. This will modify the output # of the dataset. worker_a_dataset = distribute._RebatchDataset(dataset, batch_sizes=[2, 1, 1]) if with_prefetch: worker_a_dataset = worker_a_dataset.prefetch(1) worker_a_dataset = distribute._AutoShardDataset(worker_a_dataset, 3, 0, num_replicas=3) expected = [[0, 1], [4, 5]] self.assertDatasetProduces(worker_a_dataset, expected) worker_b_dataset = distribute._RebatchDataset(dataset, batch_sizes=[1, 1, 2]) if with_prefetch: worker_b_dataset = worker_b_dataset.prefetch(1) worker_b_dataset = distribute._AutoShardDataset(worker_b_dataset, 3, 1, num_replicas=3) expected = [[2, 3], [6, 7]] self.assertDatasetProduces(worker_b_dataset, expected) worker_c_dataset = distribute._RebatchDataset(dataset, batch_sizes=[1, 2, 1]) if with_prefetch: worker_c_dataset = worker_c_dataset.prefetch(1) worker_c_dataset = distribute._AutoShardDataset(worker_c_dataset, 3, 2, num_replicas=3) expected = [[], []] self.assertDatasetProduces(worker_c_dataset, expected)