def testFilterFusion(self, map_function, predicates): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map", "Filter", "MemoryCacheImpl"])).map(map_function) for predicate in predicates: dataset = dataset.filter(predicate) dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.filter_fusion = True dataset = dataset.with_options(options) expected_output = [] for x in range(5): r = map_function(x) filtered = False for predicate in predicates: if isinstance(r, tuple): b = predicate(*r) # Pass tuple as multiple arguments. else: b = predicate(r) if not self.evaluate(b): filtered = True break if not filtered: expected_output.append(r) self.assertDatasetProduces(dataset, expected_output=expected_output)
def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False): """Runs the core tests. Args: ds_fn1: 0-argument function that returns a Dataset. ds_fn2: 0-argument function that returns a Dataset different from ds_fn1. If None, verify_restore_in_modified_graph test is not run. num_outputs: Total number of outputs expected from this Dataset. sparse_tensors: Whether dataset is built from SparseTensor(s). Raises: AssertionError if any test fails. """ # NOTE: We disable all default optimizations in serialization tests in order # to test the actual dataset in question. options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.apply_default_optimizations = False def ds_fn1_no_opt(): return ds_fn1().with_options(options) self.verify_unused_iterator(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_fully_used_iterator(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_exhausted_iterator(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_init_before_restore(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_multiple_breaks(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_reset_restored_iterator(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) self.verify_restore_in_empty_graph(ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors) if ds_fn2: def ds_fn2_no_opt(): return ds_fn2().with_options(options) self.verify_restore_in_modified_graph( ds_fn1_no_opt, ds_fn2_no_opt, num_outputs, sparse_tensors=sparse_tensors)
def testMapFilterFusion(self, function, predicate): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["Map", "FilterByLastComponent" ])).map(function).filter(predicate) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_and_filter_fusion = True dataset = dataset.with_options(options) self._testMapAndFilter(dataset, function, predicate)
def testHoisting(self, function, will_optimize): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next( ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def testMapParallelization(self, function, should_optimize): next_nodes = ["ParallelMap"] if should_optimize else ["Map"] dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(next_nodes)).map(function) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) if should_optimize: self.assertDatasetProduces( dataset, expected_output=[function(x) for x in range(5)])
def testMapAndBatchFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["MapAndBatch" ])).map(lambda x: x * x).batch(10) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_and_batch_fusion = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[[x * x for x in range(10)]])
def testNoopElimination(self): a = constant_op.constant(1, dtype=dtypes.int64) b = constant_op.constant(2, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply( optimization.assert_next( ["FiniteRepeat", "FiniteSkip", "Prefetch", "MemoryCacheImpl"])) dataset = dataset.repeat(some_tensor).skip(5).take(-1).skip(0).repeat( 1).prefetch(0).prefetch(1).cache() options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.noop_elimination = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=range(5))
def testCapturedInputs(self): a = constant_op.constant(1, dtype=dtypes.float32) b = constant_op.constant(0, dtype=dtypes.float32) some_tensor = math_ops.mul(a, b) def random_with_capture(_): return some_tensor + random_ops.random_uniform( [], minval=1, maxval=10, dtype=dtypes.float32, seed=42) dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def testCapturedInputs(self): a = constant_op.constant(3, dtype=dtypes.int64) b = constant_op.constant(4, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) function = lambda x: x * x def predicate(y): return math_ops.less(math_ops.cast(y, dtypes.int64), some_tensor) # We are currently not supporting functions with captured inputs. dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["Map", "Filter" ])).map(function).filter(predicate) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_and_filter_fusion = True dataset = dataset.with_options(options) self._testMapAndFilter(dataset, function, predicate)
def testShuffleAndRepeatFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["ShuffleAndRepeat" ])).shuffle(10).repeat(2) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.shuffle_and_repeat_fusion = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) for _ in range(2): results = [] for _ in range(10): results.append(self.evaluate(get_next())) self.assertAllEqual([x for x in range(10)], sorted(results)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testMapFusion(self, functions): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map", "MemoryCacheImpl"])) for function in functions: dataset = dataset.map(function) dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_fusion = True dataset = dataset.with_options(options) expected_output = [] for x in range(5): r = x for function in functions: if isinstance(r, tuple): r = function(*r) # Pass tuple as multiple arguments. else: r = function(r) expected_output.append(r) self.assertDatasetProduces(dataset, expected_output=expected_output)
def _get_test_datasets(self, base_dataset, map_fn, num_parallel_calls=None, expect_optimized=True): """Given base dataset and map fn, creates test datasets. Returns a tuple of (unoptimized dataset, optimized dataset). The unoptimized dataset has the assertion that Batch follows Map. The optimized dataset has the assertion that Map follows Batch, and has the "map_vectorization" optimization applied. Args: base_dataset: Input dataset to map->batch map_fn: Map function to use num_parallel_calls: (Optional.) num_parallel_calls argument for map expect_optimized: (Optional.) Whether we expect the optimization to take place, in which case we will assert that Batch is followed by Map, otherwise Map followed by Batch. Defaults to True. Returns: Tuple of (unoptimized dataset, optimized dataset). """ map_node_name = "Map" if num_parallel_calls is None else "ParallelMap" batch_size = 100 def _make_dataset(node_names): return base_dataset.apply( optimization.assert_next(node_names)).map( map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size) unoptimized = _make_dataset([map_node_name, "Batch"]) optimized = _make_dataset(["Batch", map_node_name] if expect_optimized else [map_node_name, "Batch"]) options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_vectorization = True optimized = optimized.with_options(options) return unoptimized, optimized
def testOptimization(self): dataset = dataset_ops.Dataset.range(10) dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"])) dataset = dataset.skip(0) # this should be optimized away dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.noop_elimination = True dataset = dataset.with_options(options) multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/cpu:2"]) elem_on_1, elem_on_2 = multi_device_iterator.get_next() config = config_pb2.ConfigProto(device_count={"CPU": 3}) with self.test_session(config=config) as sess: self.evaluate(multi_device_iterator.initializer) for i in range(0, 10, 2): self.assertEqual(i, self.evaluate(elem_on_1)) self.assertEqual(i + 1, self.evaluate(elem_on_2)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(elem_on_1) self.evaluate(elem_on_2)
def benchmark(label, series): """Runs benchmark the given series.""" print("%s:" % label) def make_base_dataset(element_size): k = 1024 * 1024 x = constant_op.constant(np.random.rand(element_size, 4 * k)) y = constant_op.constant(np.random.rand(4 * k, 1)) return dataset_ops.Dataset.range(1000000000000).map(lambda _: (x, y)) for num_calls, inter_op, element_size, batch_size in series: num_iters = 1024 // ( (element_size * batch_size) // min(num_calls, inter_op)) fused_dataset = make_base_dataset(element_size) fused_dataset = fused_dataset.map( math_ops.matmul, num_parallel_calls=num_calls).batch(batch_size=batch_size) fused_iterator = dataset_ops.make_one_shot_iterator(fused_dataset) fused_get_next = fused_iterator.get_next() fused_deltas = [] with session.Session( config=config_pb2.ConfigProto( inter_op_parallelism_threads=inter_op, use_per_session_threads=True)) as sess: for _ in range(5): sess.run(fused_get_next.op) for _ in range(num_iters): start = time.time() sess.run(fused_get_next.op) end = time.time() fused_deltas.append(end - start) # `map_and_batch_fusion` is optimized by default. To get the chained # dataset, with have to disable it. options = dataset_ops.Options() options.experimental_optimization = OptimizationOptions() options.experimental_optimization.map_and_batch_fusion = False chained_dataset = fused_dataset.with_options(options) chained_iterator = dataset_ops.make_one_shot_iterator(chained_dataset) chained_get_next = chained_iterator.get_next() chained_deltas = [] with session.Session( config=config_pb2.ConfigProto( inter_op_parallelism_threads=inter_op, use_per_session_threads=True)) as sess: for _ in range(5): sess.run(chained_get_next.op) for _ in range(num_iters): start = time.time() sess.run(chained_get_next.op) end = time.time() chained_deltas.append(end - start) print( "batch size: %d, num parallel calls: %d, inter-op parallelism: %d, " "element size: %d, num iters: %d\nchained wall time: %f (median), " "%f (mean), %f (stddev), %f (min), %f (max)\n fused wall time: " "%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n " "chained/fused: %.2fx (median), %.2fx (mean)" % (batch_size, num_calls, inter_op, element_size, num_iters, np.median(chained_deltas), np.mean(chained_deltas), np.std(chained_deltas), np.min(chained_deltas), np.max(chained_deltas), np.median(fused_deltas), np.mean(fused_deltas), np.std(fused_deltas), np.min(fused_deltas), np.max(fused_deltas), np.median(chained_deltas) / np.median(fused_deltas), np.mean(chained_deltas) / np.mean(fused_deltas))) self.report_benchmark( iters=num_iters, wall_time=np.median(chained_deltas), name=name("chained", label, num_calls, inter_op, element_size, batch_size)) self.report_benchmark( iters=num_iters, wall_time=np.median(fused_deltas), name=name("fused", label, num_calls, inter_op, element_size, batch_size)) print()