def make_dataset(config, take_count, batch_size, shard=False, synthetic=False): shuffle_buffer_size = 10000 if synthetic: height = 224 width = 224 input_shape = [batch_size, height, width, 3] input_element = nest.map_structure(lambda s: tf.constant(0.5, tf.float32, s), tf.TensorShape(input_shape)) label_element = nest.map_structure(lambda s: tf.constant(1, tf.int32, s), tf.TensorShape([batch_size, 1])) element = (input_element, label_element) ds = tf.data.Dataset.from_tensors(element).repeat() return ds filename_pattern = os.path.join(config.data_dir, '%s-*') filenames = tf.gfile.Glob(filename_pattern % 'train') ds = tf.data.TFRecordDataset.list_files(filenames, shuffle=False) ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=10, block_length=1,num_parallel_calls=tf.data.experimental.AUTOTUNE) ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) counter = tf.data.Dataset.range(sys.maxsize) ds = tf.data.Dataset.zip((ds, counter)) ds = ds.apply(tf.data.experimental.shuffle_and_repeat(shuffle_buffer_size)) ds = ds.map(lambda image, counter: parse_record(image, True, config), num_parallel_calls=216) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128, display_name='input_pipeline_thread_pool')) return ds
def testOverrideThreadPool(self): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) for num_threads in [1, 2, 4, 8, 16]: dataset = ( Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, display_name='private_thread_pool_%d' % num_threads)) thread_ids = [] for next_element in datasets.Iterator(dataset): thread_ids.append(next_element) self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def override_threadpool_fn(dataset): return threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, max_intra_op_parallelism=max_intra_op_parallelism, display_name="private_thread_pool_%d" % num_threads))
def testOverrideThreadPool(self): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) for num_threads in [1, 2, 4, 8, 16]: dataset = (Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, display_name='private_thread_pool_%d' % num_threads)) thread_ids = [] for next_element in datasets.Iterator(dataset): thread_ids.append(next_element) self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def testOptimizationThreadPoolDataset(self): dataset = dataset_ops.Dataset.range(10).batch(10) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( 2, display_name="private_thread_pool_%d" % 2)) dataset = dataset_ops._OptimizeDataset(dataset, []) self.assertDatasetProduces(dataset, expected_output=[list(range(10))], requires_initialization=True)
def testOptimizationThreadPoolDataset(self): dataset = dataset_ops.Dataset.range(10).batch(10) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( 2, display_name="private_thread_pool_%d" % 2)) dataset = dataset_ops._OptimizeDataset(dataset, []) self.assertDatasetProduces( dataset, expected_output=[list(range(10))], requires_initialization=True)
def testOptimizationThreadPoolDataset(self): dataset = dataset_ops.Dataset.range(10).batch(10) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( 2, display_name="private_thread_pool_%d" % 2)) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[list(range(10))], requires_initialization=True)
def testOptimizationThreadPoolDataset(self): dataset = dataset_ops.Dataset.range(10).batch(10) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( 2, display_name="private_thread_pool_%d" % 2)) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[list(range(10))], requires_initialization=True)
def make_dataset(args, take_count, batch_size, training=False, shard=False): shuffle_buffer_size = 10000 num_readers = 10 rank_size = int(os.getenv('RANK_SIZE')) # rank_id = int(os.getenv('DEVICE_INDEX')) rank_id = int(os.getenv('DEVICE_ID')) if training: filename_pattern = os.path.join(args.data_dir, '%s-*') filenames = sorted(tf.gfile.Glob(filename_pattern % 'train')) else: filename_pattern = os.path.join(args.data_dir, '%s-*') filenames = sorted(tf.gfile.Glob(filename_pattern % 'validation')) ds = tf.data.Dataset.from_tensor_slices(filenames) if not training: ds = ds.take(take_count) if training: # ds = ds.shuffle(1024, seed=7*(1+rank_id)) ds = ds.shuffle(1024) ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1, num_parallel_calls = tf.data.experimental.AUTOTUNE) #counter = tf.data.Dataset.range(sys.maxsize) #ds = tf.data.Dataset.zip((ds, counter)) options = tf.data.Options() options.experimental_threading.max_intra_op_parallelism = 1 ds = ds.with_options(options) ds = ds.prefetch(buffer_size = batch_size) if training: # ds = ds.apply(tf.data.experimental.shuffle_and_repeat(shuffle_buffer_size, seed=5*(1+rank_id))) ds = ds.shuffle(buffer_size = shuffle_buffer_size) ds = ds.repeat() ds = ds.map(lambda image: parse_record(image, training), num_parallel_calls=tf.data.experimental.AUTOTUNE) #ds = ds.map(lambda image, counter: parse_record(image, training), num_parallel_calls=24) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) ds = threadpool.override_threadpool(ds, threadpool.PrivateThreadPool(128,display_name='input_pipeline_threa_pool')) return ds
def testOptimizationThreadPoolDataset(self): dataset = dataset_ops.Dataset.range(10).batch(10) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( 2, display_name="private_thread_pool_%d" % 2)) dataset = dataset_ops._OptimizeDataset(dataset, []) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() with self.cached_session() as sess: sess.run(iterator.initializer) self.assertAllEqual(list(range(10)), sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testNumThreads(self, num_threads, max_intra_op_parallelism): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) dataset = ( dataset_ops.Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, max_intra_op_parallelism=max_intra_op_parallelism, display_name="private_thread_pool_%d" % num_threads)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() with self.cached_session() as sess: sess.run(iterator.initializer) thread_ids = [] try: while True: thread_ids.append(sess.run(next_element)) except errors.OutOfRangeError: pass self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def testNumThreads(self, num_threads, max_intra_op_parallelism): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) dataset = ( dataset_ops.Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, max_intra_op_parallelism=max_intra_op_parallelism, display_name="private_thread_pool_%d" % num_threads)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() with self.cached_session() as sess: self.evaluate(iterator.initializer) thread_ids = [] try: while True: thread_ids.append(sess.run(next_element)) except errors.OutOfRangeError: pass self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def make_dataset_eval(config, take_count, batch_size, shard=False, synthetic=False): filename_pattern = os.path.join(config.data_dir, '%s-*') filenames = sorted(tf.gfile.Glob(filename_pattern % 'validation')) ds = tf.data.TFRecordDataset.list_files(filenames, shuffle=False) ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=10, block_length=1) ds = ds.take(take_count) # make sure all ranks have the same amount ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) counter = tf.data.Dataset.range(sys.maxsize) ds = tf.data.Dataset.zip((ds, counter)) ds = ds.map(lambda image, counter: parse_record(image, False, config), num_parallel_calls=64) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128, display_name='input_pipeline_thread_pool')) return ds