def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_train(self, label, make_iterator, defun=False): device, data_format = device_and_data_format() for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) self._force_gpu_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) self._force_gpu_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def test_apply_with_pooling(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format, include_top=False, pooling='avg') with tf.device(device): images, _ = random_batch(2) output = model(images) self.assertEqual((2, 2048), output.shape)
def testTrainWithSummary(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None), name='images') labels = tf.placeholder(tf.float32, [None, 1000], name='labels') tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.always_record_summaries(): with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default(): model = resnet50.ResNet50(data_format()) logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) tf.contrib.summary.scalar(name='loss', tensor=loss) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() self.assertEqual(321, len(tf.global_variables())) # Use small batches for tests because the OSS version runs # in constrained GPU environment with 1-2GB of memory. batch_size = 2 with tf.Session() as sess: sess.run(init) sess.run(tf.contrib.summary.summary_writer_initializer_op()) np_images, np_labels = random_batch(batch_size) sess.run([train_op, tf.contrib.summary.all_summary_ops()], feed_dict={images: np_images, labels: np_labels}) events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def test_no_garbage(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): images, labels = random_batch(2, data_format) gc.disable() # Warm up. Note that this first run does create significant amounts of # garbage to be collected. The hope is that this is a build-only effect, # and a subsequent training loop will create nothing which needs to be # collected. apply_gradients(model, optimizer, compute_gradients(model, images, labels)) gc.collect() previous_gc_debug_flags = gc.get_debug() gc.set_debug(gc.DEBUG_SAVEALL) for _ in range(2): # Run twice to ensure that garbage that is created on the first # iteration is no longer accessible. apply_gradients(model, optimizer, compute_gradients(model, images, labels)) gc.collect() # There should be no garbage requiring collection. self.assertEqual(0, len(gc.garbage)) gc.set_debug(previous_gc_debug_flags) gc.enable()
def test_apply(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) with tf.device(device): images, _ = random_batch(2) output = model(images) self.assertEqual((2, 1000), output.shape)
def benchmark_graph_train(self): for batch_size in [16, 32, 64]: with tf.Graph().as_default(): np_images, np_labels = random_batch(batch_size) dataset = tf.data.Dataset.from_tensors( (np_images, np_labels)).repeat() (images, labels) = dataset.make_one_shot_iterator().get_next() model = resnet50.ResNet50(data_format()) logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels) optimizer = tf.train.GradientDescentOptimizer( learning_rate=1.0) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) (num_burn, num_iters) = (5, 10) for _ in range(num_burn): sess.run(train_op) start = time.time() for _ in range(num_iters): sess.run(train_op) self._report('train', start, num_iters, batch_size)
def _apply(self, defun=False): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) with tf.device(device): images, _ = random_batch(2) output = model(images) self.assertEqual((2, 1000), output.shape)
def test_apply_no_top(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format, include_top=False) with tf.device(device): images, _ = random_batch(2) output = model(images) output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else (2, 1, 1, 2048)) self.assertEqual(output_shape, output.shape)
def _apply(self, defun=False, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) with tf.device(device), tfe.execution_mode(execution_mode): images, _ = random_batch(2, data_format) output = model(images, training=False) tfe.async_wait() self.assertEqual((2, 1000), output.shape)
def testApply(self): batch_size = 64 with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) predictions = model(images) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) np_images, _ = random_batch(batch_size) out = sess.run(predictions, feed_dict={images: np_images}) self.assertAllEqual([64, 1000], out.shape)
def test_train(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default( ), tf.contrib.summary.always_record_summaries(): with tf.device(device): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def testApply(self): # Use small batches for tests because the OSS version runs # in constrained GPU environment with 1-2GB of memory. batch_size = 8 with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) predictions = model(images, training=False) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) np_images, _ = random_batch(batch_size) out = sess.run(predictions, feed_dict={images: np_images}) self.assertAllEqual([batch_size, 1000], out.shape)
def benchmark_eager_apply(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): model(images).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): model(images).cpu() self._report('eager_apply', start, num_iters, device, batch_size, data_format)
def _benchmark_eager_apply(self, label, defun=False): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): model(images).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): model(images).cpu() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def benchmark_graph_apply(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) predictions = model(images, training=False) init = tf.global_variables_initializer() batch_size = 64 with tf.Session() as sess: sess.run(init) np_images, _ = random_batch(batch_size) num_burn, num_iters = (3, 30) for _ in range(num_burn): sess.run(predictions, feed_dict={images: np_images}) start = time.time() for _ in range(num_iters): # Comparison with the eager execution benchmark in resnet50_test.py # isn't entirely fair as the time here includes the cost of copying # the feeds from CPU memory to GPU. sess.run(predictions, feed_dict={images: np_images}) self._report('apply', start, num_iters, batch_size)