def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_train(self, label, make_iterator, defun=False, execution_mode=None): with tfe.execution_mode(execution_mode): device, data_format = device_and_data_format() for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() self._force_gpu_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() self._force_gpu_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def _apply(self, defun=False, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) with tf.device(device), tfe.execution_mode(execution_mode): images, _ = random_batch(2, data_format) output = model(images, training=False) tfe.async_wait() self.assertEqual((2, 1000), output.shape)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def _test_train(self, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default( ), tf.contrib.summary.always_record_summaries(): with tf.device(device), tfe.execution_mode(execution_mode): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2, data_format) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) tfe.async_wait() events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def _test_train(self, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default(), tf.contrib.summary.always_record_summaries(): with tf.device(device), tfe.execution_mode(execution_mode): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2, data_format) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) tfe.async_wait() events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.function(model.call) # TODO(apassos) enable tf.function here apply_grads = tfe.defun(apply_gradients) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call, compiled=compiled) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)