Пример #1
0
 def test_no_garbage(self):
     device, data_format = resnet50_test_util.device_and_data_format()
     model = resnet50.ResNet50(data_format)
     optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.1)
     with tf.device(device):
         images, labels = resnet50_test_util.random_batch(2, data_format)
         gc.disable()
         # Warm up. Note that this first run does create significant amounts of
         # garbage to be collected. The hope is that this is a build-only effect,
         # and a subsequent training loop will create nothing which needs to be
         # collected.
         apply_gradients(model, optimizer,
                         compute_gradients(model, images, labels))
         gc.collect()
         previous_gc_debug_flags = gc.get_debug()
         gc.set_debug(gc.DEBUG_SAVEALL)
         for _ in range(2):
             # Run twice to ensure that garbage that is created on the first
             # iteration is no longer accessible.
             apply_gradients(model, optimizer,
                             compute_gradients(model, images, labels))
         gc.collect()
         # There should be no garbage requiring collection.
         self.assertEqual(0, len(gc.garbage))
         gc.set_debug(previous_gc_debug_flags)
         gc.enable()
Пример #2
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None):
     with context.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = resnet50.ResNet50(data_format)
         if defun:
             model.call = tf.function(model.call)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = resnet50_test_util.random_batch(
                 batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 context.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 context.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
Пример #3
0
 def _hvp_benchmark(self,
                    hvp_fn,
                    label,
                    batch_sizes,
                    num_iters=30,
                    num_burn=5):
     device, data_format = resnet50_test_util.device_and_data_format()
     model = resnet50.ResNet50(data_format)
     for batch_size in batch_sizes:
         with tf.device(device):
             images, labels = resnet50_test_util.random_batch(
                 batch_size, data_format)
             images = tf.constant(images)
             labels = tf.constant(labels)
             model.build(images.shape)
             vector = [tf.ones_like(v) for v in model.trainable_variables]
             for _ in xrange(num_burn):
                 results = hvp_fn(model, images, labels, vector)
                 for result in results:
                     result.cpu()
             self._force_device_sync()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 results = hvp_fn(model, images, labels, vector)
                 for result in results:
                     result.cpu()
             self._force_device_sync()
             resnet50_test_util.report(self, label, start, num_iters,
                                       device, batch_size, data_format)
Пример #4
0
    def test_apply_retrieve_intermediates(self):
        device, data_format = resnet50_test_util.device_and_data_format()
        model = resnet50.ResNet50(data_format,
                                  block3_strides=True,
                                  include_top=False)
        intermediates_dict = {}
        with tf.device(device):
            images, _ = resnet50_test_util.random_batch(2, data_format)
            output = model(images,
                           training=False,
                           intermediates_dict=intermediates_dict)
        output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else
                        (2, 1, 1, 2048))
        self.assertEqual(output_shape, output.shape)

        if data_format == 'channels_first':
            block_shapes = {
                'block0': (2, 64, 112, 112),
                'block0mp': (2, 64, 55, 55),
                'block1': (2, 256, 55, 55),
                'block2': (2, 512, 28, 28),
                'block3': (2, 1024, 7, 7),
                'block4': (2, 2048, 1, 1),
            }
        else:
            block_shapes = {
                'block0': (2, 112, 112, 64),
                'block0mp': (2, 55, 55, 64),
                'block1': (2, 55, 55, 256),
                'block2': (2, 28, 28, 512),
                'block3': (2, 7, 7, 1024),
                'block4': (2, 1, 1, 2048),
            }
        for (block_name, block) in intermediates_dict.items():
            self.assertEqual(block_shapes[block_name], block.shape)
Пример #5
0
 def test_apply_with_pooling(self):
   device, data_format = resnet50_test_util.device_and_data_format()
   model = resnet50.ResNet50(data_format, include_top=False, pooling='avg')
   with tf.device(device):
     images, _ = resnet50_test_util.random_batch(2, data_format)
     output = model(images, training=False)
   self.assertEqual((2, 2048), output.shape)
Пример #6
0
 def test_apply_no_top(self):
     device, data_format = resnet50_test_util.device_and_data_format()
     model = resnet50.ResNet50(data_format, include_top=False)
     with tf.device(device):
         images, _ = resnet50_test_util.random_batch(2, data_format)
         output = model(images, training=False)
     output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else
                     (2, 1, 1, 2048))
     self.assertEqual(output_shape, output.shape)
Пример #7
0
 def _apply(self, defun=False, execution_mode=None):
     device, data_format = resnet50_test_util.device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
         model.call = tf.function(model.call)
     with tf.device(device), context.execution_mode(execution_mode):
         images, _ = resnet50_test_util.random_batch(2, data_format)
         output = model(images, training=False)
         context.async_wait()
     self.assertEqual((2, 1000), output.shape)
Пример #8
0
    def test_hvp_shapes(self, hvp_function):
        device, data_format = resnet50_test_util.device_and_data_format()
        model = resnet50.ResNet50(data_format)
        with tf.device(device):
            images, labels = resnet50_test_util.random_batch(2, data_format)
            images = tf.constant(images)
            labels = tf.constant(labels)
            model.build(images.shape)
            vector = [tf.ones_like(v) for v in model.trainable_variables]

            # Note that numerical differences build up to quite large differences here
            # in the final hvp. tensorflow/python/eager:forwardprop_test has a
            # smaller-scale test that the computations are close on a much smaller but
            # otherwise similar model.
            hvp = hvp_function(model, images, labels, vector)
            for hvp_component, variable in zip(hvp, model.trainable_variables):
                self.assertEqual(hvp_component.shape, variable.shape)
                self.assertEqual(hvp_component.dtype, variable.dtype)
Пример #9
0
 def _test_train(self, execution_mode=None):
   device, data_format = resnet50_test_util.device_and_data_format()
   model = resnet50.ResNet50(data_format)
   tf.compat.v2.summary.experimental.set_step(
       tf.compat.v1.train.get_or_create_global_step())
   logdir = tempfile.mkdtemp()
   with tf.compat.v2.summary.create_file_writer(
       logdir, max_queue=0,
       name='t0').as_default(), tf.compat.v2.summary.record_if(True):
     with tf.device(device), context.execution_mode(execution_mode):
       optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.1)
       images, labels = resnet50_test_util.random_batch(2, data_format)
       apply_gradients(model, optimizer,
                       compute_gradients(model, images, labels))
       self.assertEqual(320, len(model.variables))
       context.async_wait()
   events = events_from_logdir(logdir)
   self.assertEqual(len(events), 2)
   self.assertEqual(events[1].summary.value[0].tag, 'loss')
Пример #10
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None):
        with context.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = resnet50_test_util.random_batch(
                    batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                # TODO(b/161911585): tf_to_corert MLIR lowering pipeline should handle
                # case when momentum is not set.
                optimizer = tf.keras.optimizers.SGD(0.1, 0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tf.function(model.call)
                    apply_grads = tf.function(apply_gradients)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
Пример #11
0
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None):
        with context.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = resnet50_test_util.random_batch(
                    batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                optimizer = tf.compat.v1.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tf.function(model.call)
                    apply_grads = tf.function(apply_gradients)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        context.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)