def testMinimize(self):
        """Ensure that minimize actually lowers the loss."""
        with self.test_session():
            w_init = np.random.randn(10)
            w = tf.Variable(w_init, dtype=dtypes.float32)
            loss = tf.reduce_sum(w * w)

            igt_opt = exp_igt_optimizer.ExpIgtOptimizer(learning_rate=0.01,
                                                        tail_fraction=2.)
            igt_update = igt_opt.minimize(loss)

            tf_variables.global_variables_initializer().run()

            loss_pre = loss.eval()
            igt_update.run()
            loss_post = loss.eval()
            self.assertLess(loss_post, loss_pre)
    def testSwap(self):
        with self.cached_session() as sess:
            v_init = np.random.randn(10)
            v = tf.Variable(v_init, dtype=dtypes.float32)
            loss = tf.reduce_sum(v * v)

            opt = exp_igt_optimizer.ExpIgtOptimizer(learning_rate=0.01,
                                                    tail_fraction=2.)
            unused_igt_update = opt.minimize(loss)
            slot = opt.get_slot(v, 'true_param')

            tf_variables.global_variables_initializer().run()
            self.assertAllCloseAccordingToType(v_init, v.eval())
            self.assertAllCloseAccordingToType(v_init, slot.eval())

            zeros = np.zeros(10)
            sess.run(v.assign(zeros))
            self.assertAllCloseAccordingToType(zeros, v.eval())
            self.assertAllCloseAccordingToType(v_init, slot.eval())

            swap_op = opt.swap_true_and_shifted()
            swap_op.run()
            self.assertAllCloseAccordingToType(v_init, v.eval())
            self.assertAllCloseAccordingToType(zeros, slot.eval())
示例#3
0
def resnet_model_fn(features, labels, mode, params):
    """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images. If transpose_input is enabled, it is
      transposed to device layout and reshaped to 1D tensor.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
      `params['batch_size']` is always provided and should be used as the
      effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
    if isinstance(features, dict):
        features = features['feature']

    # In most cases, the default data format NCHW instead of NHWC should be
    # used for a significant performance boost on GPU/TPU. NHWC should be used
    # only if the network needs to be run on CPU since the pooling operations
    # are only supported on NHWC.
    if params['data_format'] == 'channels_first':
        assert not params['transpose_input']  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])

    if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
        image_size = tf.sqrt(tf.shape(features)[0] / (3 * tf.shape(labels)[0]))
        features = tf.reshape(features, [image_size, image_size, 3, -1])
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    # Normalize the image to zero mean and unit variance.
    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

    # DropBlock keep_prob for the 4 block groups of ResNet architecture.
    # None means applying no DropBlock at the corresponding block group.
    dropblock_keep_probs = [None] * 4
    if params['dropblock_groups']:
        # Scheduled keep_prob for DropBlock.
        train_steps = tf.cast(params['train_steps'], tf.float32)
        current_step = tf.cast(tf.train.get_global_step(), tf.float32)
        current_ratio = current_step / train_steps
        dropblock_keep_prob = (1 - current_ratio *
                               (1 - params['dropblock_keep_prob']))

        # Computes DropBlock keep_prob for different block groups of ResNet.
        dropblock_groups = [
            int(x) for x in params['dropblock_groups'].split(',')
        ]
        for block_group in dropblock_groups:
            if block_group < 1 or block_group > 4:
                raise ValueError(
                    'dropblock_groups should be a comma separated list of integers '
                    'between 1 and 4 (dropblcok_groups: {}).'.format(
                        params['dropblock_groups']))
            dropblock_keep_probs[block_group - 1] = 1 - (
                (1 - dropblock_keep_prob) / 4.0**(4 - block_group))

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        network = resnet_model.resnet_v1(
            resnet_depth=params['resnet_depth'],
            num_classes=params['num_label_classes'],
            dropblock_size=params['dropblock_size'],
            dropblock_keep_probs=dropblock_keep_probs,
            data_format=params['data_format'])
        return network(inputs=features,
                       is_training=(mode == tf.estimator.ModeKeys.TRAIN))

    if params['precision'] == 'bfloat16':
        with tf.contrib.tpu.bfloat16_scope():
            logits = build_network()
        logits = tf.cast(logits, tf.float32)
    elif params['precision'] == 'float32':
        logits = build_network()

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    # If necessary, in the model_fn, use params['batch_size'] instead the batch
    # size flags (--train_batch_size or --eval_batch_size).
    batch_size = params['batch_size']  # pylint: disable=unused-variable

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, params['num_label_classes'])
    cross_entropy = tf.losses.softmax_cross_entropy(
        logits=logits,
        onehot_labels=one_hot_labels,
        label_smoothing=params['label_smoothing'])

    # Add weight decay to the loss for non-batch-normalization variables.
    loss = cross_entropy + params['weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Compute the current epoch and associated learning rate from global_step.
        global_step = tf.train.get_global_step()
        steps_per_epoch = params['num_train_images'] / params[
            'train_batch_size']
        current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch)
        # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K
        # and larger batch sizes.
        if params['enable_lars']:
            learning_rate = 0.0
            optimizer = lars_util.init_lars_optimizer(current_epoch, params)
            raise ValueError(
                'LARS unexpected in the context of IGT experiments.')
        else:
            learning_rate = linear_learning_rate_schedule(params, global_step)

            if FLAGS.optimizer == 'momentum':
                tf.logging.info('Using MomentumOptimizer ({}).'.format(
                    params['momentum']))
                optimizer = tf.train.MomentumOptimizer(
                    learning_rate=learning_rate,
                    momentum=params['momentum'],
                    use_nesterov=False)

            elif FLAGS.optimizer == 'adam':
                tf.logging.info('Using AdamOptimizer')
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

            elif FLAGS.optimizer == 'eigt':
                tf.logging.info('Using ExpIgtOptimizer {} tail: {}'.format(
                    FLAGS.igt_optimizer, FLAGS.tail_fraction))
                optimizer = exp_igt_optimizer.ExpIgtOptimizer(
                    learning_rate,
                    tail_fraction=FLAGS.tail_fraction,
                    optimizer=FLAGS.igt_optimizer)

            else:
                raise ValueError('{} is not a supported optimizer'.format(
                    FLAGS.optimizer))

        if params['use_tpu']:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

        # Batch normalization requires UPDATE_OPS to be added as a dependency to
        # the train operation.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)

        if not params['skip_host_call']:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call.

        Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                # Host call fns are executed params['iterations_per_loop'] times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the data
                # to storage once per loop.
                with summary.create_file_writer(
                        get_model_dir(params),
                        max_queue=params['iterations_per_loop']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            gs_t = tf.reshape(global_step, [1])
            loss_t = tf.reshape(loss, [1])
            lr_t = tf.reshape(learning_rate, [1])
            ce_t = tf.reshape(current_epoch, [1])

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    else:
        train_op = None

    eval_metrics = None
    scaffold_fn = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Evaluation metric function.

      Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'top_1_accuracy': top_1_accuracy,
                'top_5_accuracy': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

        if FLAGS.mode == 'eval_igt' and FLAGS.igt_eval_mode == 'true':
            tf.logging.info('Using true param loading saver.')

            def scaffold_fn_true_params():
                """Returns a scaffold that loads the true values into vars."""
                var_mapping = {}
                trainable_vars = set(tf.trainable_variables())
                for var in tf.global_variables():
                    if var in trainable_vars:
                        var_mapping[var.op.name + '/true_param'] = var
                    else:
                        var_mapping[var.op.name] = var

                tf.logging.info('Mapping: {}'.format(var_mapping))
                saver = tf.train.Saver(var_list=var_mapping, sharded=True)
                return tf.train.Scaffold(saver=saver)

            scaffold_fn = scaffold_fn_true_params

    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           host_call=host_call,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
    def doTestApplyGradients(self, use_resource=False):
        """Validate the IGT update (i.e. apply_gradients) against a python impl."""
        # TODO(manzagop): try dtypes.half and dtypes.float64:
        for dtype in [dtypes.float32]:
            print('running for dtype {}'.format(dtype))

            with self.test_session():
                # Set up 2 variables and constants for their gradients.
                var0_value = np.array([1.0, 2.0])
                var1_value = np.array([3.0, 4.0])
                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable(var0_value,
                                                                  dtype=dtype)
                    var1 = resource_variable_ops.ResourceVariable(var1_value,
                                                                  dtype=dtype)
                else:
                    var0 = tf_variables.Variable(var0_value, dtype=dtype)
                    var1 = tf_variables.Variable(var1_value, dtype=dtype)
                grads0 = tf.placeholder(dtype, shape=var0.get_shape())
                grads1 = tf.placeholder(dtype, shape=var1.get_shape())

                # TODO(manzagop): use a different tail fraction once validator support.
                igt_opt = exp_igt_optimizer.ExpIgtOptimizer(
                    learning_rate=LEARNING_RATE, tail_fraction=1.)
                igt_update = igt_opt.apply_gradients(
                    list(zip([grads0, grads1], [var0, var1])),
                    global_step=tf.train.get_global_step())
                tf_variables.global_variables_initializer().run()

                # Validate we have slots.
                expected_slot_names = set(['estimate', 'true_param', 'update'])
                self.assertEqual(expected_slot_names,
                                 set(igt_opt.get_slot_names()))

                for slot_name in expected_slot_names:
                    for var in [var0, var1]:
                        slot = igt_opt.get_slot(var, slot_name)
                        self.assertEqual(slot.get_shape(), var.get_shape())
                        self.assertNotIn(slot,
                                         tf_variables.trainable_variables())

                # Validate initial values.
                validators = [
                    IgtValidator(var0_value, LEARNING_RATE),
                    IgtValidator(var1_value, LEARNING_RATE)
                ]
                self._validate(igt_opt, [var0, var1], validators)

                # Run first update and validate.
                g0_first = np.array([0.1, 0.1])
                g1_first = np.array([0.01, 0.01])
                igt_update.run({grads0: g0_first, grads1: g1_first})

                validators[0].update(g0_first)
                validators[1].update(g1_first)
                self._validate(igt_opt, [var0, var1], validators)

                # Run second update and validate.
                g0_second = np.array([0.1, 0.1])
                g1_second = np.array([0.01, 0.01])
                igt_update.run({grads0: g0_second, grads1: g1_second})

                validators[0].update(g0_second)
                validators[1].update(g1_second)
                self._validate(igt_opt, [var0, var1], validators)