def append_apply_gradients_ops(self, gradient_state, opt, grads,
                                   training_ops, loss_scale_params):
        """Adds training ops for grads to 'training_ops'.



    Args:
      gradient_state: from previous call to apply_gradients_devices.
      opt: the underlying optimizer
      grads: [(grad, var)] to apply
      training_ops: list to which to add ops
      loss_scale_params: parameters for loss scaling.
    """
        del gradient_state  # unused by this implementation

        def get_apply_gradients_ops_func():
            """Returns the apply_gradients op."""
            return [opt.apply_gradients(grads)]

        variable_mgr_util.append_gradients_with_loss_scale(
            training_ops, get_apply_gradients_ops_func, loss_scale_params,
            self.grad_has_inf_nan)
    def testAppendGradientsWithLossScaleForNonChiefWorker(self):
        v = tf.Variable(0)
        training_ops = []
        get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
        loss_scale_params = variable_mgr_util.AutoLossScaleParams(
            enable_auto_loss_scale=True,
            loss_scale=tf.Variable(4),
            loss_scale_normal_steps=tf.Variable(10),
            inc_loss_scale_every_n=10,
            is_chief=False)  # Non-chief
        variable_mgr_util.append_gradients_with_loss_scale(
            training_ops,
            get_apply_gradients_ops_func,
            loss_scale_params,
            grad_has_inf_nan=False)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(training_ops)
            self.assertEqual(sess.run(v), 1)
            self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
            self.assertEqual(
                sess.run(loss_scale_params.loss_scale_normal_steps), 10)
    def testAppendGradientsWithLossScaleWithoutNan(self):
        v = tf.Variable(0)
        training_ops = []
        get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
        loss_scale_params = variable_mgr_util.AutoLossScaleParams(
            enable_auto_loss_scale=True,
            loss_scale=tf.Variable(4, dtype=tf.float32),
            loss_scale_normal_steps=tf.Variable(10),
            inc_loss_scale_every_n=10,
            is_chief=True)
        variable_mgr_util.append_gradients_with_loss_scale(
            training_ops,
            get_apply_gradients_ops_func,
            loss_scale_params,
            grad_has_inf_nan=tf.constant(False))

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(training_ops)
            self.assertEqual(sess.run(v), 1)
            self.assertEqual(sess.run(loss_scale_params.loss_scale), 8)
            self.assertEqual(
                sess.run(loss_scale_params.loss_scale_normal_steps), 0)
    def append_apply_gradients_ops(self, gradient_state, opt, grads,
                                   training_ops, loss_scale_params):
        device_grads = gradient_state  # From 2nd result of preprocess_device_grads.

        def get_apply_gradients_ops_func():
            """Returns a list of ops for updating gradients."""
            apply_gradients_ops = []
            # For each variable, apply the combined gradients for this server on
            # the parameter server, and then wait for all other servers to do this.
            for i, (g, v) in enumerate(grads):
                apply_gradient_op = opt.apply_gradients([(g, v)])
                barrier = self.benchmark_cnn.add_sync_queues_and_barrier(
                    'replicate_variable_%s' % i, [apply_gradient_op])
                with tf.control_dependencies([barrier]):
                    with tf.device(self.benchmark_cnn.cpu_device):
                        updated_value = v.read_value()
                        for my_d in range(len(self.benchmark_cnn.devices)):
                            apply_gradients_ops.append(
                                device_grads[my_d][i][1].assign(updated_value))
            return apply_gradients_ops

        variable_mgr_util.append_gradients_with_loss_scale(
            training_ops, get_apply_gradients_ops_func, loss_scale_params,
            self.grad_has_inf_nan)