Exemplo n.º 1
0
    def testAverages(self):
        with self.cached_session() as session:
            scale = 2.
            grad = array_ops.ones([3, 4]) * scale
            log_norm = np.log(
                np.sqrt(scale**2 * grad.get_shape().num_elements()))
            grads_and_vars = [(grad, grad)]
            grads_and_vars = optimizers_lib.adaptive_clipping_fn(
                decay=0.5)(grads_and_vars)

            var_dict = {}
            for var in variables.global_variables():
                if var.name.startswith("AdaptiveMaxNorm"):
                    var_dict[var.name.split(":")[0]] = var
            self.assertEqual(2, len(var_dict))
            moving_mean = var_dict["AdaptiveMaxNorm/mean"]
            moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
            variables.global_variables_initializer().run()
            mean, sq_mean = session.run([moving_mean, moving_sq_mean])
            self.assertEqual([0], mean)
            self.assertEqual([0], sq_mean)
            for i in range(20):
                mean, sq_mean, _ = session.run(
                    [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
                if i == 0:
                    self.assertLess(mean, 0.9 * log_norm)
                    self.assertLess(sq_mean, 0.9 * log_norm**2)

            self.assertAlmostEqual(float(mean), log_norm, places=4)
            self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
Exemplo n.º 2
0
  def testClip(self):
    with self.cached_session() as session:
      spike = 1000.
      multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier")
      step = array_ops.placeholder(dtypes.int32, [], "step")

      grad = array_ops.ones([3, 4]) * multiplier
      grads_and_vars = [(grad, grad)]
      grads_and_vars = optimizers_lib.adaptive_clipping_fn(
          decay=0.9, global_step=step)(grads_and_vars)

      variables.global_variables_initializer().run()

      def run(scale, i):
        return session.run(grads_and_vars[0][0],
                           feed_dict={multiplier: scale,
                                      step: i})

      for i in range(20):
        scale = [1., -2.][i % 2]
        clipped_grad = run(scale, i)
        if i > 3:
          self.assertAllClose(np.ones(clipped_grad.shape) * scale, clipped_grad)

      # assert that the spike will have low influence.
      clipped_grad = run(spike, 20)
      self.assertTrue((clipped_grad < 25.).all())

      # assert that a repeated spike will converge to this new value.
      for i in range(10):
        clipped_grad = run(spike, i + 21)

      self.assertAllClose(np.ones(clipped_grad.shape) * spike, clipped_grad)
Exemplo n.º 3
0
 def testAdaptiveGradientClip(self):
     with self.cached_session() as session:
         x, var, loss, global_step = _setup_model()
         clip_gradients = optimizers_lib.adaptive_clipping_fn()
         train = optimizers_lib.optimize_loss(loss,
                                              global_step,
                                              learning_rate=0.1,
                                              optimizer="SGD",
                                              clip_gradients=clip_gradients)
         variables.global_variables_initializer().run()
         session.run(train, feed_dict={x: 5})
         var_value, global_step_value = session.run([var, global_step])
         self.assertAlmostEqual(var_value, 9.8916, 4)
         self.assertEqual(global_step_value, 1)
         var_count = 0
         for var in variables.global_variables():
             if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"):
                 var_count += 1
         self.assertEqual(2, var_count)