Пример #1
0
 def testDpgGradientNormClipping(self):
   """Tests the gradient qd/qa are clipped using norm clipping."""
   _, dpg_extra = dpg_ops.dpg(
       self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01, clip_norm=True)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     for i in range(int(self.batch_size)):
       self.assertAllClose(np.linalg.norm(dpg_extra.dqda.eval()[i]), 0.01)
Пример #2
0
 def testDpgGradientNormClipping(self):
   """Tests the gradient qd/qa are clipped using norm clipping."""
   _, dpg_extra = dpg_ops.dpg(
       self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01, clip_norm=True)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     for i in range(int(self.batch_size)):
       self.assertAllClose(np.linalg.norm(dpg_extra.dqda.eval()[i]), 0.01)
Пример #3
0
 def testDpgGradientClipping(self):
   """Tests the gradient qd/qa are clipped."""
   _, dpg_extra = dpg_ops.dpg(
       self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     value_grad = np.transpose(self.w.eval())[0]
     for i in range(int(self.batch_size)):
       self.assertAllClose(dpg_extra.dqda.eval()[i],
                           np.clip(value_grad, -0.01, 0.01))
       self.assertTrue(np.greater(np.absolute(value_grad), 0.01).any())
Пример #4
0
 def testDpgGradientClipping(self):
   """Tests the gradient qd/qa are clipped."""
   _, dpg_extra = dpg_ops.dpg(
       self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     value_grad = np.transpose(self.w.eval())[0]
     for i in range(int(self.batch_size)):
       self.assertAllClose(dpg_extra.dqda.eval()[i],
                           np.clip(value_grad, -0.01, 0.01))
       self.assertTrue(np.greater(np.absolute(value_grad), 0.01).any())
Пример #5
0
  def setUp(self):
    """Sets up test scenario.

    a_tm1_max = s_tm1 * w_s + b_s
    q_tm1_max = a_tm1_max * w + b
    """
    super(DpgTest, self).setUp()
    self.s_tm1 = tf.constant([[0, 1, 0], [1, 1, 2]], dtype=tf.float32)
    self.w_s = tf.Variable(tf.random_normal([3, 2]), dtype=tf.float32)
    self.b_s = tf.Variable(tf.zeros([2]), dtype=tf.float32)
    self.a_tm1_max = tf.matmul(self.s_tm1, self.w_s) + self.b_s
    self.w = tf.Variable(tf.random_normal([2, 1]), dtype=tf.float32)
    self.b = tf.Variable(tf.zeros([1]), dtype=tf.float32)
    self.q_tm1_max = tf.matmul(self.a_tm1_max, self.w) + self.b
    self.loss, self.dpg_extra = dpg_ops.dpg(self.q_tm1_max, self.a_tm1_max)
    self.batch_size = self.a_tm1_max.get_shape()[0]
Пример #6
0
  def setUp(self):
    """Sets up test scenario.

    a_tm1_max = s_tm1 * w_s + b_s
    q_tm1_max = a_tm1_max * w + b
    """
    super(DpgTest, self).setUp()
    self.s_tm1 = tf.constant([[0, 1, 0], [1, 1, 2]], dtype=tf.float32)
    self.w_s = tf.Variable(tf.random_normal([3, 2]), dtype=tf.float32)
    self.b_s = tf.Variable(tf.zeros([2]), dtype=tf.float32)
    self.a_tm1_max = tf.matmul(self.s_tm1, self.w_s) + self.b_s
    self.w = tf.Variable(tf.random_normal([2, 1]), dtype=tf.float32)
    self.b = tf.Variable(tf.zeros([1]), dtype=tf.float32)
    self.q_tm1_max = tf.matmul(self.a_tm1_max, self.w) + self.b
    self.loss, self.dpg_extra = dpg_ops.dpg(self.q_tm1_max, self.a_tm1_max)
    self.batch_size = self.a_tm1_max.get_shape()[0]