def testDpgGradientNormClipping(self): """Tests the gradient qd/qa are clipped using norm clipping.""" _, dpg_extra = dpg_ops.dpg( self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01, clip_norm=True) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) for i in range(int(self.batch_size)): self.assertAllClose(np.linalg.norm(dpg_extra.dqda.eval()[i]), 0.01)
def testDpgGradientClipping(self): """Tests the gradient qd/qa are clipped.""" _, dpg_extra = dpg_ops.dpg( self.q_tm1_max, self.a_tm1_max, dqda_clipping=0.01) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) value_grad = np.transpose(self.w.eval())[0] for i in range(int(self.batch_size)): self.assertAllClose(dpg_extra.dqda.eval()[i], np.clip(value_grad, -0.01, 0.01)) self.assertTrue(np.greater(np.absolute(value_grad), 0.01).any())
def setUp(self): """Sets up test scenario. a_tm1_max = s_tm1 * w_s + b_s q_tm1_max = a_tm1_max * w + b """ super(DpgTest, self).setUp() self.s_tm1 = tf.constant([[0, 1, 0], [1, 1, 2]], dtype=tf.float32) self.w_s = tf.Variable(tf.random_normal([3, 2]), dtype=tf.float32) self.b_s = tf.Variable(tf.zeros([2]), dtype=tf.float32) self.a_tm1_max = tf.matmul(self.s_tm1, self.w_s) + self.b_s self.w = tf.Variable(tf.random_normal([2, 1]), dtype=tf.float32) self.b = tf.Variable(tf.zeros([1]), dtype=tf.float32) self.q_tm1_max = tf.matmul(self.a_tm1_max, self.w) + self.b self.loss, self.dpg_extra = dpg_ops.dpg(self.q_tm1_max, self.a_tm1_max) self.batch_size = self.a_tm1_max.get_shape()[0]