示例#1
0
 def testCompatibilityCheck(self):
   r_t = tf.placeholder(tf.float32, [2])
   with self.assertRaisesRegexp(
       ValueError,
       "PersistentQLearning: Error in rank and/or compatibility check"):
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, r_t, self.pcont_t, self.q_t,
         self.action_gap_scale)
示例#2
0
 def testScalarCheck(self):
   action_gap_scale = 2
   with self.assertRaisesRegexp(
       ValueError,
       r"PersistentQLearning: action_gap_scale has to lie in \[0, 1\]\."):
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
         action_gap_scale)
示例#3
0
 def testCompatibilityCheck(self):
     r_t = tf.placeholder(tf.float32, [2])
     with self.assertRaisesRegexp(
             ValueError,
             "PersistentQLearning: Error in rank and/or compatibility check"
     ):
         self.persistent_qlearning = rl.persistent_qlearning(
             self.q_tm1, self.a_tm1, r_t, self.pcont_t, self.q_t,
             self.action_gap_scale)
示例#4
0
 def testScalarCheck(self):
     action_gap_scale = 2
     with self.assertRaisesRegexp(
             ValueError,
             r"PersistentQLearning: action_gap_scale has to lie in \[0, 1\]\."
     ):
         self.persistent_qlearning = rl.persistent_qlearning(
             self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
             action_gap_scale)
示例#5
0
 def setUp(self):
   super(PersistentQLearningTest, self).setUp()
   self.q_tm1 = tf.constant([[1, 2], [3, 4], [5, 6]], dtype=tf.float32)
   self.a_tm1 = tf.constant([0, 1, 1], dtype=tf.int32)
   self.pcont_t = tf.constant([0, 1, 0.5], dtype=tf.float32)
   self.r_t = tf.constant([3, 2, 7], dtype=tf.float32)
   self.q_t = tf.constant([[11, 12], [20, 16], [-8, -4]], dtype=tf.float32)
   self.action_gap_scale = 0.25
   self.persistent_qlearning = rl.persistent_qlearning(
       self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
       self.action_gap_scale)
示例#6
0
 def setUp(self):
     super(PersistentQLearningTest, self).setUp()
     self.q_tm1 = tf.constant([[1, 2], [3, 4], [5, 6]], dtype=tf.float32)
     self.a_tm1 = tf.constant([0, 1, 1], dtype=tf.int32)
     self.pcont_t = tf.constant([0, 1, 0.5], dtype=tf.float32)
     self.r_t = tf.constant([3, 2, 7], dtype=tf.float32)
     self.q_t = tf.constant([[11, 12], [20, 16], [-8, -4]],
                            dtype=tf.float32)
     self.action_gap_scale = 0.25
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
         self.action_gap_scale)