Python persistent_qlearning示例

编程语言: Python

命名空间/包名称: trfl.action_value_ops

方法/功能: persistent_qlearning

hotexamples.com的示例: 6

Python persistent_qlearning - 已找到6个示例。这些是从开源项目中提取的最受好评的trfl.action_value_ops.persistent_qlearning现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： action_value_ops_test.py 项目： wmiao1769/trfl

 def testCompatibilityCheck(self):
   r_t = tf.placeholder(tf.float32, [2])
   with self.assertRaisesRegexp(
       ValueError,
       "PersistentQLearning: Error in rank and/or compatibility check"):
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, r_t, self.pcont_t, self.q_t,
         self.action_gap_scale)

示例#2

显示文件

文件： action_value_ops_test.py 项目： wmiao1769/trfl

 def testScalarCheck(self):
   action_gap_scale = 2
   with self.assertRaisesRegexp(
       ValueError,
       r"PersistentQLearning: action_gap_scale has to lie in \[0, 1\]\."):
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
         action_gap_scale)

示例#3

显示文件

文件： action_value_ops_test.py 项目： zhuanglineu/trfl

 def testCompatibilityCheck(self):
     r_t = tf.placeholder(tf.float32, [2])
     with self.assertRaisesRegexp(
             ValueError,
             "PersistentQLearning: Error in rank and/or compatibility check"
     ):
         self.persistent_qlearning = rl.persistent_qlearning(
             self.q_tm1, self.a_tm1, r_t, self.pcont_t, self.q_t,
             self.action_gap_scale)

示例#4

显示文件

文件： action_value_ops_test.py 项目： zhuanglineu/trfl

 def testScalarCheck(self):
     action_gap_scale = 2
     with self.assertRaisesRegexp(
             ValueError,
             r"PersistentQLearning: action_gap_scale has to lie in \[0, 1\]\."
     ):
         self.persistent_qlearning = rl.persistent_qlearning(
             self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
             action_gap_scale)

示例#5

显示文件

文件： action_value_ops_test.py 项目： wmiao1769/trfl

 def setUp(self):
   super(PersistentQLearningTest, self).setUp()
   self.q_tm1 = tf.constant([[1, 2], [3, 4], [5, 6]], dtype=tf.float32)
   self.a_tm1 = tf.constant([0, 1, 1], dtype=tf.int32)
   self.pcont_t = tf.constant([0, 1, 0.5], dtype=tf.float32)
   self.r_t = tf.constant([3, 2, 7], dtype=tf.float32)
   self.q_t = tf.constant([[11, 12], [20, 16], [-8, -4]], dtype=tf.float32)
   self.action_gap_scale = 0.25
   self.persistent_qlearning = rl.persistent_qlearning(
       self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
       self.action_gap_scale)

示例#6

显示文件

文件： action_value_ops_test.py 项目： zhuanglineu/trfl

 def setUp(self):
     super(PersistentQLearningTest, self).setUp()
     self.q_tm1 = tf.constant([[1, 2], [3, 4], [5, 6]], dtype=tf.float32)
     self.a_tm1 = tf.constant([0, 1, 1], dtype=tf.int32)
     self.pcont_t = tf.constant([0, 1, 0.5], dtype=tf.float32)
     self.r_t = tf.constant([3, 2, 7], dtype=tf.float32)
     self.q_t = tf.constant([[11, 12], [20, 16], [-8, -4]],
                            dtype=tf.float32)
     self.action_gap_scale = 0.25
     self.persistent_qlearning = rl.persistent_qlearning(
         self.q_tm1, self.a_tm1, self.r_t, self.pcont_t, self.q_t,
         self.action_gap_scale)