Python var_list示例

编程语言: Python

命名空间/包名称: deeprl.utils.tf_utils

方法/功能: var_list

hotexamples.com的示例: 6

Python var_list - 已找到6个示例。这些是从开源项目中提取的最受好评的deeprl.utils.tf_utils.var_list现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

 def test_target_var_init(self):
     """ test target_var_init op, sets target and main variables equal
     """
     with tf.variable_scope(TARGET):
         target_val = tf_utils.mlp(self.obs_ph, (4, ), activation=tf.tanh)
     with tf.variable_scope(MAIN):
         main_val = tf_utils.mlp(self.obs_ph, (4, ), activation=tf.tanh)
     with self.agent.sess as sess:
         sess.run(tf.global_variables_initializer())
         target_vars = tf_utils.var_list(TARGET)
         main_vars = tf_utils.var_list(MAIN)
         target_nps, main_nps = sess.run((target_vars, main_vars))
         for targ, upd in zip(target_nps, main_nps):
             assert targ.shape == upd.shape
             # the biases should actually be the same, all zeros
             if len(targ.shape) > 1:
                 assert not (targ == upd).all()
         # now set target and main equal
         init_op = self.agent.target_var_init()
         # now make sure all target and main parrameters are equal
         target_vars = tf_utils.var_list(TARGET)
         main_vars = tf_utils.var_list(MAIN)
         target_nps, main_nps = sess.run((target_vars, main_vars))
         for targ, upd in zip(target_nps, main_nps):
             assert targ.shape == upd.shape
             np.testing.assert_allclose(targ, upd)

示例#2

显示文件

文件： ddpg.py 项目： shubhampachori12110095/deeprl-intro

 def target_var_init(self):
     """ returns tensorflow op to initialize target variables to be equal
     to the updated variables """
     op_list = [
         tf.assign(target_var, updated_var)
         for target_var, updated_var in zip(tf_utils.var_list(TARGET),
                                            tf_utils.var_list(MAIN))
     ]
     self.sess.run(tf.group(op_list))

示例#3

显示文件

 def test_build_policy_and_qval(self):
     """ smoke test, make sure the number of parameters is right """
     pi, qval, qval_pi = self.agent.build_policy_and_qval(
         self.obs_ph, self.act_ph, self.env.action_space)
     with self.cached_session() as sess:
         sess.run(tf.global_variables_initializer())
         pi_vars = tf_utils.var_list(POLICY)
         assert len(pi_vars) == 4  # 2 kernels and 2 biases
         qval_vars = tf_utils.var_list(QVAL)
         assert len(qval_vars) == 4  # 2 kernels and 2 biases

示例#4

显示文件

文件： ddpg.py 项目： shubhampachori12110095/deeprl-intro

 def build_target_update_op(self):
     """ returns tensorflow operation to update target parameters
     based on updated parameters and polyak """
     op_list = [
         tf.assign(
             target_var,
             self.polyak * target_var + (1 - self.polyak) * updated_var)
         for (target_var, updated_var
              ) in zip(tf_utils.var_list(TARGET), tf_utils.var_list(MAIN))
     ]
     return tf.group(op_list)

示例#5

显示文件

文件： ddpg.py 项目： shubhampachori12110095/deeprl-intro

 def build_policy_loss(self, qval_pi):
     """ build loss function and train op for deterministic policy """
     loss = -1 * tf.reduce_mean(qval_pi)
     train_op = tf.train.AdamOptimizer(learning_rate=self.pi_lr).minimize(
         loss, var_list=tf_utils.var_list(MAIN + '/' + POLICY))
     return loss, train_op

示例#6

显示文件

文件： ddpg.py 项目： shubhampachori12110095/deeprl-intro

 def build_qval_loss(self, qval, qval_target):
     """ build loss for action-value function """
     loss = tf.losses.mean_squared_error(qval, qval_target)
     train_op = tf.train.AdamOptimizer(learning_rate=self.q_lr).minimize(
         loss, var_list=tf_utils.var_list(MAIN + '/' + QVAL))
     return loss, train_op