示例#1
0
  def update_targets(self, tau=1.0):
    """Performs a soft update of the target network parameters.

    For each weight w_s in the actor/critic networks, and its corresponding
    weight w_t in the target actor/critic networks, a soft update is:
    w_t = (1- tau) x w_t + tau x ws

    Args:
      tau: A float scalar in [0, 1]
    Returns:
      An operation that performs a soft update of the target network parameters.
    Raises:
      ValueError: If `tau` is not in [0, 1].
    """
    if tau < 0 or tau > 1:
      raise ValueError('Input `tau` should be in [0, 1].')
    update_actor = utils.soft_variables_update(
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.ACTOR_NET_SCOPE)),
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.TARGET_ACTOR_NET_SCOPE)),
        tau)
    # NOTE: This updates both critic networks.
    update_critic = utils.soft_variables_update(
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.CRITIC_NET_SCOPE)),
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.TARGET_CRITIC_NET_SCOPE)),
        tau)
    return tf.group(update_actor, update_critic, name='update_targets')
示例#2
0
  def update_targets(self, tau=1.0):
    """Performs a soft update of the target network parameters.

    For each weight w_s in the actor/critic networks, and its corresponding
    weight w_t in the target actor/critic networks, a soft update is:
    w_t = (1- tau) x w_t + tau x ws

    Args:
      tau: A float scalar in [0, 1]
    Returns:
      An operation that performs a soft update of the target network parameters.
    Raises:
      ValueError: If `tau` is not in [0, 1].
    """
    if tau < 0 or tau > 1:
      raise ValueError('Input `tau` should be in [0, 1].')
    update_actor = utils.soft_variables_update(
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.ACTOR_NET_SCOPE)),
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.TARGET_ACTOR_NET_SCOPE)),
        tau)
    # NOTE: This updates both critic networks.
    update_critic = utils.soft_variables_update(
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.CRITIC_NET_SCOPE)),
        slim.get_trainable_variables(
            utils.join_scope(self._scope, self.TARGET_CRITIC_NET_SCOPE)),
        tau)
    return tf.group(update_actor, update_critic, name='update_targets')