예제 #1
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been
     performed for each agent)
     """
     soft_update(self.agents.target_critic, self.agents.critic, self.tau)
     self.niter += 1
예제 #2
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been performed for each agent)
     """
     for a in self.agents:
         soft_update(a.target_critic, a.critic, self.tau)
         soft_update(a.target_policy, a.policy, self.tau)
     self.niter += 1
예제 #3
0
 def update_all_agent(self):
     """
     soft update all agent
     """
     for a in self.agents:
         soft_update(a.target_actor, a.actor, self.tau)
         soft_update(a.target_critic, a.critic, self.tau)
     self.num_iteration += 1
예제 #4
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been performed for each agent)
     """
     for i, a in enumerate(self.agents):
         soft_update(a.target_critic, a.critic, self.tau)
         soft_update(a.target_policy, a.policy, self.tau)
         if self.alg_types[i] in SHARED_ALGOS:
             break
         else:
             raise ValueError('Only shared algo on this branch')
     self.niter += 1
예제 #5
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been
     performed for each agent)
     """
     if self.hard_update_interval is None:
         soft_update(self.target_critic, self.critic, self.tau)
         for a in self.agents:
             soft_update(a.target_policy, a.policy, self.tau)
     elif self.niter % self.hard_update_interval == 0:
         hard_update(self.target_critic, self.critic)
         for a in self.agents:
             hard_update(a.target_policy, a.policy)
예제 #6
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been
     performed for each agent)
     """
     for target_param, param in zip(
             self.target_critic.nonattend_parameters(),
             self.critic.nonattend_parameters()):
         target_param.data.copy_(target_param.data * (1.0 - self.tau) +
                                 param.data * self.tau)
     for target_param, param in zip(self.target_critic.attend_parameters(),
                                    self.critic.attend_parameters()):
         target_param.data.copy_(target_param.data *
                                 (1.0 - self.attend_tau) +
                                 param.data * self.attend_tau)
     for a in self.agents:
         soft_update(a.target_policy, a.policy, self.tau)
예제 #7
0
 def update_all_targets(self):
     """
     Update all target networks (called after normal updates have been
     performed for each agent)
     """
     if self.commonCritic:
         soft_update(self.target_critic, self.critic, self.tau)
     for a_i in range(len(self.agents)):
         a = self.agents[a_i]
         if not self.commonCritic:
             soft_update(a.target_critic, a.critic, self.tau)
         if a_i == 0:
             soft_update(a.target_policy, a.policy, self.tau)
         else:
             hard_update(a.policy, self.agents[0].policy)
             soft_update(a.target_policy, a.policy, self.tau)
     self.niter += 1
예제 #8
0
 def update_target(self):
     soft_update(self.target_critic, self.critic, self.tau)
     soft_update(self.target_actor, self.actor, self.tau)
예제 #9
0
 def update_all_targets(self):
     soft_update(self.target_critic, self.critic, self.tau)
     for a in self.agents:
         soft_update(a.target_policy, a.policy, self.tau)