Exemplo n.º 1
0
 def _compute_loss(self, observations, actions, rewards, observations_n,
                   dones):
     '''Compute the loss of the agents.'''
     feed_dict = self.placeholders._make(
         [observations, actions, rewards, observations_n, dones])
     feed_dict = flatten_map(feed_dict._asdict())
     return unflatten_map(self._compute_loss_h(feed_dict))
Exemplo n.º 2
0
 def _train_step(self,
                 observations,
                 actions,
                 rewards,
                 observations_n,
                 dones,
                 step=None):
     '''Train the agents.'''
     feed_dict = self.placeholders._make(
         [observations, actions, rewards, observations_n, dones])
     feed_dict = flatten_map(feed_dict._asdict())
     if step and step % 2 == 0:
         results = unflatten_map(self._train(feed_dict))
     else:
         results = unflatten_map(self._train_critic(feed_dict))
     return results
Exemplo n.º 3
0
 def _train_step(self,
                 observations,
                 actions,
                 rewards,
                 observations_n,
                 dones,
                 step=None):
     '''Train the agents.'''
     feed_dict = self.placeholders._make(
         [observations, actions, rewards, observations_n, dones])
     feed_dict = flatten_map(feed_dict._asdict())
     return unflatten_map(self._train(feed_dict))