def calculate_log_probability_of_actions(self, policy, states, actions):
     """Calculates the log probability of an action occuring given a policy and starting state"""
     policy_output = policy.forward(states).cpu().to(self.device)
     policy_distribution = create_actor_distribution(
         self.action_types, policy_output, self.action_size)
     actions_tensor = torch.from_numpy(np.array(actions))
     policy_distribution_log_prob = policy_distribution.log_prob(
         actions_tensor)
     return policy_distribution_log_prob
 def pick_action(self, policy, state):
     """Picks an action using the policy"""
     state = torch.from_numpy(state).float().unsqueeze(0)
     actor_output = policy.forward(state)
     action_distribution = create_actor_distribution(
         self.action_types, actor_output, self.action_size)
     action = action_distribution.sample().numpy()
     if self.action_types == "CONTINUOUS":
         action += self.noise.sample()
     return action
Exemplo n.º 3
0
 def produce_action_and_action_info(self, state):
     """Given the state, produces an action, the probability of the action, the log probability of the action, and
     the argmax action"""
     action_probabilities = self.actor_local(state)
     max_probability_action = torch.argmax(action_probabilities).unsqueeze(
         0)
     action_distribution = create_actor_distribution(
         self.action_types, action_probabilities, self.action_size)
     action = action_distribution.sample().cpu()
     log_action_probabilities = torch.log(action_probabilities)
     return action, (action_probabilities,
                     log_action_probabilities), max_probability_action