def actions(self, states, agent_indices): action_probs_n = self.policy.multi_state_policy(states, agent_indices) actions_and_infos_n = [] for action_probs in action_probs_n: action = Action.sample(action_probs) actions_and_infos_n.append((action, {"action_probs": action_probs})) return actions_and_infos_n
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) legal_actions = list(Action.MOTION_ACTIONS) if self.interact: legal_actions.append(Action.INTERACT) legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions]) action_probs[legal_actions_indices] = 1 / len(legal_actions_indices) return Action.sample(action_probs), {"action_probs": action_probs}
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) legal_actions = list(Action.MOTION_ACTIONS) if self.all_actions: legal_actions = Action.ALL_ACTIONS legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions]) action_probs[legal_actions_indices] = 1 / len(legal_actions_indices) if self.custom_wait_prob is not None: stay = Action.STAY if np.random.random() < self.custom_wait_prob: return stay, {"action_probs": Agent.a_probs_from_action(stay)} else: action_probs = Action.remove_indices_and_renormalize(action_probs, [Action.ACTION_TO_INDEX[stay]]) return Action.sample(action_probs), {"action_probs": action_probs}
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) for agent in self.agents: action_probs += agent.action(state)[1]["action_probs"] action_probs = action_probs / len(self.agents) return Action.sample(action_probs), {"action_probs": action_probs}