def _render_actions_probs(self, surface, players, action_probs): direction_to_rotation = {Direction.NORTH:0, Direction.WEST:90 , Direction.SOUTH:180, Direction.EAST:270} direction_to_aligns = { Direction.NORTH: {"horizontal_align": "center", "vertical_align":"bottom"}, Direction.WEST: {"horizontal_align": "right", "vertical_align":"center"}, Direction.SOUTH: {"horizontal_align": "center", "vertical_align":"top"}, Direction.EAST: {"horizontal_align": "left", "vertical_align":"center"}} rescaled_arrow = pygame.transform.scale(self.ARROW_IMG, (self.tile_size, self.tile_size)) # divide width by math.sqrt(2) to always fit both interact icon and stay icon into single tile rescaled_interact = pygame.transform.scale(self.INTERACT_IMG, (int(self.tile_size/math.sqrt(2)), self.tile_size)) rescaled_stay = pygame.transform.scale(self.STAY_IMG, (int(self.tile_size/math.sqrt(2)), self.tile_size)) for player, probs in zip(players, action_probs): if probs is not None: for action in Action.ALL_ACTIONS: # use math sqrt to make probability proportional to area of the image size = math.sqrt(probs[Action.ACTION_TO_INDEX[action]]) if action == "interact": img = pygame.transform.rotozoom(rescaled_interact, 0, size) self._render_on_tile_position(surface, img, player.position, horizontal_align="left", vertical_align="center") elif action == Action.STAY: img = pygame.transform.rotozoom(rescaled_stay, 0, size) self._render_on_tile_position(surface, img, player.position, horizontal_align="right", vertical_align="center") else: position = Action.move_in_direction(player.position, action) img = pygame.transform.rotozoom(rescaled_arrow, direction_to_rotation[action], size) self._render_on_tile_position(surface, img, position, **direction_to_aligns[action])
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) legal_actions = list(Action.MOTION_ACTIONS) if self.all_actions: legal_actions = Action.ALL_ACTIONS legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions]) action_probs[legal_actions_indices] = 1 / len(legal_actions_indices) if self.custom_wait_prob is not None: stay = Action.STAY if np.random.random() < self.custom_wait_prob: return stay, {"action_probs": Agent.a_probs_from_action(stay)} else: action_probs = Action.remove_indices_and_renormalize(action_probs, [Action.ACTION_TO_INDEX[stay]]) return Action.sample(action_probs), {"action_probs": action_probs}
def actions(self, states, agent_indices): action_probs_n = self.policy.multi_state_policy(states, agent_indices) actions_and_infos_n = [] for action_probs in action_probs_n: action = Action.sample(action_probs) actions_and_infos_n.append((action, {"action_probs": action_probs})) return actions_and_infos_n
def get_near_locations(self, location): """Get neighbouring locations to the passed in location""" near_locations = [] for d in Direction.ALL_DIRECTIONS: new_location = Action.move_in_direction(location, d) if self.is_in_bounds(new_location): near_locations.append(new_location) return near_locations
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) legal_actions = list(Action.MOTION_ACTIONS) if self.interact: legal_actions.append(Action.INTERACT) legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions]) action_probs[legal_actions_indices] = 1 / len(legal_actions_indices) return Action.sample(action_probs), {"action_probs": action_probs}
def action(self, state): action_probs = np.zeros(Action.NUM_ACTIONS) for agent in self.agents: action_probs += agent.action(state)[1]["action_probs"] action_probs = action_probs / len(self.agents) return Action.sample(action_probs), {"action_probs": action_probs}