def _render_actions_probs(self, surface, players, action_probs):
        direction_to_rotation = {Direction.NORTH:0, Direction.WEST:90 , Direction.SOUTH:180, Direction.EAST:270}
        direction_to_aligns = {
            Direction.NORTH: {"horizontal_align": "center", "vertical_align":"bottom"},
            Direction.WEST: {"horizontal_align": "right", "vertical_align":"center"},
            Direction.SOUTH: {"horizontal_align": "center", "vertical_align":"top"},
            Direction.EAST: {"horizontal_align": "left", "vertical_align":"center"}}

        rescaled_arrow = pygame.transform.scale(self.ARROW_IMG, (self.tile_size, self.tile_size))
        # divide width by math.sqrt(2) to always fit both interact icon and stay icon into single tile
        rescaled_interact = pygame.transform.scale(self.INTERACT_IMG, (int(self.tile_size/math.sqrt(2)), self.tile_size))
        rescaled_stay = pygame.transform.scale(self.STAY_IMG, (int(self.tile_size/math.sqrt(2)), self.tile_size))
        for player, probs in zip(players, action_probs):
            if probs is not None:
                for action in Action.ALL_ACTIONS:
                    # use math sqrt to make probability proportional to area of the image
                    size = math.sqrt(probs[Action.ACTION_TO_INDEX[action]])
                    if action == "interact":
                        img = pygame.transform.rotozoom(rescaled_interact, 0, size)
                        self._render_on_tile_position(surface, img, player.position, horizontal_align="left", vertical_align="center")
                    elif action == Action.STAY:
                        img = pygame.transform.rotozoom(rescaled_stay, 0, size)
                        self._render_on_tile_position(surface, img, player.position, horizontal_align="right", vertical_align="center")
                    else:
                        position = Action.move_in_direction(player.position, action)
                        img =  pygame.transform.rotozoom(rescaled_arrow, direction_to_rotation[action], size)
                        self._render_on_tile_position(surface, img, position, **direction_to_aligns[action])
示例#2
0
    def action(self, state):
        action_probs = np.zeros(Action.NUM_ACTIONS)
        legal_actions = list(Action.MOTION_ACTIONS)
        if self.all_actions:
            legal_actions = Action.ALL_ACTIONS
        legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions])
        action_probs[legal_actions_indices] = 1 / len(legal_actions_indices)

        if self.custom_wait_prob is not None:
            stay = Action.STAY
            if np.random.random() < self.custom_wait_prob:
                return stay, {"action_probs": Agent.a_probs_from_action(stay)}
            else:
                action_probs = Action.remove_indices_and_renormalize(action_probs, [Action.ACTION_TO_INDEX[stay]])

        return Action.sample(action_probs), {"action_probs": action_probs}
示例#3
0
 def actions(self, states, agent_indices):
     action_probs_n = self.policy.multi_state_policy(states, agent_indices)
     actions_and_infos_n = []
     for action_probs in action_probs_n:
         action = Action.sample(action_probs)
         actions_and_infos_n.append((action, {"action_probs": action_probs}))
     return actions_and_infos_n
 def get_near_locations(self, location):
     """Get neighbouring locations to the passed in location"""
     near_locations = []
     for d in Direction.ALL_DIRECTIONS:
         new_location = Action.move_in_direction(location, d)
         if self.is_in_bounds(new_location):
             near_locations.append(new_location)
     return near_locations
示例#5
0
 def action(self, state):
     action_probs = np.zeros(Action.NUM_ACTIONS)
     legal_actions = list(Action.MOTION_ACTIONS)
     if self.interact:
         legal_actions.append(Action.INTERACT)
     legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions])
     action_probs[legal_actions_indices] = 1 / len(legal_actions_indices)
     return Action.sample(action_probs), {"action_probs": action_probs}
示例#6
0
 def action(self, state):
     action_probs = np.zeros(Action.NUM_ACTIONS)
     for agent in self.agents:
         action_probs += agent.action(state)[1]["action_probs"]
     action_probs = action_probs / len(self.agents)
     return Action.sample(action_probs), {"action_probs": action_probs}