Exemplo n.º 1
0
 def _what_to_break(cls, board, my_position, blast_strength):
     x, y = my_position
     to_break = list()
     # To up
     for dx in range(1, blast_strength):
         if x + dx >= len(board[0]):
             break
         position = (x + dx, y)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To down
     for dx in range(1, blast_strength):
         if x - dx < 0:
             break
         position = (x - dx, y)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To right
     for dy in range(1, blast_strength):
         if y + dy >= len(board):
             break
         position = (x, y + dy)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To left
     for dy in range(1, blast_strength):
         if y - dy < 0:
             break
         position = (x, y - dy)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     return to_break
Exemplo n.º 2
0
    def act(self, obs, action_space):
        # convert action_space
        action_space = spaces.Discrete(action_space)

        # convert obs
        for key in ["board", "bomb_blast_strength", "bomb_life"]:
            obs[key] = np.array(obs[key], dtype="uint8")
        obs["position"] = tuple(obs["position"])
        obs["teammate"] = constants.Item(obs["teammate"])
        obs["enemies"] = [constants.Item(n) for n in obs["enemies"]]

        return self._agent.act(obs, action_space)
Exemplo n.º 3
0
    def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None):
        assert (depth is not None)

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p_1, p_2):
            '''Determines if two points are out of rang of each other'''
            x_1, y_1 = p_1
            x_2, y_2 = p_2
            return abs(y_2 - y_1) + abs(x_2 - x_1) > depth

        items = defaultdict(list)
        dist = {}
        prev = {}
        Q = queue.PriorityQueue()

        my_x, my_y = my_position
        for r in range(max(0, my_x - depth), min(len(board), my_x + depth)):
            for c in range(max(0, my_y - depth), min(len(board), my_y + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None
                Q.put((dist[position], position))

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        while not Q.empty():
            _, position = Q.get()

            if utility.position_is_passable(board, position, enemies):
                x, y = position
                val = dist[(x, y)] + 1
                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + x, col + y)
                    if new_position not in dist:
                        continue

                    if val < dist[new_position]:
                        dist[new_position] = val
                        prev[new_position] = position

            item = constants.Item(board[position])
            items[item].append(position)

        return items, dist, prev
Exemplo n.º 4
0
def move2wooden(obs):
    res = [0] * 6
    my_position = tuple(obs['position'])
    board = np.array(obs['board'])
    bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
    enemies = [consts.Item(e) for e in obs['enemies']]
    items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10)
    direction = _near_wood(my_position, items, dist, prev, 2)
    if direction:
        res[direction.value] = 1
    return res
Exemplo n.º 5
0
def _filter_legal_actions(state):
    my_position = tuple(state['position'])
    board = np.array(state['board'])
    enemies = [constants.Item(e) for e in state['enemies']]
    ret = [constants.Action.Bomb]
    for direction in directions:
        position = utility.get_next_position(my_position, direction)
        if utility.position_on_board(
                board, position) and utility.position_is_passable(
                    board, position, enemies):
            ret.append(direction)
    return ret
Exemplo n.º 6
0
def unsafe_directions(obs):
    my_position = tuple(obs['position'])
    board = np.array(obs['board'])
    bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
    enemies = [consts.Item(e) for e in obs['enemies']]
    items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10)
    unsafe_directions = _directions_in_range_of_bomb(board, my_position, bombs,
                                                     dist)
    res = [0] * 6
    for key in unsafe_directions:
        if unsafe_directions[key] > 0:
            res[key.value] = -1
    return res
Exemplo n.º 7
0
def wooden_wall(obs):
    res = [0] * 6
    my_position = tuple(obs['position'])
    board = np.array(obs['board'])
    bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
    enemies = [consts.Item(e) for e in obs['enemies']]
    ammo = int(obs['ammo'])
    blast_strength = int(obs['blast_strength'])
    items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10)
    if _near_wood(my_position, items, dist, prev, 1):
        if _maybe_bomb(ammo, blast_strength, items, dist, my_position):
            res[-1] = 1
    return res
        def action():  #pylint: disable=W0612
            '''handles an action over http'''
            data = request.get_json()
            observation = data.get("obs")
            observation = json.loads(observation)

            observation['teammate'] = constants.Item(observation['teammate'])
            for enemy_id in range(len(observation['enemies'])):
                observation['enemies'][enemy_id] = constants.Item(
                    observation['enemies'][enemy_id])
            observation['position'] = tuple(observation['position'])
            observation['board'] = np.array(observation['board'],
                                            dtype=np.uint8)
            observation['bomb_life'] = np.array(observation['bomb_life'],
                                                dtype=np.float64)
            observation['bomb_blast_strength'] = np.array(
                observation['bomb_blast_strength'], dtype=np.float64)

            action_space = data.get("action_space")
            action_space = json.loads(action_space)
            action = self.act(observation, action_space)
            return jsonify({"action": action})
Exemplo n.º 9
0
def kill_enemy(obs):
    my_position = tuple(obs['position'])
    board = np.array(obs['board'])
    bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
    enemies = [consts.Item(e) for e in obs['enemies']]
    ammo = int(obs['ammo'])
    blast_strength = int(obs['blast_strength'])
    items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10)
    res = [0] * 6
    if _is_adjacent_enemy(items, dist, enemies) and _maybe_bomb(
            ammo, blast_strength, items, dist, my_position):
        res[-1] = 1
    return res
Exemplo n.º 10
0
    def __init__(self, obs, init=False, bombing_agents={}, board_size=11):
        self._game_mode = constants.GameType.FFA
        self.move = None

        self._board_size = board_size
        self._obs = obs
        self._my_position = tuple(obs['position'])
        self._board = np.array(obs['board'])
        self._bomb_life = np.array(self._obs['bomb_life'])
        self._teammate = obs['teammate']
        self._enemies = [constants.Item(e) for e in obs['enemies']]
        self._ammo = int(obs['ammo'])
        self.fm = forward_model.ForwardModel()

        self.self_agent = self.find_self_agent(self._obs)

        agents_id = [
            constants.Item.Agent0, constants.Item.Agent1, constants.Item.Agent2,
            constants.Item.Agent3
        ]

        self._agents = [
            characters.Bomber(aid.value, "FFA") for aid in agents_id
        ]  # remember to modifiy if it is team or radio mode

        self.bombing_agents = copy.deepcopy(bombing_agents)

        self.score = 0
        if init:
            self.curr_flames = self.convert_flames(
                self._board)  # determine by confirming the map
            self.curr_bombs = self.convert_bombs(
                np.array(obs['bomb_blast_strength']),
                np.array(obs['bomb_life']))
            self.curr_items = self.convert_items(self._board)
            self.curr_agents = self.convert_agents(self._board)
            self.last_items = self.curr_items
            if (bombing_agents != {}):
                self.curr_bombs = self.convert_bombs_two(
                    np.array(self._obs['bomb_blast_strength']), self._bomb_life,
                    bombing_agents)
Exemplo n.º 11
0
    def find_next_move(self, obs, action_space, win_condition, score_func,
                       bombing_agents):

        self.action_space = action_space
        self.win_condition = win_condition
        self.bombing_agents = bombing_agents
        self.score_func = score_func
        self.bombing_agents = bombing_agents

        my_pos = tuple(obs['position'])
        board = np.array(obs['board'])
        self.board = np.array(obs['board'])
        self._enemies = [constants.Item(e) for e in obs['enemies']]

        tree = gn.Tree(obs, True, self.bombing_agents)
        #get the root node
        self.rootNode = tree.get_root_node()

        #need way to find terminating condition
        self.end_time = 30
        start_time = time.time()
        elapsed = 0

        self.bfs(self.rootNode, start_time)

        # max_score = self.score_func(self.rootNode.get_child_with_max_score().state.obs)
        max_score = -1

        winner_node = None
        for child in self.rootNode.childArray:
            if (child.score) > max_score:
                max_score = child.score
                winner_node = child

        # print("max score {0} reached level {1} with move {2}".format(max_score, endLevel, winner_node.state.move))
        if winner_node is None:
            return constants.Action.Stop.value
        return (winner_node.state.move)
Exemplo n.º 12
0
    def reward(self, id, current_obs, info):
        reward = 0

        if utility._position_is_item(self.prev_obs[id]['board'],
                                     current_obs[id]['position'],
                                     constants.Item.IncrRange):
            reward += 0.01
            self.stat[id][Metrics.IncrRange.name] += 1

        if utility._position_is_item(self.prev_obs[id]['board'],
                                     current_obs[id]['position'],
                                     constants.Item.ExtraBomb):
            reward += 0.01
            self.stat[id][Metrics.ExtraBomb.name] += 1

        if utility._position_is_item(
                self.prev_obs[id]['board'], current_obs[id]['position'],
                constants.Item.Kick) and not self.prev_obs[id]['can_kick']:
            reward += 0.02
            self.stat[id][Metrics.Kick.name] = True

        for i in range(10, 14):
            if i in self.alive_agents and i not in current_obs[id]['alive']:
                if constants.Item(value=i) in current_obs[id]['enemies']:
                    reward += 0.5
                    self.stat[id][Metrics.EnemyDeath.name] += 1
                elif i - 10 == id:
                    reward += -1
                    self.stat[id][Metrics.DeadOrSuicide.name] += 1
                else:
                    reward += -0.5

        if info['result'] == constants.Result.Tie:
            reward += -1

        return reward
Exemplo n.º 13
0
    def _find_reachable_items(self, list_boards, my_position, time_positions):

        """
        Find items reachable from my position

        Parameters
        ----------
        list_boards : list
            list of boards, generated by _board_sequence
        my_position : tuple
            my position, where the search starts
        time_positions : list
            survivable time-positions, generated by _search_time_expanded_network

        Return
        ------
        items : dict
            items[item] : list of time-positions from which one can reach item
        reached : array
            minimum time to reach each position on the board
        next_to_items : dict
            next_to_items[item] : list of time-positions from which one can reach
                                  the position next to item
        """

        # items found on time_positions and the boundary (for Wood)
        items = defaultdict(list)

        # reached[position] : minimum time to reach the position
        reached = np.full(self.board_shape, np.inf)

        # whether already checked the position
        _checked = np.full(self.board_shape, False)

        # positions next to wood or other agents (count twice if next to two woods)
        next_to_items = defaultdict(list)

        for t, positions in enumerate(time_positions):
            # check the positions reached at time t
            board = list_boards[t]
            for position in positions:
                if reached[position] < np.inf:
                    continue
                reached[position] = t
                item = constants.Item(board[position])
                items[item].append((t,) + position)
                _checked[position] = True
                x, y = position
                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + row, y + col)
                    if not self._on_board(next_position):
                        continue
                    if _checked[next_position]:
                        continue
                    _checked[next_position] = True
                    if utility.position_is_agent(board, next_position):
                        item = constants.Item(board[next_position])
                        items[item].append((t,)+next_position)
                        next_to_items[item].append((t,) + position)
                    # ignoring wall that will not exist when explode
                    if utility.position_is_wood(list_boards[-1], next_position):
                        item = constants.Item(board[next_position])
                        items[item].append((t,)+next_position)
                        next_to_items[item].append((t,) + position)

        return items, reached, next_to_items
Exemplo n.º 14
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        #board = obs['board']
        board = info["recently_seen"]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        #
        # Understand current situation
        #

        # positions that might be blocked
        if info["teammate_position"] is None:
            agent_positions = info["enemy_positions"]
        else:
            agent_positions = info["enemy_positions"] + [
                info["teammate_position"]
            ]

        # survivable actions

        if len(info["enemy_positions"]) > 0:
            mobility = self._enemy_mobility
        else:
            mobility = 0

        n_survivable, is_survivable, list_boards \
            = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions,
                                   info["all_kickable"], allow_kick_to_fog=False,
                                   enemy_mobility=mobility, enemy_bomb=self._enemy_bomb,
                                   step_to_collapse=info["step_to_collapse"],
                                   collapse_ring=info["collapse_ring"])

        for a in info["might_block_actions"]:
            n_survivable[a] = np.zeros(self._search_range)
            is_survivable[a] = False

        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if info["might_blocked"][a] and not is_survivable[
                    constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #

        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #

            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #

            action = survivable_actions[0]
            return action.value

        if all([
                info["prev_action"]
                not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position
        ]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = info["list_boards_no_move"]
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if info["teammate_position"] is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            if n_survivable_nodes_teammate[my_teammate] > 0:
                LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[
                    my_teammate]
                positions_teammate_safe = np.where(
                    total_frac_blocked_teammate < LB)
                total_frac_blocked_teammate[positions_teammate_safe] = 0

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(
                n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:
            next_position = info["my_next_position"][action]
            if next_position is None:
                continue
            if next_position in info["all_kickable"]:
                # kick will be considered later
                continue
            if all([
                    utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]
            ]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[
                    next_position] * p_survivable[constants.Action.Stop]
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([
                    info["might_blocked"][action],
                    not is_survivable[constants.Action.Stop]
            ]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[
                action]
            if info["teammate_position"] is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            if block[action] > 0:
                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

            if info["might_blocked"][action]:
                block[action] = (total_frac_blocked[my_position] *
                                 p_survivable[constants.Action.Stop] +
                                 total_frac_blocked[next_position] *
                                 p_survivable[action]) / 2
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy_position in info["enemy_positions"]:
                # get survivable tree of the enemy
                _survivable = search_time_expanded_network(
                    list_boards_with_bomb, enemy_position)
                n_survivable_nodes_with_bomb[enemy_position] = sum(
                    [len(positions) for positions in _survivable])

            n_with_bomb = sum([
                n_survivable_nodes_with_bomb[enemy_position]
                for enemy_position in info["enemy_positions"]
            ])
            n_with_none = sum(
                [n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([
                            obs["bomb_life"][next_position] > 0,
                            board[following_position] >
                            constants.Item.AgentDummy.value
                    ]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb
            # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb)
            block[action] *= p_survivable[action]

            block_teammate_with_bomb = None
            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                    block_teammate_with_bomb \
                        += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb)
                    block[action] *= (1 - block_teammate_with_bomb)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        for next_position in info["kickable"]:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            block[action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies,
                                                   ignore_dying_agent=True)
            block[action] += total_frac_blocked[next_position] * (
                1 - block[action])
            block[action] *= p_survivable[action]

            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_kick \
                        = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                           n_survivable_nodes_teammate,
                                                           board, [my_teammate],
                                                           ignore_dying_agent=True)
                    block_teammate_with_kick \
                        += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick)
                    block[action] *= (1 - block_teammate_with_kick)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if info["teammate_position"] is not None:
                if block_teammate_with_bomb is None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[
                    my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        block_teammate_with_bomb < self._interfere_threshold,
                        block_teammate_with_bomb <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = info["my_next_position"][best_action]

            should_chase = (total_frac_blocked[next_position] >
                            self._chase_threshold)

            if info["teammate_position"] is not None:
                teammate_safety = total_frac_blocked_teammate[
                    next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] <
                        self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([
                        utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]
                ]):
                    action = constants.Action.Stop
                    return action.value
                else:
                    return best_action.value

        # Exclude the action representing stop to wait
        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        #
        # Do not take risky actions when not interacting with enemies
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        if total_frac_blocked[my_position] > 0:
            # ignore actions with low survivability
            _survivable_actions = list()
            for action in n_survivable:
                n = sum(n_survivable[action])
                if not is_survivable[action]:
                    continue
                elif n > self._my_survivability_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                return best_action.value
            else:
                # Take the most survivable action
                return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([
                info["teammate_position"] is not None,
                len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0
        ]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    """
                    if block_teammate_with_bomb is None:
                        block_teammate_with_bomb \
                            = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                               n_survivable_nodes_teammate,
                                                               board,
                                                               [my_teammate],
                                                               ignore_dying_agent=True)                        
                    frac = block_teammate_with_bomb 
                    """
                    continue
                else:
                    next_position = info["my_next_position"][action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                return best_action.value
            else:
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=mobility,
            enemy_bomb=self._enemy_bomb,
            enemy_positions=agent_positions,
            agent_blast_strength=info["agent_blast_strength"],
            step_to_collapse=info["step_to_collapse"],
            collapse_ring=info["collapse_ring"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(
            list_boards, my_position)
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        if "escape" in info:
            reachable_items, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable,
                                             might_powerup=info["escape"])  # might_powerup is the escape from collapse
        else:
            _, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position,
                                       next_to_items[constants.Item.Fog], prev,
                                       is_survivable, info)
        if action is not None:
            return action.value

        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(
            my_position, next_to_items[constants.Item.Fog], prev,
            is_survivable, info)
        if action is not None:
            return action.value

        #
        # Move to the places that will not be collapsed
        #

        if "escape" in info:
            # might_powerup is the escape from collapse
            action = self._action_to_might_powerup(my_position,
                                                   reachable_items, prev,
                                                   is_survivable)
            if action is not None:
                print("Escape from collapse", action)
                return action.value

        #
        # Move towards a fog where we have not seen longest
        #

        action = self._action_to_fog(my_position,
                                     next_to_items[constants.Item.Fog], prev,
                                     is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                return action.value

        #
        # Choose most survivable action
        #

        max_block = 0
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            score = block[action]
            if action != constants.Action.Bomb:
                score += np.random.uniform(0, 1e-3)
            if score > max_block:
                max_block = score
                best_action = action

        if best_action is None:
            max_p = 0
            best_action = None
            for action in p_survivable:
                score = p_survivable[action]
                if action != constants.Action.Bomb:
                    score += np.random.uniform(0, 1e-3)
                if score > max_p:
                    max_p = score
                    best_action = action

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
Exemplo n.º 15
0
    def _find_safe_directions(self, board, my_position, unsafe_directions,
                              bombs, enemies, item):
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                #FIXME is_stuck=False
                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck

        # All directions are unsafe. Return a position that won't leave us locked.
        safe = []

        if len(unsafe_directions) == 4:
            next_board = board.copy()
            next_board[my_position] = constants.Item.Bomb.value
            disallowed = []
            for direction, bomb_range in unsafe_directions.items():
                next_position = utility.get_next_position(
                    my_position, direction)
                next_x, next_y = next_position
                if not utility.position_on_board(next_board, next_position) or \
                   not utility.position_is_passable(next_board, next_position, enemies):
                    disallowed.append(direction)
                    continue

                if not is_stuck_direction(next_position, bomb_range,
                                          next_board, enemies):
                    # We found a direction that works. The .items provided
                    # a small bit of randomness. So let's go with this one.
                    return [direction]
            if not safe:

                #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个
                # for i in bombs:
                # if len(bombs) == 1 :
                if len(item[constants.Item(3)]) == 1:
                    # if my_position == i['position']:
                    for bomb in bombs:
                        if my_position == bomb['position']:
                            safe = [
                                k for k in unsafe_directions
                                if k not in disallowed
                            ]
                        # break
            if not safe:
                safe = [constants.Action.Stop]
            return safe

        x, y = my_position
        disallowed = []  # The directions that will go off the board.

        for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            position = (x + row, y + col)
            direction = utility.get_direction(my_position, position)

            # Don't include any direction that will go off of the board.
            if not utility.position_on_board(board, position):
                disallowed.append(direction)
                continue

            # Don't include any direction that we know is unsafe.
            if direction in unsafe_directions:

                #当这个不安全位置不能通过的时候就disallow,防止踢炸弹
                if not utility.position_is_passable(board, position, enemies):
                    disallowed.append(direction)

                #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定)
                # if

                continue

            if utility.position_is_passable(
                    board, position, enemies) or utility.position_is_fog(
                        board, position):
                #可能移动一个位置,隔壁存在炸弹
                safe.append(direction)
                for bomb in bombs:
                    if bomb['bomb_life'] == 1:
                        bomb_x, bomb_y = bomb['position']
                        if bomb_x == position[0] and abs(
                                bomb_y -
                                position[1]) <= bomb['blast_strength']:
                            #remove the direction
                            safe.pop()
                            break
                        elif bomb_y == position[1] and abs(
                                bomb_x -
                                position[0]) <= bomb['blast_strength']:
                            safe.pop()
                            break
        if not safe:
            # We don't have any safe directions, so return something that is allowed.
            safe = [k for k in unsafe_directions if k not in disallowed]

        if not safe:
            # We don't have ANY directions. So return the stop choice.
            return [constants.Action.Stop]

        return safe
Exemplo n.º 16
0
    def act(self, obs, action_space):

        t0 = time.perf_counter()
        
        # The number of steps
        self._steps += 1

        # TODO: deepcopy are not needed with Docker
        board = deepcopy(obs["board"])
        bomb_life = deepcopy(obs["bomb_life"])
        bomb_blast_strength = deepcopy(obs["bomb_blast_strength"])
        my_position = obs["position"]
        my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None

        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board==my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])

        info = dict()
        info["prev_action"] = self._prev_action
        info["prev_position"] = self._prev_position
        
        #
        # Whether each location is Rigid
        #
        #  False may be an unknown
        #
        if self._num_rigid_found < constants.NUM_RIGID:
            self._is_rigid += (board == constants.Item.Rigid.value)
            self._is_rigid += (board.T == constants.Item.Rigid.value)
            self._num_rigid_found = np.sum(self._is_rigid)
            self._rigid_locations = np.where(self._is_rigid)
            self._unreachable = ~self._get_reachable(self._is_rigid)
            self._unreachable_locations = np.where(self._unreachable)
            
        #
        # What we have seen last time, and how many steps have past since then
        #
        #  Once we see a Rigid, we always see it
        #
        visible_locations = np.where(board != constants.Item.Fog.value)
        self._last_seen[visible_locations] = board[visible_locations]
        #self._last_seen[self._rigid_locations] = constants.Item.Rigid.value
        # unreachable -> rigid
        self._last_seen[self._unreachable_locations] = constants.Item.Rigid.value
        self._since_last_seen += 1
        self._since_last_seen[visible_locations] = 0
        self._since_last_seen[np.where(self._is_rigid)] = 0
        if self._steps == 0:
            # We have some knowledge about the initial configuration of the board
            C = constants.BOARD_SIZE - 2
            self._last_seen[(1, 1)] = constants.Item.Agent0.value
            self._last_seen[(C, 1)] = constants.Item.Agent1.value
            self._last_seen[(C, C)] = constants.Item.Agent2.value
            self._last_seen[(1, C)] = constants.Item.Agent3.value
            rows = np.array([1, C, 1, C])
            cols = np.array([1, 1, C, C])
            self._since_last_seen[(rows, cols)] = 0
            rows = np.array([1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C, 2, 3, C - 1, C - 2])
            cols = np.array([2, 3, C - 1, C - 2, 1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C])
            self._last_seen[(rows, cols)] = constants.Item.Passage.value
            self._since_last_seen[(rows, cols)] = 0

        info["since_last_seen"] = self._since_last_seen
        info["last_seen"] = self._last_seen
            
        #
        # Modify the board
        #

        #fog_positions = np.where(board==constants.Item.Fog.value)
        #board[fog_positions] = self._last_seen[fog_positions]

        board[self._unreachable_locations] = constants.Item.Rigid.value

        #
        # Summarize information about bombs
        #
        #  curr_bombs : list of current bombs
        #  moving_direction : array of moving direction of bombs
        info["curr_bombs"], info["moving_direction"] \
            = self._get_bombs(board,
                              bomb_blast_strength, self._prev_bomb_blast_strength,
                              bomb_life, self._prev_bomb_life)

        self._prev_bomb_life = bomb_life.copy()
        self._prev_bomb_blast_strength = bomb_blast_strength.copy()

        #
        # Bombs to be exploded in the next step
        #
        curr_bomb_position_strength = list()
        rows, cols = np.where(bomb_blast_strength > 0)
        for position in zip(rows, cols):
            strength = int(bomb_blast_strength[position])
            curr_bomb_position_strength.append((position, strength))

        #
        # Summarize information about flames
        #
        if self._prev_board is not None:
            info["curr_flames"], self._prev_flame_life \
                = self._get_flames(board,
                                   self._prev_board[-1],
                                   self._prev_flame_life,
                                   self._prev_bomb_position_strength,
                                   curr_bomb_position_strength,
                                   self._prev_moving_direction)
        else:
            info["curr_flames"] = []
        info["flame_life"] = self._prev_flame_life

        self._prev_moving_direction = deepcopy(info["moving_direction"])

        self._prev_bomb_position_strength = curr_bomb_position_strength
        
        #
        # List of simulated boards, assuming enemies stay unmoved
        #

        info["list_boards_no_move"], _ \
            = self._board_sequence(board,
                                   info["curr_bombs"],
                                   info["curr_flames"],
                                   self._search_range,
                                   my_position,
                                   enemy_mobility=0)

        #
        # Might appear item from flames
        #

        info["might_powerup"] = np.full(self.board_shape, False)
        if self._prev_board is None:
            # Flame life is 2
            # flame life is hardcoded in pommmerman/characters.py class Flame
            self._prev_board = [deepcopy(board), deepcopy(board), deepcopy(board)]
        else:
            old_board = self._prev_board.pop(0)
            self._prev_board.append(deepcopy(board))
            if self._might_remaining_powerup:
                # was wood and now flames
                was_wood = (old_board == constants.Item.Wood.value)
                now_flames = (board == constants.Item.Flames.value)
                info["might_powerup"] = was_wood * now_flames

                # now wood and will passage
                now_wood = (board == constants.Item.Wood.value)
                become_passage = (info["list_boards_no_move"][-1] ==constants.Item.Passage.value)
                info["might_powerup"] += now_wood * become_passage

                maybe_powerup = info["might_powerup"] \
                                + (self._last_seen == constants.Item.Wood.value) \
                                + (self._last_seen == constants.Item.ExtraBomb.value) \
                                + (self._last_seen == constants.Item.IncrRange.value) \
                                + (self._last_seen == constants.Item.Kick.value)            
                if not maybe_powerup.any():
                    self._might_remaining_powerup = False

        # update the estimate of enemy blast strength
        rows, cols = np.where(bomb_life == constants.DEFAULT_BOMB_LIFE - 1)
        for position in zip(rows, cols):
            if position == my_position:
                continue
            enemy = board[position]
            self._agent_blast_strength[enemy] = bomb_blast_strength[position]
        info["agent_blast_strength"] = self._agent_blast_strength

        # update the last seen enemy position
        for agent in self._since_last_seen_agent:
            self._since_last_seen_agent[agent] += 1

        for enemy in my_enemies:
            rows, cols = np.where(board == enemy.value)
            if len(rows):
                self._last_seen_agent_position[enemy] = (rows[0], cols[0])
                self._since_last_seen_agent[enemy] = 0
                continue

        if teammate_position is not None:
            self._last_seen_agent_position[my_teammate] = teammate_position
            self._since_last_seen_agent[my_teammate] = 0            

        info["last_seen_agent_position"] = self._last_seen_agent_position
        info["since_last_seen_agent"] = self._since_last_seen_agent
        
        #
        # Choose a slave to act
        #

        if self._isolated:
            is_wood_visible = (constants.Item.Wood.value in board)
            is_closed = self._is_closed(board, my_position)
            if any([not is_wood_visible, not is_closed]):
                self._isolated = False
                
        action = None
        if self._isolated:
            # Act with an agent who do not consider other agents
            if verbose:
                print("IsolatedAgent: ", end="")
            action = self.isolated_slave.act(obs, action_space, info)
#        elif not self._might_remaining_powerup:
            # Act with an agent who do not consider powerups
#            print("BattleAgent: ", end="")
#            action = self.battle_slave.act(obs, action_space, info)
        else:
            if verbose:
                print("GenericAgent: ", end="")
            action = self.generic_slave.act(obs, action_space, info)
        
        if action is None:
            # Act with a special agent, who only seeks to survive
            if verbose:
                print("\nSurvivingAgent: ", end="")
            action = self.surviving_slave.act(obs, action_space, info)

        this_time = time.perf_counter() - t0
        if this_time > self.max_time:
            self.max_time = this_time

        self._prev_action = action
        self._prev_position = my_position
        
        return action
Exemplo n.º 17
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        recently_seen_positions = (info["since_last_seen"] < 3)
        board[recently_seen_positions] = info["last_seen"][recently_seen_positions]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        if verbose:
            print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")

        my_next_position = {constants.Action.Stop: my_position,
                            constants.Action.Bomb: my_position}
        for action in [constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = self._get_next_position(my_position, action)
            if self._on_board(next_position):
                if board[next_position] == constants.Item.Rigid.value:
                    my_next_position[action] = None
                else:
                    my_next_position[action] = next_position
            else:
                my_next_position[action] = None

        #
        # Understand current situation
        #

        if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        
        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board==enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board==my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])
        
        # Positions where we kick a bomb if we move to
        if my_kick:
            kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
            might_kickable = set()

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable)

        # enemy positions over time
        # these might be dissappeared due to extra flames
        if len(enemy_positions):
            rows = [p[0] for p in enemy_positions]
            cols = [p[1] for p in enemy_positions]
            list_enemy_positions = [(rows, cols)]
            _enemy_positions = list()
            for t in range(self._enemy_mobility):
                rows, cols = list_enemy_positions[-1]
                for x, y in zip(rows, cols):
                    for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                        next_position = (x + dx, y + dy)
                        if not self._on_board(next_position):
                            continue
                        _board = info["list_boards_no_move"][t]
                        if utility.position_is_passage(_board, next_position):
                            _enemy_positions.append(next_position)
            _enemy_positions = set(_enemy_positions)
            rows = [p[0] for p in _enemy_positions]
            cols = [p[1] for p in _enemy_positions]
            list_enemy_positions.append((rows, cols))
        else:
            list_enemy_positions = []
            
        
        # survivable actions
        is_survivable = dict()
        for a in self._get_all_actions():
            is_survivable[a] = False
        n_survivable = dict()
        list_boards = dict()
        for my_action in self._get_all_actions():

            next_position = my_next_position[my_action]

            if next_position is None:
                continue

            if my_action == constants.Action.Bomb:
                if any([my_ammo == 0,
                        obs["bomb_blast_strength"][next_position] > 0]):
                    continue
            
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1]):
                continue

            if all([my_action != constants.Action.Stop,
                    obs["bomb_blast_strength"][next_position] > 0,
                    next_position not in set.union(kickable, might_kickable)]):
                continue

            if next_position in set.union(kickable, might_kickable):
                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue
            
            # list of boards from next steps
            list_boards[my_action], _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=my_action,
                                       can_kick=my_kick,
                                       enemy_mobility=self._enemy_mobility,
                                       enemy_bomb=self._enemy_bomb,
                                       agent_blast_strength=info["agent_blast_strength"])

            # agents might be disappeared, because of overestimated bombs
            for t, positions in enumerate(list_enemy_positions):
                list_boards[my_action][t][positions] = constants.Item.AgentDummy.value
            
            # some bombs may explode with extra bombs, leading to under estimation
            for t in range(len(list_boards[my_action])):
                flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
                list_boards[my_action][t][flame_positions] = constants.Item.Flames.value
                
        """
        processed = Parallel(n_jobs=-1, verbose=0)(
            [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action)
             for action in list_boards]
        )
        for survivable, my_action in processed:
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        """
        
        for my_action in list_boards:
            survivable = search_time_expanded_network(list_boards[my_action][1:],
                                                      my_next_position[my_action])
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        
        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if might_blocked[a] and not is_survivable[constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #
                
        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #
        
            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #
            
            action = survivable_actions[0]
            if verbose:
                print("The only survivable action", action)
            return action.value


        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
    
        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            """
            np.set_printoptions(precision=3)
            print("enemy")
            print(total_frac_blocked)
            print("teammate")
            print(total_frac_blocked_teammate)
            print("product")
            prod = total_frac_blocked * (1 - total_frac_blocked_teammate)
            print(prod[:5,:5])
            """

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [constants.Action.Stop,
                       constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = my_next_position[action]
            if next_position is None:
                continue
            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop]
                if teammate_position is not None:
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp
                block[action] -=  np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([might_blocked[action],
                    not is_survivable[constants.Action.Stop]]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp            
            block[action] -=  np.log(-np.log(self.random.uniform()))
            if might_blocked[action]:
                block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop]
                                 + total_frac_blocked[next_position] * p_survivable[action]) / 2
                if teammate_position is not None:                    
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp                
                block[action] -=  np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_bomb,
                                                           enemy_position)
                n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable])

            n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies])
            n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([obs["bomb_life"][next_position] > 0,
                            board[following_position] > constants.Item.AgentDummy.value]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position)
                n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable])

                n_with_bomb = n_survivable_nodes_with_bomb_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_with_bomb_teammate = 0
                else:
                    total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_with_bomb_teammate)
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        for next_position in kickable:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True)

            n_survivable_nodes_with_kick = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_kick,
                                                           enemy_position)
                n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable])

                n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies])
                n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
                if n_with_none == 0:
                    total_frac_blocked[next_position] = 0
                else:
                    total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position)
                n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable])

                n_with_kick = n_survivable_nodes_with_kick_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_teammate[next_position] = 0
                else:
                    total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none
            
            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if teammate_position is not None:
                teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_with_bomb_teammate < self._interfere_threshold,
                        total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:                    
                    if verbose:
                        print("Bomb is best", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    if verbose:
                        print("Place a bomb at a locally optimal position", constants.Action.Bomb)
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = my_next_position[best_action]

            should_chase = (total_frac_blocked[next_position] > self._chase_threshold)

            if teammate_position is not None:
                teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] < self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]]):
                    action = constants.Action.Stop
                    if verbose:
                        print("Wait flames life", action)
                    return action.value
                else:
                    if verbose:
                        print("Move towards better place to bomb", best_action)
                    return best_action.value                

        # Exclude the action representing stop to wait
        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action
                
        #
        # Do not take risky actions
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        # ignore actions with low survivability
        _survivable_actions = list()
        for action in n_survivable:
            n = sum(n_survivable[action])
            if not is_survivable[action]:
                continue
            elif n > self._my_survivability_threshold:
                _survivable_actions.append(action)
            else:
                print("RISKY", action)
                is_survivable[action] = False

        if len(_survivable_actions) > 1:
            survivable_actions = _survivable_actions
        elif best_action is not None:
            if verbose:
                print("Take the best action in danger", best_action)
            return best_action.value
        else:
            # Take the most survivable action
            if verbose:
                print("Take the most survivable action", most_survivable_action)
            return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([teammate_position is not None,
                len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    frac = total_frac_blocked_with_bomb_teammate
                else:
                    next_position = my_next_position[action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    print("INTERFERE", action)
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                if verbose:
                    print("Take the best action in intereference", best_action)
                return best_action.value
            else:
                if verbose:
                    print("Take the least interfering action", least_interfere_action)
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=self._enemy_mobility,
                                              enemy_bomb=self._enemy_bomb,
                                              agent_blast_strength=info["agent_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value
        
        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(list_boards,
                                                                    my_position)        
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        _, _, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info,
                                       my_enemies)
        if action is not None:
            if verbose:
                print("Moving toward last seen enemy", action)
            return action.value            
        
        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(my_position,
                                                 next_to_items[constants.Item.Fog],
                                                 prev,
                                                 is_survivable,
                                                 info,
                                                 my_teammate)
        if action is not None:
            if verbose:
                print("Moving away from last seen teammate", action)
            return action.value            
        
        #
        # Move towards a fog where we have not seen longest
        #
        
        action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                if verbose:
                    print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if verbose:
            print("Take the best action among safe actions (nothing else to do)", best_action)

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
Exemplo n.º 18
0
    def act(self, obs, action_space):

        # The number of steps
        self._steps += 1

        # Collapse the board if just collapsed in the previous step
        info = dict()
        info["steps"] = self._steps
        info["recently_seen"] = deepcopy(obs["board"])
        if self._just_collapsed is not None:
            L = self._just_collapsed
            U = obs["board"].shape[0] - 1 - L

            flames_positions = np.where(
                obs["board"] == constants.Item.Flames.value)

            info["recently_seen"][L, :][L:U + 1] = constants.Item.Rigid.value
            info["recently_seen"][U, :][L:U + 1] = constants.Item.Rigid.value
            info["recently_seen"][:, L][L:U + 1] = constants.Item.Rigid.value
            info["recently_seen"][:, U][L:U + 1] = constants.Item.Rigid.value

            info["recently_seen"][
                flames_positions] = constants.Item.Flames.value

            obs["bomb_life"][L, :][L:U + 1] = 0
            obs["bomb_life"][U, :][L:U + 1] = 0
            obs["bomb_life"][:, L][L:U + 1] = 0
            obs["bomb_life"][:, U][L:U + 1] = 0

            obs["bomb_blast_strength"][L, :][L:U + 1] = 0
            obs["bomb_blast_strength"][U, :][L:U + 1] = 0
            obs["bomb_blast_strength"][:, L][L:U + 1] = 0
            obs["bomb_blast_strength"][:, U][L:U + 1] = 0

        #
        # Whether each location is Rigid
        #
        #  False may be an unknown
        #
        if self._num_rigid_found < constants.NUM_RIGID:
            self._is_rigid += (obs["board"] == constants.Item.Rigid.value)
            self._is_rigid += (obs["board"].T == constants.Item.Rigid.value)
            self._num_rigid_found = np.sum(self._is_rigid)
            self._unreachable = ~self._get_reachable(self._is_rigid)
            self._unreachable_locations = np.where(self._unreachable)

        #
        # What we have seen last time, and how many steps have past since then
        #
        visible_locations = np.where(obs["board"] != constants.Item.Fog.value)
        self._last_seen[visible_locations] = obs["board"][visible_locations]
        self._last_seen[
            self._unreachable_locations] = constants.Item.Rigid.value
        self._since_last_seen += 1
        self._since_last_seen[visible_locations] = 0
        self._since_last_seen[np.where(self._is_rigid)] = 0
        if self._steps == 0:
            # We have some knowledge about the initial configuration of the board
            C = constants.BOARD_SIZE - 2
            self._last_seen[(1, 1)] = constants.Item.Agent0.value
            self._last_seen[(C, 1)] = constants.Item.Agent1.value
            self._last_seen[(C, C)] = constants.Item.Agent2.value
            self._last_seen[(1, C)] = constants.Item.Agent3.value
            rows = np.array([1, C, 1, C])
            cols = np.array([1, 1, C, C])
            self._since_last_seen[(rows, cols)] = 0
            rows = np.array([
                1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C, 2, 3, C - 1, C - 2
            ])
            cols = np.array([
                2, 3, C - 1, C - 2, 1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C
            ])
            self._last_seen[(rows, cols)] = constants.Item.Passage.value
            self._since_last_seen[(rows, cols)] = 0

        #
        # We know exactly how my teamate is digging
        #
        my_position = obs["position"]

        if self._steps == 33:
            passage_under_fog \
                = (self._last_seen.T == constants.Item.Passage.value) * (self._last_seen == constants.Item.Fog.value)
            positions = np.where(passage_under_fog)
            self._last_seen[positions] = constants.Item.Passage.value
            self._since_last_seen[positions] = 0

        info["since_last_seen"] = self._since_last_seen
        info["last_seen"] = self._last_seen

        if not self._just_collapsed:
            # then we do not see the true board, so skip
            recently_seen_positions = (info["since_last_seen"] <
                                       self._use_last_seen)
            info["recently_seen"][recently_seen_positions] = info["last_seen"][
                recently_seen_positions]

        # TODO: deepcopy are not needed with Docker
        board = info["recently_seen"]
        bomb_life = obs["bomb_life"]
        bomb_blast_strength = obs["bomb_blast_strength"]
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None

        info["prev_action"] = self._prev_action
        info["prev_position"] = self._prev_position

        #
        # Modify the board
        #

        board[self._unreachable_locations] = constants.Item.Rigid.value

        #
        # Summarize information about bombs
        #
        #  curr_bombs : list of current bombs
        #  moving_direction : array of moving direction of bombs
        info["curr_bombs"], info["moving_direction"] \
            = self._get_bombs(obs["board"],  # use observation to keep the bombs under fog
                              bomb_blast_strength, self._prev_bomb_blast_strength,
                              bomb_life, self._prev_bomb_life)

        self._prev_bomb_life = bomb_life.copy()
        self._prev_bomb_blast_strength = bomb_blast_strength.copy()

        #
        # Bombs to be exploded in the next step
        #
        curr_bomb_position_strength = list()
        rows, cols = np.where(bomb_blast_strength > 0)
        for position in zip(rows, cols):
            strength = int(bomb_blast_strength[position])
            curr_bomb_position_strength.append((position, strength))

        #
        # Summarize information about flames
        #
        if self._prev_board is not None:
            info["curr_flames"], self._prev_flame_life \
                = self._get_flames(obs["board"],  # use observation to keep the bombs under fog
                                   self._prev_board[-1],
                                   self._prev_flame_life,
                                   self._prev_bomb_position_strength,
                                   curr_bomb_position_strength,
                                   self._prev_moving_direction)
        else:
            info["curr_flames"] = []
        info["flame_life"] = self._prev_flame_life

        self._prev_moving_direction = deepcopy(info["moving_direction"])

        self._prev_bomb_position_strength = curr_bomb_position_strength

        #
        # List of simulated boards, assuming enemies stay unmoved
        #

        step_to_collapse = None
        collapse_ring = None
        if obs["game_env"] == 'pommerman.envs.v1:Pomme':
            # Collapse mode

            # cannot trust the board just collapsed, so skip
            if self._just_collapsed is None:
                already_collapsed = (self._when_collapse < self._steps)
                not_rigid = (obs["board"] != constants.Item.Rigid.value) * (
                    obs["board"] != constants.Item.Fog.value)
                not_collapsed_positions = np.where(already_collapsed *
                                                   not_rigid)
                self._when_collapse[not_collapsed_positions] = np.inf

            collapse_steps = [
                step for step in self._collapse_steps if step >= self._steps
            ]
            if len(collapse_steps):
                step_to_collapse = min(collapse_steps) - self._steps
                collapse_ring = len(self._collapse_steps) - len(collapse_steps)
                if step_to_collapse == 0:
                    self._just_collapsed = collapse_ring
                else:
                    self._just_collapsed = None
            else:
                self._just_collapsed = None

        info["step_to_collapse"] = step_to_collapse
        info["collapse_ring"] = collapse_ring

        info["list_boards_no_move"], _ \
            = self._board_sequence(board,
                                   info["curr_bombs"],
                                   info["curr_flames"],
                                   self._search_range,
                                   my_position,
                                   enemy_mobility=0,
                                   step_to_collapse=step_to_collapse,
                                   collapse_ring=collapse_ring)

        #
        # Might appear item from flames
        #

        info["might_powerup"] = np.full(self.board_shape, False)
        if self._prev_board is None:
            # Flame life is 2
            # flame life is hardcoded in pommmerman/characters.py class Flame
            self._prev_board = [
                deepcopy(board),
                deepcopy(board),
                deepcopy(board)
            ]
        else:
            old_board = self._prev_board.pop(0)
            self._prev_board.append(deepcopy(board))
            if self._might_remaining_powerup:
                # was wood and now flames
                was_wood = (old_board == constants.Item.Wood.value)
                now_flames = (board == constants.Item.Flames.value)
                info["might_powerup"] = was_wood * now_flames

                # now wood and will passage
                now_wood = (board == constants.Item.Wood.value)
                become_passage = (info["list_boards_no_move"][-1] ==
                                  constants.Item.Passage.value)
                info["might_powerup"] += now_wood * become_passage

                maybe_powerup = info["might_powerup"] \
                                + (self._last_seen == constants.Item.Wood.value) \
                                + (self._last_seen == constants.Item.ExtraBomb.value) \
                                + (self._last_seen == constants.Item.IncrRange.value) \
                                + (self._last_seen == constants.Item.Kick.value)
                if not maybe_powerup.any():
                    self._might_remaining_powerup = False

        # update the estimate of enemy blast strength
        rows, cols = np.where(bomb_life == constants.DEFAULT_BOMB_LIFE - 1)
        for position in zip(rows, cols):
            if position == my_position:
                continue
            enemy = board[position]
            self._agent_blast_strength[enemy] = bomb_blast_strength[position]
        info["agent_blast_strength"] = self._agent_blast_strength

        # enemy positions
        info["enemy_positions"] = list()
        for enemy in my_enemies:
            rows, cols = np.where(board == enemy.value)
            if len(rows) == 1:
                info["enemy_positions"].append((rows[0], cols[0]))
            elif len(rows) > 1:
                # choose the most recently seen enemy of this ID, because only one
                time_passed = info["since_last_seen"][(rows, cols)]
                idx = np.argmin(time_passed)
                enemy_position = (rows[idx], cols[idx])
                board[(
                    rows, cols
                )] = constants.Item.Passage.value  # overwrite old teammates by passage
                board[enemy_position] = enemy.value
                info["enemy_positions"].append(enemy_position)

        # teammate position
        info["teammate_position"] = None
        if my_teammate is not None:
            rows, cols = np.where(board == my_teammate.value)
            if len(rows) == 1:
                info["teammate_position"] = (rows[0], cols[0])
            elif len(rows) > 1:
                # choose the most recently seen teammate, because only one
                time_passed = info["since_last_seen"][(rows, cols)]
                idx = np.argmin(time_passed)
                info["teammate_position"] = (rows[idx], cols[idx])
                board[(
                    rows, cols
                )] = constants.Item.Passage.value  # overwrite old teammates by passage
                board[info["teammate_position"]] = my_teammate.value

        # next positions
        info["my_next_position"] = {constants.Action.Stop: my_position}
        if all([obs["ammo"] > 0,
                obs["bomb_blast_strength"][my_position] == 0]):
            info["my_next_position"][constants.Action.Bomb] = my_position
        else:
            info["my_next_position"][constants.Action.Bomb] = None
        for action in [
                constants.Action.Up, constants.Action.Down,
                constants.Action.Left, constants.Action.Right
        ]:
            next_position = self._get_next_position(my_position, action)
            if self._on_board(next_position):
                if board[next_position] in [
                        constants.Item.Rigid.value, constants.Item.Wood.value
                ]:
                    info["my_next_position"][action] = None
                else:
                    info["my_next_position"][action] = next_position
            else:
                info["my_next_position"][action] = None

        # kickable positions
        if obs["can_kick"]:
            is_bomb = np.full(self.board_shape, False)
            is_bomb[np.where(obs["bomb_blast_strength"] > 0)] = True
            info["kickable"], info["might_kickable"] \
                = self._kickable_positions(obs, is_bomb, info["moving_direction"])
            info["all_kickable"] = set.union(info["kickable"],
                                             info["might_kickable"])
        else:
            info["kickable"] = set()
            info["might_kickable"] = set()
            info["all_kickable"] = set()

        # might block/blocked actions
        # I am the leader if agent0 or agent1
        # I am the follower otherwise
        # If leader, not blocked by teammate
        # If follower, do not block teammate
        info["might_blocked"] = self._get_might_blocked(
            board, my_position, info["enemy_positions"],
            info["might_kickable"])
        if all([
                board[my_position]
                in [constants.Item.Agent2.value, constants.Item.Agent3.value],
                info["teammate_position"] is not None
        ]):
            info["might_block_teammate"] = self._get_might_blocked(
                board, my_position, [info["teammate_position"]],
                info["might_kickable"])
        else:
            info["might_block_teammate"] = defaultdict(bool)
        info["might_block_actions"] = set([
            a for a in info["might_block_teammate"]
            if info["might_block_teammate"][a]
        ])

        #
        # Choose a slave to act
        #

        if self._isolated:
            is_wood_visible = (constants.Item.Wood.value in board)
            is_closed = self._is_closed(board, my_position)
            if any([not is_wood_visible, not is_closed]):
                self._isolated = False

        action = None
        if self._isolated:
            # Act with an agent who do not consider other dypmAgents
            action = self.isolated_slave.act(obs, action_space, info)
        elif not self._might_remaining_powerup:
            # Act with an agent who do not consider powerups

            if obs["game_env"] == 'pommerman.envs.v1:Pomme':
                info["escape"] = (self._when_collapse == np.inf) * (
                    info["last_seen"] == constants.Item.Passage.value)

            action = self.battle_slave.act(obs, action_space, info)
        else:
            action = self.generic_slave.act(obs, action_space, info)

        if action is None:
            # Act with a special agent, who only seeks to survive
            action = self.surviving_slave.act(obs, action_space, info)

        self._prev_action = action
        self._prev_position = my_position

        return action
    def obs_to_net_in(self, obs):
        """
            Takes the observation dictionary and turns value into feature planes of same shape
        """

        # Handle order of the keys
        key_list = [
            "bomb_life",
            "bomb_blast_strength",
            "bomb_moving_direction",
            "flame_life",
            "blast_strength",
            "can_kick",
            "ammo",
            #"game_type",
            #"game_env",
            #"step_count",
            #"alive",
            #"position",
            #"teammate",
            #"enemies",
            #"message",
            "board"
        ]

        type_dict = {
            # Full board to break down
            "board": "full_board",

            # Board type values:
            "bomb_blast_strength": "board",
            "bomb_life": "board",
            "bomb_moving_direction": "board",
            "flame_life": "board",

            # Single value type values:
            "blast_strength": "val",
            "can_kick": "val",
            "ammo": "val",
            "game_type": "val",
            "game_env": "val",
            "step_count": "val",

            # Special type values:
            "alive": None,
            "position": "pos",
            "message": "pos",
            "teammate": "mate",
            "enemies": "enemy"
        }

        # Rating of items (Prefere power-ups, avoid flames)
        desire_dict = {
            constants.Item.ExtraBomb.value: 0,
            constants.Item.Kick.value: 0,
            constants.Item.IncrRange.value: 0,
            constants.Item.Wood.value: 1,
            constants.Item.Passage.value: 2,
            constants.Item.Fog.value: 3,
            constants.Item.Rigid.value: 5,
            constants.Item.Agent0.value: 6,
            constants.Item.Agent1.value: 6,
            constants.Item.Agent2.value: 6,
            constants.Item.Agent3.value: 6,
            constants.Item.AgentDummy.value: 6,
            constants.Item.Bomb.value: 7,
            constants.Item.Flames.value: 8
        }

        board_shape = obs['board'].shape
        agents = [
            constants.Item.Agent0, constants.Item.Agent1,
            constants.Item.Agent2, constants.Item.Agent3,
            constants.Item.AgentDummy
        ]
        enemies = obs['enemies']
        mate = obs['teammate']
        alive = obs['alive']
        player = [constants.Item(e) for e in alive]
        for e in enemies:
            desire_dict[e.value] = 4
            try:
                player.remove(e)
            except:
                pass

        try:
            player.remove(mate)
        except:
            pass

        net_in = np.zeros(
            (*board_shape, len(key_list) + len(constants.Item) - 1))

        for idx, key in enumerate(key_list):
            # Determine current value_type to handle value accordingly
            value = obs[key]
            value_type = type_dict[key]

            if value_type == 'full_board':
                # Break board down in its different features
                desire_plane = np.zeros(shape=board_shape, dtype=np.float32)
                enemy_plane = np.zeros(shape=board_shape, dtype=np.float32)
                mate_plane = np.zeros(shape=board_shape, dtype=np.float32)
                self_plane = np.zeros(shape=board_shape, dtype=np.float32)
                i = 0
                # One-hot encode every item
                for e in constants.Item:
                    feature_plane = np.where(
                        value == e.value,
                        np.ones(shape=board_shape, dtype=np.float32),
                        np.zeros(shape=board_shape, dtype=np.float32))
                    if e in agents:
                        if e in enemies:
                            enemy_plane += feature_plane
                        if e == mate:
                            mate_plane += feature_plane
                        if e in player:
                            self_plane = feature_plane
                    else:
                        net_in[:, :, idx + i] = feature_plane
                        i += 1

                    # Encode desired positions
                    desire_plane += feature_plane * desire_dict[e.value]

                if self.last_board is None:
                    self.last_board = desire_plane.copy()
                net_in[:, :, idx + i] = enemy_plane
                i += 1
                net_in[:, :, idx + i] = mate_plane
                i += 1
                net_in[:, :, idx + i] = self_plane
                i += 1
                net_in[:, :, idx + i] = self.last_board
                net_in[:, :, -1] = desire_plane
                self.last_board = desire_plane.copy()

            else:
                if value_type == 'board':
                    # Value already board_shape
                    feature_plane = np.array(value, dtype=np.float32)

                elif value_type == 'val':
                    # Fill array of board_shape with value
                    feature_plane = np.zeros(shape=board_shape,
                                             dtype=np.float32)
                    feature_plane.fill(np.float32(value))

                elif value_type == 'pos':
                    # Mark position in zero plane
                    feature_plane = np.zeros(shape=board_shape,
                                             dtype=np.float32)
                    feature_plane[value[0], value[1]] = 1

                elif value_type == 'mate':
                    # Fill array of board_shape with mate value for alive mate
                    feature_plane = np.zeros(shape=board_shape,
                                             dtype=np.float32)
                    if value.value in obs['alive']:
                        feature_plane.fill(np.float32(value.value))

                elif value_type == 'enemy':
                    # Sum up enemy values for alive enemies
                    feature_plane = np.zeros(shape=board_shape,
                                             dtype=np.float32)
                    for val in value:
                        if val.value in obs['alive']:
                            feature_plane += val.value

                else:
                    # Create zero plane if value is not handled
                    feature_plane = np.zeros(shape=board_shape,
                                             dtype=np.float32)

                net_in[:, :, idx] = feature_plane

        return net_in
Exemplo n.º 20
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = info['last_seen']
        #board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        kickable, might_kickable \
            = self._kickable_positions(obs, info["moving_direction"],
                                       consider_agents=True)

        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board == enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board == my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position,
                                                agent_positions,
                                                might_kickable)

        #
        # Survivability, when enemy is replaced by a bomb, and no move afterwards
        #

        # replace enemy with bomb
        _bombs = deepcopy(info["curr_bombs"])
        rows, cols = np.where(board > constants.Item.AgentDummy.value)
        for position in zip(rows, cols):
            if board[position] not in my_enemies:
                continue
            if obs["bomb_blast_strength"][position]:
                # already a bomb
                continue
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                constants.DEFAULT_BOMB_LIFE,
                enemy_blast_strength_map[position],
                None)
            _bombs.append(bomb)

        n_survivable_bomb = self._get_n_survivable(board,
                                                   _bombs,
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivability, when enemy moves one position or stay unmoved
        #

        n_survivable_move = self._get_n_survivable(board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=1)

        #
        # Survivability, when no enemies
        #

        _board = deepcopy(board)
        agent_positions = np.where(_board > constants.Item.AgentDummy.value)
        _board[agent_positions] = constants.Item.Passage.value
        _board[my_position] = board[my_position]

        _obs = {
            "position": obs["position"],
            "blast_strength": obs["blast_strength"],
            "ammo": obs["ammo"],
            "bomb_life": obs["bomb_life"],
            "board": _board
        }

        n_survivable_none = self._get_n_survivable(_board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   _obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivable actions
        #

        survivable_actions_bomb = set(
            [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0])
        survivable_actions_move = set(
            [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])
        survivable_actions_none = set(
            [a for a in n_survivable_none if n_survivable_none[a][-1] > 0])

        survivable_actions = set.intersection(survivable_actions_bomb,
                                              survivable_actions_move,
                                              survivable_actions_none)

        # if can survive without possibility of being blocked, then do so
        if not constants.Action.Stop in survivable_actions:
            _survivable_actions = [
                action for action in survivable_actions
                if not might_blocked[action]
            ]
            if len(_survivable_actions):
                survivable_action = _survivable_actions

            _survivable_actions_bomb = [
                action for action in survivable_actions_bomb
                if not might_blocked[action]
            ]
            _survivable_actions_move = [
                action for action in survivable_actions_move
                if not might_blocked[action]
            ]
            _survivable_actions_none = [
                action for action in survivable_actions_none
                if not might_blocked[action]
            ]
            if all([
                    len(_survivable_actions_bomb) > 0,
                    len(_survivable_actions_move) > 0,
                    len(_survivable_actions_none) > 0
            ]):
                survivable_action_bomb = _survivable_actions_bomb
                survivable_action_move = _survivable_actions_move
                survivable_action_none = _survivable_actions_none

        #
        # Choose actions
        #

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most survivable action", action)
            return action.value

        # no survivable actions for all cases
        survivable_actions = set(
            list(n_survivable_bomb.keys()) + list(n_survivable_move.keys()) +
            list(n_survivable_none.keys()))

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only might survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            for a in set.union(survivable_actions, {constants.Action.Stop}):
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)
                if a not in n_survivable_none:
                    n_survivable_none[a] = np.zeros(self._search_range)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most might survivable action", action)
            return action.value

        # no survivable action found for any cases
        # TODO : Then consider killing enemies or helping teammate

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:

            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue

            if board[next_position] in [
                    constants.Item.Rigid.value, constants.Item.Wood.value
            ]:
                continue

            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue

            block[action] = total_frac_blocked[next_position]
            if teammate_position is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -= np.log(-np.log(self.random.uniform()))

        if any([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]):

            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            block[constants.Action.Bomb] \
                = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_bomb, n_survivable_nodes, board,
                    [my_teammate])
                block[constants.Action.Bomb] *= (1 - block_teammate)

            block[constants.Action.Bomb] *= self._inv_tmp
            block[constants.Action.Bomb] -= np.log(
                -np.log(self.random.uniform()))

        for next_position in set.union(kickable, might_kickable):

            my_action = self._get_direction(my_position, next_position)

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True)

            block[my_action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_kick, n_survivable_nodes, board,
                    [my_teammate])
                block[my_action] *= (1 - block_teammate)

            block[my_action] *= self._inv_tmp
            block[my_action] -= np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if best_action is not None:
            if verbose:
                print(
                    "Best action to kill enemies or help teammate (cannot survive)"
                )
            return best_action.value

        # The following will not be used

        if obs["ammo"] > 0 and obs["blast_strength"] == 0:
            action = constants.Action.Bomb
            if verbose:
                print("Suicide", action)
                return action.value

        kickable_positions = list(set.union(kickable, might_kickable))
        if kickable_positions:
            self.random.shuffle(kickable_positions)
            action = self._get_direction(my_position, kickable_positions[0])
            if verbose:
                print("Suicide kick", action)
                return action.value

        all_actions = [
            constants.Action.Stop, constants.Action.Up, constants.Action.Down,
            constants.Action.Right, constants.Action.Left
        ]
        self.random.shuffle(all_actions)
        for action in all_actions:
            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_wall(board, next_position):
                continue
            if verbose:
                print("Random action", action)
                return action.value

        action = constants.Action.Stop
        if verbose:
            print("No action found", action)
        return action.value
Exemplo n.º 21
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        kickable, might_kickable \
            = self._kickable_positions(obs, info["moving_direction"],
                                       consider_agents=True)

        #
        # Survivability, when enemy is replaced by a bomb, and no move afterwards
        #

        # replace enemy with bomb
        _bombs = deepcopy(info["curr_bombs"])
        rows, cols = np.where(board > constants.Item.AgentDummy.value)
        for position in zip(rows, cols):
            if board[position] not in my_enemies:
                continue
            if obs["bomb_blast_strength"][position]:
                # already a bomb
                continue
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                constants.DEFAULT_BOMB_LIFE,
                enemy_blast_strength_map[position],
                None)
            _bombs.append(bomb)

        n_survivable_bomb = self._get_n_survivable(board,
                                                   _bombs,
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)
        print("survivable bomb")
        for a in n_survivable_bomb:
            print(a, n_survivable_bomb[a])

        survivable_actions_bomb = set(n_survivable_bomb)

        #
        # Survivability, when enemy moves one position or stay unmoved
        #

        n_survivable_move = self._get_n_survivable(board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=1)

        # If my move is survivable with bomb but not with move,
        # then my move must be blocked by an enemy.
        # I might be blocked by an enemy with such my move,
        # it will end up in stop and enemy is also stop,
        # so my survivability with such my move should be the
        # same as my survivability with stop when enemy stops

        if constants.Action.Stop in survivable_actions_bomb:
            for action in survivable_actions_bomb:
                if action in [constants.Action.Stop, constants.Action.Bomb]:
                    continue
                if action not in n_survivable_move:
                    n_survivable_move[action] = n_survivable_bomb[
                        constants.Action.Stop]

        survivable_actions_move = set(n_survivable_move)

        #print("survivable move")
        #for a in n_survivable_move:
        #    print(a, n_survivable_move[a])

        # if survivable by not stopping when enemy place a bomb,
        # then do not stop
        if survivable_actions_bomb - {constants.Action.Stop}:
            survivable_actions_bomb -= {constants.Action.Stop}
            survivable_actions_move -= {constants.Action.Stop}
        #if survivable_actions_bomb - {constants.Action.Bomb}:
        #    survivable_actions_bomb -= {constants.Action.Bomb}
        #    survivable_actions_move -= {constants.Action.Bomb}

        survivable_actions = set.intersection(survivable_actions_bomb,
                                              survivable_actions_move)

        #print("survivable", survivable_actions)

        if len(survivable_actions) == 0:
            if survivable_actions_bomb:
                action = self._get_most_survivable_action(n_survivable_bomb)
                print("Most survivable action when enemy place a bomb", action)
                return action.value
            elif survivable_actions_move:
                action = self._get_most_survivable_action(n_survivable_move)
                print("Most survivable action when enemy moves", action)
                return action.value
            else:
                #
                # Survivability with no enemies or teammate
                #

                _board = deepcopy(board)
                agent_positions = np.where(
                    _board > constants.Item.AgentDummy.value)
                _board[agent_positions] = constants.Item.Passage.value
                _board[my_position] = board[my_position]

                _obs = {
                    "position": obs["position"],
                    "blast_strength": obs["blast_strength"],
                    "ammo": obs["ammo"],
                    "bomb_life": obs["bomb_life"],
                    "board": _board
                }

                n_survivable = self._get_n_survivable(_board,
                                                      info["curr_bombs"],
                                                      info["curr_flames"],
                                                      _obs,
                                                      my_position,
                                                      set.union(
                                                          kickable,
                                                          might_kickable),
                                                      enemy_mobility=0)

                survivable_actions = list(n_survivable)

                if survivable_actions:
                    action = self._get_most_survivable_action(n_survivable)
                    print("Most survivable action when no enemy", action)
                    return action.value
                else:
                    if obs["ammo"] > 0 and obs["blast_strength"] == 0:
                        action = constants.Action.Bomb
                        print("Suicide", action)
                        return action.value
                    else:
                        all_actions = [
                            constants.Action.Stop, constants.Action.Up,
                            constants.Action.Down, constants.Action.Right,
                            constants.Action.Left
                        ]
                        random.shuffle(all_actions)
                        for action in all_actions:
                            next_position = self._get_next_position(
                                my_position, action)
                            if not self._on_board(next_position):
                                continue
                            if not utility.position_is_wall(
                                    board, next_position):
                                continue
                            print("Random action", action)
                            return action.value

        elif len(survivable_actions) == 1:

            action = survivable_actions.pop()
            print("Only survivable action", action)
            return action.value

        else:

            n_survivable_min = dict()
            for a in survivable_actions:
                n_survivable_min[a] = min(
                    [n_survivable_bomb[a], n_survivable_move[a]])
            action = self._get_most_survivable_action(n_survivable_min)
            print("Most survivable action when no enemy", action)
            return action.value

        action = constants.Action.Stop
        print("No action found", action)
        return action.value
Exemplo n.º 22
0
    def _djikstra(board,
                  my_position,
                  bombs,
                  enemies,
                  bomb_timer=None,
                  depth=None,
                  exclude=None):

        if depth is None:
            depth = len(board) * 2

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p1, p2):
            x1, y1 = p1
            x2, y2 = p2
            return abs(y2 - y1) + abs(x2 - x1) > depth

        items = defaultdict(list)

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        dist = {}
        prev = {}

        mx, my = my_position
        for r in range(max(0, mx - depth), min(len(board), mx + depth)):
            for c in range(max(0, my - depth), min(len(board), my + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None

                item = constants.Item(board[position])
                items[item].append(position)

        # Djikstra
        H = []
        heapq.heappush(H, (0, my_position))
        while H:
            min_dist, position = heapq.heappop(H)

            if (board[position] != constants.Item.Bomb.value
                ) and not utility.position_is_passable(board, position,
                                                       enemies):
                continue

            x, y = position
            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (row + x, col + y)
                if new_position not in dist:
                    continue

                if not utility.position_is_passable(board, new_position,
                                                    enemies):
                    continue

                if bomb_timer is not None:
                    t = bomb_timer[new_position]
                    if t > 0 and abs((min_dist + 1) - t) < 2:
                        continue

                if min_dist + 1 < dist[new_position]:
                    dist[new_position] = min_dist + 1
                    prev[new_position] = position
                    heapq.heappush(H, (dist[new_position], new_position))

        return items, dist, prev
Exemplo n.º 23
0
    def act(self, obs, action_space):
        def convert_bombs(bomb_map):
            ret = []
            locations = np.where(bomb_map > 0)
            for r, c in zip(locations[0], locations[1]):
                ret.append({
                    'position': (r, c),
                    'blast_strength': int(bomb_map[(r, c)])
                })
            return ret

        depth = 20

        my_position = tuple(obs['position'])
        board = np.array(obs['board'])
        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
        enemies = [constants.Item(e) for e in obs['enemies']]
        ammo = int(obs['ammo'])
        blast_strength = int(obs['blast_strength'])

        if self.prev_pos != None:
            if self.prev_pos == my_position:
                if 1 <= self.prev_action.value <= 4:
                    if self.logging:
                        print('freeze')
                    board[self.prev_pos] = constants.Item.Rigid.value

        items, dist, prev = self._djikstra(board,
                                           my_position,
                                           bombs,
                                           enemies,
                                           bomb_timer=self.bomb_time,
                                           depth=depth)

        if self.logging:
            print('my_position =', my_position)
            print('board =')
            print(board)
            print('dist =')
            print(dist)
            print('bombs =', bombs)
            print('enemies =', enemies)
            for e in enemies:
                print(e)
                pos = items.get(e, [])
                print('pos =', pos)
                print('pos_len=', len(pos))
                if len(pos) > 0:
                    print('xy=', pos[0][0], ',', pos[0][1])
                # print('pos_r =', x, ',',y)
            print('ammo =', ammo)
            print('blast_strength =', blast_strength)

        test_ary = np.ones((11, 11))

        for c in range(11):
            for r in range(11):
                if (r, c) in dist:
                    test_ary[r, c] = dist[(r, c)]
                else:
                    test_ary[r, c] = -1

        if self.logging:
            print("dist_mat:")
            print(test_ary)

        # update bomb_time map
        bomb_life = 8
        has_bomb = {}
        already_breakable = np.zeros((11, 11))
        for b in bombs:
            r, c = b['position']
            strength = b['blast_strength']
            # print('bomb_cr =', c, 'r=', r, 'st=', strength)

            if self.bomb_time[(r, c)] == 0:
                self.bomb_time[(r, c)] = bomb_life
            else:
                self.bomb_time[(r, c)] -= 1

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)

                    if TestSimpleAgent._out_of_board(new_pos):
                        continue

                    # if new_pos[0] < 0 or new_pos[0] > 10:
                    #     continue
                    # if new_pos[1] < 0 or new_pos[1] > 10:
                    #     continue

                    if utility.position_is_rigid(board, new_pos):
                        continue

                    if utility.position_is_wood(board, new_pos):
                        already_breakable[new_pos] = 1

                    if self.bomb_time[new_pos] == 0:
                        self.bomb_time[new_pos] = bomb_life
                    else:
                        self.bomb_time[new_pos] -= 1

                    has_bomb[new_pos] = 1

        # clear up table
        for c in range(11):
            for r in range(11):
                if (r, c) not in has_bomb:
                    self.bomb_time[(r, c)] = 0

        if self.logging:
            print("bomb_time:")
            print(self.bomb_time)

        # evaluate each position in terms of breakable woods
        num_breakable = np.zeros((11, 11))
        num_breakable_inside = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if utility.position_is_wood(board, (r, c)):
                    if already_breakable[(r, c)]:
                        continue
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        for d in range(1, blast_strength):
                            new_pos = (r + d * row, c + d * col)

                            if TestSimpleAgent._out_of_board(new_pos):
                                continue

                            if utility.position_is_passable(
                                    board, new_pos,
                                    enemies) or utility.position_is_flames(
                                        board, new_pos):
                                num_breakable[new_pos] += 1
                            else:
                                break

                    tmp_num = 0
                    has_passable = False
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if TestSimpleAgent._out_of_board(new_pos):
                            continue

                        if utility.position_is_wood(board, new_pos):
                            tmp_num += 1
                        elif utility.position_is_passable(
                                board, new_pos, enemies):
                            has_passable = True

                    if (not has_passable) and tmp_num > 0:
                        tmp_num -= 1

                    num_breakable_inside[(r, c)] = tmp_num

        if self.logging:
            print('num_breakable:')
            print(num_breakable)

            print('num_breakable_inside:')
            print(num_breakable_inside)

        num_breakable_total = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                num_breakable_total[(r, c)] = num_breakable[(r, c)]

                if num_breakable_total[(r, c)] == -1 or num_breakable_total[(
                        r, c)] == np.inf:
                    continue

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_pos = (r + row, c + col)

                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    num_breakable_total[(
                        r, c)] += num_breakable_inside[new_pos] * 0.5

        if self.logging:
            print('num_breakable_total:')
            print(num_breakable_total)

        # evaluate each position in total
        pos_scores = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    pos_scores[(r, c)] = -1
                    continue
                elif dist[(r, c)] == np.inf:
                    pos_scores[(r, c)] = np.inf
                    continue

                if num_breakable_total[(r, c)] > 0:
                    pos_scores[(r, c)] += num_breakable_total[(r, c)]
                    pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2

                # consider power-up items
                if board[(r, c)] in {
                        constants.Item.ExtraBomb.value,
                        constants.Item.IncrRange.value
                }:
                    pos_scores[(r, c)] += 50

        if self.logging:
            print('pos_score:')
            print(pos_scores)

        # consider degree of freedom
        dis_to_ene = 100
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] -
                                                          my_position[1])
                if dis_to_ene > d:
                    dis_to_ene = d
        if dis_to_ene <= -4:
            # if direction is not None:
            deg_frees = np.zeros((11, 11))
            for c in range(11):
                for r in range(11):
                    # if pos_scores[(r, c)] == np.inf:
                    #     continue
                    if not utility.position_is_passable(
                            board, (r, c), enemies):
                        continue

                    deg_free = 0
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if utility.position_is_passable(
                                board, new_pos,
                                enemies) or utility.position_is_flames(
                                    board, new_pos):
                            deg_free += 1

                    deg_frees[(r, c)] = deg_free

                    if deg_free <= 1:
                        pos_scores[(r, c)] -= 5

            if self.logging:
                print('deg_free')
                print(deg_frees)

        # consider bomb blast
        for i in range(len(bombs)):
            r, c = bombs[i]['position']
            strength = bombs[i]['blast_strength']

            pos_scores[(r, c)] = -20

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)
                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    if new_pos not in dist:
                        continue
                    elif new_pos == np.inf:
                        continue

                    pos_scores[new_pos] = -20

        if self.logging:
            print('consider blast pos_score:')
            print(pos_scores)

        # consider enemies
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                r = pos[0][0]
                c = pos[0][1]

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    for d in range(1, blast_strength * 2):
                        new_pos = (r + d * row, c + d * col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if not utility.position_is_passable(
                                board, new_pos, enemies):
                            break

                        pos_scores[new_pos] += 0.3

        if self.logging:
            print('consider enemy:')
            print(pos_scores)

        h_r, h_c = -1, -1
        h_score = -1
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    continue
                elif dist[(r, c)] == np.inf:
                    continue

                if h_score < pos_scores[(r, c)]:
                    h_score = pos_scores[(r, c)]
                    h_r, h_c = (r, c)

        if self.logging:
            print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c)
            print('prev:')
            print(prev)

        # if current position is not the highest score position, move to the highest position.
        if h_r == -1:
            # print('action: Stop')
            self.prev_action = constants.Action.Stop
            # return constants.Action.Stop.value
        elif pos_scores[my_position] == h_score:
            if self._can_escape(pos_scores, my_position, blast_strength):
                # print('set bomb')
                self.prev_action = constants.Action.Bomb
                # return constants.Action.Bomb.value
            else:
                # print('action: Stop2')
                self.prev_action = constants.Action.Stop
                # return constants.Action.Stop.value
        else:
            # print('action: backtrack')
            self.prev_action = self._backtrack(my_position, (h_r, h_c), prev)
            # return self._backtrack(my_position, (h_r, h_c), prev)

        self.prev_pos = my_position
        if self.logging:
            print('action: ', self.prev_action)
        return self.prev_action.value

        # Move if we are in an unsafe place.
        unsafe_directions = self._directions_in_range_of_bomb(
            board, my_position, bombs, dist)
        if unsafe_directions:
            directions = self._find_safe_directions(board, my_position,
                                                    unsafe_directions, bombs,
                                                    enemies)
            return random.choice(directions).value

        # Lay pomme if we are adjacent to an enemy.
        if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb(
                ammo, blast_strength, items, dist, my_position):
            return constants.Action.Bomb.value

        # Move towards an enemy if there is one in exactly three reachable spaces.
        direction = self._near_enemy(my_position, items, dist, prev, enemies,
                                     3)
        if direction is not None and (self._prev_direction != direction
                                      or random.random() < .5):
            self._prev_direction = direction
            return direction.value

        # Move towards a good item if there is one within two reachable spaces.
        direction = self._near_good_powerup(my_position, items, dist, prev, 2)
        if direction is not None:
            return direction.value

        # Maybe lay a bomb if we are within a space of a wooden wall.
        if self._near_wood(my_position, items, dist, prev, 1):
            if self._maybe_bomb(ammo, blast_strength, items, dist,
                                my_position):
                return constants.Action.Bomb.value
            else:
                return constants.Action.Stop.value

        # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb.
        direction = self._near_wood(my_position, items, dist, prev, 2)
        if direction is not None:
            directions = self._filter_unsafe_directions(
                board, my_position, [direction], bombs)
            if directions:
                return directions[0].value

        # Choose a random but valid direction.
        directions = [
            constants.Action.Stop, constants.Action.Left,
            constants.Action.Right, constants.Action.Up, constants.Action.Down
        ]
        valid_directions = self._filter_invalid_directions(
            board, my_position, directions, enemies)
        directions = self._filter_unsafe_directions(board, my_position,
                                                    valid_directions, bombs)
        directions = self._filter_recently_visited(
            directions, my_position, self._recently_visited_positions)
        if len(directions) > 1:
            directions = [k for k in directions if k != constants.Action.Stop]
        if not len(directions):
            directions = [constants.Action.Stop]

        # Add this position to the recently visited uninteresting positions so we don't return immediately.
        self._recently_visited_positions.append(my_position)
        self._recently_visited_positions = self._recently_visited_positions[
            -self._recently_visited_length:]

        return random.choice(directions).value
Exemplo n.º 24
0
    def act(self, obs, action_space):

        #
        # Definitions
        #

        self._search_range = 10

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]

        #
        # Prepare extended observations
        # - bomb moving direction
        # - flame remaining life
        #

        # Summarize information about bombs
        # curr_bombs : list of current bombs
        # moving_direction : array of moving direction of bombs
        curr_bombs, moving_direction, self._prev_bomb_life \
            = self._get_bombs(obs, self._prev_bomb_life)

        # Summarize information about flames
        curr_flames, self._prev_flame_life \
            = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength)

        # bombs to be exploded in the next step
        self._prev_bomb_position_strength = list()
        rows, cols = np.where(obs["bomb_blast_strength"] > 0)
        for position in zip(rows, cols):
            strength = int(obs["bomb_blast_strength"][position])
            self._prev_bomb_position_strength.append((position, strength))

        #
        # Understand current situation
        #

        # Simulation assuming enemies stay unmoved

        # List of simulated boards
        list_boards_no_move, _ \
            = self._board_sequence(board,
                                   curr_bombs,
                                   curr_flames,
                                   self._search_range,
                                   my_position,
                                   enemy_mobility=0)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable_no_move, prev_no_move \
            = self._search_time_expanded_network(list_boards_no_move,
                                                 my_position)

        # Items that can be reached in a survivable manner
        reachable_items_no_move, reached_no_move, next_to_items_no_move \
            = self._find_reachable_items(list_boards_no_move,
                                         my_position,
                                         survivable_no_move)

        # Simulation assuming enemies move

        for enemy_mobility in range(3, -1, -1):
            # List of boards simulated
            list_boards, _ = self._board_sequence(board,
                                                  curr_bombs,
                                                  curr_flames,
                                                  self._search_range,
                                                  my_position,
                                                  enemy_mobility=enemy_mobility)

            # List of the set of survivable time-positions at each time
            # and preceding positions
            survivable, prev = self._search_time_expanded_network(list_boards,
                                                                  my_position)

            if len(survivable[1]) > 0:
                # Gradually reduce the mobility of enemy, so we have at least one survivable action
                break

        # Items that can be reached in a survivable manner
        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           curr_bombs,
                                           curr_flames)

        survivable_actions = [a for a in is_survivable if is_survivable[a]]
        
        if verbose:
            print("survivable actions are", survivable_actions)

        # Positions where we kick a bomb if we move to
        kickable = self._kickable_positions(obs, moving_direction)

        print()
        for t in range(0):
            print(list_boards[t])
            print(survivable[t])
            for key in prev[t]:
                print(key, prev[t][key])

        #
        # Choose an action
        #

        """
        # This is not effective in the current form
        if len(survivable_actions) > 1:
            # avoid the position if only one position at the following step
            # the number of positions that can be reached from the next position
            next = defaultdict(set)
            next_count = defaultdict(int)
            for position in survivable[1]:
                next[position] = set([p for p in prev[2] if position in prev[2][p]])
                next_count[position] = len(next[position])
            print("next count", next_count)
            if max(next_count.values()) > 1:
                for position in survivable[1]:
                    if next_count[position] == 1:
                        risky_action = self._get_direction(my_position, position)
                        is_survivable[risky_action] = False
                survivable_actions = [a for a in is_survivable if is_survivable[a]]                
        """

        # Do not stay on a bomb if I can
        if all([obs["bomb_life"][my_position] > 0,
                len(survivable_actions) > 1,
                is_survivable[constants.Action.Stop]]):
            is_survivable[constants.Action.Stop] = False
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if len(survivable_actions) == 0:

            # must die
            # TODO: might want to do something that can help team mate
            # TODO: kick if possible
            print("Must die", constants.Action.Stop)
            return super().act(obs, action_space)
            # return constants.Action.Stop.value

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        # Move towards good items
        good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange]
        # TODO : kick may be a good item only if I cannot kick yet
        # TODO : might want to destroy
        good_items.append(constants.Item.Kick)
        # positions with good items
        good_time_positions = set()
        for item in good_items:
            good_time_positions = good_time_positions.union(reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        # TODO : shoud check the survivability of all agents in one method

        # Place a bomb if
        # - it does not significantly reduce my survivability
        # - it can break wood
        # - it can reduce the survivability of enemies
        if is_survivable[constants.Action.Bomb]:
            # if survavable now after bomb, consider bomb
            if all([len(s) > 0 for s in survivable_with_bomb]):
                # if survivable all the time after bomb, consider bomb
                if all([self._can_break_wood(list_boards_no_move[-1],
                                             my_position,
                                             my_blast_strength)]
                       + [not utility.position_is_flames(board, my_position)
                          for board in list_boards_no_move[:10]]):
                    # place bomb if can break wood
                    print("Bomb to break wood", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                for enemy in my_enemies:
                    # check if the enemy is reachable
                    if len(reachable_items_no_move[enemy]) == 0:
                        continue

                    # can reach the enemy at enemy_position in enemy_time step
                    enemy_time = reachable_items_no_move[enemy][0][0]
                    enemy_position = reachable_items_no_move[enemy][0][1:3]

                    # find direction towards enemy
                    positions = set([x[1:3] for x in next_to_items_no_move[enemy]])
                    for t in range(enemy_time, 1, -1):
                        _positions = set()
                        for position in positions:
                            _positions = _positions.union(prev_no_move[t][position])
                        positions = _positions.copy()

                    #if enemy_time <= my_blast_strength:
                    if True:
                        positions.add(my_position)
                        positions_after_bomb = set(survivable[1]).difference(positions)
                        if positions_after_bomb:
                            print("Bomb to kill an enemy", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value
                    else:
                        # bomb to kick
                        x0, y0 = my_position
                        positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions]
                        positions_after_bomb = set(survivable[1]).intersection(positions_against)

                        if positions_after_bomb:
                            print("Bomb to kick", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value

                    """
                    # check if placing a bomb can reduce the survivability
                    # of the enemy
                    survivable_before, _ = self._search_time_expanded_network(list_boards_no_move,
                                                                              enemy_position)

                    board_with_bomb = deepcopy(obs["board"])
                    curr_bombs_with_bomb = deepcopy(curr_bombs)
                    # lay a bomb
                    board_with_bomb[my_position] = constants.Item.Bomb.value
                    bomb = characters.Bomb(characters.Bomber(),  # dummy owner of the bomb
                                           my_position,
                                           constants.DEFAULT_BOMB_LIFE,
                                           my_blast_strength,
                                           None)
                    curr_bombs_with_bomb.append(bomb)
                    list_boards_with_bomb, _ \
                        = self._board_sequence(board_with_bomb,
                                               curr_bombs_with_bomb,
                                               curr_flames,
                                               self._search_range,
                                               my_position,
                                               enemy_mobility=0)
                    survivable_after, _ \
                        = self._search_time_expanded_network(list_boards_with_bomb,
                                                             enemy_position)

                    good_before = np.array([len(s) for s in survivable_before])
                    good_after = np.array([len(s) for s in survivable_after])
                    # TODO : what are good criteria?
                    if any(good_after < good_before):
                        # place a bomb if it makes sense
                        print("Bomb to kill an enemy", constants.Action.Bomb)
                        return constants.Action.Bomb.value
                    """

        # Move towards a wood
        if len(next_to_items_no_move[constants.Item.Wood]) > 0:
            # positions next to wood
            good_time_positions = next_to_items_no_move[constants.Item.Wood]
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward wood", action)
                return action.value

        # kick whatever I can kick
        # -- tentative, this is generally not a good strategy
        if len(kickable) > 0:

            while kickable:
                # then consider what happens if I kick a bomb
                next_position = kickable.pop()

                # do not kick a bomb if it will break a wall
                if all([moving_direction[next_position] is None,
                        self._can_break_wood(board, next_position, my_blast_strength)]):
                    # if it is a static bomb
                    # do not kick if it is breaking a wall
                    continue

                my_action = self._get_direction(my_position, next_position)
                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           curr_bombs,
                                           curr_flames,
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=3)
                survivable_with_kick, prev_kick \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    print("Kicking", my_action)
                    return my_action.value

        # Move towards an enemy
        good_time_positions = set()
        for enemy in my_enemies:
            good_time_positions = good_time_positions.union(next_to_items[enemy])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)

            if obs["bomb_life"][my_position] > 0:
                # if on a bomb, move away
                if action == constants.Action.Down and is_survivable[constants.Action.Up]:
                    action = constants.Action.Up
                elif action == constants.Action.Up and is_survivable[constants.Action.Down]:
                    action = constants.Action.Down
                elif action == constants.Action.Right and is_survivable[constants.Action.Left]:
                    action = constants.Action.Left
                elif action == constants.Action.Left and is_survivable[constants.Action.Right]:
                    action = constants.Action.Right
                else:
                    action = None

            if action is not None:
                print("Moving toward/against enemy", action)
                return action.value

        #
        # as in the agent from the previous competition
        #
        action = super().act(obs, action_space)
        if is_survivable[constants.Action(action)]:
            print("Action from prev. agent", constants.Action(action))
            return action
        else:
            action = random.choice(survivable_actions)
            print("Random action", action)
            return action.value
Exemplo n.º 25
0
    def _djikstra(board,
                  my_position,
                  bombs,
                  enemies,
                  depth=None,
                  exclude=None):
        """
        Dijkstra method

        Parameters
        ----------
        board = np.array(obs['board'])

        my_position = tuple(obs['position'])

        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))

        enemies = [constants.Item(e) for e in obs['enemies']]
        """

        if depth is None:
            depth = len(board) * 2

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p1, p2):
            x1, y1 = p1
            x2, y2 = p2
            return abs(y2 - y1) + abs(x2 - x1) > depth

        items = defaultdict(list)

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        dist = {}
        prev = {}

        mx, my = my_position
        for r in range(max(0, mx - depth), min(len(board), mx + depth)):
            for c in range(max(0, my - depth), min(len(board), my + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None

                item = constants.Item(board[position])
                items[item].append(position)

        # Djikstra
        H = []
        heapq.heappush(H, (0, my_position))
        while H:
            min_dist, position = heapq.heappop(H)

            if not utility.position_is_passable(board, position, enemies):
                continue

            x, y = position
            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (row + x, col + y)
                if new_position not in dist:
                    continue

                if min_dist + 1 < dist[new_position]:
                    dist[new_position] = min_dist + 1
                    prev[new_position] = position
                    heapq.heappush(H, (dist[new_position], new_position))

        return items, dist, prev
Exemplo n.º 26
0
    def act(self, obs, action_space):
        def convert_bombs(bomb_map):
            ret = []
            locations = np.where(bomb_map > 0)
            for r, c in zip(locations[0], locations[1]):
                ret.append({
                    'position': (r, c),
                    'blast_strength': int(bomb_map[(r, c)])
                })
            return ret

        my_position = tuple(obs['position'])
        board = np.array(obs['board'])
        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
        enemies = [constants.Item(e) for e in obs['enemies']]
        ammo = int(obs['ammo'])
        blast_strength = int(obs['blast_strength'])
        items, dist, prev = self._djikstra(board,
                                           my_position,
                                           bombs,
                                           enemies,
                                           depth=20)

        # safety score
        safety_score = self._make_safety_score(board, items, bombs, enemies)
        if safety_score[my_position] == -np.inf:
            max_distance = 5
            safe_positions = list()
            maybe_positions = list()
            mx, my = my_position
            for x in range(max([0, mx - max_distance]),
                           min([len(board), mx + max_distance])):
                for y in range(max([0, my - max_distance]),
                               min([len(board), my + max_distance])):
                    if not (x, y) in dist:
                        # unreachable
                        continue
                    if safety_score[(x, y)] > 1:
                        safe_positions.append((x, y))
                    elif safety_score[(x, y)] == 1:
                        maybe_positions.append((x, y))
            nearest = None
            dist_to = max(dist.values())
            for position in safe_positions:
                d = dist[position]
                if d <= dist_to:
                    nearest = position
                    dist_to = d
            if dist_to > max_distance:
                for position in maybe_positions:
                    d = dist[position]
                    if d <= dist_to:
                        nearest = position
                        dist_to = d
            if nearest is not None:
                # found a way to escape
                if prev[nearest] is not None:
                    direction = self._get_direction_towards_position(
                        my_position, nearest, prev)
                    if verbose:
                        print("escaping")
                    return direction.value

        # Move towards a powerup
        direction = self._near_good_powerup(my_position, items, dist, prev, 20)
        if direction is not None:
            if safety_score[utility.get_next_position(my_position,
                                                      direction)] > 1:
                if verbose:
                    print("item")
                return direction.value
            else:
                if verbose:
                    print("item but unsafe")

        # Break whatever you can break
        to_break = self._what_to_break(board, my_position, blast_strength)
        maybe = self._maybe_bomb(ammo, blast_strength, items, dist,
                                 my_position)
        if len(to_break) > 0 and maybe:
            if verbose:
                print("to break", to_break)
            return constants.Action.Bomb.value

        # Move towards an enemy
        if constants.Item.Wood not in items and constants.Item.ExtraBomb not in items and constants.Item.Kick not in items:
            to_chase = 3
        else:
            to_chase = 3
        direction = self._near_enemy(my_position, items, dist, prev, enemies,
                                     to_chase)
        if direction is not None and (self._prev_direction != direction
                                      or random.random() < .5):
            self._prev_direction = direction
            if verbose:
                print("enemy")
            return direction.value

        # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb.
        direction = self._near_wood(my_position, items, dist, prev, 2)
        if direction is not None:
            if safety_score[utility.get_next_position(my_position,
                                                      direction)] > 1:
                if verbose:
                    print("wood")
                return direction.value

        # Choose a random but valid direction.
        directions = [
            constants.Action.Stop, constants.Action.Left,
            constants.Action.Right, constants.Action.Up, constants.Action.Down
        ]
        valid_directions = self._filter_invalid_directions(
            board, my_position, directions, enemies)
        directions = self._filter_unsafe_directions(board, my_position,
                                                    valid_directions, bombs)
        directions = self._filter_recently_visited(
            directions, my_position, self._recently_visited_positions)
        if len(directions) > 1:
            directions = [k for k in directions if k != constants.Action.Stop]
        if not len(directions):
            directions = [constants.Action.Stop]

        # Add this position to the recently visited uninteresting positions so we don't return immediately.
        self._recently_visited_positions.append(my_position)
        self._recently_visited_positions = self._recently_visited_positions[
            -self._recently_visited_length:]

        p = [
            safety_score[utility.get_next_position(my_position, d)]
            for d in directions
        ]
        if verbose:
            print("random", p, directions)
        p = np.exp(p)
        if len(p) == 1:
            p = [1]
        else:
            p /= np.sum(p)
        try:
            #return np.random.choice(directions, p=p).value
            return random.choices(directions, weights=p).value
        except:
            return random.choice(directions).value
Exemplo n.º 27
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4
        enemy_bomb = 1

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position",
              my_position,
              "ammo",
              my_ammo,
              "blast",
              my_blast_strength,
              "kick",
              my_kick,
              end="\t")

        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        #np.set_printoptions(precision=2)
        #print("frac")
        #print(total_frac_blocked)

        # where to place bombs to break wood
        bomb_target_wood, n_breakable \
            = self._get_bomb_target(info["list_boards_no_move"][-1],
                                    my_position,
                                    my_blast_strength,
                                    constants.Item.Wood,
                                    max_breakable=False)

        #bomb_target_enemy = (total_frac_blocked > 0)
        #bomb_target = bomb_target_enemy + bomb_target_wood
        bomb_target = bomb_target_wood

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=enemy_mobility,
            enemy_bomb=enemy_bomb,
            enemy_blast_strength=info["enemy_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        #print("boards")
        #for t, b in enumerate(list_boards):
        #    print(t)
        #    print(b[-3:,:])
        #    if t > 2:
        #        break

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable, _ = self._kickable_positions(obs,
                                                   info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #print("survivable actions", survivable_actions)

        if len(survivable_actions) == 0:
            return None

        #
        # Items and bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                succ, 1, next_position)

        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Avoid the action leading to no choice if possible
        #
        updated = False

        max_survivable_positions = max([n[-1] for n in n_survivable.values()])
        if max_survivable_positions > 1:
            for a in n_survivable:
                if n_survivable[a][-1] > max_survivable_positions / 2:
                    continue
                is_survivable[a] = False
                updated = True

        minn = defaultdict(int)
        for a in n_survivable:
            minn[a] = min(n_survivable[a][enemy_mobility:])
        maxmin = max(minn.values())
        if maxmin > 1:
            for a in minn:
                if minn[a] == 1:
                    is_survivable[a] = False
                    updated = True

        if updated:
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value
        """
        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value
        """

        #
        # Bomb at a target
        #

        best_action = None
        max_block = 0
        for action in survivable_actions:
            next_position = self._get_next_position(my_position, action)
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if all([
                is_survivable[constants.Action.Bomb], best_action
                in [constants.Action.Stop, constants.Action.Bomb]
        ]):
            print("Place a bomb at a locally optimal position",
                  constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = self._get_next_position(my_position, best_action)
            # TODO : PARAMETER TO OPTIMIZE
            if total_frac_blocked[next_position] > 0.1:
                print("Move towards better place to bomb", best_action)
                return best_action.value

        #
        # Bomb to break wood
        #

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False
        elif self._might_break_powerup(info["list_boards_no_move"][-1],
                                       my_position, my_blast_strength,
                                       info["might_powerup"]):
            # if might break an item, do not bomb
            consider_bomb = False

        if consider_bomb and bomb_target[my_position]:
            # place bomb if I am at a bomb target
            print("Bomb at a bomb target", constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards good items
        #

        good_items = [
            constants.Item.ExtraBomb, constants.Item.IncrRange,
            constants.Item.Kick
        ]
        good_time_positions = set()  # positions with good items
        for item in good_items:
            good_time_positions = good_time_positions.union(
                reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions, prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        #
        # Move towards where to bomb to break wood
        #

        good_time_positions = reachable_items["target"]
        print("good time positions", good_time_positions)
        action = self._find_distance_minimizer(my_position,
                                               good_time_positions, prev,
                                               is_survivable)
        if action is not None:
            print("Moving toward where to bomb", action)
            return action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break a wall, enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position, my_blast_strength,
                                   [constants.Item.Wood] + my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board == enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum(
                    [len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->",
                          n_survivable_nodes_with_kick, my_action)
                    return my_action.value

        #
        # TODO : move toward might powerups
        #

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x + dx, y + dy)
                         for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([
                info["since_last_seen"][position] for position in neighbors
                if self._on_board(position)
            ])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position], prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value

        #
        # Choose most survivable action
        #

        action = self._get_most_survivable_action(n_survivable)
        print("Most survivable action", action)
        return action.value
Exemplo n.º 28
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = info['recently_seen']
        #board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None

        all_feasible_actions = [
            a for a in info["my_next_position"] if info["my_next_position"][a]
        ]

        # positions that might be blocked
        if info["teammate_position"] is None:
            agent_positions = info["enemy_positions"]
        else:
            agent_positions = info["enemy_positions"] + [
                info["teammate_position"]
            ]

        #
        # Fraction of blocked node in the survival trees of enemies
        #

        _list_boards = info["list_boards_no_move"]
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"],
                                     ignore_dying_agent=False)

        if info["teammate_position"] is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"],
                                         ignore_dying_agent=True)

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:

            next_position = info["my_next_position"][action]

            if next_position is None:
                continue

            if next_position in info["all_kickable"]:
                # kick will be considered later
                continue

            block[action] = total_frac_blocked[next_position]
            if info["teammate_position"] is not None and block[action] > 0:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])

            if block[action] > 0:
                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        if all([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]):

            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            block[constants.Action.Bomb] \
                = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies,
                                                   ignore_dying_agent=False)
            block[constants.Action.Bomb] \
                += total_frac_blocked[my_position] * (1 - block[constants.Action.Bomb])

            if info["teammate_position"] is not None:
                block_teammate_with_bomb = self._get_frac_blocked_two_lists(
                    list_boards_with_bomb,
                    n_survivable_nodes_teammate,
                    board, [my_teammate],
                    ignore_dying_agent=True)
                # this is an approximation
                block_teammate_with_bomb \
                    += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb)

                block[constants.Action.Bomb] *= (1 - block_teammate_with_bomb)

            if block[constants.Action.Bomb] > 0:
                block[constants.Action.Bomb] *= self._inv_tmp
                block[constants.Action.Bomb] -= np.log(
                    -np.log(self.random.uniform()))

        block_teammate_with_kick = defaultdict(float)
        for next_position in info["all_kickable"]:

            my_action = self._get_direction(my_position, next_position)

            backedup = False
            if board[next_position] != constants.Item.Bomb.value:
                backup_cell = board[next_position]
                board[
                    next_position] = constants.Item.Bomb.value  # an agent will be overwritten
                backedup = True

            list_boards_with_kick, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            if backedup:
                board[next_position] = backup_cell

            block[my_action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)
            block[my_action] \
                += total_frac_blocked[next_position] * (1 - block[my_action])

            if block[my_action] > 0 and info["teammate_position"] is not None:
                block_teammate_with_kick[next_position] \
                    = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                       n_survivable_nodes_teammate,
                                                       board,
                                                       [my_teammate],
                                                       ignore_dying_agent=True)

                # this is an approximation
                block_teammate_with_kick[next_position] \
                    += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick[next_position])

                block[my_action] *= (1 -
                                     block_teammate_with_kick[next_position])

            if block[my_action] > 0:
                block[my_action] *= self._inv_tmp
                block[my_action] -= np.log(-np.log(self.random.uniform()))

        n_survivable_move, is_survivable_move, list_boards_move \
            = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"],
                                   info["all_kickable"], allow_kick_to_fog=True,
                                   enemy_mobility=1, enemy_bomb=0,
                                   ignore_dying_agent=False,
                                   step_to_collapse=info["step_to_collapse"],
                                   collapse_ring=info["collapse_ring"])

        for a in all_feasible_actions:
            if a not in n_survivable_move:
                n_survivable_move[a] = np.zeros(self._search_range)

        enemy_can_place_bomb = any([
            obs["bomb_blast_strength"][position] == 0
            for position in info["enemy_positions"]
        ])

        if enemy_can_place_bomb:

            n_survivable_bomb, is_survivable_bomb, list_boards_bomb \
                = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"],
                                       info["all_kickable"], allow_kick_to_fog=True,
                                       enemy_mobility=0, enemy_bomb=1,
                                       ignore_dying_agent=False,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            for a in all_feasible_actions:
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)

            might_survivable_actions = set(
                [a
                 for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0] +
                [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])

            might_survivable_actions -= info["might_block_actions"]
            for a in info["might_block_actions"]:
                n_survivable_bomb[a] = np.zeros(self._search_range)
                n_survivable_move[a] = np.zeros(self._search_range)

            for a in might_survivable_actions:
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)

            survivable_actions = list()
            for action in might_survivable_actions:
                if n_survivable_move[action][-1] > 0 and n_survivable_bomb[
                        action][-1] > 0:
                    if not info["might_blocked"][action] or n_survivable_move[
                            constants.Action.Stop][-1] > 0:
                        survivable_actions.append(action)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if info["might_blocked"][a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + 2 * np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                n_survivable_expected[a] = n_survivable_expected[a]

        else:

            might_survivable_actions = set(
                [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])

            might_survivable_actions -= info["might_block_actions"]
            for a in info["might_block_actions"]:
                n_survivable_move[a] = np.zeros(self._search_range)

            survivable_actions = list()
            for action in might_survivable_actions:
                if n_survivable_move[action][-1] > 0:
                    if not info["might_blocked"][action] or n_survivable_move[
                            constants.Action.Stop][-1] > 0:
                        survivable_actions.append(action)

            for a in might_survivable_actions:
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if info["might_blocked"][a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(n_survivable_move[a])

        #
        # Choose actions
        #

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            return action.value

        if len(survivable_actions) > 1:

            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                # tie break by block score
                max_block = 0  # do not choose 0
                best_action = None
                for action in all_feasible_actions:
                    if action not in most_survivable_actions:
                        # for deterministic behavior
                        continue
                    if info["might_block_teammate"][action]:
                        continue
                    if block[action] > max_block:
                        max_block = block[action]
                        best_action = action
                if best_action is not None:
                    return best_action.value

        #
        # no survivable actions for all cases
        #

        if enemy_can_place_bomb:

            n_survivable_expected = dict()
            for a in all_feasible_actions:
                if info["might_blocked"][a]:
                    if is_survivable_move[constants.Action.Stop]:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_bomb[a]) \
                            + np.array(n_survivable_move[constants.Action.Stop]) \
                            + np.array(n_survivable_move[a])
                    else:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_bomb[a]) \
                            + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + 2 * np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3

        else:

            n_survivable_expected = dict()
            for a in all_feasible_actions:
                if info["my_next_position"][a] is None:
                    continue
                if info["might_blocked"][a]:
                    if is_survivable_move[constants.Action.Stop]:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_move[constants.Action.Stop]) \
                            + np.array(n_survivable_move[a])
                    else:
                        n_survivable_expected[a] = np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(n_survivable_move[a])

        if len(might_survivable_actions) == 1:

            action = might_survivable_actions.pop()
            return action.value

        if len(might_survivable_actions) > 1:

            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                # tie break by block score
                max_block = 0  # do not choose 0
                best_action = None
                for action in all_feasible_actions:
                    if action not in most_survivable_actions:
                        # for deterministic behavior
                        continue
                    if info["might_block_teammate"][action]:
                        continue
                    if block[action] > max_block:
                        max_block = block[action]
                        best_action = action

                if best_action is not None:
                    return best_action.value

        # no survivable action found for any cases
        # TODO : Then consider killing enemies or helping teammate

        max_block = 0  # do not choose 0
        best_action = None
        for action in all_feasible_actions:
            if action not in block:
                # for deterministic behavior
                continue
            if info["might_block_teammate"][action]:
                continue
            if all([
                    action == constants.Action.Bomb, info["teammate_position"]
                    is not None
            ]):
                if block_teammate_with_bomb > 0:
                    continue
            next_position = info["my_next_position"][action]
            if all([
                    next_position in info["all_kickable"],
                    block_teammate_with_kick[next_position] > 0
            ]):
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if best_action is not None:
            return best_action.value

        # longest survivable action

        longest_survivable_actions = self._get_longest_survivable_actions(
            n_survivable_expected)

        if len(longest_survivable_actions) == 1:

            return longest_survivable_actions[0].value

        elif len(longest_survivable_actions) > 1:

            # break tie by most survivable actions
            for a in n_survivable_expected:
                if a not in longest_survivable_actions:
                    n_survivable_expected[a] = np.zeros(self._search_range)
            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                if info["teammate_position"] is not None:
                    min_block = np.inf
                    best_action = None
                    for a in all_feasible_actions:
                        if a not in most_survivable_actions:
                            # for deterministic behavior
                            continue
                        if a == constants.Action.Bomb:
                            score = block_teammate_with_bomb  # do not choose Bomb unless it is strictly better than others
                        else:
                            next_position = info["my_next_position"][a]
                            if next_position in info["all_kickable"]:
                                score = block_teammate_with_kick[
                                    next_position] - self.random.uniform(
                                        0, 1e-6)
                            else:
                                score = total_frac_blocked_teammate[
                                    next_position] - self.random.uniform(
                                        0, 1e-6)
                        if score < min_block:
                            min_block = score
                            best_action = a
                    if best_action is not None:
                        return best_action.value
                else:
                    # remove Bomb (as it is most affected by bugs)
                    #most_survivable_actions = list(set(most_survivable_actions) - {constants.Action.Bomb})
                    most_survivable_actions = [
                        a for a in all_feasible_actions
                        if a in most_survivable_actions
                        and a != constants.Action.Bomb
                    ]

                    index = self.random.randint(len(most_survivable_actions))
                    random_action = most_survivable_actions[index]
                    return random_action.value

        # The following will not be used

        self.random.shuffle(all_feasible_actions)
        if len(all_feasible_actions):
            action = all_feasible_actions[0]
            return action.value

        action = constants.Action.Stop
        return action.value
Exemplo n.º 29
0
    def act(self, obs, action_space):
        if self.prev_ammo is None:
            self.prev_ammo = self.ammo
            self.prev_blast_strength = self.blast_strength
            self.prev_can_kick = self.can_kick
            self.prev_wood_wall = np.sum(obs['board'] == 2)
        self.steps += 1
        state = self.process_observation(obs['board'], obs['position'],
                                         obs['bomb_life'],
                                         obs['bomb_blast_strength'],
                                         obs['enemies'])
        self.memory.store(self.prev_state, state, 0, self.prev_action)
        self.prev_state = state
        self.prev_ammo = self.ammo
        self.prev_blast_strength = self.blast_strength
        self.prev_can_kick = self.can_kick
        self.prev_wood_wall = np.sum(obs['board'] == 2)

        def convert_bombs(bomb_map):
            '''Flatten outs the bomb array'''
            ret = []
            locations = np.where(bomb_map > 0)
            for r, c in zip(locations[0], locations[1]):
                ret.append({
                    'position': (r, c),
                    'blast_strength': int(bomb_map[(r, c)])
                })
            return ret

        my_position = tuple(obs['position'])
        board = np.array(obs['board'])
        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
        enemies = [constants.Item(e) for e in obs['enemies']]
        ammo = int(obs['ammo'])
        blast_strength = int(obs['blast_strength'])
        items, dist, prev = self._djikstra(board,
                                           my_position,
                                           bombs,
                                           enemies,
                                           depth=10)

        # Move if we are in an unsafe place.
        unsafe_directions = self._directions_in_range_of_bomb(
            board, my_position, bombs, dist)
        if unsafe_directions:
            directions = self._find_safe_directions(board, my_position,
                                                    unsafe_directions, bombs,
                                                    enemies)
            action = random.choice(directions).value
            self.prev_action = action
            return action

        # Lay pomme if we are adjacent to an enemy.
        if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb(
                ammo, blast_strength, items, dist, my_position):
            action = constants.Action.Bomb.value
            self.prev_action = action
            return action

        # Move towards an enemy if there is one in exactly three reachable spaces.
        direction = self._near_enemy(my_position, items, dist, prev, enemies,
                                     3)
        if direction is not None and (self._prev_direction != direction
                                      or random.random() < .5):
            self._prev_direction = direction
            action = direction.value
            self.prev_action = action
            return action

        # Move towards a good item if there is one within two reachable spaces.
        direction = self._near_good_powerup(my_position, items, dist, prev, 2)
        if direction is not None:
            action = direction.value
            self.prev_action = action
            return action

        # Maybe lay a bomb if we are within a space of a wooden wall.
        if self._near_wood(my_position, items, dist, prev, 1):
            if self._maybe_bomb(ammo, blast_strength, items, dist,
                                my_position):
                action = constants.Action.Bomb.value
                self.prev_action = action
                return action
            else:
                action = constants.Action.Stop.value
                self.prev_action = action
                return action

        # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb.
        direction = self._near_wood(my_position, items, dist, prev, 2)
        if direction is not None:
            directions = self._filter_unsafe_directions(
                board, my_position, [direction], bombs)
            if directions:
                action = directions[0].value
                self.prev_action = action
                return action

        # Choose a random but valid direction.
        directions = [
            constants.Action.Stop, constants.Action.Left,
            constants.Action.Right, constants.Action.Up, constants.Action.Down
        ]
        valid_directions = self._filter_invalid_directions(
            board, my_position, directions, enemies)
        directions = self._filter_unsafe_directions(board, my_position,
                                                    valid_directions, bombs)
        directions = self._filter_recently_visited(
            directions, my_position, self._recently_visited_positions)
        if len(directions) > 1:
            directions = [k for k in directions if k != constants.Action.Stop]
        if not len(directions):
            directions = [constants.Action.Stop]

        # Add this position to the recently visited uninteresting positions so we don't return immediately.
        self._recently_visited_positions.append(my_position)
        self._recently_visited_positions = self._recently_visited_positions[
            -self._recently_visited_length:]
        action = random.choice(directions).value
        self.prev_action = action
        return action
Exemplo n.º 30
0
    def act(self, obs, action_space):
        self.action_space = action_space
        my_pos = tuple(obs['position'])
        board = np.array(obs['board'])
        self.board = np.array(obs['board'])
        self._enemies = [constants.Item(e) for e in obs['enemies']]

        if (self.copy_walls):
            for i in range(len(self.copy_board)):
                for j in range(len(self.copy_board[i])):
                    if (board[i][j] == 1):
                        self.copy_board[i][j] = 9999

        self.copy_board[my_pos[0]][my_pos[1]] += 1

        #check new bombs on field first
        bomb_life_map = np.array(obs['bomb_life'])

        self.find_bombing_agents(bomb_life_map, board)
        #print("bomb_life_map \n", bomb_life_map)
        #preform MCTS ONLY IF ENEMY AGENT IS VISISIBLE
        if self.enemy_in_my_sights_and_ammo(obs, 5):
            #print("my ammo",int(obs['ammo']))
            #print("HELLO MCTS")
            #check board, to see if someone inside my view made a bomb move

            tree = gn.Tree(obs, True, self.bombing_agents)
            #get the root node
            self.rootNode = tree.get_root_node()

            #need way to find terminating condition
            self.end_time = 30
            start_time = time.time()
            elapsed = 0
            #while(elapsed < self.end_time):
            while (self.rootNode.visit_count < 25):
                #print("board \n",self.rootNode.state._board)
                #for i in self.rootNode.childArray:
                #print("ROOT NODES CHILDREN")
                #print("i move is",i.state.move)
                #print("i temp score is", i.state.score)
                #print("i temp node score is", i.score)

                promising_node = self.select_promising_node(self.rootNode)

                #print("promisng nodes move", promising_node.my_move)
                #print("promising nodes score", promising_node.score)

                #expand that node
                #create the childs for that node

                self.expand_node(promising_node)
                # print("EXPANDED PROMISE NODE")
                # for i in promising_node.childArray:
                # 	print("PROMISING NODES CHILDREN")
                # 	print("i move is",i.state.move)
                # 	print("i temp score is", i.state.score)
                # 	print("i temp node score is", i.score)
                # #explore that node
                # print("LENGTH OF CHILDREN", len(promising_node.childArray))
                nodeToExplore = promising_node.get_random_child_node()
                #print("Node to explore", nodeToExplore.state.move)
                #simulate

                simulationResult = self.simulate_random_play(nodeToExplore)
                # simulationResult = self.simulate_random_play_yichen(nodeToExplore, self.copy_board)
                #propogate up
                self.back_propogation(nodeToExplore, simulationResult)
                nowTime = time.time()
                elapsed += (nowTime - start_time)
                start_time = nowTime
                # input()
            #winner is root node with child with big score
            #winner_node = rootNode.get_child_with_max_score()
            winner_node = None
            max_ucb = float('-inf')
            for child in self.rootNode.childArray:
                #print("child move is", child.state.move)
                #print ("the node", child.score)
                #print ("the node's state", child.state.score)
                UCB1 = self.UCB(child, child.get_win_score(),
                                child.get_visit_count(),
                                self.rootNode.get_visit_count())

                if UCB1 > max_ucb:
                    max_ucb = UCB1
                    winner_node = child
                    #print("winning childs move is ", winner_node.state.move)
                    #print("winning childs score is", winner_node.score)
                    #print("UCB is", UCB1)

            self.bombing_agents = winner_node.state.bombing_agents

            #print("the move I make is",winner_node.state.move)
            #print("the move I picked score", winner_node.score)

            return winner_node.state.move

        #yichen agent time
        else:
            #print("YICHEN AGENT TIME")

            self.agt = agents.YichenAgent()
            self.agt.make_a_visit_board(self.copy_board)
            aid = board[my_pos[0]][my_pos[1]]
            game_mode = constants.GameType.FFA
            position = my_pos
            self.agt.init_agent(aid, game_mode)
            self.agt.set_start_position(position)
            self.agt.reset(is_alive=True)

            r = self.agt.act(obs, action_space)
            #print (r)
            return r