Пример #1
0
 def _what_to_break(cls, board, my_position, blast_strength):
     x, y = my_position
     to_break = list()
     # To up
     for dx in range(1, blast_strength):
         if x + dx >= len(board[0]):
             break
         position = (x + dx, y)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To down
     for dx in range(1, blast_strength):
         if x - dx < 0:
             break
         position = (x - dx, y)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To right
     for dy in range(1, blast_strength):
         if y + dy >= len(board):
             break
         position = (x, y + dy)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     # To left
     for dy in range(1, blast_strength):
         if y - dy < 0:
             break
         position = (x, y - dy)
         if utility.position_is_rigid(board, position):
             # stop searching this direction
             break
         elif utility.position_is_wood(
                 board, position) or utility.position_is_agent(
                     board, position):
             to_break.append(constants.Item(board[position]))
             break
     return to_break
Пример #2
0
    def _is_closed(self, board, position):
        """
        Check whether the position is srounded by Wood/Rigid.

        Parameters
        ----------
        board = np.array(obs['board'])

        position = tuple(obs['position'])
        """

        is_done = np.full(board.shape, False)
        is_done[position] = True
        to_search = [position]

        while to_search:
            x, y = to_search.pop()
            for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (x + dx, y + dy)
                if not self._on_board(new_position):
                    continue
                if is_done[new_position]:
                    continue
                is_done[new_position] = True
                if utility.position_is_agent(board, new_position):
                    return False
                if utility.position_is_wall(board, new_position):
                    continue
                if utility.position_is_fog(board, new_position):
                    continue
                to_search.append(new_position)

        return True
Пример #3
0
def _check_if_flame_will_gone(obs, prev_two_obs, flame_pos):
    assert (prev_two_obs[0] is not None)
    assert (prev_two_obs[1] is not None)
    # check the flame group in current obs, see if
    # the whole group was there prev two obs
    # otherwise, although this flame appears in prev two obs,
    # it could be a old overlap new, thus will not gone next step
    if not (utility.position_is_flames(prev_two_obs[0]['board'], flame_pos) \
            and utility.position_is_flames(prev_two_obs[1]['board'], flame_pos)):
        return False
    board = obs['board']
    Q = deque(maxlen=121)
    Q.append(flame_pos)
    visited = [flame_pos]
    dirs = _all_directions(exclude_stop=True)
    while len(Q) > 0:
        pos = Q.popleft()
        if not (utility.position_is_flames(prev_two_obs[0]['board'], pos) \
                and utility.position_is_flames(prev_two_obs[1]['board'], pos)):
            return False
        for d in dirs:
            next_pos = utility.get_next_position(pos, d)
            if utility.position_on_board(board, next_pos) and utility.position_is_agent(board, next_pos):
                if next_pos not in visited:
                    Q.append(next_pos)
                    visited.append(next_pos)
    return True
Пример #4
0
def position_is_bombable(board, position, bombs):
    return any([
        utility.position_is_agent(board, position),
        utility.position_is_powerup(board, position),
        utility.position_is_passage(board, position),
        position_is_flame(board, position),
        position_is_bomb(bombs, position)
    ])
Пример #5
0
def position_is_passable(board, position, enemies):
    '''Determins if a possible can be passed'''
    return all([
        any([
            utility.position_is_agent(board, position),
            utility.position_is_powerup(board, position),
            utility.position_is_passage(board, position),
            utility.position_is_fog(board, position),
        ]), not utility.position_is_enemy(board, position, enemies)
    ])
Пример #6
0
def _stop_condition(board, pos, exclude_agent=True):
    if not utility.position_on_board(board, pos):
        return True
    if utility.position_is_fog(board, pos):
        return True
    if utility.position_is_wall(board, pos):
        return True
    if not exclude_agent:
        if utility.position_is_agent(board, pos):
            return True
    return False
Пример #7
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4
        enemy_bomb = 1

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position",
              my_position,
              "ammo",
              my_ammo,
              "blast",
              my_blast_strength,
              "kick",
              my_kick,
              end="\t")

        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        #np.set_printoptions(precision=2)
        #print("frac")
        #print(total_frac_blocked)

        # where to place bombs to break wood
        bomb_target_wood, n_breakable \
            = self._get_bomb_target(info["list_boards_no_move"][-1],
                                    my_position,
                                    my_blast_strength,
                                    constants.Item.Wood,
                                    max_breakable=False)

        #bomb_target_enemy = (total_frac_blocked > 0)
        #bomb_target = bomb_target_enemy + bomb_target_wood
        bomb_target = bomb_target_wood

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=enemy_mobility,
            enemy_bomb=enemy_bomb,
            enemy_blast_strength=info["enemy_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        #print("boards")
        #for t, b in enumerate(list_boards):
        #    print(t)
        #    print(b[-3:,:])
        #    if t > 2:
        #        break

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable, _ = self._kickable_positions(obs,
                                                   info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #print("survivable actions", survivable_actions)

        if len(survivable_actions) == 0:
            return None

        #
        # Items and bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                succ, 1, next_position)

        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Avoid the action leading to no choice if possible
        #
        updated = False

        max_survivable_positions = max([n[-1] for n in n_survivable.values()])
        if max_survivable_positions > 1:
            for a in n_survivable:
                if n_survivable[a][-1] > max_survivable_positions / 2:
                    continue
                is_survivable[a] = False
                updated = True

        minn = defaultdict(int)
        for a in n_survivable:
            minn[a] = min(n_survivable[a][enemy_mobility:])
        maxmin = max(minn.values())
        if maxmin > 1:
            for a in minn:
                if minn[a] == 1:
                    is_survivable[a] = False
                    updated = True

        if updated:
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value
        """
        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value
        """

        #
        # Bomb at a target
        #

        best_action = None
        max_block = 0
        for action in survivable_actions:
            next_position = self._get_next_position(my_position, action)
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if all([
                is_survivable[constants.Action.Bomb], best_action
                in [constants.Action.Stop, constants.Action.Bomb]
        ]):
            print("Place a bomb at a locally optimal position",
                  constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = self._get_next_position(my_position, best_action)
            # TODO : PARAMETER TO OPTIMIZE
            if total_frac_blocked[next_position] > 0.1:
                print("Move towards better place to bomb", best_action)
                return best_action.value

        #
        # Bomb to break wood
        #

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False
        elif self._might_break_powerup(info["list_boards_no_move"][-1],
                                       my_position, my_blast_strength,
                                       info["might_powerup"]):
            # if might break an item, do not bomb
            consider_bomb = False

        if consider_bomb and bomb_target[my_position]:
            # place bomb if I am at a bomb target
            print("Bomb at a bomb target", constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards good items
        #

        good_items = [
            constants.Item.ExtraBomb, constants.Item.IncrRange,
            constants.Item.Kick
        ]
        good_time_positions = set()  # positions with good items
        for item in good_items:
            good_time_positions = good_time_positions.union(
                reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions, prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        #
        # Move towards where to bomb to break wood
        #

        good_time_positions = reachable_items["target"]
        print("good time positions", good_time_positions)
        action = self._find_distance_minimizer(my_position,
                                               good_time_positions, prev,
                                               is_survivable)
        if action is not None:
            print("Moving toward where to bomb", action)
            return action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break a wall, enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position, my_blast_strength,
                                   [constants.Item.Wood] + my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board == enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum(
                    [len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->",
                          n_survivable_nodes_with_kick, my_action)
                    return my_action.value

        #
        # TODO : move toward might powerups
        #

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x + dx, y + dy)
                         for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([
                info["since_last_seen"][position] for position in neighbors
                if self._on_board(position)
            ])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position], prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value

        #
        # Choose most survivable action
        #

        action = self._get_most_survivable_action(n_survivable)
        print("Most survivable action", action)
        return action.value
Пример #8
0
    def _board_sequence(self, board, bombs, flames, length, my_position,
                        my_action=None, can_kick=False, enemy_mobility=3):
        """
        Simulate the sequence of boards, assuming agents stay unmoved

        Parameters
        ----------
        board : array
            initial board
        bombs : list
            list of initial bombs
        flames : list
            list of initial flames
        length : int
            length of the board sequence to simulate
        my_position : tuple
            position of my agent
        my_action : Action, optional
            my action at the first step
        can_kick : boolean, optional
            whether I can kick
        enemy_mobility : int, optional
            number of steps where enemies move nondeterministically

        Return
        ------
        list_boards : list
            list of boards
        """

        # Forward model to simulate
        model = ForwardModel()

        # Prepare initial state
        _board = board.copy()
        _bombs = deepcopy(bombs)
        _flames = deepcopy(flames)
        _items = dict()  # we never know hidden items
        _actions = [constants.Action.Stop.value] * 4
        if my_action is not None:
            agent = characters.Bomber()
            agent.agent_id = board[my_position] - 10
            agent.position = my_position
            agent.can_kick = can_kick
            _agents = [agent]
            _actions[agent.agent_id] = my_action
        else:
            _agents = list()

        my_next_position = None

        # Get enemy positions to take into account their mobility
        rows, cols = np.where(_board > constants.Item.AgentDummy.value)
        enemy_positions = [position for position in zip(rows, cols)
                           if position != my_position]

        # List of enemies
        enemies = list()
        for position in enemy_positions:
            agent = characters.Bomber()
            agent.agent_id = board[position] - 10
            agent.position = position
            enemies.append(agent)

        _agents = _agents + enemies

        # Overwrite bomb over agent if they overlap
        for bomb in _bombs:
            _board[bomb.position] = constants.Item.Bomb.value

        # Simulate
        list_boards = [_board.copy()]
        for t in range(length):
            # Standard simulation step            
            _board, _agents, _bombs, _, _flames \
                = model.step(_actions,
                             _board,
                             _agents,
                             _bombs,
                             _items,
                             _flames)

            # Overwrite passage over my agent when it has moved to a passage
            if t == 0 and len(_agents) > 0:
                agent = _agents[0]
                my_next_position = agent.position
                if all([agent.position != my_position,
                        _board[agent.position] != constants.Item.Flames.value,
                        _board[agent.position] != constants.Item.Bomb.value]):   
                    # I did not die and did not stay on a bomb
                    _board[agent.position] = constants.Item.Passage.value

            # Overwrite bomb over agent if they overlap
            for bomb in _bombs:
                _board[bomb.position] = constants.Item.Bomb.value

            # Take into account the nondeterministic mobility of enemies
            if t < enemy_mobility:
                _enemy_positions = set()
                for x, y in enemy_positions:
                    # for each enemy position in the previous step
                    for dx, dy in [(0, 0), (1, 0), (-1, 0), (0, 1), (0, -1)]:
                        # consider the next possible position
                        next_position = (x + dx, y + dy)
                        if not self._on_board(next_position):
                            # ignore if out of board
                            continue
                        if any([utility.position_is_passage(_board, next_position),
                                utility.position_is_powerup(_board, next_position),
                                (next_position == my_position
                                 and utility.position_is_agent(_board, next_position)
                                )]):
                            # possible as a next position
                            # TODO : what to do with my position
                            _enemy_positions.add(next_position)
                            _board[next_position] = constants.Item.AgentDummy.value
                enemy_positions = _enemy_positions

            _actions = [constants.Action.Stop.value] * 4
            _agents = enemies
            list_boards.append(_board.copy())

        return list_boards, my_next_position
Пример #9
0
    def _find_reachable_items(self, list_boards, my_position, time_positions):

        """
        Find items reachable from my position

        Parameters
        ----------
        list_boards : list
            list of boards, generated by _board_sequence
        my_position : tuple
            my position, where the search starts
        time_positions : list
            survivable time-positions, generated by _search_time_expanded_network

        Return
        ------
        items : dict
            items[item] : list of time-positions from which one can reach item
        reached : array
            minimum time to reach each position on the board
        next_to_items : dict
            next_to_items[item] : list of time-positions from which one can reach
                                  the position next to item
        """

        # items found on time_positions and the boundary (for Wood)
        items = defaultdict(list)

        # reached[position] : minimum time to reach the position
        reached = np.full(self.board_shape, np.inf)

        # whether already checked the position
        _checked = np.full(self.board_shape, False)

        # positions next to wood or other agents (count twice if next to two woods)
        next_to_items = defaultdict(list)

        for t, positions in enumerate(time_positions):
            # check the positions reached at time t
            board = list_boards[t]
            for position in positions:
                if reached[position] < np.inf:
                    continue
                reached[position] = t
                item = constants.Item(board[position])
                items[item].append((t,) + position)
                _checked[position] = True
                x, y = position
                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + row, y + col)
                    if not self._on_board(next_position):
                        continue
                    if _checked[next_position]:
                        continue
                    _checked[next_position] = True
                    if utility.position_is_agent(board, next_position):
                        item = constants.Item(board[next_position])
                        items[item].append((t,)+next_position)
                        next_to_items[item].append((t,) + position)
                    # ignoring wall that will not exist when explode
                    if utility.position_is_wood(list_boards[-1], next_position):
                        item = constants.Item(board[next_position])
                        items[item].append((t,)+next_position)
                        next_to_items[item].append((t,) + position)

        return items, reached, next_to_items
Пример #10
0
    def _search_time_expanded_network(self, list_boards, my_position):

        """
        Find survivable time-positions in the list of boards from my position

        Parameters
        ----------
        list_boards : list
            list of boards, generated by _board_sequence
        my_position : tuple
            my position, where the search starts

        Return
        ------
        survivable : list
            list of the set of survivable time-positions at each time
            survivable[t] : set of survivable positions at time t
        prev : list
            prev[t] : dict
            prev[t][position] : list of positions from which
                                one can reach the position at time t
        """

        depth = len(list_boards)

        # TODO : what to do with Fog?
        exclude = [constants.Item.Fog,
                   constants.Item.Rigid,
                   constants.Item.Wood,
                   constants.Item.Bomb,
                   constants.Item.Flames,
                   constants.Item.AgentDummy]

        if list_boards[0][my_position] == constants.Item.Flames.value:
            return [set()] * depth, [list()] * depth
        
        # Forward search for reachable positions
        # reachable[(t,x,y]): whether can reach (x,y) at time t
        reachable = np.full((depth,) + self.board_shape, False)
        reachable[(0,)+my_position] = True
        next_positions = set([my_position])
        my_position_get_flame = False
        for t in range(1, depth):
            if list_boards[t][my_position] == constants.Item.Flames.value:
                my_position_get_flame = True
            curr_positions = next_positions
            next_positions = set()

            # add all possible positions
            for curr_position in curr_positions:
                next_positions.add(curr_position)
                x, y = curr_position
                for row, col in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_positions.add((x + row, y + col))

            for position in next_positions.copy():
                if not self._on_board(position):
                    # remove out of positions
                    next_positions.remove(position)
                elif list_boards[t][position] == constants.Item.AgentDummy.value:
                    # TODO: this may be too conservative
                    # avoid contact to other agents
                    next_positions.remove(position)
                elif position == my_position and not my_position_get_flame:
                    # can stay even on bomb until getting flame
                    continue
                elif utility.position_in_items(list_boards[t], position, exclude):
                    # remove blocked
                    next_positions.remove(position)
                elif utility.position_is_agent(list_boards[t], position):
                    # if occupied by another agent
                    next_positions.remove(position)

            for position in next_positions:
                reachable[(t,)+position] = True

        # Backward search for survivable positions
        # survivable[t]: set of survavable positions at time t
        # prev[t][position]: list of positions from which
        #                    one can reach the position at time t
        survivable = [set() for _ in range(depth)]
        survivable[-1] = next_positions
        prev = [defaultdict(list) for _ in range(depth+1)]
        for t in range(depth-1, 0, -1):
            for position in survivable[t]:
                # for each position surviving at time t
                # if the position is on a bomb, I must have stayed there since I placed the bomb
                if list_boards[t][position] == constants.Item.Bomb.value:
                    if reachable[(t-1,)+position]:
                        prev[t][position].append(position)
                        continue

                # otherwise, standard case
                x, y = position
                for row, col in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]:
                    # consider the prev_position at time t - 1
                    prev_position = (x + row, y + col)
                    if not self._on_board(prev_position):
                        # discard the prev_position if out of board
                        continue
                    if reachable[(t-1,)+prev_position]:
                        # can reach the position at time t
                        # from the prev_position at time t-1
                        prev[t][position].append(prev_position)

            # the set of prev_positions at time t-1
            # from which one can reach the surviving positions at time t
            survivable[t-1] = set([position for prevs in prev[t].values()
                                   for position in prevs])

        return survivable, prev
Пример #11
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']

        enemy_position = dict()
        for enemy in my_enemies:
            positions = np.argwhere(board == enemy.value)
            if len(positions) == 0:
                continue
            enemy_position[enemy] = tuple(positions[0])

        survivable_steps = defaultdict(int)

        #
        # survivable tree in standard case
        #

        list_boards_no_kick = deepcopy(info["list_boards_no_move"])

        # remove myself
        if obs["bomb_blast_strength"][my_position]:
            for b in list_boards_no_kick:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in list_boards_no_kick:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        my_survivable, my_prev, my_succ, my_survivable_with_enemy \
            = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position)

        life = defaultdict(int)
        for t in range(self._search_range, 0, -1):
            for position in my_survivable_with_enemy[t]:
                if not life[(t, ) + position]:
                    life[(t, ) + position] = t
                for prev_position in my_prev[t][position]:
                    life[(t - 1, ) + prev_position] = max([
                        life[(t - 1, ) + prev_position], life[(t, ) + position]
                    ])

        for next_position in my_survivable[1]:
            my_action = self._get_direction(my_position, next_position)
            survivable_steps[my_action] = life[(1, ) + next_position]

        #
        # survivable tree if I lay bomb
        #

        if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]):
            # if I can lay a bomb

            board_with_bomb = deepcopy(obs["board"])
            curr_bombs_with_bomb = deepcopy(info["curr_bombs"])
            # lay a bomb
            board_with_bomb[my_position] = constants.Item.Bomb.value
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                my_position,
                constants.DEFAULT_BOMB_LIFE,
                my_blast_strength,
                None)
            curr_bombs_with_bomb.append(bomb)
            list_boards_with_bomb, _ \
                = self._board_sequence(board_with_bomb,
                                       curr_bombs_with_bomb,
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       enemy_mobility=0)

            my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \
                = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position)

            life = defaultdict(int)
            for t in range(self._search_range, 0, -1):
                for position in my_survivable_with_bomb_enemy[t]:
                    if not life[(t, ) + position]:
                        life[(t, ) + position] = t
                    for prev_position in my_prev_with_bomb[t][position]:
                        life[(t - 1, ) + prev_position] = max([
                            life[(t - 1, ) + prev_position],
                            life[(t, ) + position]
                        ])

            survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position]

        print("survivable steps")
        print(survivable_steps)

        if survivable_steps:
            values = np.array(list(survivable_steps.values()))
            print(values)
            best_index = np.where(values == np.max(values))
            best_actions = np.array(list(survivable_steps.keys()))[best_index]

            best_action = random.choice(best_actions)
            print("Most survivable action", best_action)

            return best_action.value

        else:
            print("No actions: stop")
            return constants.Action.Stop.value

        #
        # survivable tree if I kick
        #

        if my_kick:
            # Positions where I kick a bomb if I move to
            kickable, more_kickable = self._kickable_positions(
                obs, info["moving_direction"])

            for next_position in set.union(*[kickable, more_kickable]):
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=0)

                my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \
                    = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position)

                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(my_survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=0)

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable = self._kickable_positions(obs, info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=0)
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    survivable_actions.append(my_action)
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                my_succ, 1, next_position)

        most_survivable_action = None
        if survivable_actions:
            survivable_score = dict()
            for action in n_survivable:
                #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]])
                survivable_score[action] = sum(
                    [n for n in n_survivable[action]])
                if verbose:
                    print(action, survivable_score[action],
                          n_survivable[action])
            best_survivable_score = max(survivable_score.values())

            random.shuffle(survivable_actions)
            for action in survivable_actions:
                if survivable_score[action] == best_survivable_score:
                    most_survivable_action = action
                    break

        if most_survivable_action is not None:
            print("Most survivable action", most_survivable_action)
            return most_survivable_action.value

        # kick if possible
        if my_kick:
            kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
        print("Kickable", my_kick, kickable)
        while kickable:
            next_position = kickable.pop()
            action = self._get_direction(my_position, next_position)
            print("Must kick to survive", action)
            return action.value

        # move towards a teammate if she is blocking
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility._position_is_item(board, next_position, my_teammate):
                print("Must move to teammate to survive", action)
                return action.value

        # move towards an enemy
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_enemy(board, next_position, my_enemies):
                print("Must move to enemy to survive", action)
                return action.value

        # move towards anywhere besides ridid
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_rigid(board, next_position):
                continue
            if utility.position_is_wood(board, next_position):
                continue
            if utility.position_is_bomb(info["curr_bombs"], next_position):
                continue
            print("Try moving to survive", action)
            return action.value

        action = constants.Action.Stop
        print("Must die", action)
        return action
Пример #12
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = info['recently_seen']
        #board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None

        all_feasible_actions = [
            a for a in info["my_next_position"] if info["my_next_position"][a]
        ]

        # positions that might be blocked
        if info["teammate_position"] is None:
            agent_positions = info["enemy_positions"]
        else:
            agent_positions = info["enemy_positions"] + [
                info["teammate_position"]
            ]

        #
        # Fraction of blocked node in the survival trees of enemies
        #

        _list_boards = info["list_boards_no_move"]
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"],
                                     ignore_dying_agent=False)

        if info["teammate_position"] is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"],
                                         ignore_dying_agent=True)

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:

            next_position = info["my_next_position"][action]

            if next_position is None:
                continue

            if next_position in info["all_kickable"]:
                # kick will be considered later
                continue

            block[action] = total_frac_blocked[next_position]
            if info["teammate_position"] is not None and block[action] > 0:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])

            if block[action] > 0:
                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        if all([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]):

            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            block[constants.Action.Bomb] \
                = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies,
                                                   ignore_dying_agent=False)
            block[constants.Action.Bomb] \
                += total_frac_blocked[my_position] * (1 - block[constants.Action.Bomb])

            if info["teammate_position"] is not None:
                block_teammate_with_bomb = self._get_frac_blocked_two_lists(
                    list_boards_with_bomb,
                    n_survivable_nodes_teammate,
                    board, [my_teammate],
                    ignore_dying_agent=True)
                # this is an approximation
                block_teammate_with_bomb \
                    += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb)

                block[constants.Action.Bomb] *= (1 - block_teammate_with_bomb)

            if block[constants.Action.Bomb] > 0:
                block[constants.Action.Bomb] *= self._inv_tmp
                block[constants.Action.Bomb] -= np.log(
                    -np.log(self.random.uniform()))

        block_teammate_with_kick = defaultdict(float)
        for next_position in info["all_kickable"]:

            my_action = self._get_direction(my_position, next_position)

            backedup = False
            if board[next_position] != constants.Item.Bomb.value:
                backup_cell = board[next_position]
                board[
                    next_position] = constants.Item.Bomb.value  # an agent will be overwritten
                backedup = True

            list_boards_with_kick, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            if backedup:
                board[next_position] = backup_cell

            block[my_action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)
            block[my_action] \
                += total_frac_blocked[next_position] * (1 - block[my_action])

            if block[my_action] > 0 and info["teammate_position"] is not None:
                block_teammate_with_kick[next_position] \
                    = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                       n_survivable_nodes_teammate,
                                                       board,
                                                       [my_teammate],
                                                       ignore_dying_agent=True)

                # this is an approximation
                block_teammate_with_kick[next_position] \
                    += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick[next_position])

                block[my_action] *= (1 -
                                     block_teammate_with_kick[next_position])

            if block[my_action] > 0:
                block[my_action] *= self._inv_tmp
                block[my_action] -= np.log(-np.log(self.random.uniform()))

        n_survivable_move, is_survivable_move, list_boards_move \
            = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"],
                                   info["all_kickable"], allow_kick_to_fog=True,
                                   enemy_mobility=1, enemy_bomb=0,
                                   ignore_dying_agent=False,
                                   step_to_collapse=info["step_to_collapse"],
                                   collapse_ring=info["collapse_ring"])

        for a in all_feasible_actions:
            if a not in n_survivable_move:
                n_survivable_move[a] = np.zeros(self._search_range)

        enemy_can_place_bomb = any([
            obs["bomb_blast_strength"][position] == 0
            for position in info["enemy_positions"]
        ])

        if enemy_can_place_bomb:

            n_survivable_bomb, is_survivable_bomb, list_boards_bomb \
                = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"],
                                       info["all_kickable"], allow_kick_to_fog=True,
                                       enemy_mobility=0, enemy_bomb=1,
                                       ignore_dying_agent=False,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            for a in all_feasible_actions:
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)

            might_survivable_actions = set(
                [a
                 for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0] +
                [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])

            might_survivable_actions -= info["might_block_actions"]
            for a in info["might_block_actions"]:
                n_survivable_bomb[a] = np.zeros(self._search_range)
                n_survivable_move[a] = np.zeros(self._search_range)

            for a in might_survivable_actions:
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)

            survivable_actions = list()
            for action in might_survivable_actions:
                if n_survivable_move[action][-1] > 0 and n_survivable_bomb[
                        action][-1] > 0:
                    if not info["might_blocked"][action] or n_survivable_move[
                            constants.Action.Stop][-1] > 0:
                        survivable_actions.append(action)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if info["might_blocked"][a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + 2 * np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                n_survivable_expected[a] = n_survivable_expected[a]

        else:

            might_survivable_actions = set(
                [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])

            might_survivable_actions -= info["might_block_actions"]
            for a in info["might_block_actions"]:
                n_survivable_move[a] = np.zeros(self._search_range)

            survivable_actions = list()
            for action in might_survivable_actions:
                if n_survivable_move[action][-1] > 0:
                    if not info["might_blocked"][action] or n_survivable_move[
                            constants.Action.Stop][-1] > 0:
                        survivable_actions.append(action)

            for a in might_survivable_actions:
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if info["might_blocked"][a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(n_survivable_move[a])

        #
        # Choose actions
        #

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            return action.value

        if len(survivable_actions) > 1:

            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                # tie break by block score
                max_block = 0  # do not choose 0
                best_action = None
                for action in all_feasible_actions:
                    if action not in most_survivable_actions:
                        # for deterministic behavior
                        continue
                    if info["might_block_teammate"][action]:
                        continue
                    if block[action] > max_block:
                        max_block = block[action]
                        best_action = action
                if best_action is not None:
                    return best_action.value

        #
        # no survivable actions for all cases
        #

        if enemy_can_place_bomb:

            n_survivable_expected = dict()
            for a in all_feasible_actions:
                if info["might_blocked"][a]:
                    if is_survivable_move[constants.Action.Stop]:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_bomb[a]) \
                            + np.array(n_survivable_move[constants.Action.Stop]) \
                            + np.array(n_survivable_move[a])
                    else:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_bomb[a]) \
                            + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + 2 * np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3

        else:

            n_survivable_expected = dict()
            for a in all_feasible_actions:
                if info["my_next_position"][a] is None:
                    continue
                if info["might_blocked"][a]:
                    if is_survivable_move[constants.Action.Stop]:
                        n_survivable_expected[a] \
                            = np.array(n_survivable_move[constants.Action.Stop]) \
                            + np.array(n_survivable_move[a])
                    else:
                        n_survivable_expected[a] = np.array(
                            n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(n_survivable_move[a])

        if len(might_survivable_actions) == 1:

            action = might_survivable_actions.pop()
            return action.value

        if len(might_survivable_actions) > 1:

            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                # tie break by block score
                max_block = 0  # do not choose 0
                best_action = None
                for action in all_feasible_actions:
                    if action not in most_survivable_actions:
                        # for deterministic behavior
                        continue
                    if info["might_block_teammate"][action]:
                        continue
                    if block[action] > max_block:
                        max_block = block[action]
                        best_action = action

                if best_action is not None:
                    return best_action.value

        # no survivable action found for any cases
        # TODO : Then consider killing enemies or helping teammate

        max_block = 0  # do not choose 0
        best_action = None
        for action in all_feasible_actions:
            if action not in block:
                # for deterministic behavior
                continue
            if info["might_block_teammate"][action]:
                continue
            if all([
                    action == constants.Action.Bomb, info["teammate_position"]
                    is not None
            ]):
                if block_teammate_with_bomb > 0:
                    continue
            next_position = info["my_next_position"][action]
            if all([
                    next_position in info["all_kickable"],
                    block_teammate_with_kick[next_position] > 0
            ]):
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if best_action is not None:
            return best_action.value

        # longest survivable action

        longest_survivable_actions = self._get_longest_survivable_actions(
            n_survivable_expected)

        if len(longest_survivable_actions) == 1:

            return longest_survivable_actions[0].value

        elif len(longest_survivable_actions) > 1:

            # break tie by most survivable actions
            for a in n_survivable_expected:
                if a not in longest_survivable_actions:
                    n_survivable_expected[a] = np.zeros(self._search_range)
            most_survivable_actions = self._get_most_survivable_actions(
                n_survivable_expected)

            if len(most_survivable_actions) == 1:

                return most_survivable_actions[0].value

            elif len(most_survivable_actions) > 1:

                if info["teammate_position"] is not None:
                    min_block = np.inf
                    best_action = None
                    for a in all_feasible_actions:
                        if a not in most_survivable_actions:
                            # for deterministic behavior
                            continue
                        if a == constants.Action.Bomb:
                            score = block_teammate_with_bomb  # do not choose Bomb unless it is strictly better than others
                        else:
                            next_position = info["my_next_position"][a]
                            if next_position in info["all_kickable"]:
                                score = block_teammate_with_kick[
                                    next_position] - self.random.uniform(
                                        0, 1e-6)
                            else:
                                score = total_frac_blocked_teammate[
                                    next_position] - self.random.uniform(
                                        0, 1e-6)
                        if score < min_block:
                            min_block = score
                            best_action = a
                    if best_action is not None:
                        return best_action.value
                else:
                    # remove Bomb (as it is most affected by bugs)
                    #most_survivable_actions = list(set(most_survivable_actions) - {constants.Action.Bomb})
                    most_survivable_actions = [
                        a for a in all_feasible_actions
                        if a in most_survivable_actions
                        and a != constants.Action.Bomb
                    ]

                    index = self.random.randint(len(most_survivable_actions))
                    random_action = most_survivable_actions[index]
                    return random_action.value

        # The following will not be used

        self.random.shuffle(all_feasible_actions)
        if len(all_feasible_actions):
            action = all_feasible_actions[0]
            return action.value

        action = constants.Action.Stop
        return action.value
Пример #13
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = info['last_seen']
        #board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        kickable, might_kickable \
            = self._kickable_positions(obs, info["moving_direction"],
                                       consider_agents=True)

        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board == enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board == my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position,
                                                agent_positions,
                                                might_kickable)

        #
        # Survivability, when enemy is replaced by a bomb, and no move afterwards
        #

        # replace enemy with bomb
        _bombs = deepcopy(info["curr_bombs"])
        rows, cols = np.where(board > constants.Item.AgentDummy.value)
        for position in zip(rows, cols):
            if board[position] not in my_enemies:
                continue
            if obs["bomb_blast_strength"][position]:
                # already a bomb
                continue
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                constants.DEFAULT_BOMB_LIFE,
                enemy_blast_strength_map[position],
                None)
            _bombs.append(bomb)

        n_survivable_bomb = self._get_n_survivable(board,
                                                   _bombs,
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivability, when enemy moves one position or stay unmoved
        #

        n_survivable_move = self._get_n_survivable(board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=1)

        #
        # Survivability, when no enemies
        #

        _board = deepcopy(board)
        agent_positions = np.where(_board > constants.Item.AgentDummy.value)
        _board[agent_positions] = constants.Item.Passage.value
        _board[my_position] = board[my_position]

        _obs = {
            "position": obs["position"],
            "blast_strength": obs["blast_strength"],
            "ammo": obs["ammo"],
            "bomb_life": obs["bomb_life"],
            "board": _board
        }

        n_survivable_none = self._get_n_survivable(_board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   _obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivable actions
        #

        survivable_actions_bomb = set(
            [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0])
        survivable_actions_move = set(
            [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])
        survivable_actions_none = set(
            [a for a in n_survivable_none if n_survivable_none[a][-1] > 0])

        survivable_actions = set.intersection(survivable_actions_bomb,
                                              survivable_actions_move,
                                              survivable_actions_none)

        # if can survive without possibility of being blocked, then do so
        if not constants.Action.Stop in survivable_actions:
            _survivable_actions = [
                action for action in survivable_actions
                if not might_blocked[action]
            ]
            if len(_survivable_actions):
                survivable_action = _survivable_actions

            _survivable_actions_bomb = [
                action for action in survivable_actions_bomb
                if not might_blocked[action]
            ]
            _survivable_actions_move = [
                action for action in survivable_actions_move
                if not might_blocked[action]
            ]
            _survivable_actions_none = [
                action for action in survivable_actions_none
                if not might_blocked[action]
            ]
            if all([
                    len(_survivable_actions_bomb) > 0,
                    len(_survivable_actions_move) > 0,
                    len(_survivable_actions_none) > 0
            ]):
                survivable_action_bomb = _survivable_actions_bomb
                survivable_action_move = _survivable_actions_move
                survivable_action_none = _survivable_actions_none

        #
        # Choose actions
        #

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most survivable action", action)
            return action.value

        # no survivable actions for all cases
        survivable_actions = set(
            list(n_survivable_bomb.keys()) + list(n_survivable_move.keys()) +
            list(n_survivable_none.keys()))

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only might survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            for a in set.union(survivable_actions, {constants.Action.Stop}):
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)
                if a not in n_survivable_none:
                    n_survivable_none[a] = np.zeros(self._search_range)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most might survivable action", action)
            return action.value

        # no survivable action found for any cases
        # TODO : Then consider killing enemies or helping teammate

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:

            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue

            if board[next_position] in [
                    constants.Item.Rigid.value, constants.Item.Wood.value
            ]:
                continue

            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue

            block[action] = total_frac_blocked[next_position]
            if teammate_position is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -= np.log(-np.log(self.random.uniform()))

        if any([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]):

            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            block[constants.Action.Bomb] \
                = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_bomb, n_survivable_nodes, board,
                    [my_teammate])
                block[constants.Action.Bomb] *= (1 - block_teammate)

            block[constants.Action.Bomb] *= self._inv_tmp
            block[constants.Action.Bomb] -= np.log(
                -np.log(self.random.uniform()))

        for next_position in set.union(kickable, might_kickable):

            my_action = self._get_direction(my_position, next_position)

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True)

            block[my_action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_kick, n_survivable_nodes, board,
                    [my_teammate])
                block[my_action] *= (1 - block_teammate)

            block[my_action] *= self._inv_tmp
            block[my_action] -= np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if best_action is not None:
            if verbose:
                print(
                    "Best action to kill enemies or help teammate (cannot survive)"
                )
            return best_action.value

        # The following will not be used

        if obs["ammo"] > 0 and obs["blast_strength"] == 0:
            action = constants.Action.Bomb
            if verbose:
                print("Suicide", action)
                return action.value

        kickable_positions = list(set.union(kickable, might_kickable))
        if kickable_positions:
            self.random.shuffle(kickable_positions)
            action = self._get_direction(my_position, kickable_positions[0])
            if verbose:
                print("Suicide kick", action)
                return action.value

        all_actions = [
            constants.Action.Stop, constants.Action.Up, constants.Action.Down,
            constants.Action.Right, constants.Action.Left
        ]
        self.random.shuffle(all_actions)
        for action in all_actions:
            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_wall(board, next_position):
                continue
            if verbose:
                print("Random action", action)
                return action.value

        action = constants.Action.Stop
        if verbose:
            print("No action found", action)
        return action.value
Пример #14
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        recently_seen_positions = (info["since_last_seen"] < 3)
        board[recently_seen_positions] = info["last_seen"][recently_seen_positions]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        if verbose:
            print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")

        my_next_position = {constants.Action.Stop: my_position,
                            constants.Action.Bomb: my_position}
        for action in [constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = self._get_next_position(my_position, action)
            if self._on_board(next_position):
                if board[next_position] == constants.Item.Rigid.value:
                    my_next_position[action] = None
                else:
                    my_next_position[action] = next_position
            else:
                my_next_position[action] = None

        #
        # Understand current situation
        #

        if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        
        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board==enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board==my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])
        
        # Positions where we kick a bomb if we move to
        if my_kick:
            kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
            might_kickable = set()

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable)

        # enemy positions over time
        # these might be dissappeared due to extra flames
        if len(enemy_positions):
            rows = [p[0] for p in enemy_positions]
            cols = [p[1] for p in enemy_positions]
            list_enemy_positions = [(rows, cols)]
            _enemy_positions = list()
            for t in range(self._enemy_mobility):
                rows, cols = list_enemy_positions[-1]
                for x, y in zip(rows, cols):
                    for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                        next_position = (x + dx, y + dy)
                        if not self._on_board(next_position):
                            continue
                        _board = info["list_boards_no_move"][t]
                        if utility.position_is_passage(_board, next_position):
                            _enemy_positions.append(next_position)
            _enemy_positions = set(_enemy_positions)
            rows = [p[0] for p in _enemy_positions]
            cols = [p[1] for p in _enemy_positions]
            list_enemy_positions.append((rows, cols))
        else:
            list_enemy_positions = []
            
        
        # survivable actions
        is_survivable = dict()
        for a in self._get_all_actions():
            is_survivable[a] = False
        n_survivable = dict()
        list_boards = dict()
        for my_action in self._get_all_actions():

            next_position = my_next_position[my_action]

            if next_position is None:
                continue

            if my_action == constants.Action.Bomb:
                if any([my_ammo == 0,
                        obs["bomb_blast_strength"][next_position] > 0]):
                    continue
            
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1]):
                continue

            if all([my_action != constants.Action.Stop,
                    obs["bomb_blast_strength"][next_position] > 0,
                    next_position not in set.union(kickable, might_kickable)]):
                continue

            if next_position in set.union(kickable, might_kickable):
                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue
            
            # list of boards from next steps
            list_boards[my_action], _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=my_action,
                                       can_kick=my_kick,
                                       enemy_mobility=self._enemy_mobility,
                                       enemy_bomb=self._enemy_bomb,
                                       agent_blast_strength=info["agent_blast_strength"])

            # agents might be disappeared, because of overestimated bombs
            for t, positions in enumerate(list_enemy_positions):
                list_boards[my_action][t][positions] = constants.Item.AgentDummy.value
            
            # some bombs may explode with extra bombs, leading to under estimation
            for t in range(len(list_boards[my_action])):
                flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
                list_boards[my_action][t][flame_positions] = constants.Item.Flames.value
                
        """
        processed = Parallel(n_jobs=-1, verbose=0)(
            [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action)
             for action in list_boards]
        )
        for survivable, my_action in processed:
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        """
        
        for my_action in list_boards:
            survivable = search_time_expanded_network(list_boards[my_action][1:],
                                                      my_next_position[my_action])
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        
        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if might_blocked[a] and not is_survivable[constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #
                
        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #
        
            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #
            
            action = survivable_actions[0]
            if verbose:
                print("The only survivable action", action)
            return action.value


        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
    
        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            """
            np.set_printoptions(precision=3)
            print("enemy")
            print(total_frac_blocked)
            print("teammate")
            print(total_frac_blocked_teammate)
            print("product")
            prod = total_frac_blocked * (1 - total_frac_blocked_teammate)
            print(prod[:5,:5])
            """

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [constants.Action.Stop,
                       constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = my_next_position[action]
            if next_position is None:
                continue
            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop]
                if teammate_position is not None:
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp
                block[action] -=  np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([might_blocked[action],
                    not is_survivable[constants.Action.Stop]]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp            
            block[action] -=  np.log(-np.log(self.random.uniform()))
            if might_blocked[action]:
                block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop]
                                 + total_frac_blocked[next_position] * p_survivable[action]) / 2
                if teammate_position is not None:                    
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp                
                block[action] -=  np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_bomb,
                                                           enemy_position)
                n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable])

            n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies])
            n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([obs["bomb_life"][next_position] > 0,
                            board[following_position] > constants.Item.AgentDummy.value]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position)
                n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable])

                n_with_bomb = n_survivable_nodes_with_bomb_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_with_bomb_teammate = 0
                else:
                    total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_with_bomb_teammate)
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        for next_position in kickable:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True)

            n_survivable_nodes_with_kick = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_kick,
                                                           enemy_position)
                n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable])

                n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies])
                n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
                if n_with_none == 0:
                    total_frac_blocked[next_position] = 0
                else:
                    total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position)
                n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable])

                n_with_kick = n_survivable_nodes_with_kick_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_teammate[next_position] = 0
                else:
                    total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none
            
            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if teammate_position is not None:
                teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_with_bomb_teammate < self._interfere_threshold,
                        total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:                    
                    if verbose:
                        print("Bomb is best", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    if verbose:
                        print("Place a bomb at a locally optimal position", constants.Action.Bomb)
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = my_next_position[best_action]

            should_chase = (total_frac_blocked[next_position] > self._chase_threshold)

            if teammate_position is not None:
                teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] < self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]]):
                    action = constants.Action.Stop
                    if verbose:
                        print("Wait flames life", action)
                    return action.value
                else:
                    if verbose:
                        print("Move towards better place to bomb", best_action)
                    return best_action.value                

        # Exclude the action representing stop to wait
        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action
                
        #
        # Do not take risky actions
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        # ignore actions with low survivability
        _survivable_actions = list()
        for action in n_survivable:
            n = sum(n_survivable[action])
            if not is_survivable[action]:
                continue
            elif n > self._my_survivability_threshold:
                _survivable_actions.append(action)
            else:
                print("RISKY", action)
                is_survivable[action] = False

        if len(_survivable_actions) > 1:
            survivable_actions = _survivable_actions
        elif best_action is not None:
            if verbose:
                print("Take the best action in danger", best_action)
            return best_action.value
        else:
            # Take the most survivable action
            if verbose:
                print("Take the most survivable action", most_survivable_action)
            return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([teammate_position is not None,
                len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    frac = total_frac_blocked_with_bomb_teammate
                else:
                    next_position = my_next_position[action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    print("INTERFERE", action)
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                if verbose:
                    print("Take the best action in intereference", best_action)
                return best_action.value
            else:
                if verbose:
                    print("Take the least interfering action", least_interfere_action)
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=self._enemy_mobility,
                                              enemy_bomb=self._enemy_bomb,
                                              agent_blast_strength=info["agent_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value
        
        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(list_boards,
                                                                    my_position)        
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        _, _, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info,
                                       my_enemies)
        if action is not None:
            if verbose:
                print("Moving toward last seen enemy", action)
            return action.value            
        
        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(my_position,
                                                 next_to_items[constants.Item.Fog],
                                                 prev,
                                                 is_survivable,
                                                 info,
                                                 my_teammate)
        if action is not None:
            if verbose:
                print("Moving away from last seen teammate", action)
            return action.value            
        
        #
        # Move towards a fog where we have not seen longest
        #
        
        action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                if verbose:
                    print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if verbose:
            print("Take the best action among safe actions (nothing else to do)", best_action)

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
Пример #15
0
    def _kickable_positions(self, obs, moving_direction, consider_agents=True):
        """
        Parameters
        ----------
        obs : dict
            pommerman observation
        """

        if not obs["can_kick"]:
            return set()

        kickable = set()
        # my position
        x, y = obs["position"]

        # Find neigoboring positions around me
        on_board_next_positions = list()
        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            next_position = (x + dx, y + dy)
            if self._on_board(next_position):
                on_board_next_positions.append(next_position)

        # Check if can kick a static bomb
        for next_position in on_board_next_positions:
            if obs["board"][next_position] != constants.Item.Bomb.value:
                # not a bomb
                continue
            if moving_direction[next_position] is not None:
                # moving
                continue
            if obs["bomb_life"][next_position] <= 1:
                # kick and die
                continue
            following_position = (2 * next_position[0] - x,
                                  2 * next_position[1] - y)
            if not self._on_board(following_position):
                # cannot kick to that direction
                continue
            if not utility.position_is_passage(obs["board"],
                                               following_position):
                # cannot kick to that direction
                continue
            might_blocked = False
            if consider_agents:
                # neighboring agent might block (or change the direction) immediately
                for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]:
                    neighboring_position = (x + dx, y + dy)
                    if not self._on_board(neighboring_position):
                        continue
                    if np.sum(
                            np.abs(
                                np.array(neighboring_position) -
                                np.array(next_position))) != 1:
                        continue
                    if utility.position_is_agent(obs["board"],
                                                 neighboring_position):
                        print("agent is blocking at", neighboring_position)
                        might_blocked = True
                        break
                if might_blocked:
                    continue
                for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]:
                    neighboring_position = (next_position[0] + dx,
                                            next_position[1] + dy)
                    if not self._on_board(neighboring_position):
                        continue
                    if np.sum(
                            np.abs(
                                np.array(neighboring_position) -
                                np.array(following_position))) != 1:
                        continue
                    if utility.position_is_agent(obs["board"],
                                                 neighboring_position):
                        print("agent is blocking at", neighboring_position)
                        might_blocked = True
                        break
                if might_blocked:
                    continue
            print("can kick a static bomb at", next_position)
            kickable.add(next_position)

        # Check if can kick a moving bomb
        for next_position in on_board_next_positions:
            if next_position in kickable:
                # can kick a static bomb
                continue
            x, y = next_position
            for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                coming_position = (x + dx, y + dy)
                if coming_position == obs["position"]:
                    # cannot come from my position
                    continue
                if not self._on_board(coming_position):
                    # cannot come from out of board
                    continue
                #if obs["bomb_life"][coming_position] <= 1:
                #    # kick and die
                #    continue
                if all([
                        moving_direction[coming_position] ==
                        constants.Action.Up, dx == 1, dy == 0
                ]):
                    # coming from below
                    print("can kick a moving bomb coming from below at",
                          next_position)
                    kickable.add(next_position)
                    break
                if all([
                        moving_direction[coming_position] ==
                        constants.Action.Down, dx == -1, dy == 0
                ]):
                    # coming from above
                    print("can kick a moving bomb coming from",
                          coming_position, "above to", next_position)
                    kickable.add(next_position)
                    break
                if all([
                        moving_direction[coming_position] ==
                        constants.Action.Right, dx == 0, dy == -1
                ]):
                    # coming from left
                    print("can kick a moving bomb coming from left at",
                          next_position)
                    kickable.add(next_position)
                    break
                if all([
                        moving_direction[coming_position] ==
                        constants.Action.Left, dx == 0, dy == 1
                ]):
                    # coming from right
                    print("can kick a moving bomb coming from right at",
                          next_position)
                    break

        return kickable
Пример #16
0
    def _get_bombs(self, board, bomb_blast_strength, prev_bomb_blast_strength,
                   bomb_life, prev_bomb_life):
        """
        Summarize information about bombs

        Parameters
        ----------
        board : array
        bomb_blast_strength : array
        bomb_life : array
        prev_bomb_life : array
            remaining life of bombs at the previous step

        Return
        ------
        curr_bombs : list
            list of bombs
        moving_direction : array
            array of moving direction of bombs
            moving_direction[position] : direction of bomb at position
        bomb_life : array
            Copy the remaining life of bombs for the next step
        """

        # Keep bombs under fog
        bomb_positions_under_fog = np.where(
            (prev_bomb_life > 1) * (board == constants.Item.Fog.value))
        bomb_life[bomb_positions_under_fog] = prev_bomb_life[
            bomb_positions_under_fog] - 1
        bomb_blast_strength[
            bomb_positions_under_fog] = prev_bomb_blast_strength[
                bomb_positions_under_fog]

        # Prepare information about moving bombs

        # diff = 0 if no bomb -> no bomb
        # diff = 1 if the remaining life of a bomb is decremented
        # diff = -9 if no bomb -> new bomb
        diff = prev_bomb_life - bomb_life

        moving = (diff != 0) * (diff != 1) * (diff != -9)

        # move_from: previous positions of moving bombs
        rows, cols = np.where(moving * (diff > 0))
        move_from = [position for position in zip(rows, cols)]

        # move_to: current positions of moving bombs
        rows, cols = np.where(moving * (diff < 0))
        move_to = [position for position in zip(rows, cols)]

        # TODO : Consider bombs moving into fog
        matched_move_from = [False] * len(move_from)

        curr_bombs = list()
        rows, cols = np.where(bomb_life > 0)
        moving_direction = np.full(self.board_shape, None)
        for position in zip(rows, cols):
            this_bomb_life = bomb_life[position]
            if position in move_to:
                # then the bomb is moving, so find the moving direction
                for i, prev_position in enumerate(move_from):
                    if prev_bomb_life[prev_position] != this_bomb_life + 1:
                        # the previous life of the bomb at the previous position
                        # must be +1 of the life of this bomb
                        continue
                    dx = position[0] - prev_position[0]
                    dy = position[1] - prev_position[1]
                    if abs(dx) + abs(dy) == 2:
                        # this can be a moving bomb whose direction is changed by kick
                        agent_position = (prev_position[0] + dx,
                                          prev_position[1])
                        if utility.position_is_agent(board, agent_position):
                            # the agent must have kicked
                            print("agent must have kicked at", agent_position)
                            moving_direction[position] = self._get_direction(
                                agent_position, position)
                            break
                        agent_position = (prev_position[0],
                                          prev_position[1] + dy)
                        if utility.position_is_agent(board, agent_position):
                            # the agent must have kicked
                            print("agent must have kicked at", agent_position)
                            moving_direction[position] = self._get_direction(
                                agent_position, position)
                            break
                    if abs(dx) + abs(dy) != 1:
                        # the previous position must be 1 manhattan distance
                        # from this position
                        continue
                    moving_direction[position] = self._get_direction(
                        prev_position, position)
                    # TODO: there might be multiple possibilities of
                    # where the bomb came from
                    matched_move_from[i] = True
                    break
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                this_bomb_life,
                int(bomb_blast_strength[position]),
                moving_direction[position])
            curr_bombs.append(bomb)

        return curr_bombs, moving_direction
Пример #17
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        #board = obs['board']
        board = info["recently_seen"]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        #
        # Understand current situation
        #

        # positions that might be blocked
        if info["teammate_position"] is None:
            agent_positions = info["enemy_positions"]
        else:
            agent_positions = info["enemy_positions"] + [
                info["teammate_position"]
            ]

        # survivable actions

        if len(info["enemy_positions"]) > 0:
            mobility = self._enemy_mobility
        else:
            mobility = 0

        n_survivable, is_survivable, list_boards \
            = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions,
                                   info["all_kickable"], allow_kick_to_fog=False,
                                   enemy_mobility=mobility, enemy_bomb=self._enemy_bomb,
                                   step_to_collapse=info["step_to_collapse"],
                                   collapse_ring=info["collapse_ring"])

        for a in info["might_block_actions"]:
            n_survivable[a] = np.zeros(self._search_range)
            is_survivable[a] = False

        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if info["might_blocked"][a] and not is_survivable[
                    constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #

        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #

            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #

            action = survivable_actions[0]
            return action.value

        if all([
                info["prev_action"]
                not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position
        ]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = info["list_boards_no_move"]
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if info["teammate_position"] is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            if n_survivable_nodes_teammate[my_teammate] > 0:
                LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[
                    my_teammate]
                positions_teammate_safe = np.where(
                    total_frac_blocked_teammate < LB)
                total_frac_blocked_teammate[positions_teammate_safe] = 0

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(
                n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:
            next_position = info["my_next_position"][action]
            if next_position is None:
                continue
            if next_position in info["all_kickable"]:
                # kick will be considered later
                continue
            if all([
                    utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]
            ]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[
                    next_position] * p_survivable[constants.Action.Stop]
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([
                    info["might_blocked"][action],
                    not is_survivable[constants.Action.Stop]
            ]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[
                action]
            if info["teammate_position"] is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            if block[action] > 0:
                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

            if info["might_blocked"][action]:
                block[action] = (total_frac_blocked[my_position] *
                                 p_survivable[constants.Action.Stop] +
                                 total_frac_blocked[next_position] *
                                 p_survivable[action]) / 2
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy_position in info["enemy_positions"]:
                # get survivable tree of the enemy
                _survivable = search_time_expanded_network(
                    list_boards_with_bomb, enemy_position)
                n_survivable_nodes_with_bomb[enemy_position] = sum(
                    [len(positions) for positions in _survivable])

            n_with_bomb = sum([
                n_survivable_nodes_with_bomb[enemy_position]
                for enemy_position in info["enemy_positions"]
            ])
            n_with_none = sum(
                [n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([
                            obs["bomb_life"][next_position] > 0,
                            board[following_position] >
                            constants.Item.AgentDummy.value
                    ]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb
            # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb)
            block[action] *= p_survivable[action]

            block_teammate_with_bomb = None
            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                    block_teammate_with_bomb \
                        += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb)
                    block[action] *= (1 - block_teammate_with_bomb)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        for next_position in info["kickable"]:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            block[action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies,
                                                   ignore_dying_agent=True)
            block[action] += total_frac_blocked[next_position] * (
                1 - block[action])
            block[action] *= p_survivable[action]

            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_kick \
                        = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                           n_survivable_nodes_teammate,
                                                           board, [my_teammate],
                                                           ignore_dying_agent=True)
                    block_teammate_with_kick \
                        += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick)
                    block[action] *= (1 - block_teammate_with_kick)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if info["teammate_position"] is not None:
                if block_teammate_with_bomb is None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[
                    my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        block_teammate_with_bomb < self._interfere_threshold,
                        block_teammate_with_bomb <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = info["my_next_position"][best_action]

            should_chase = (total_frac_blocked[next_position] >
                            self._chase_threshold)

            if info["teammate_position"] is not None:
                teammate_safety = total_frac_blocked_teammate[
                    next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] <
                        self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([
                        utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]
                ]):
                    action = constants.Action.Stop
                    return action.value
                else:
                    return best_action.value

        # Exclude the action representing stop to wait
        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        #
        # Do not take risky actions when not interacting with enemies
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        if total_frac_blocked[my_position] > 0:
            # ignore actions with low survivability
            _survivable_actions = list()
            for action in n_survivable:
                n = sum(n_survivable[action])
                if not is_survivable[action]:
                    continue
                elif n > self._my_survivability_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                return best_action.value
            else:
                # Take the most survivable action
                return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([
                info["teammate_position"] is not None,
                len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0
        ]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    """
                    if block_teammate_with_bomb is None:
                        block_teammate_with_bomb \
                            = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                               n_survivable_nodes_teammate,
                                                               board,
                                                               [my_teammate],
                                                               ignore_dying_agent=True)                        
                    frac = block_teammate_with_bomb 
                    """
                    continue
                else:
                    next_position = info["my_next_position"][action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                return best_action.value
            else:
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=mobility,
            enemy_bomb=self._enemy_bomb,
            enemy_positions=agent_positions,
            agent_blast_strength=info["agent_blast_strength"],
            step_to_collapse=info["step_to_collapse"],
            collapse_ring=info["collapse_ring"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(
            list_boards, my_position)
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        if "escape" in info:
            reachable_items, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable,
                                             might_powerup=info["escape"])  # might_powerup is the escape from collapse
        else:
            _, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position,
                                       next_to_items[constants.Item.Fog], prev,
                                       is_survivable, info)
        if action is not None:
            return action.value

        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(
            my_position, next_to_items[constants.Item.Fog], prev,
            is_survivable, info)
        if action is not None:
            return action.value

        #
        # Move to the places that will not be collapsed
        #

        if "escape" in info:
            # might_powerup is the escape from collapse
            action = self._action_to_might_powerup(my_position,
                                                   reachable_items, prev,
                                                   is_survivable)
            if action is not None:
                print("Escape from collapse", action)
                return action.value

        #
        # Move towards a fog where we have not seen longest
        #

        action = self._action_to_fog(my_position,
                                     next_to_items[constants.Item.Fog], prev,
                                     is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                return action.value

        #
        # Choose most survivable action
        #

        max_block = 0
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            score = block[action]
            if action != constants.Action.Bomb:
                score += np.random.uniform(0, 1e-3)
            if score > max_block:
                max_block = score
                best_action = action

        if best_action is None:
            max_p = 0
            best_action = None
            for action in p_survivable:
                score = p_survivable[action]
                if action != constants.Action.Bomb:
                    score += np.random.uniform(0, 1e-3)
                if score > max_p:
                    max_p = score
                    best_action = action

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
Пример #18
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")
        
        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value
        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
        bomb_target_enemy = (total_frac_blocked > 0)
        
        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=enemy_mobility)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility)

        survivable_actions = [a for a in is_survivable if is_survivable[a]] 

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable = self._kickable_positions(obs, info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility)
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    survivable_actions.append(my_action)
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [len(s) for s in survivable_with_kick[1:]]
                    kick_actions.append(my_action)
        else:
            kickable = set()
        
        if len(survivable_actions) == 0:
            return None

        #
        # bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target_enemy)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [len(s) for s in survivable_with_bomb[1:]]
                continue
            
            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0 
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(succ, 1, next_position)

        #if True:
        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value

        #
        # Bomb at a target
        #
            
        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False
            
        #
        # Place a bomb
        #

        best_action = None
        max_block = 0
        for action in survivable_actions:
            if action == constants.Action.Stop:
                continue
            next_position = self._get_next_position(my_position, action)
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if consider_bomb and best_action == constants.Action.Stop:
            print("Place a bomb at a locally optimal position", constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #
        
        if best_action not in [None, constants.Action.Stop]:
            print("Move towards better place to bomb", best_action)
            return best_action.value
        
        good_time_positions = reachable_items["target"]
        if good_time_positions:
            score = [total_frac_blocked[(x,y)] / (t+1) for t, x, y in good_time_positions]
            argmax = np.argwhere(score==np.max(score))
            best_time_positions = [good_time_positions[i[0]] for i in argmax]
            action = self._find_distance_minimizer(my_position,
                                                   best_time_positions,
                                                   #good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward where to bomb", action)
                return action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb                    
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position,
                                   my_blast_strength,
                                   my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum([len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick,
                          my_action)
                    return my_action.value

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position],
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        survivable_score = dict()
        for action in n_survivable:
            # survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]])
            survivable_score[action] = sum([n for n in n_survivable[action]])
            if verbose:
                print(action, survivable_score[action], n_survivable[action])                
        best_survivable_score = max(survivable_score.values())

        most_survivable_action = None
        random.shuffle(survivable_actions)
        for action in survivable_actions:
            if survivable_score[action] == best_survivable_score:
                most_survivable_action = action
                break
         
        print("Most survivable action", most_survivable_action)
        return most_survivable_action.value